detect qr codes from URL and cid

This commit is contained in:
heyethereum
2024-07-28 00:03:53 +08:00
parent a95e5e8fcd
commit 31dbb35d00
5 changed files with 368 additions and 236 deletions

View File

@@ -103,6 +103,12 @@
<artifactId>javase</artifactId> <artifactId>javase</artifactId>
<version>3.5.3</version> <version>3.5.3</version>
</dependency> </dependency>
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.18.1</version>
</dependency>
</dependencies> </dependencies>

View File

@@ -1,17 +1,7 @@
package com.safeqr.app.gmail.controller; package com.safeqr.app.gmail.controller;
import com.google.api.client.auth.oauth2.BearerToken; import com.google.api.services.gmail.model.*;
import com.google.api.client.http.javanet.NetHttpTransport;
import com.google.api.services.gmail.Gmail;
import com.google.api.services.gmail.model.MessagePart;
import com.google.zxing.BinaryBitmap;
import com.google.zxing.LuminanceSource;
import com.google.zxing.MultiFormatReader;
import com.google.zxing.Result;
import com.google.zxing.client.j2se.BufferedImageLuminanceSource;
import com.google.zxing.common.HybridBinarizer;
import org.apache.commons.codec.binary.Base64; import org.apache.commons.codec.binary.Base64;
import org.json.JSONArray;
import org.json.JSONObject; import org.json.JSONObject;
import com.google.api.client.auth.oauth2.AuthorizationCodeRequestUrl; import com.google.api.client.auth.oauth2.AuthorizationCodeRequestUrl;
import com.google.api.client.auth.oauth2.Credential; import com.google.api.client.auth.oauth2.Credential;
@@ -23,8 +13,6 @@ import com.google.api.client.http.HttpTransport;
import com.google.api.client.json.JsonFactory; import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.gson.GsonFactory; import com.google.api.client.json.gson.GsonFactory;
import com.google.api.services.gmail.GmailScopes; import com.google.api.services.gmail.GmailScopes;
import com.google.api.services.gmail.model.ListMessagesResponse;
import com.google.api.services.gmail.model.Message;
import com.safeqr.app.gmail.service.GmailService; import com.safeqr.app.gmail.service.GmailService;
import jakarta.servlet.http.HttpServletRequest; import jakarta.servlet.http.HttpServletRequest;
import org.slf4j.Logger; import org.slf4j.Logger;
@@ -36,20 +24,10 @@ import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity; import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*; import org.springframework.web.bind.annotation.*;
import org.springframework.web.servlet.view.RedirectView; import org.springframework.web.servlet.view.RedirectView;
import javax.imageio.ImageIO;
import static com.safeqr.app.constants.APIConstants.*; import static com.safeqr.app.constants.APIConstants.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.*;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@RestController @RestController
@@ -57,7 +35,6 @@ import java.util.regex.Pattern;
public class GmailController { public class GmailController {
private static final Logger logger = LoggerFactory.getLogger(GmailController.class); private static final Logger logger = LoggerFactory.getLogger(GmailController.class);
GmailService gmailService; GmailService gmailService;
private static HttpTransport httpTransport = new NetHttpTransport();
private static final JsonFactory JSON_FACTORY = GsonFactory.getDefaultInstance(); private static final JsonFactory JSON_FACTORY = GsonFactory.getDefaultInstance();
private static com.google.api.services.gmail.Gmail client; private static com.google.api.services.gmail.Gmail client;
@@ -84,13 +61,14 @@ public class GmailController {
return new RedirectView(authorize()); return new RedirectView(authorize());
} }
private String authorize() throws Exception { private String authorize() throws Exception {
AuthorizationCodeRequestUrl authorizationUrl; AuthorizationCodeRequestUrl authorizationUrl;
if (flow == null) { if (flow == null) {
GoogleClientSecrets.Details web = new GoogleClientSecrets.Details(); GoogleClientSecrets.Details web = new GoogleClientSecrets.Details();
web.setClientId(clientId); web.setClientId(clientId);
web.setClientSecret(clientSecret); web.setClientSecret(clientSecret);
clientSecrets = new GoogleClientSecrets().setWeb(web); clientSecrets = new GoogleClientSecrets().setWeb(web);
httpTransport = GoogleNetHttpTransport.newTrustedTransport(); HttpTransport httpTransport = GoogleNetHttpTransport.newTrustedTransport();
flow = new GoogleAuthorizationCodeFlow.Builder(httpTransport, JSON_FACTORY, clientSecrets, flow = new GoogleAuthorizationCodeFlow.Builder(httpTransport, JSON_FACTORY, clientSecrets,
Collections.singleton(GmailScopes.GMAIL_READONLY)) Collections.singleton(GmailScopes.GMAIL_READONLY))
.build(); .build();
@@ -107,24 +85,14 @@ public class GmailController {
@GetMapping(value = "/gmail/callback", params = "code") @GetMapping(value = "/gmail/callback", params = "code")
public ResponseEntity<String> oauth2Callback(@RequestParam(value = "code") String code) { public ResponseEntity<String> oauth2Callback(@RequestParam(value = "code") String code) {
// System.out.println("code->" + code + " userId->" + userId + "
// query->" + query);
JSONObject json = new JSONObject(); JSONObject json = new JSONObject();
// String message;
try { try {
TokenResponse response = flow.newTokenRequest(code).setRedirectUri(redirectUri).execute(); TokenResponse response = flow.newTokenRequest(code).setRedirectUri(redirectUri).execute();
credential = flow.createAndStoreCredential(response, "userID"); credential = flow.createAndStoreCredential(response, "userID");
logger.info(credential.getAccessToken()); logger.info(credential.getAccessToken());
logger.info(credential.getRefreshToken()); logger.info(credential.getRefreshToken());
logger.info(credential.toString()); logger.info(credential.toString());
} catch (Exception e) { } catch (Exception e) {
System.out.println("exception cached ");
e.printStackTrace(); e.printStackTrace();
} }
@@ -132,104 +100,13 @@ public class GmailController {
} }
@GetMapping(value = "/gmail/getEmails", produces = MediaType.APPLICATION_JSON_VALUE) @GetMapping(value = "/gmail/getEmails", produces = MediaType.APPLICATION_JSON_VALUE)
public ResponseEntity<?> getUserEmails(@RequestHeader(name = "accessToken") String accessToken) throws IOException { public ResponseEntity<?> getUserEmails(@RequestHeader(name = "accessToken") String accessToken) throws IOException, InterruptedException {
logger.info("Invoking GET User Emails endpoints"); logger.info("Invoking GET Scan User Emails endpoints");
if (accessToken == null || accessToken.isEmpty()) { if (accessToken == null || accessToken.isEmpty()) {
return new ResponseEntity<>("Access token is missing", HttpStatus.BAD_REQUEST); return new ResponseEntity<>("Access token is missing", HttpStatus.BAD_REQUEST);
} }
JSONObject json; return new ResponseEntity<>(gmailService.getEmail(accessToken).toString(), HttpStatus.OK);
try {
json = getEmail(accessToken);
} catch (Exception e) {
logger.error("Error getting emails: ", e);
return new ResponseEntity<>("Failed to get emails", HttpStatus.INTERNAL_SERVER_ERROR);
}
return new ResponseEntity<>(json.toString(), HttpStatus.OK);
}
private Gmail getGmailService(String accessToken) {
Credential userCredentials = new Credential(BearerToken.authorizationHeaderAccessMethod()).setAccessToken(accessToken);
return new Gmail.Builder(httpTransport, JSON_FACTORY, userCredentials)
.setApplicationName(APPLICATION_NAME)
.build();
}
private JSONObject getEmail(String accessToken) throws IOException {
JSONObject json = new JSONObject();
JSONArray emailArray = new JSONArray();
// Build the Gmail service
Gmail service = getGmailService(accessToken);
logger.info("service-> {}", service);
// Get the list of messages
ListMessagesResponse listResponse = service.users().messages().list("me").execute();
List<Message> messages = listResponse.getMessages();
if (messages != null && !messages.isEmpty()) {
Message message = service.users().messages().get("me", messages.get(0).getId()).setFormat("full").execute();
logger.info("message-> {}", message);
if (containsQRCode(message)) {
emailArray.put(message.getId());
}
}
json.put("emails_with_qr_codes", emailArray);
return json;
}
private boolean containsQRCode(Message message) throws IOException {
if (message.getPayload().getParts() != null) {
for (MessagePart part : message.getPayload().getParts()) {
if ("text/html".equals(part.getMimeType())) {
String data = new String(Base64.decodeBase64(part.getBody().getData()));
if (scanForQRCode(data)) {
return true;
}
}
}
}
return false;
}
private boolean scanForQRCode(String htmlContent) {
// Extract all img tags
Pattern pattern = Pattern.compile("<img[^>]+src\\s*=\\s*['\"]([^'\"]+)['\"][^>]*>");
Matcher matcher = pattern.matcher(htmlContent);
while (matcher.find()) {
String src = matcher.group(1);
if (src.startsWith("data:image")) {
// It's a base64 encoded image
String base64Image = src.split(",")[1];
byte[] imageBytes = Base64.decodeBase64(base64Image);
try {
BufferedImage image = ImageIO.read(new ByteArrayInputStream(imageBytes));
LuminanceSource source = new BufferedImageLuminanceSource(image);
BinaryBitmap bitmap = new BinaryBitmap(new HybridBinarizer(source));
Result result = new MultiFormatReader().decode(bitmap);
if (result != null) {
// QR Code detected
return true;
}
} catch (Exception e) {
// If there's an error reading the image or it's not a QR code, continue to the next image
continue;
}
}
}
return false;
}
@GetMapping(value = "/gmail/authenticate", produces = MediaType.APPLICATION_JSON_VALUE)
public ResponseEntity<?> authenticate() {
logger.info("Invoking gmail authenticate endpoint");
return ResponseEntity.ok(Map.of("version", "SafeQR v1.0.2"));
} }
} }

View File

@@ -1,32 +1,242 @@
package com.safeqr.app.gmail.service; package com.safeqr.app.gmail.service;
import com.google.api.client.auth.oauth2.BearerToken;
import com.google.api.client.http.HttpTransport;
import com.google.api.services.gmail.model.*;
import com.google.zxing.*;
import com.google.zxing.client.j2se.BufferedImageLuminanceSource;
import com.google.zxing.common.HybridBinarizer;
import com.google.zxing.multi.qrcode.QRCodeMultiReader;
import org.apache.commons.codec.binary.Base64;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import com.google.api.client.auth.oauth2.Credential; import com.google.api.client.auth.oauth2.Credential;
import com.google.api.client.extensions.java6.auth.oauth2.AuthorizationCodeInstalledApp;
import com.google.api.client.extensions.jetty.auth.oauth2.LocalServerReceiver;
import com.google.api.client.googleapis.auth.oauth2.GoogleAuthorizationCodeFlow;
import com.google.api.client.googleapis.auth.oauth2.GoogleClientSecrets;
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
import com.google.api.client.http.javanet.NetHttpTransport; import com.google.api.client.http.javanet.NetHttpTransport;
import com.google.api.client.json.JsonFactory; import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.gson.GsonFactory; import com.google.api.client.json.gson.GsonFactory;
import com.google.api.client.util.store.FileDataStoreFactory;
import com.google.api.services.gmail.Gmail; import com.google.api.services.gmail.Gmail;
import com.google.api.services.gmail.GmailScopes;
import com.google.api.services.gmail.model.Label; import javax.imageio.ImageIO;
import com.google.api.services.gmail.model.ListLabelsResponse; import java.awt.image.BufferedImage;
import java.io.FileNotFoundException; import java.io.*;
import java.io.IOException; import java.net.URI;
import java.io.InputStream; import java.net.URISyntaxException;
import java.io.InputStreamReader; import java.net.http.HttpClient;
import java.security.GeneralSecurityException; import java.net.http.HttpRequest;
import java.util.Collections; import java.net.http.HttpResponse;
import java.util.List; import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static com.safeqr.app.constants.APIConstants.APPLICATION_NAME;
@Service @Service
public class GmailService { public class GmailService {
private static final Logger logger = LoggerFactory.getLogger(GmailService.class); private static final Logger logger = LoggerFactory.getLogger(GmailService.class);
private static final HttpTransport httpTransport = new NetHttpTransport();
private static final JsonFactory JSON_FACTORY = GsonFactory.getDefaultInstance();
private Gmail getGmailService(String accessToken) {
Credential userCredentials = new Credential(BearerToken.authorizationHeaderAccessMethod()).setAccessToken(accessToken);
return new Gmail.Builder(httpTransport, JSON_FACTORY, userCredentials)
.setApplicationName(APPLICATION_NAME)
.build();
}
public JSONObject getEmail(String accessToken) throws IOException, InterruptedException {
JSONObject json = new JSONObject();
JSONArray emailArray = new JSONArray();
// Build the Gmail service
Gmail service = getGmailService(accessToken);
logger.info("service-> {}", service);
// Get the list of messages
ListMessagesResponse listResponse = service.users().messages().list("me").execute();
List<Message> messages = listResponse.getMessages();
for (Message message : messages) {
message = service.users().messages().get("me", message.getId()).setFormat("full").execute();
List<MessagePart> parts = message.getPayload().getParts();
Set<String> attachmentIds = new HashSet<>();
Set<String> imageUrls = new HashSet<>();
processPartsRecursively(parts, attachmentIds, imageUrls);
// Extract and log the email subject
String subject = getSubject(message);
logger.info("Email Subject-> {}", subject);
if (attachmentIds.isEmpty() && imageUrls.isEmpty())
continue;
String messageId = message.getId();
logger.info("messageId-> {}", messageId);
String historyId = String.valueOf(message.getHistoryId());
logger.info("historyId-> {}", historyId);
for (String attachmentId : attachmentIds) {
Optional<String> attachment = findAttachmentIdByCid(parts, attachmentId);
logger.info("attachment-> {}", attachment);
if (attachment.isPresent()) {
List<String> qrCodeValue = processAttachment(service, messageId, attachment.get());
emailArray.put(qrCodeValue);
}
}
for (String imageUrl : imageUrls) {
List<String> qrCodeValue = scanQRCodeFromUrl(imageUrl);
if (qrCodeValue != null) {
emailArray.put(qrCodeValue);
}
}
}
logger.info("Total Emails-> {}", messages.size());
json.put("qr_codes", emailArray);
return json;
}
private String getSubject(Message message) {
return message.getPayload().getHeaders().stream()
.filter(header -> "Subject".equals(header.getName()))
.findFirst()
.map(MessagePartHeader::getValue)
.orElse("No Subject");
}
private Optional<String> findAttachmentIdByCid(List<MessagePart> parts, String cid) {
return parts.stream()
.flatMap(part -> Stream.concat(findAttachmentIdInCurrentPart(part, cid).stream(), Optional.ofNullable(part.getParts())
.flatMap(subParts -> findAttachmentIdByCid(subParts, cid)).stream()))
.findFirst();
}
private Optional<String> findAttachmentIdInCurrentPart(MessagePart part, String cid) {
return Optional.ofNullable(part.getHeaders())
.flatMap(headers -> headers.stream()
.filter(header -> isContentIdHeader(header, cid))
.findFirst()
.map(header -> part.getBody().getAttachmentId()));
}
private boolean isContentIdHeader(MessagePartHeader header, String cid) {
return "Content-ID".equalsIgnoreCase(header.getName()) && header.getValue().contains(cid);
}
// Recursive method to handle nested parts to search for CID URIs
private void processPartsRecursively(List<MessagePart> parts, Set<String> attachmentIds, Set<String> imageURLs) {
if (parts != null) {
for (MessagePart part : parts) {
if (part.getMimeType().equalsIgnoreCase("text/html")) {
String html = new String(Base64.decodeBase64(part.getBody().getData()));
attachmentIds.addAll(extractCIDsFromHtml(html));
imageURLs.addAll(extractImageUrlsFromHtml(html));
} else if (part.getParts() != null) {
// Recursive call to handle nested parts
processPartsRecursively(part.getParts(), attachmentIds, imageURLs);
}
}
}
}
private List<String> scanQRCodeFromUrl(String imageUrl) throws IOException, InterruptedException {
try {
BufferedImage image = downloadImageFromUrl(imageUrl);
if (image != null) {
return decodeQRCodes(image);
}
} catch(URISyntaxException e) {
logger.error("Error while scanning QR code from URL", e);
}
return null;
}
// Download the image from the given URL
private BufferedImage downloadImageFromUrl(String imageUrl) throws IOException, InterruptedException, URISyntaxException {
HttpClient client = HttpClient.newBuilder()
.followRedirects(HttpClient.Redirect.ALWAYS)
.build();
logger.info("imageUrl-> {}", imageUrl);
// Encode the URL
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(imageUrl.replace(" ", "%20")))
.GET()
.build();
HttpResponse<byte[]> response = client.send(request, HttpResponse.BodyHandlers.ofByteArray());
if (response.statusCode() == 200) {
byte[] imageBytes = response.body();
return ImageIO.read(new ByteArrayInputStream(imageBytes));
} else {
logger.error("Failed to download image. HTTP response code: {}", response.statusCode());
}
return null;
}
private List<String> processAttachment(Gmail service, String messageId, String attachmentId) throws IOException {
MessagePartBody attachPart = service.users().messages().attachments().get("me", messageId, attachmentId).execute();
byte[] imageBytes = Base64.decodeBase64(attachPart.getData());
BufferedImage image = ImageIO.read(new ByteArrayInputStream(imageBytes));
ImageIO.write(image, "png", new File("debug_image.png"));
return decodeQRCodes(image);
}
private List<String> decodeQRCodes(BufferedImage image) {
List<String> qrCodeValues = new ArrayList<>();
LuminanceSource source = new BufferedImageLuminanceSource(image);
BinaryBitmap bitmap = new BinaryBitmap(new HybridBinarizer(source));
// Set up decoding hints
Map<DecodeHintType, Object> hints = new EnumMap<>(DecodeHintType.class);
hints.put(DecodeHintType.TRY_HARDER, Boolean.TRUE);
hints.put(DecodeHintType.POSSIBLE_FORMATS, List.of(BarcodeFormat.QR_CODE));
try {
QRCodeMultiReader multiReader = new QRCodeMultiReader();
Result[] results = multiReader.decodeMultiple(bitmap, hints);
if (results != null) {
for (Result result : results) {
qrCodeValues.add(result.getText());
logger.info("Detected QR code: {}", result.getText());
}
} else {
logger.info("No QR codes found in the image");
}
} catch (NotFoundException e) {
logger.info("No QR codes found in the image");
} catch (Exception e) {
logger.error("Error decoding QR codes", e);
}
logger.info("Total QR codes found: {}", qrCodeValues.size());
return qrCodeValues;
}
//Extract CIDs from HTML
private Set<String> extractCIDsFromHtml(String html) {
Document doc = Jsoup.parse(html);
Elements imgs = doc.select("img[src^=cid:]");
return imgs.stream()
.map(img -> img.attr("src"))
.filter(src -> src.startsWith("cid:"))
.map(src -> src.substring(4)) // Remove "cid:" prefix
.collect(Collectors.toSet());
}
//Extract image URLs from HTML
private Set<String> extractImageUrlsFromHtml(String html) {
Document doc = Jsoup.parse(html);
Elements imgs = doc.select("img[src]");
return imgs.stream()
.map(img -> img.attr("src"))
.filter(this::isImageUrl)
.collect(Collectors.toSet());
}
// Check if the URL is an image URL
private boolean isImageUrl(String url) {
String lowerUrl = url.toLowerCase();
return lowerUrl.endsWith(".jpg") || lowerUrl.endsWith(".jpeg") || lowerUrl.endsWith(".png") || lowerUrl.endsWith(".gif") || lowerUrl.endsWith(".bmp");
}
} }

View File

@@ -5,6 +5,7 @@ import com.safeqr.app.qrcode.entity.EmailEntity;
import com.safeqr.app.qrcode.entity.QRCodeEntity; import com.safeqr.app.qrcode.entity.QRCodeEntity;
import com.safeqr.app.qrcode.entity.URLEntity; import com.safeqr.app.qrcode.entity.URLEntity;
import com.safeqr.app.qrcode.service.URLVerificationService; import com.safeqr.app.qrcode.service.URLVerificationService;
import jakarta.transaction.Transactional;
import lombok.*; import lombok.*;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@@ -26,7 +27,7 @@ public final class URLModel extends QRCodeModel<URLEntity> {
this.urlVerificationService = urlVerificationService; this.urlVerificationService = urlVerificationService;
details = null; details = null;
} }
@Transactional
@Override @Override
public void setDetails() { public void setDetails() {
String url = data.getContents(); String url = data.getContents();
@@ -35,11 +36,10 @@ public final class URLModel extends QRCodeModel<URLEntity> {
urlVerificationService.countAndTrackRedirects(url, details); urlVerificationService.countAndTrackRedirects(url, details);
// set qrCode Identifier // set qrCode Identifier
details.setQrCodeId(data.getId()); details.setQrCodeId(data.getId());
// Insert into URL table // Insert into URL table
urlVerificationService.insertDB(details); urlVerificationService.insertDB(details);
} catch (IOException | URISyntaxException e) { } catch (IOException e) {
logger.error("Error: ", e); logger.error("Error: ", e);
} }
} }

View File

@@ -15,6 +15,7 @@ import org.springframework.stereotype.Service;
import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.HttpsURLConnection;
import java.io.IOException; import java.io.IOException;
import java.net.*; import java.net.*;
import java.nio.charset.StandardCharsets;
import java.util.*; import java.util.*;
@@ -37,112 +38,150 @@ public class URLVerificationService {
urlRepository.save(urlEntity); urlRepository.save(urlEntity);
} }
// Function to breakdown URL into subdomain, domain, topLevelDomain, query params, fragment // Function to breakdown URL into subdomain, domain, topLevelDomain, query params, fragment
public URLEntity breakdownURL(String urlString) throws MalformedURLException, URISyntaxException { public URLEntity breakdownURL(String urlString) throws MalformedURLException {
URI uri = new URI(urlString);
URL url = uri.toURL();
URLEntity urlObj = new URLEntity(); URLEntity urlObj = new URLEntity();
try {
// Ensure the URL is properly encoded
String encodedUrl = encodeUrl(urlString);
URI uri = new URI(encodedUrl);
URL url = uri.toURL();
String host = url.getHost(); String host = url.getHost();
// split host into subdomain, domain, topLevelDomain // split host into subdomain, domain, topLevelDomain
String[] hostParts = host.split("\\."); String[] hostParts = host.split("\\.");
String subdomain = ""; String subdomain = "";
if (hostParts.length >= 2) { if (hostParts.length >= 2) {
// set topLevelDomain to the last part of the host // set topLevelDomain to the last part of the host
urlObj.setTopLevelDomain(hostParts[hostParts.length - 1]); urlObj.setTopLevelDomain(hostParts[hostParts.length - 1]);
// set domain to the second last part of the host // set domain to the second last part of the host
urlObj.setDomain(hostParts[hostParts.length - 2]); urlObj.setDomain(hostParts[hostParts.length - 2]);
// set subdomain to the first part of the host // set subdomain to the first part of the host
if (hostParts.length > 2) { if (hostParts.length > 2) {
subdomain = String.join(".", java.util.Arrays.copyOfRange(hostParts, 0, hostParts.length - 2)); subdomain = String.join(".", java.util.Arrays.copyOfRange(hostParts, 0, hostParts.length - 2));
}
} }
} // set subdomain to URL host
// set subdomain to URL host urlObj.setSubdomain(subdomain);
urlObj.setSubdomain(subdomain);
String path = url.getPath(); String path = url.getPath();
//set path to URL path if it's not empty, otherwise set it to root path //set path to URL path if it's not empty, otherwise set it to root path
urlObj.setPath(path.isEmpty() ? "/" : path); urlObj.setPath(path.isEmpty() ? "/" : path);
String query = url.getQuery(); String query = url.getQuery();
Map<String, String> queryParams = new HashMap<>(); Map<String, String> queryParams = new HashMap<>();
if (query != null) { if (query != null) {
// split query params into key value pairs // split query params into key value pairs
for (String param : query.split("&")) { for (String param : query.split("&")) {
String[] pair = param.split("="); String[] pair = param.split("=");
queryParams.put(pair[0], pair.length > 1 ? pair[1] : ""); queryParams.put(pair[0], pair.length > 1 ? pair[1] : "");
}
logger.info("queryParams: {}", queryParams);
} }
logger.info("queryParams: {}", queryParams); // set query params to URL query
urlObj.setQuery(queryParams.toString());
// set fragment to URL ref
urlObj.setFragment(Optional.ofNullable(url.getRef()).orElse(""));
} catch (URISyntaxException | MalformedURLException e) {
logger.error("Error in breaking down URL: {}", e.getMessage());
} }
// set query params to URL query
urlObj.setQuery(queryParams.toString());
// set fragment to URL ref
urlObj.setFragment(Optional.ofNullable(url.getRef()).orElse(""));
return urlObj; return urlObj;
} }
private String encodeUrl(String urlString) throws MalformedURLException {
try {
URL url = new URL(urlString);
String protocol = url.getProtocol();
String host = url.getHost();
int port = url.getPort();
String path = url.getPath();
String query = url.getQuery();
String ref = url.getRef();
public void countAndTrackRedirects(String urlString, URLEntity details) throws IOException, URISyntaxException { StringBuilder encodedUrl = new StringBuilder();
URI uri = new URI(urlString); encodedUrl.append(protocol).append("://").append(host);
URL url = uri.toURL(); if (port != -1) {
List<String> redirectChain = new ArrayList<>(); encodedUrl.append(":").append(port);
List<String> hstsHeaderList = new ArrayList<>();
List<Boolean> sslStrippingList = new ArrayList<>();
// Add the initial URL to the chain
redirectChain.add(urlString);
boolean redirected;
int redirectCount = 0;
do {
URLConnection testConnection = url.openConnection();
if (!(testConnection instanceof HttpURLConnection)) {
// Handle non-HTTP connections (like mailto:)
logger.info("Non-HTTP URL encountered: {}", url);
hstsHeaderList.add(INFO_HSTS_NOT_APPLICABLE);
sslStrippingList.add(false);
break;
} }
HttpURLConnection connection = (HttpURLConnection) url.openConnection(); encodedUrl.append(URLEncoder.encode(path, StandardCharsets.UTF_8).replace("%2F", "/"));
connection.setRequestMethod("GET");
connection.setInstanceFollowRedirects(false);
int responseCode = connection.getResponseCode(); if (query != null) {
redirected = (responseCode >= 300 && responseCode < 400); encodedUrl.append("?").append(URLEncoder.encode(query, StandardCharsets.UTF_8).replace("%3D", "=").replace("%26", "&"));
}
if (ref != null) {
encodedUrl.append("#").append(URLEncoder.encode(ref, StandardCharsets.UTF_8));
}
// Checks for HSTS Header return encodedUrl.toString();
hstsHeaderList.add(detectHSTSHeader(url, connection)); } catch (Exception e) {
throw new MalformedURLException("Failed to encode URL: " + e.getMessage());
}
}
// Handle redirects public void countAndTrackRedirects(String urlString, URLEntity details) throws IOException {
if (redirected) { try {
// Location header contains the URL to redirect to URI uri = new URI(urlString);
String newUrl = connection.getHeaderField("Location"); URL url = uri.toURL();
if (newUrl == null) { List<String> redirectChain = new ArrayList<>();
List<String> hstsHeaderList = new ArrayList<>();
List<Boolean> sslStrippingList = new ArrayList<>();
// Add the initial URL to the chain
redirectChain.add(urlString);
boolean redirected;
int redirectCount = 0;
do {
URLConnection testConnection = url.openConnection();
if (!(testConnection instanceof HttpURLConnection)) {
// Handle non-HTTP connections (like mailto:)
logger.info("Non-HTTP URL encountered: {}", url);
hstsHeaderList.add(INFO_HSTS_NOT_APPLICABLE);
sslStrippingList.add(false);
break; break;
} }
URI newUri = uri.resolve(newUrl); HttpURLConnection connection = (HttpURLConnection) url.openConnection();
// check for SSL stripping during redirect connection.setRequestMethod("GET");
sslStrippingList.add(checkRedirectForSSLStripping(uri, newUri)); connection.setInstanceFollowRedirects(false);
// Handle relative URLs int responseCode = connection.getResponseCode();
uri = uri.resolve(newUrl); redirected = (responseCode >= 300 && responseCode < 400);
url = uri.toURL();
redirectChain.add(url.toString());
redirectCount++;
logger.info("Redirect #{}: {}",redirectCount, newUrl);
} else {
// No redirect, so no SSL stripping
sslStrippingList.add(false);
}
connection.disconnect(); // Checks for HSTS Header
} while (redirected && redirectCount < MAX_REDIRECT_COUNT); hstsHeaderList.add(detectHSTSHeader(url, connection));
details.setRedirect(redirectChain.size() - 1); // Handle redirects
details.setRedirectChain(redirectChain); if (redirected) {
details.setSslStripping(sslStrippingList); // Location header contains the URL to redirect to
details.setHstsHeader(hstsHeaderList); String newUrl = connection.getHeaderField("Location");
if (newUrl == null) {
break;
}
URI newUri = uri.resolve(newUrl);
// check for SSL stripping during redirect
sslStrippingList.add(checkRedirectForSSLStripping(uri, newUri));
// Handle relative URLs
uri = uri.resolve(newUrl);
url = uri.toURL();
redirectChain.add(url.toString());
redirectCount++;
logger.info("Redirect #{}: {}",redirectCount, newUrl);
} else {
// No redirect, so no SSL stripping
sslStrippingList.add(false);
}
connection.disconnect();
} while (redirected && redirectCount < MAX_REDIRECT_COUNT);
details.setRedirect(redirectChain.size() - 1);
details.setRedirectChain(redirectChain);
details.setSslStripping(sslStrippingList);
details.setHstsHeader(hstsHeaderList);
} catch (URISyntaxException e){
logger.error("Error in breaking down URL: {}", e.getMessage());
}
} }
// Function to check if the redirect is from HTTPS to HTTP // Function to check if the redirect is from HTTPS to HTTP
private boolean checkRedirectForSSLStripping(URI originalUri, URI newUri) { private boolean checkRedirectForSSLStripping(URI originalUri, URI newUri) {