diff --git a/pom.xml b/pom.xml index 1590d99..99309cc 100644 --- a/pom.xml +++ b/pom.xml @@ -103,6 +103,12 @@ javase 3.5.3 + + + org.jsoup + jsoup + 1.18.1 + diff --git a/src/main/java/com/safeqr/app/gmail/controller/GmailController.java b/src/main/java/com/safeqr/app/gmail/controller/GmailController.java index 7be12ac..08b55e2 100644 --- a/src/main/java/com/safeqr/app/gmail/controller/GmailController.java +++ b/src/main/java/com/safeqr/app/gmail/controller/GmailController.java @@ -1,17 +1,7 @@ package com.safeqr.app.gmail.controller; -import com.google.api.client.auth.oauth2.BearerToken; -import com.google.api.client.http.javanet.NetHttpTransport; -import com.google.api.services.gmail.Gmail; -import com.google.api.services.gmail.model.MessagePart; -import com.google.zxing.BinaryBitmap; -import com.google.zxing.LuminanceSource; -import com.google.zxing.MultiFormatReader; -import com.google.zxing.Result; -import com.google.zxing.client.j2se.BufferedImageLuminanceSource; -import com.google.zxing.common.HybridBinarizer; +import com.google.api.services.gmail.model.*; import org.apache.commons.codec.binary.Base64; -import org.json.JSONArray; import org.json.JSONObject; import com.google.api.client.auth.oauth2.AuthorizationCodeRequestUrl; import com.google.api.client.auth.oauth2.Credential; @@ -23,8 +13,6 @@ import com.google.api.client.http.HttpTransport; import com.google.api.client.json.JsonFactory; import com.google.api.client.json.gson.GsonFactory; import com.google.api.services.gmail.GmailScopes; -import com.google.api.services.gmail.model.ListMessagesResponse; -import com.google.api.services.gmail.model.Message; import com.safeqr.app.gmail.service.GmailService; import jakarta.servlet.http.HttpServletRequest; import org.slf4j.Logger; @@ -36,20 +24,10 @@ import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.*; import org.springframework.web.servlet.view.RedirectView; - -import javax.imageio.ImageIO; - import static com.safeqr.app.constants.APIConstants.*; - -import java.awt.image.BufferedImage; -import java.io.ByteArrayInputStream; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import java.util.*; + @RestController @@ -57,7 +35,6 @@ import java.util.regex.Pattern; public class GmailController { private static final Logger logger = LoggerFactory.getLogger(GmailController.class); GmailService gmailService; - private static HttpTransport httpTransport = new NetHttpTransport(); private static final JsonFactory JSON_FACTORY = GsonFactory.getDefaultInstance(); private static com.google.api.services.gmail.Gmail client; @@ -84,13 +61,14 @@ public class GmailController { return new RedirectView(authorize()); } private String authorize() throws Exception { + AuthorizationCodeRequestUrl authorizationUrl; if (flow == null) { GoogleClientSecrets.Details web = new GoogleClientSecrets.Details(); web.setClientId(clientId); web.setClientSecret(clientSecret); clientSecrets = new GoogleClientSecrets().setWeb(web); - httpTransport = GoogleNetHttpTransport.newTrustedTransport(); + HttpTransport httpTransport = GoogleNetHttpTransport.newTrustedTransport(); flow = new GoogleAuthorizationCodeFlow.Builder(httpTransport, JSON_FACTORY, clientSecrets, Collections.singleton(GmailScopes.GMAIL_READONLY)) .build(); @@ -107,24 +85,14 @@ public class GmailController { @GetMapping(value = "/gmail/callback", params = "code") public ResponseEntity oauth2Callback(@RequestParam(value = "code") String code) { - - // System.out.println("code->" + code + " userId->" + userId + " - // query->" + query); JSONObject json = new JSONObject(); - - - // String message; try { TokenResponse response = flow.newTokenRequest(code).setRedirectUri(redirectUri).execute(); credential = flow.createAndStoreCredential(response, "userID"); logger.info(credential.getAccessToken()); logger.info(credential.getRefreshToken()); logger.info(credential.toString()); - - } catch (Exception e) { - - System.out.println("exception cached "); e.printStackTrace(); } @@ -132,104 +100,13 @@ public class GmailController { } @GetMapping(value = "/gmail/getEmails", produces = MediaType.APPLICATION_JSON_VALUE) - public ResponseEntity getUserEmails(@RequestHeader(name = "accessToken") String accessToken) throws IOException { - logger.info("Invoking GET User Emails endpoints"); + public ResponseEntity getUserEmails(@RequestHeader(name = "accessToken") String accessToken) throws IOException, InterruptedException { + logger.info("Invoking GET Scan User Emails endpoints"); if (accessToken == null || accessToken.isEmpty()) { return new ResponseEntity<>("Access token is missing", HttpStatus.BAD_REQUEST); } - JSONObject json; - try { - json = getEmail(accessToken); - } catch (Exception e) { - logger.error("Error getting emails: ", e); - return new ResponseEntity<>("Failed to get emails", HttpStatus.INTERNAL_SERVER_ERROR); - } - - return new ResponseEntity<>(json.toString(), HttpStatus.OK); - } - - private Gmail getGmailService(String accessToken) { - Credential userCredentials = new Credential(BearerToken.authorizationHeaderAccessMethod()).setAccessToken(accessToken); - return new Gmail.Builder(httpTransport, JSON_FACTORY, userCredentials) - .setApplicationName(APPLICATION_NAME) - .build(); - } - - private JSONObject getEmail(String accessToken) throws IOException { - JSONObject json = new JSONObject(); - JSONArray emailArray = new JSONArray(); - - // Build the Gmail service - Gmail service = getGmailService(accessToken); - logger.info("service-> {}", service); - - // Get the list of messages - ListMessagesResponse listResponse = service.users().messages().list("me").execute(); - List messages = listResponse.getMessages(); - - if (messages != null && !messages.isEmpty()) { - Message message = service.users().messages().get("me", messages.get(0).getId()).setFormat("full").execute(); - logger.info("message-> {}", message); - - if (containsQRCode(message)) { - emailArray.put(message.getId()); - } - } - - json.put("emails_with_qr_codes", emailArray); - return json; - } - private boolean containsQRCode(Message message) throws IOException { - if (message.getPayload().getParts() != null) { - for (MessagePart part : message.getPayload().getParts()) { - if ("text/html".equals(part.getMimeType())) { - String data = new String(Base64.decodeBase64(part.getBody().getData())); - if (scanForQRCode(data)) { - return true; - } - } - } - } - return false; - } - - private boolean scanForQRCode(String htmlContent) { - // Extract all img tags - Pattern pattern = Pattern.compile("]+src\\s*=\\s*['\"]([^'\"]+)['\"][^>]*>"); - Matcher matcher = pattern.matcher(htmlContent); - - while (matcher.find()) { - String src = matcher.group(1); - if (src.startsWith("data:image")) { - // It's a base64 encoded image - String base64Image = src.split(",")[1]; - byte[] imageBytes = Base64.decodeBase64(base64Image); - - try { - BufferedImage image = ImageIO.read(new ByteArrayInputStream(imageBytes)); - LuminanceSource source = new BufferedImageLuminanceSource(image); - BinaryBitmap bitmap = new BinaryBitmap(new HybridBinarizer(source)); - - Result result = new MultiFormatReader().decode(bitmap); - if (result != null) { - // QR Code detected - return true; - } - } catch (Exception e) { - // If there's an error reading the image or it's not a QR code, continue to the next image - continue; - } - } - } - return false; - } - - - @GetMapping(value = "/gmail/authenticate", produces = MediaType.APPLICATION_JSON_VALUE) - public ResponseEntity authenticate() { - logger.info("Invoking gmail authenticate endpoint"); - return ResponseEntity.ok(Map.of("version", "SafeQR v1.0.2")); + return new ResponseEntity<>(gmailService.getEmail(accessToken).toString(), HttpStatus.OK); } } diff --git a/src/main/java/com/safeqr/app/gmail/service/GmailService.java b/src/main/java/com/safeqr/app/gmail/service/GmailService.java index 9555728..26117c0 100644 --- a/src/main/java/com/safeqr/app/gmail/service/GmailService.java +++ b/src/main/java/com/safeqr/app/gmail/service/GmailService.java @@ -1,32 +1,242 @@ package com.safeqr.app.gmail.service; +import com.google.api.client.auth.oauth2.BearerToken; +import com.google.api.client.http.HttpTransport; +import com.google.api.services.gmail.model.*; +import com.google.zxing.*; +import com.google.zxing.client.j2se.BufferedImageLuminanceSource; +import com.google.zxing.common.HybridBinarizer; +import com.google.zxing.multi.qrcode.QRCodeMultiReader; +import org.apache.commons.codec.binary.Base64; +import org.json.JSONArray; +import org.json.JSONObject; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.select.Elements; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; import com.google.api.client.auth.oauth2.Credential; -import com.google.api.client.extensions.java6.auth.oauth2.AuthorizationCodeInstalledApp; -import com.google.api.client.extensions.jetty.auth.oauth2.LocalServerReceiver; -import com.google.api.client.googleapis.auth.oauth2.GoogleAuthorizationCodeFlow; -import com.google.api.client.googleapis.auth.oauth2.GoogleClientSecrets; -import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport; import com.google.api.client.http.javanet.NetHttpTransport; import com.google.api.client.json.JsonFactory; import com.google.api.client.json.gson.GsonFactory; -import com.google.api.client.util.store.FileDataStoreFactory; import com.google.api.services.gmail.Gmail; -import com.google.api.services.gmail.GmailScopes; -import com.google.api.services.gmail.model.Label; -import com.google.api.services.gmail.model.ListLabelsResponse; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.security.GeneralSecurityException; -import java.util.Collections; -import java.util.List; + +import javax.imageio.ImageIO; +import java.awt.image.BufferedImage; +import java.io.*; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static com.safeqr.app.constants.APIConstants.APPLICATION_NAME; @Service public class GmailService { private static final Logger logger = LoggerFactory.getLogger(GmailService.class); + private static final HttpTransport httpTransport = new NetHttpTransport(); + private static final JsonFactory JSON_FACTORY = GsonFactory.getDefaultInstance(); + + private Gmail getGmailService(String accessToken) { + Credential userCredentials = new Credential(BearerToken.authorizationHeaderAccessMethod()).setAccessToken(accessToken); + return new Gmail.Builder(httpTransport, JSON_FACTORY, userCredentials) + .setApplicationName(APPLICATION_NAME) + .build(); + } + + public JSONObject getEmail(String accessToken) throws IOException, InterruptedException { + JSONObject json = new JSONObject(); + JSONArray emailArray = new JSONArray(); + + // Build the Gmail service + Gmail service = getGmailService(accessToken); + logger.info("service-> {}", service); + + // Get the list of messages + ListMessagesResponse listResponse = service.users().messages().list("me").execute(); + List messages = listResponse.getMessages(); + + for (Message message : messages) { + message = service.users().messages().get("me", message.getId()).setFormat("full").execute(); + List parts = message.getPayload().getParts(); + Set attachmentIds = new HashSet<>(); + Set imageUrls = new HashSet<>(); + processPartsRecursively(parts, attachmentIds, imageUrls); + + // Extract and log the email subject + String subject = getSubject(message); + logger.info("Email Subject-> {}", subject); + + if (attachmentIds.isEmpty() && imageUrls.isEmpty()) + continue; + + String messageId = message.getId(); + logger.info("messageId-> {}", messageId); + String historyId = String.valueOf(message.getHistoryId()); + logger.info("historyId-> {}", historyId); + + for (String attachmentId : attachmentIds) { + Optional attachment = findAttachmentIdByCid(parts, attachmentId); + logger.info("attachment-> {}", attachment); + if (attachment.isPresent()) { + List qrCodeValue = processAttachment(service, messageId, attachment.get()); + emailArray.put(qrCodeValue); + } + } + for (String imageUrl : imageUrls) { + List qrCodeValue = scanQRCodeFromUrl(imageUrl); + if (qrCodeValue != null) { + emailArray.put(qrCodeValue); + } + } + } + logger.info("Total Emails-> {}", messages.size()); + json.put("qr_codes", emailArray); + return json; + } + + private String getSubject(Message message) { + return message.getPayload().getHeaders().stream() + .filter(header -> "Subject".equals(header.getName())) + .findFirst() + .map(MessagePartHeader::getValue) + .orElse("No Subject"); + } + private Optional findAttachmentIdByCid(List parts, String cid) { + return parts.stream() + .flatMap(part -> Stream.concat(findAttachmentIdInCurrentPart(part, cid).stream(), Optional.ofNullable(part.getParts()) + .flatMap(subParts -> findAttachmentIdByCid(subParts, cid)).stream())) + .findFirst(); + } + + private Optional findAttachmentIdInCurrentPart(MessagePart part, String cid) { + return Optional.ofNullable(part.getHeaders()) + .flatMap(headers -> headers.stream() + .filter(header -> isContentIdHeader(header, cid)) + .findFirst() + .map(header -> part.getBody().getAttachmentId())); + } + + private boolean isContentIdHeader(MessagePartHeader header, String cid) { + return "Content-ID".equalsIgnoreCase(header.getName()) && header.getValue().contains(cid); + } + // Recursive method to handle nested parts to search for CID URIs + private void processPartsRecursively(List parts, Set attachmentIds, Set imageURLs) { + if (parts != null) { + for (MessagePart part : parts) { + if (part.getMimeType().equalsIgnoreCase("text/html")) { + String html = new String(Base64.decodeBase64(part.getBody().getData())); + attachmentIds.addAll(extractCIDsFromHtml(html)); + imageURLs.addAll(extractImageUrlsFromHtml(html)); + } else if (part.getParts() != null) { + // Recursive call to handle nested parts + processPartsRecursively(part.getParts(), attachmentIds, imageURLs); + } + } + } + } + private List scanQRCodeFromUrl(String imageUrl) throws IOException, InterruptedException { + try { + BufferedImage image = downloadImageFromUrl(imageUrl); + if (image != null) { + return decodeQRCodes(image); + } + } catch(URISyntaxException e) { + logger.error("Error while scanning QR code from URL", e); + } + return null; + } + // Download the image from the given URL + private BufferedImage downloadImageFromUrl(String imageUrl) throws IOException, InterruptedException, URISyntaxException { + HttpClient client = HttpClient.newBuilder() + .followRedirects(HttpClient.Redirect.ALWAYS) + .build(); + logger.info("imageUrl-> {}", imageUrl); + // Encode the URL + HttpRequest request = HttpRequest.newBuilder() + .uri(URI.create(imageUrl.replace(" ", "%20"))) + .GET() + .build(); + + HttpResponse response = client.send(request, HttpResponse.BodyHandlers.ofByteArray()); + if (response.statusCode() == 200) { + byte[] imageBytes = response.body(); + return ImageIO.read(new ByteArrayInputStream(imageBytes)); + } else { + logger.error("Failed to download image. HTTP response code: {}", response.statusCode()); + } + return null; + } + private List processAttachment(Gmail service, String messageId, String attachmentId) throws IOException { + MessagePartBody attachPart = service.users().messages().attachments().get("me", messageId, attachmentId).execute(); + byte[] imageBytes = Base64.decodeBase64(attachPart.getData()); + BufferedImage image = ImageIO.read(new ByteArrayInputStream(imageBytes)); + ImageIO.write(image, "png", new File("debug_image.png")); + return decodeQRCodes(image); + } + + private List decodeQRCodes(BufferedImage image) { + List qrCodeValues = new ArrayList<>(); + LuminanceSource source = new BufferedImageLuminanceSource(image); + BinaryBitmap bitmap = new BinaryBitmap(new HybridBinarizer(source)); + + // Set up decoding hints + Map hints = new EnumMap<>(DecodeHintType.class); + hints.put(DecodeHintType.TRY_HARDER, Boolean.TRUE); + hints.put(DecodeHintType.POSSIBLE_FORMATS, List.of(BarcodeFormat.QR_CODE)); + + try { + QRCodeMultiReader multiReader = new QRCodeMultiReader(); + Result[] results = multiReader.decodeMultiple(bitmap, hints); + + if (results != null) { + for (Result result : results) { + qrCodeValues.add(result.getText()); + logger.info("Detected QR code: {}", result.getText()); + } + } else { + logger.info("No QR codes found in the image"); + } + } catch (NotFoundException e) { + logger.info("No QR codes found in the image"); + } catch (Exception e) { + logger.error("Error decoding QR codes", e); + } + + logger.info("Total QR codes found: {}", qrCodeValues.size()); + return qrCodeValues; + } + + //Extract CIDs from HTML + private Set extractCIDsFromHtml(String html) { + Document doc = Jsoup.parse(html); + Elements imgs = doc.select("img[src^=cid:]"); + + return imgs.stream() + .map(img -> img.attr("src")) + .filter(src -> src.startsWith("cid:")) + .map(src -> src.substring(4)) // Remove "cid:" prefix + .collect(Collectors.toSet()); + } + //Extract image URLs from HTML + private Set extractImageUrlsFromHtml(String html) { + Document doc = Jsoup.parse(html); + Elements imgs = doc.select("img[src]"); + + return imgs.stream() + .map(img -> img.attr("src")) + .filter(this::isImageUrl) + .collect(Collectors.toSet()); + } + // Check if the URL is an image URL + private boolean isImageUrl(String url) { + String lowerUrl = url.toLowerCase(); + return lowerUrl.endsWith(".jpg") || lowerUrl.endsWith(".jpeg") || lowerUrl.endsWith(".png") || lowerUrl.endsWith(".gif") || lowerUrl.endsWith(".bmp"); + } } diff --git a/src/main/java/com/safeqr/app/qrcode/model/URLModel.java b/src/main/java/com/safeqr/app/qrcode/model/URLModel.java index 5c32d74..a646dce 100644 --- a/src/main/java/com/safeqr/app/qrcode/model/URLModel.java +++ b/src/main/java/com/safeqr/app/qrcode/model/URLModel.java @@ -5,6 +5,7 @@ import com.safeqr.app.qrcode.entity.EmailEntity; import com.safeqr.app.qrcode.entity.QRCodeEntity; import com.safeqr.app.qrcode.entity.URLEntity; import com.safeqr.app.qrcode.service.URLVerificationService; +import jakarta.transaction.Transactional; import lombok.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -26,7 +27,7 @@ public final class URLModel extends QRCodeModel { this.urlVerificationService = urlVerificationService; details = null; } - + @Transactional @Override public void setDetails() { String url = data.getContents(); @@ -35,11 +36,10 @@ public final class URLModel extends QRCodeModel { urlVerificationService.countAndTrackRedirects(url, details); // set qrCode Identifier details.setQrCodeId(data.getId()); - // Insert into URL table urlVerificationService.insertDB(details); - } catch (IOException | URISyntaxException e) { + } catch (IOException e) { logger.error("Error: ", e); } } diff --git a/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java b/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java index 23d4f60..c77f0ce 100644 --- a/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java +++ b/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java @@ -15,6 +15,7 @@ import org.springframework.stereotype.Service; import javax.net.ssl.HttpsURLConnection; import java.io.IOException; import java.net.*; +import java.nio.charset.StandardCharsets; import java.util.*; @@ -37,112 +38,150 @@ public class URLVerificationService { urlRepository.save(urlEntity); } // Function to breakdown URL into subdomain, domain, topLevelDomain, query params, fragment - public URLEntity breakdownURL(String urlString) throws MalformedURLException, URISyntaxException { - URI uri = new URI(urlString); - URL url = uri.toURL(); + public URLEntity breakdownURL(String urlString) throws MalformedURLException { URLEntity urlObj = new URLEntity(); + try { + // Ensure the URL is properly encoded + String encodedUrl = encodeUrl(urlString); + URI uri = new URI(encodedUrl); + URL url = uri.toURL(); - String host = url.getHost(); - // split host into subdomain, domain, topLevelDomain - String[] hostParts = host.split("\\."); - String subdomain = ""; + String host = url.getHost(); + // split host into subdomain, domain, topLevelDomain + String[] hostParts = host.split("\\."); + String subdomain = ""; - if (hostParts.length >= 2) { - // set topLevelDomain to the last part of the host - urlObj.setTopLevelDomain(hostParts[hostParts.length - 1]); - // set domain to the second last part of the host - urlObj.setDomain(hostParts[hostParts.length - 2]); - // set subdomain to the first part of the host - if (hostParts.length > 2) { - subdomain = String.join(".", java.util.Arrays.copyOfRange(hostParts, 0, hostParts.length - 2)); + if (hostParts.length >= 2) { + // set topLevelDomain to the last part of the host + urlObj.setTopLevelDomain(hostParts[hostParts.length - 1]); + // set domain to the second last part of the host + urlObj.setDomain(hostParts[hostParts.length - 2]); + // set subdomain to the first part of the host + if (hostParts.length > 2) { + subdomain = String.join(".", java.util.Arrays.copyOfRange(hostParts, 0, hostParts.length - 2)); + } } - } - // set subdomain to URL host - urlObj.setSubdomain(subdomain); + // set subdomain to URL host + urlObj.setSubdomain(subdomain); - String path = url.getPath(); - //set path to URL path if it's not empty, otherwise set it to root path - urlObj.setPath(path.isEmpty() ? "/" : path); + String path = url.getPath(); + //set path to URL path if it's not empty, otherwise set it to root path + urlObj.setPath(path.isEmpty() ? "/" : path); - String query = url.getQuery(); - Map queryParams = new HashMap<>(); - if (query != null) { - // split query params into key value pairs - for (String param : query.split("&")) { - String[] pair = param.split("="); - queryParams.put(pair[0], pair.length > 1 ? pair[1] : ""); + String query = url.getQuery(); + Map queryParams = new HashMap<>(); + if (query != null) { + // split query params into key value pairs + for (String param : query.split("&")) { + String[] pair = param.split("="); + queryParams.put(pair[0], pair.length > 1 ? pair[1] : ""); + } + logger.info("queryParams: {}", queryParams); } - logger.info("queryParams: {}", queryParams); + // set query params to URL query + urlObj.setQuery(queryParams.toString()); + // set fragment to URL ref + urlObj.setFragment(Optional.ofNullable(url.getRef()).orElse("")); + } catch (URISyntaxException | MalformedURLException e) { + logger.error("Error in breaking down URL: {}", e.getMessage()); } - // set query params to URL query - urlObj.setQuery(queryParams.toString()); - // set fragment to URL ref - urlObj.setFragment(Optional.ofNullable(url.getRef()).orElse("")); - return urlObj; } + private String encodeUrl(String urlString) throws MalformedURLException { + try { + URL url = new URL(urlString); + String protocol = url.getProtocol(); + String host = url.getHost(); + int port = url.getPort(); + String path = url.getPath(); + String query = url.getQuery(); + String ref = url.getRef(); - public void countAndTrackRedirects(String urlString, URLEntity details) throws IOException, URISyntaxException { - URI uri = new URI(urlString); - URL url = uri.toURL(); - List redirectChain = new ArrayList<>(); - List hstsHeaderList = new ArrayList<>(); - List sslStrippingList = new ArrayList<>(); - - // Add the initial URL to the chain - redirectChain.add(urlString); - boolean redirected; - int redirectCount = 0; - - do { - URLConnection testConnection = url.openConnection(); - - if (!(testConnection instanceof HttpURLConnection)) { - // Handle non-HTTP connections (like mailto:) - logger.info("Non-HTTP URL encountered: {}", url); - hstsHeaderList.add(INFO_HSTS_NOT_APPLICABLE); - sslStrippingList.add(false); - break; + StringBuilder encodedUrl = new StringBuilder(); + encodedUrl.append(protocol).append("://").append(host); + if (port != -1) { + encodedUrl.append(":").append(port); } - HttpURLConnection connection = (HttpURLConnection) url.openConnection(); - connection.setRequestMethod("GET"); - connection.setInstanceFollowRedirects(false); + encodedUrl.append(URLEncoder.encode(path, StandardCharsets.UTF_8).replace("%2F", "/")); - int responseCode = connection.getResponseCode(); - redirected = (responseCode >= 300 && responseCode < 400); + if (query != null) { + encodedUrl.append("?").append(URLEncoder.encode(query, StandardCharsets.UTF_8).replace("%3D", "=").replace("%26", "&")); + } + if (ref != null) { + encodedUrl.append("#").append(URLEncoder.encode(ref, StandardCharsets.UTF_8)); + } - // Checks for HSTS Header - hstsHeaderList.add(detectHSTSHeader(url, connection)); + return encodedUrl.toString(); + } catch (Exception e) { + throw new MalformedURLException("Failed to encode URL: " + e.getMessage()); + } + } - // Handle redirects - if (redirected) { - // Location header contains the URL to redirect to - String newUrl = connection.getHeaderField("Location"); - if (newUrl == null) { + public void countAndTrackRedirects(String urlString, URLEntity details) throws IOException { + try { + URI uri = new URI(urlString); + URL url = uri.toURL(); + List redirectChain = new ArrayList<>(); + List hstsHeaderList = new ArrayList<>(); + List sslStrippingList = new ArrayList<>(); + + // Add the initial URL to the chain + redirectChain.add(urlString); + boolean redirected; + int redirectCount = 0; + + do { + URLConnection testConnection = url.openConnection(); + + if (!(testConnection instanceof HttpURLConnection)) { + // Handle non-HTTP connections (like mailto:) + logger.info("Non-HTTP URL encountered: {}", url); + hstsHeaderList.add(INFO_HSTS_NOT_APPLICABLE); + sslStrippingList.add(false); break; } - URI newUri = uri.resolve(newUrl); - // check for SSL stripping during redirect - sslStrippingList.add(checkRedirectForSSLStripping(uri, newUri)); + HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + connection.setRequestMethod("GET"); + connection.setInstanceFollowRedirects(false); - // Handle relative URLs - uri = uri.resolve(newUrl); - url = uri.toURL(); - redirectChain.add(url.toString()); - redirectCount++; - logger.info("Redirect #{}: {}",redirectCount, newUrl); - } else { - // No redirect, so no SSL stripping - sslStrippingList.add(false); - } + int responseCode = connection.getResponseCode(); + redirected = (responseCode >= 300 && responseCode < 400); - connection.disconnect(); - } while (redirected && redirectCount < MAX_REDIRECT_COUNT); + // Checks for HSTS Header + hstsHeaderList.add(detectHSTSHeader(url, connection)); - details.setRedirect(redirectChain.size() - 1); - details.setRedirectChain(redirectChain); - details.setSslStripping(sslStrippingList); - details.setHstsHeader(hstsHeaderList); + // Handle redirects + if (redirected) { + // Location header contains the URL to redirect to + String newUrl = connection.getHeaderField("Location"); + if (newUrl == null) { + break; + } + URI newUri = uri.resolve(newUrl); + // check for SSL stripping during redirect + sslStrippingList.add(checkRedirectForSSLStripping(uri, newUri)); + + // Handle relative URLs + uri = uri.resolve(newUrl); + url = uri.toURL(); + redirectChain.add(url.toString()); + redirectCount++; + logger.info("Redirect #{}: {}",redirectCount, newUrl); + } else { + // No redirect, so no SSL stripping + sslStrippingList.add(false); + } + + connection.disconnect(); + } while (redirected && redirectCount < MAX_REDIRECT_COUNT); + + details.setRedirect(redirectChain.size() - 1); + details.setRedirectChain(redirectChain); + details.setSslStripping(sslStrippingList); + details.setHstsHeader(hstsHeaderList); + } catch (URISyntaxException e){ + logger.error("Error in breaking down URL: {}", e.getMessage()); + } } // Function to check if the redirect is from HTTPS to HTTP private boolean checkRedirectForSSLStripping(URI originalUri, URI newUri) {