diff --git a/src/main/java/com/safeqr/app/qrcode/entity/URLEntity.java b/src/main/java/com/safeqr/app/qrcode/entity/URLEntity.java index 476f223..42b11ec 100644 --- a/src/main/java/com/safeqr/app/qrcode/entity/URLEntity.java +++ b/src/main/java/com/safeqr/app/qrcode/entity/URLEntity.java @@ -1,6 +1,7 @@ package com.safeqr.app.qrcode.entity; import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import io.hypersistence.utils.hibernate.type.array.ListArrayType; import jakarta.persistence.*; @@ -11,6 +12,7 @@ import lombok.Builder; import org.hibernate.annotations.Type; import org.hibernate.annotations.UuidGenerator; +import java.util.ArrayList; import java.util.List; import java.util.UUID; @@ -34,28 +36,82 @@ public class URLEntity { private String domain; + @JsonInclude(JsonInclude.Include.NON_EMPTY) private String subdomain; private String topLevelDomain; private String path; - @JsonProperty private String query; + @JsonInclude(JsonInclude.Include.NON_EMPTY) private String fragment; private int redirect = 0; + @JsonInclude(JsonInclude.Include.NON_EMPTY) @Type(ListArrayType.class) @Column(name = "hsts_header", columnDefinition = "text[]") - private List hstsHeader; + private List hstsHeader = new ArrayList<>(); + @JsonInclude(JsonInclude.Include.NON_EMPTY) @Type(ListArrayType.class) @Column(name = "ssl_stripping", columnDefinition = "boolean[]") - private List sslStripping; + private List sslStripping = new ArrayList<>(); + @JsonInclude(JsonInclude.Include.NON_EMPTY) @Type(ListArrayType.class) @Column(name = "redirect_chain", columnDefinition = "text[]") - private List redirectChain; + private List redirectChain = new ArrayList<>(); + + @Column(name = "hostname_embedding") + private Integer hostnameEmbedding = 0; + + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @Column(name = "javascript_check") + private String javascriptCheck = ""; + + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @Column(name = "shortening_service") + private String shorteningService = ""; + + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @Column(name = "has_ip_address") + private String hasIpAddress = ""; + + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @Type(ListArrayType.class) + @Column(name = "tracking_descriptions", columnDefinition = "text[]") + private List trackingDescriptions = new ArrayList<>(); + + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @Column(name = "url_encoding") + private String urlEncoding = ""; + + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @Column(name = "dns_error") + private String dnsError = ""; + + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @Column(name="ssl_error") + private String sslError = ""; + + // Custom getter for hostnameEmbedding + @JsonInclude(JsonInclude.Include.NON_NULL) + public Integer getHostnameEmbedding() { + return hostnameEmbedding == 0 ? null : hostnameEmbedding; + } + // Custom getter for path + @JsonInclude(JsonInclude.Include.NON_NULL) + public String getPath() { + return path.isEmpty() ? null : path; + } + + // Custom getter for query + @JsonInclude(JsonInclude.Include.NON_NULL) + @JsonProperty + public String getQuery() { + return query.equals("{}") ? null : query; + } } diff --git a/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java b/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java index 5649f1a..d355e23 100644 --- a/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java +++ b/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java @@ -2,7 +2,6 @@ package com.safeqr.app.qrcode.service; import static com.safeqr.app.constants.CommonConstants.*; -import com.safeqr.app.exceptions.ResourceNotFoundExceptions; import com.safeqr.app.qrcode.dto.request.QRCodePayload; import com.safeqr.app.qrcode.dto.URLVerificationResponse; import com.safeqr.app.qrcode.entity.URLEntity; @@ -13,14 +12,21 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import javax.net.ssl.HttpsURLConnection; +import javax.net.ssl.SSLHandshakeException; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.net.*; import java.nio.charset.StandardCharsets; import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; @Service public class URLVerificationService { + private static final int CONNECTION_TIMEOUT_MS = 10000; + private static final int READ_TIMEOUT_MS = 10000; private static final Logger logger = LoggerFactory.getLogger(URLVerificationService.class); private final URLRepository urlRepository; @Autowired @@ -28,6 +34,29 @@ public class URLVerificationService { this.urlRepository = urlRepository; } + // Regular expression pattern for shortening services + private static final String SHORTENING_PATTERN = + "bit\\.ly|goo\\.gl|shorte\\.st|go2l\\.ink|x\\.co|ow\\.ly|t\\.co|tinyurl|tr\\.im|is\\.gd|cli\\.gs|" + + "yfrog\\.com|migre\\.me|ff\\.im|tiny\\.cc|url4\\.eu|twit\\.ac|su\\.pr|twurl\\.nl|snipurl\\.com|" + + "short\\.to|BudURL\\.com|ping\\.fm|post\\.ly|Just\\.as|bkite\\.com|snipr\\.com|fic\\.kr|loopt\\.us|" + + "doiop\\.com|short\\.ie|kl\\.am|wp\\.me|rubyurl\\.com|om\\.ly|to\\.ly|bit\\.do|t\\.co|lnkd\\.in|" + + "db\\.tt|qr\\.ae|adf\\.ly|goo\\.gl|bitly\\.com|cur\\.lv|tinyurl\\.com|ow\\.ly|bit\\.ly|ity\\.im|" + + "q\\.gs|is\\.gd|po\\.st|bc\\.vc|twitthis\\.com|u\\.to|j\\.mp|buzurl\\.com|cutt\\.us|u\\.bb|yourls\\.org|" + + "x\\.co|prettylinkpro\\.com|scrnch\\.me|filoops\\.info|vzturl\\.com|qr\\.net|1url\\.com|tweez\\.me|v\\.gd|" + + "tr\\.im|link\\.zip\\.net"; + + // Regular expression pattern to match various IP address formats + private static final String IP_PATTERN = + "(([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\.([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\.([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\." + + "([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\/)|" + + "(([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\.([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\.([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\." + + "([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\/)|" + + "((0x[0-9a-fA-F]{1,2})\\.(0x[0-9a-fA-F]{1,2})\\.(0x[0-9a-fA-F]{1,2})\\.(0x[0-9a-fA-F]{1,2})\\/)" + + "(?:[a-fA-F0-9]{1,4}:){7}[a-fA-F0-9]{1,4}|" + + "([0-9]+(?:\\.[0-9]+){3}:[0-9]+)|" + + "((?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?)"; + + public URLEntity getURLEntityByQRCodeId(UUID qrCodeId) { logger.info("qrCodeId retrieving: {}", qrCodeId); // return urlRepository.findByQrCodeId(qrCodeId) @@ -39,55 +68,172 @@ public class URLVerificationService { urlRepository.save(urlEntity); } // Function to breakdown URL into subdomain, domain, topLevelDomain, query params, fragment - public URLEntity breakdownURL(String urlString) throws MalformedURLException { + public URLEntity breakdownURL(String urlString) { URLEntity urlObj = new URLEntity(); try { - // Ensure the URL is properly encoded - String encodedUrl = encodeUrl(urlString); - URI uri = new URI(encodedUrl); - URL url = uri.toURL(); - + //URL url = new URI(encodeUrl(urlString)).toURL(); + URL url = new URI(urlString).toURL(); String host = url.getHost(); - // split host into subdomain, domain, topLevelDomain - String[] hostParts = host.split("\\."); - String subdomain = ""; - if (hostParts.length >= 2) { - // set topLevelDomain to the last part of the host - urlObj.setTopLevelDomain(hostParts[hostParts.length - 1]); - // set domain to the second last part of the host - urlObj.setDomain(hostParts[hostParts.length - 2]); - // set subdomain to the first part of the host - if (hostParts.length > 2) { - subdomain = String.join(".", java.util.Arrays.copyOfRange(hostParts, 0, hostParts.length - 2)); - } - } - // set subdomain to URL host - urlObj.setSubdomain(subdomain); + // Check for deceptive URL + urlObj.setHostnameEmbedding(checkDeceptiveUrl(url)); - String path = url.getPath(); - //set path to URL path if it's not empty, otherwise set it to root path - urlObj.setPath(path.isEmpty() ? "/" : path); + // Check for Javascript code in url + urlObj.setJavascriptCheck(checkForJavascriptCode(urlString)); - String query = url.getQuery(); - Map queryParams = new HashMap<>(); - if (query != null) { - // split query params into key value pairs - for (String param : query.split("&")) { - String[] pair = param.split("="); - queryParams.put(pair[0], pair.length > 1 ? pair[1] : ""); - } - logger.info("queryParams: {}", queryParams); - } - // set query params to URL query - urlObj.setQuery(queryParams.toString()); - // set fragment to URL ref + // Check for url shortener + urlObj.setShorteningService(hasShorteningService(urlString)); + + // Check for IP address + urlObj.setHasIpAddress(hasIPAddress(urlString)); + + populateHostDetails(host, urlObj); + + urlObj.setPath(Optional.ofNullable(url.getPath()).filter(p -> !p.isEmpty()).orElse("")); + + String query = parseQueryParams(url.getQuery()); + urlObj.setQuery(query); urlObj.setFragment(Optional.ofNullable(url.getRef()).orElse("")); - } catch (URISyntaxException | MalformedURLException e) { + + // Check for tracking parameters + urlObj.setTrackingDescriptions(getTrackingDescriptions(url.getQuery())); + + // Check for URL encoding in path and query + String pathEncoding = checkURLEncoding(url.getPath()); + String queryEncoding = query != null ? checkURLEncoding(query) : ""; + + // Combine encoding results + urlObj.setUrlEncoding(pathEncoding.equals("Yes") || queryEncoding.equals("Yes") ? "Yes" : ""); + + } catch (Exception e) { logger.error("Error in breaking down URL: {}", e.getMessage()); } return urlObj; } + + private void populateHostDetails(String host, URLEntity urlObj) { + String[] hostParts = host.split("\\."); + int length = hostParts.length; + + if (length >= 2) { + urlObj.setTopLevelDomain(hostParts[length - 1]); + urlObj.setDomain(hostParts[length - 2]); + urlObj.setSubdomain(length > 2 ? String.join(".", Arrays.copyOfRange(hostParts, 0, length - 2)) : ""); + } + } + // List of common tracking parameters with their descriptions + private static final Map TRACKING_DESCRIPTIONS = Map.ofEntries( + Map.entry("utm_source", "Campaign Source: Identifies which site sent the traffic."), + Map.entry("utm_medium", "Campaign Medium: Identifies what type of link was used."), + Map.entry("utm_campaign", "Campaign Name: Identifies a specific product promotion or campaign."), + Map.entry("utm_term", "Campaign Term: Identifies search terms."), + Map.entry("utm_content", "Campaign Content: Differentiates similar content or links within the same ad."), + Map.entry("gclid", "Google Click Identifier: Used by Google Ads to track clicks."), + Map.entry("fbclid", "Facebook Click Identifier: Used by Facebook to track clicks."), + Map.entry("tracking_id", "Tracking ID: General identifier for tracking purposes."), + Map.entry("affiliate_id", "Affiliate ID: Identifies traffic from affiliates."), + Map.entry("ref", "Referrer: Identifies the referrer site."), + Map.entry("referrer", "Referrer: Identifies the referrer site.") + ); + + // Regex pattern to capture key-value pairs in the query string + private static final Pattern PARAM_PATTERN = Pattern.compile( + "(?[^=&]+)=(?[^&]+)", + Pattern.CASE_INSENSITIVE + ); + + // Static method to detect and return tracking parameter descriptions in a URL + private List getTrackingDescriptions(String query) { + if (query == null || query.isEmpty()) { + return Collections.emptyList(); + } + + Matcher matcher = PARAM_PATTERN.matcher(query); + List foundDescriptions = new ArrayList<>(); + + while (matcher.find()) { + String key = matcher.group("key").toLowerCase(); + String value = URLDecoder.decode(matcher.group("value"), StandardCharsets.UTF_8); + if (TRACKING_DESCRIPTIONS.containsKey(key)) { + foundDescriptions.add(TRACKING_DESCRIPTIONS.get(key) + ": " + value); + } + } + + return foundDescriptions; + } + + private int checkDeceptiveUrl(URL url) { + String[] parts = url.getHost().split("\\."); + if (parts.length < 3) return 0; + + Set commonTlds = new HashSet<>(Arrays.asList("com", "org", "net", "edu", "gov")); + + for (int i = parts.length - 2; i >= 1; i--) { + if (commonTlds.contains(parts[i]) && !commonTlds.contains(parts[i - 1]) && i != parts.length - 2) { + logger.warn("Potentially deceptive URL detected: {} (Suspicious domain: {}.{})", + url, parts[i - 1], parts[i]); + return 1; + } + } + return 0; + } + + private String checkForJavascriptCode(String url) { + // Decode the URL + String decodedUrl = URLDecoder.decode(url, StandardCharsets.UTF_8); + + // Patterns to detect 'javascript:', '