From 2746891645049f7a033de453ba8571f833489170 Mon Sep 17 00:00:00 2001 From: ltiongku Date: Mon, 5 Aug 2024 20:45:03 +0800 Subject: [PATCH] added javascript check in url and domain embedding checks --- .../safeqr/app/qrcode/entity/URLEntity.java | 15 ++- .../service/URLVerificationService.java | 108 ++++++++++++------ 2 files changed, 81 insertions(+), 42 deletions(-) diff --git a/src/main/java/com/safeqr/app/qrcode/entity/URLEntity.java b/src/main/java/com/safeqr/app/qrcode/entity/URLEntity.java index a75ae88..012625c 100644 --- a/src/main/java/com/safeqr/app/qrcode/entity/URLEntity.java +++ b/src/main/java/com/safeqr/app/qrcode/entity/URLEntity.java @@ -11,6 +11,7 @@ import lombok.Builder; import org.hibernate.annotations.Type; import org.hibernate.annotations.UuidGenerator; +import java.util.ArrayList; import java.util.List; import java.util.UUID; @@ -49,18 +50,24 @@ public class URLEntity { @Type(ListArrayType.class) @Column(name = "hsts_header", columnDefinition = "text[]") - private List hstsHeader; + private List hstsHeader = new ArrayList<>(); @Type(ListArrayType.class) @Column(name = "ssl_stripping", columnDefinition = "boolean[]") - private List sslStripping; + private List sslStripping = new ArrayList<>(); @Type(ListArrayType.class) @Column(name = "redirect_chain", columnDefinition = "text[]") - private List redirectChain; + private List redirectChain = new ArrayList<>(); + + @Column(name = "hostname_embedding") + private int hostnameEmbedding = 0; + + @Column(name = "javascript_check") + private String javascriptCheck = "No Javascript in URL"; @Column(name = "dns_error") - private String dnsError; + private String dnsError = "No Error Found."; @Column(name="certificate_subject_mismatch") private String certificateSubjectMismatch; diff --git a/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java b/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java index 3cdf75e..6f59b97 100644 --- a/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java +++ b/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java @@ -17,6 +17,9 @@ import java.io.IOException; import java.net.*; import java.nio.charset.StandardCharsets; import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; @Service @@ -41,55 +44,84 @@ public class URLVerificationService { urlRepository.save(urlEntity); } // Function to breakdown URL into subdomain, domain, topLevelDomain, query params, fragment - public URLEntity breakdownURL(String urlString) throws MalformedURLException { + public URLEntity breakdownURL(String urlString) { URLEntity urlObj = new URLEntity(); try { - // Ensure the URL is properly encoded - String encodedUrl = encodeUrl(urlString); - URI uri = new URI(encodedUrl); - URL url = uri.toURL(); - + URL url = new URI(encodeUrl(urlString)).toURL(); String host = url.getHost(); - // split host into subdomain, domain, topLevelDomain - String[] hostParts = host.split("\\."); - String subdomain = ""; - if (hostParts.length >= 2) { - // set topLevelDomain to the last part of the host - urlObj.setTopLevelDomain(hostParts[hostParts.length - 1]); - // set domain to the second last part of the host - urlObj.setDomain(hostParts[hostParts.length - 2]); - // set subdomain to the first part of the host - if (hostParts.length > 2) { - subdomain = String.join(".", java.util.Arrays.copyOfRange(hostParts, 0, hostParts.length - 2)); - } - } - // set subdomain to URL host - urlObj.setSubdomain(subdomain); + urlObj.setHostnameEmbedding(checkDeceptiveUrl(url)); + urlObj.setJavascriptCheck(checkForJavascriptCode(urlString)); - String path = url.getPath(); - //set path to URL path if it's not empty, otherwise set it to root path - urlObj.setPath(path.isEmpty() ? "/" : path); + populateHostDetails(host, urlObj); - String query = url.getQuery(); - Map queryParams = new HashMap<>(); - if (query != null) { - // split query params into key value pairs - for (String param : query.split("&")) { - String[] pair = param.split("="); - queryParams.put(pair[0], pair.length > 1 ? pair[1] : ""); - } - logger.info("queryParams: {}", queryParams); - } - // set query params to URL query - urlObj.setQuery(queryParams.toString()); - // set fragment to URL ref + urlObj.setPath(Optional.ofNullable(url.getPath()).filter(p -> !p.isEmpty()).orElse("/")); + urlObj.setQuery(parseQueryParams(url.getQuery())); urlObj.setFragment(Optional.ofNullable(url.getRef()).orElse("")); - } catch (URISyntaxException | MalformedURLException e) { + + } catch (Exception e) { logger.error("Error in breaking down URL: {}", e.getMessage()); } return urlObj; } + + private void populateHostDetails(String host, URLEntity urlObj) { + String[] hostParts = host.split("\\."); + int length = hostParts.length; + + if (length >= 2) { + urlObj.setTopLevelDomain(hostParts[length - 1]); + urlObj.setDomain(hostParts[length - 2]); + urlObj.setSubdomain(length > 2 ? String.join(".", Arrays.copyOfRange(hostParts, 0, length - 2)) : ""); + } + } + + private int checkDeceptiveUrl(URL url) { + String[] parts = url.getHost().split("\\."); + if (parts.length < 3) return 0; + + Set commonTlds = new HashSet<>(Arrays.asList("com", "org", "net", "edu", "gov")); + + for (int i = parts.length - 2; i >= 1; i--) { + if (commonTlds.contains(parts[i]) && !commonTlds.contains(parts[i - 1]) && i != parts.length - 2) { + logger.warn("Potentially deceptive URL detected: {} (Suspicious domain: {}.{})", + url, parts[i - 1], parts[i]); + return 1; + } + } + return 0; + } + + private String checkForJavascriptCode(String url) { + // Patterns to detect 'javascript:', '