diff --git a/pom.xml b/pom.xml index ced849d..0a8cddd 100644 --- a/pom.xml +++ b/pom.xml @@ -121,26 +121,7 @@ jackson-annotations 2.17.2 - - - org.apache.spark - spark-core_2.13 - 3.4.3 - - - - org.apache.spark - spark-sql_2.13 - 3.4.3 - provided - - - - org.apache.spark - spark-mllib_2.13 - 3.4.3 - provided - + diff --git a/src/main/java/com/safeqr/app/constants/APIConstants.java b/src/main/java/com/safeqr/app/constants/APIConstants.java index 36cf11e..9f0b861 100644 --- a/src/main/java/com/safeqr/app/constants/APIConstants.java +++ b/src/main/java/com/safeqr/app/constants/APIConstants.java @@ -13,6 +13,7 @@ public class APIConstants { public static final String API_URL_QRCODE_VIRUS_TOTAL_CHECK = "/qrcodetypes/virusTotalCheck"; public static final String API_URL_QRCODE_REDIRECT_COUNT = "/qrcodetypes/checkRedirects"; public static final String API_URL_QRCODE_GET_QR_DETAILS = "/qrcodetypes/getQRDetails"; + public static final String PREDICTION_API_URL = "http://localhost:8000/predict"; public static final String API_URL_USER_GET = "/user/getUser"; diff --git a/src/main/java/com/safeqr/app/prediction/model/URLFeatures.java b/src/main/java/com/safeqr/app/prediction/model/URLFeatures.java new file mode 100644 index 0000000..e105b94 --- /dev/null +++ b/src/main/java/com/safeqr/app/prediction/model/URLFeatures.java @@ -0,0 +1,199 @@ +package com.safeqr.app.prediction.model; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.safeqr.app.qrcode.model.URLModel; +import lombok.*; + +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +@Getter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class URLFeatures { + @JsonProperty("domain") + private Integer domain; + + @JsonProperty("subdomain") + private Integer subdomain; + + @JsonProperty("top_level_domain") + private Integer topLevelDomain; + + @JsonProperty("query") + private Integer query; + + @JsonProperty("fragment") + private Integer fragment; + + @JsonProperty("redirect") + private Integer redirect; + + @JsonProperty("path") + private Integer path; + + @JsonProperty("redirect_chain") + private Integer redirectChain; + + @JsonProperty("hsts_header") + private Integer hstsHeader; + + @JsonProperty("ssl_stripping") + private Integer sslStripping; + + @JsonProperty("hostname_embedding") + private Integer hostnameEmbedding; + + @JsonProperty("javascript_check") + private Integer javascriptCheck; + + @JsonProperty("shortening_service") + private Integer shorteningService; + + @JsonProperty("has_ip_address") + private Integer hasIpAddress; + + @JsonProperty("tracking_descriptions") + private Integer trackingDescriptions; + + @JsonProperty("url_encoding") + private Integer urlEncoding; + + @JsonProperty("has_executable") + private Integer hasExecutable; + + @JsonProperty("tls") + private Integer tls; + + @JsonProperty("contents") + private Integer contents; + + public static URLFeatures fromEntity(URLModel urlModel) { + URLFeatures features = URLFeatures.builder() + .build(); + features.setDomain(urlModel.getDetails().getDomain()); + features.setSubdomain(urlModel.getDetails().getSubdomain()); + features.setTopLevelDomain(urlModel.getDetails().getTopLevelDomain()); + features.setQuery(urlModel.getDetails().getQuery()); + features.setFragment(urlModel.getDetails().getFragment()); + features.setPath(urlModel.getDetails().getPath()); + features.setRedirect(urlModel.getDetails().getRedirect()); + features.setRedirectChain(urlModel.getDetails().getRedirectChain()); + features.setHstsHeader(urlModel.getDetails().getHstsHeader()); + features.setSslStripping(urlModel.getDetails().getSslStripping()); + features.setHostnameEmbedding(urlModel.getDetails().getHostnameEmbedding()); + features.setJavascriptCheck(urlModel.getDetails().getJavascriptCheck()); + features.setShorteningService(urlModel.getDetails().getShorteningService()); + features.setHasIpAddress(urlModel.getDetails().getHasIpAddress()); + features.setTrackingDescriptions(urlModel.getDetails().getTrackingDescriptions()); + features.setUrlEncoding(urlModel.getDetails().getUrlEncoding()); + features.setHasExecutable(urlModel.getDetails().getHasExecutable()); + features.setTls(Math.toIntExact(urlModel.getData().getInfo().getId())); + features.setContents(urlModel.getData().getContents()); + + return features; + } + + private void setRedirect(int redirect) { + this.redirect = redirect; + } + + // Custom setter for tls (qr_code_type_id) + public void setTls(Integer tls) { + if (tls != null) { + this.tls = tls == 1 ? 0 : tls == 9 ? 1 : tls.intValue(); + } else { + this.tls = 0; + } + } + + // Custom setter for hostnameEmbedding and other similar columns + public void setHostnameEmbedding(Integer hostnameEmbedding) { + this.hostnameEmbedding = (hostnameEmbedding != null && hostnameEmbedding != 0) ? 1 : 0; + } + + public void setJavascriptCheck(String javascriptCheck) { + this.javascriptCheck = (javascriptCheck != null && !javascriptCheck.isEmpty()) ? 1 : 0; + } + + public void setShorteningService(String shorteningService) { + this.shorteningService = (shorteningService != null && !shorteningService.isEmpty()) ? 1 : 0; + } + + public void setHasIpAddress(String hasIpAddress) { + this.hasIpAddress = (hasIpAddress != null && !hasIpAddress.isEmpty()) ? 1 : 0; + } + + public void setUrlEncoding(String urlEncoding) { + this.urlEncoding = (urlEncoding != null && !urlEncoding.isEmpty()) ? 1 : 0; + } + + public void setHasExecutable(String hasExecutable) { + this.hasExecutable = (hasExecutable != null && !hasExecutable.isEmpty()) ? 1 : 0; + } + + public void setTrackingDescriptions(List trackingDescriptions) { + this.trackingDescriptions = (trackingDescriptions != null && !trackingDescriptions.isEmpty()) ? 1 : 0; + } + + // Custom setter for sslStripping + public void setSslStripping(List sslStripping) { + if (sslStripping != null && !sslStripping.isEmpty() && sslStripping.get(0) != null) { + this.sslStripping = sslStripping.get(0) ? 1 : 0; + } else { + this.sslStripping = 0; + } + } + + // Custom setter for hstsHeader + public void setHstsHeader(List hstsHeader) { + if (hstsHeader == null || hstsHeader.isEmpty()) { + this.hstsHeader = 0; + } else if (hstsHeader.get(0).startsWith("{") && hstsHeader.get(0).endsWith("}")) { + Pattern pattern = Pattern.compile("\"(.*?)\""); + Matcher matcher = pattern.matcher(hstsHeader.get(0)); + if (matcher.find() && matcher.group(1).toLowerCase().contains("no")) { + this.hstsHeader = 0; + } else { + this.hstsHeader = 1; + } + } else { + this.hstsHeader = 1; + } + } + + // Custom setters for calculating string lengths + public void setDomain(String domain) { + this.domain = (domain != null) ? domain.length() : 0; + } + + public void setSubdomain(String subdomain) { + this.subdomain = (subdomain != null) ? subdomain.length() : 0; + } + + public void setTopLevelDomain(String topLevelDomain) { + this.topLevelDomain = (topLevelDomain != null) ? topLevelDomain.length() : 0; + } + + public void setQuery(String query) { + this.query = (query != null) ? query.length() : 0; + } + + public void setFragment(String fragment) { + this.fragment = (fragment != null) ? fragment.length() : 0; + } + + public void setPath(String path) { + this.path = (path != null) ? path.length() : 0; + } + + public void setRedirectChain(List redirectChain) { + this.redirectChain = (redirectChain != null) ? redirectChain.size() : 0; + } + + public void setContents(String contents) { + this.contents = (contents != null) ? contents.length() : 0; + } +} diff --git a/src/main/java/com/safeqr/app/prediction/service/PredictionService.java b/src/main/java/com/safeqr/app/prediction/service/PredictionService.java new file mode 100644 index 0000000..2513d7a --- /dev/null +++ b/src/main/java/com/safeqr/app/prediction/service/PredictionService.java @@ -0,0 +1,63 @@ +package com.safeqr.app.prediction.service; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.safeqr.app.prediction.model.URLFeatures; +import com.safeqr.app.qrcode.model.URLModel; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Service; +import org.springframework.web.client.RestTemplate; +import org.springframework.http.ResponseEntity; +import org.springframework.http.HttpEntity; +import org.springframework.http.HttpHeaders; +import org.springframework.http.HttpMethod; +import org.springframework.http.MediaType; + +import static com.safeqr.app.constants.APIConstants.PREDICTION_API_URL; + +@Service +public class PredictionService { + private static final Logger logger = LoggerFactory.getLogger(PredictionService.class); + + private final RestTemplate restTemplate; + private final ObjectMapper objectMapper; + + public PredictionService(RestTemplate restTemplate, ObjectMapper objectMapper) { + this.restTemplate = restTemplate; + this.objectMapper = objectMapper; + } + + public String predict(URLModel urlModel) { + // Convert URLModel to URLFeatures + URLFeatures features = URLFeatures.fromEntity(urlModel); + logger.info("Prediction request: {}", features); + + // Prepare the HTTP headers + HttpHeaders headers = new HttpHeaders(); + headers.setContentType(MediaType.APPLICATION_JSON); + + // Create the HTTP entity containing the features and headers + HttpEntity requestEntity = new HttpEntity<>(features, headers); + + // Make the HTTP POST request to the FastAPI prediction endpoint + ResponseEntity response = restTemplate.exchange( + PREDICTION_API_URL, + HttpMethod.POST, + requestEntity, + String.class + ); + + // Use ObjectMapper to deserialize the response and automatically remove quotes + String prediction = response.getBody(); + try { + prediction = objectMapper.readValue(prediction, String.class); + } catch (Exception e) { + logger.error("Failed to parse prediction response", e); + prediction = "Unknown"; + } + logger.info("Prediction response: {}", prediction); + + // Return the prediction + return prediction; + } +} diff --git a/src/main/java/com/safeqr/app/qrcode/controller/QRCodeTypeController.java b/src/main/java/com/safeqr/app/qrcode/controller/QRCodeTypeController.java index 2397e98..aff7a4d 100644 --- a/src/main/java/com/safeqr/app/qrcode/controller/QRCodeTypeController.java +++ b/src/main/java/com/safeqr/app/qrcode/controller/QRCodeTypeController.java @@ -61,10 +61,12 @@ public class QRCodeTypeController { return ResponseEntity.ok(qrCodeTypeService.detectType(payload).block()); } - @PostMapping(API_URL_QRCODE_VERIFY_URL) - public ResponseEntity verifyURL(@RequestBody QRCodePayload payload) { - URLVerificationResponse response = urlVerificationService.verifyURL(payload); - return ResponseEntity.ok(response); + @PostMapping(value = API_URL_QRCODE_VERIFY_URL, produces = MediaType.APPLICATION_JSON_VALUE) + public ResponseEntity verifyURL(@RequestBody QRCodePayload payload, + @RequestHeader(required = false, name = HEADER_USER_ID) String userId) { + logger.info("User Id Invoking verify url endpoint: {}", userId); + return ResponseEntity.ok(qrCodeTypeService.scanQRCode(userId, payload)); + } @PostMapping(API_URL_QRCODE_VIRUS_TOTAL_CHECK) diff --git a/src/main/java/com/safeqr/app/qrcode/model/URLModel.java b/src/main/java/com/safeqr/app/qrcode/model/URLModel.java index db5984a..c5e9721 100644 --- a/src/main/java/com/safeqr/app/qrcode/model/URLModel.java +++ b/src/main/java/com/safeqr/app/qrcode/model/URLModel.java @@ -47,6 +47,6 @@ public final class URLModel extends QRCodeModel { @Override public String retrieveClassification() { - return ""; + return urlVerificationService.getClassification(this); } } diff --git a/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java b/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java index b4897fd..1a43bec 100644 --- a/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java +++ b/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java @@ -1,11 +1,10 @@ package com.safeqr.app.qrcode.service; import static com.safeqr.app.constants.CommonConstants.*; - -import com.safeqr.app.qrcode.dto.request.QRCodePayload; -import com.safeqr.app.qrcode.dto.URLVerificationResponse; import com.safeqr.app.qrcode.entity.URLEntity; +import com.safeqr.app.qrcode.model.URLModel; import com.safeqr.app.qrcode.repository.URLRepository; +import com.safeqr.app.prediction.service.PredictionService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -29,9 +28,11 @@ public class URLVerificationService { private static final int READ_TIMEOUT_MS = 10000; private static final Logger logger = LoggerFactory.getLogger(URLVerificationService.class); private final URLRepository urlRepository; + private final PredictionService predictionService; @Autowired - public URLVerificationService(URLRepository urlRepository) { + public URLVerificationService(URLRepository urlRepository, PredictionService predictionService) { this.urlRepository = urlRepository; + this.predictionService = predictionService; } // Regular expression pattern for shortening services @@ -425,22 +426,8 @@ public class URLVerificationService { return INFO_NON_SECURE_CONNECTION; } - public URLVerificationResponse verifyURL(QRCodePayload payload) { - URLVerificationResponse response = new URLVerificationResponse(); - try { - java.net.URL url = new java.net.URL(payload.getData()); - String protocol = url.getProtocol(); - if ("https".equalsIgnoreCase(protocol)) { - response.setSecure(true); - response.setMessage("The connection is secure."); - } else { - response.setSecure(false); - response.setMessage("The connection is not secure."); - } - } catch (Exception e) { - response.setSecure(false); - response.setMessage("Invalid URL."); - } - return response; + // Get Classification using ML Model + public String getClassification(URLModel urlModel){ + return predictionService.predict(urlModel); } } \ No newline at end of file diff --git a/src/main/java/com/safeqr/app/spark/model/URLFeatures.java b/src/main/java/com/safeqr/app/spark/model/URLFeatures.java deleted file mode 100644 index 9910919..0000000 --- a/src/main/java/com/safeqr/app/spark/model/URLFeatures.java +++ /dev/null @@ -1,134 +0,0 @@ -package com.safeqr.app.spark.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class URLFeatures { - private Long domain; - private Long subdomain; - private Long topLevelDomain; - private Long query; - private Long fragment; - private Long redirect; - private Long path; - private Long redirectChain; - private Long hstsHeader; - private Long sslStripping; - private Long hostnameEmbedding; - private Long javascriptCheck; - private Long shorteningService; - private Long hasIpAddress; - private Long trackingDescriptions; - private Long urlEncoding; - private Long hasExecutable; - private Long tls; - private Long contents; - private String target; // This is the label, may be null if predicting - - // Custom setter for tls (qr_code_type_id) - public void setTls(Long tls) { - if (tls != null) { - this.tls = tls == 1 ? 0 : tls == 9 ? 1 : tls; - } else { - this.tls = 0L; - } - } - - // Custom setter for hostnameEmbedding and other similar columns - public void setHostnameEmbedding(Long hostnameEmbedding) { - this.hostnameEmbedding = (hostnameEmbedding != null && hostnameEmbedding != 0) ? 1L : 0L; - } - - public void setJavascriptCheck(Long javascriptCheck) { - this.javascriptCheck = (javascriptCheck != null && javascriptCheck != 0) ? 1L : 0L; - } - - public void setShorteningService(Long shorteningService) { - this.shorteningService = (shorteningService != null && shorteningService != 0) ? 1L : 0L; - } - - public void setHasIpAddress(Long hasIpAddress) { - this.hasIpAddress = (hasIpAddress != null && hasIpAddress != 0) ? 1L : 0L; - } - - public void setUrlEncoding(Long urlEncoding) { - this.urlEncoding = (urlEncoding != null && urlEncoding != 0) ? 1L : 0L; - } - - public void setHasExecutable(Long hasExecutable) { - this.hasExecutable = (hasExecutable != null && hasExecutable != 0) ? 1L : 0L; - } - - public void setTrackingDescriptions(Long trackingDescriptions) { - this.trackingDescriptions = (trackingDescriptions != null && trackingDescriptions != 0) ? 1L : 0L; - } - - // Custom setter for sslStripping - public void setSslStripping(String sslStripping) { - if (sslStripping != null && "true".equalsIgnoreCase(sslStripping)) { - this.sslStripping = 1L; - } else { - this.sslStripping = 0L; - } - } - - // Custom setter for hstsHeader - public void setHstsHeader(String hstsHeader) { - if (hstsHeader == null || "0".equals(hstsHeader)) { - this.hstsHeader = 0L; - } else if (hstsHeader.startsWith("{") && hstsHeader.endsWith("}")) { - Pattern pattern = Pattern.compile("\"(.*?)\""); - Matcher matcher = pattern.matcher(hstsHeader); - if (matcher.find() && matcher.group(1).toLowerCase().contains("no")) { - this.hstsHeader = 0L; - } else { - this.hstsHeader = 1L; - } - } else { - this.hstsHeader = 0L; - } - } - - // Custom setters for calculating string lengths - public void setDomain(String domain) { - this.domain = (domain != null) ? (long) domain.length() : 0L; - } - - public void setSubdomain(String subdomain) { - this.subdomain = (subdomain != null) ? (long) subdomain.length() : 0L; - } - - public void setTopLevelDomain(String topLevelDomain) { - this.topLevelDomain = (topLevelDomain != null) ? (long) topLevelDomain.length() : 0L; - } - - public void setQuery(String query) { - this.query = (query != null) ? (long) query.length() : 0L; - } - - public void setFragment(String fragment) { - this.fragment = (fragment != null) ? (long) fragment.length() : 0L; - } - - public void setPath(String path) { - this.path = (path != null) ? (long) path.length() : 0L; - } - - public void setRedirectChain(String redirectChain) { - this.redirectChain = (redirectChain != null) ? (long) redirectChain.length() : 0L; - } - - public void setContents(String contents) { - this.contents = (contents != null) ? (long) contents.length() : 0L; - } -} diff --git a/src/main/java/com/safeqr/app/utils/RestTemplateConfig.java b/src/main/java/com/safeqr/app/utils/RestTemplateConfig.java new file mode 100644 index 0000000..39605d7 --- /dev/null +++ b/src/main/java/com/safeqr/app/utils/RestTemplateConfig.java @@ -0,0 +1,12 @@ +package com.safeqr.app.utils; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.web.client.RestTemplate; +@Configuration +public class RestTemplateConfig { + @Bean + public RestTemplate restTemplate() { + return new RestTemplate(); + } +} diff --git a/src/main/resources/cv_model/bestModel/metadata/.part-00000.crc b/src/main/resources/cv_model/bestModel/metadata/.part-00000.crc deleted file mode 100644 index 2042fbe..0000000 Binary files a/src/main/resources/cv_model/bestModel/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/metadata/_SUCCESS b/src/main/resources/cv_model/bestModel/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/bestModel/metadata/part-00000 b/src/main/resources/cv_model/bestModel/metadata/part-00000 deleted file mode 100644 index 81a3616..0000000 --- a/src/main/resources/cv_model/bestModel/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.PipelineModel","timestamp":1723422050490,"sparkVersion":"3.4.3","uid":"PipelineModel_4ecdd9f71524","paramMap":{"stageUids":["StringIndexer_d3c63289c493","VectorAssembler_517fc429fbfb","RandomForestClassifier_4909b7ca2bbe"]},"defaultParamMap":{}} diff --git a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/data/.part-00000-82eb0422-116b-4264-a2e1-f02308435abb-c000.snappy.parquet.crc b/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/data/.part-00000-82eb0422-116b-4264-a2e1-f02308435abb-c000.snappy.parquet.crc deleted file mode 100644 index 258b61d..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/data/.part-00000-82eb0422-116b-4264-a2e1-f02308435abb-c000.snappy.parquet.crc and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/data/_SUCCESS b/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/data/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/data/part-00000-82eb0422-116b-4264-a2e1-f02308435abb-c000.snappy.parquet b/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/data/part-00000-82eb0422-116b-4264-a2e1-f02308435abb-c000.snappy.parquet deleted file mode 100644 index b6f842f..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/data/part-00000-82eb0422-116b-4264-a2e1-f02308435abb-c000.snappy.parquet and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/metadata/.part-00000.crc b/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/metadata/.part-00000.crc deleted file mode 100644 index 9302067..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/metadata/_SUCCESS b/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/metadata/part-00000 b/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/metadata/part-00000 deleted file mode 100644 index 76212bf..0000000 --- a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.feature.StringIndexerModel","timestamp":1723422050930,"sparkVersion":"3.4.3","uid":"StringIndexer_d3c63289c493","paramMap":{"outputCol":"indexed_target","inputCol":"target"},"defaultParamMap":{"stringOrderType":"frequencyDesc","handleInvalid":"error","outputCol":"StringIndexer_d3c63289c493__output"}} diff --git a/src/main/resources/cv_model/bestModel/stages/1_VectorAssembler_517fc429fbfb/metadata/.part-00000.crc b/src/main/resources/cv_model/bestModel/stages/1_VectorAssembler_517fc429fbfb/metadata/.part-00000.crc deleted file mode 100644 index 2ccb975..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/1_VectorAssembler_517fc429fbfb/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/stages/1_VectorAssembler_517fc429fbfb/metadata/_SUCCESS b/src/main/resources/cv_model/bestModel/stages/1_VectorAssembler_517fc429fbfb/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/bestModel/stages/1_VectorAssembler_517fc429fbfb/metadata/part-00000 b/src/main/resources/cv_model/bestModel/stages/1_VectorAssembler_517fc429fbfb/metadata/part-00000 deleted file mode 100644 index 05fc83c..0000000 --- a/src/main/resources/cv_model/bestModel/stages/1_VectorAssembler_517fc429fbfb/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.feature.VectorAssembler","timestamp":1723422054241,"sparkVersion":"3.4.3","uid":"VectorAssembler_517fc429fbfb","paramMap":{"inputCols":["domain","subdomain","top_level_domain","query","fragment","redirect","path","redirect_chain","hsts_header","ssl_stripping","hostname_embedding","javascript_check","shortening_service","has_ip_address","tracking_descriptions","url_encoding","has_executable","tls","contents"],"outputCol":"features"},"defaultParamMap":{"handleInvalid":"error","outputCol":"VectorAssembler_517fc429fbfb__output"}} diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/data/.part-00000-93fad09f-9ae6-465a-9d23-54dac5816f4b-c000.snappy.parquet.crc b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/data/.part-00000-93fad09f-9ae6-465a-9d23-54dac5816f4b-c000.snappy.parquet.crc deleted file mode 100644 index 7722ab3..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/data/.part-00000-93fad09f-9ae6-465a-9d23-54dac5816f4b-c000.snappy.parquet.crc and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/data/_SUCCESS b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/data/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/data/part-00000-93fad09f-9ae6-465a-9d23-54dac5816f4b-c000.snappy.parquet b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/data/part-00000-93fad09f-9ae6-465a-9d23-54dac5816f4b-c000.snappy.parquet deleted file mode 100644 index 2664881..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/data/part-00000-93fad09f-9ae6-465a-9d23-54dac5816f4b-c000.snappy.parquet and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/.part-00000.crc b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/.part-00000.crc deleted file mode 100644 index e758ca5..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/_SUCCESS b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/part-00000 b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/part-00000 deleted file mode 100644 index 9f8fc55..0000000 --- a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.classification.RandomForestClassificationModel","timestamp":1723422054559,"sparkVersion":"3.4.3","uid":"RandomForestClassifier_4909b7ca2bbe","paramMap":{"featuresCol":"features","numTrees":100,"maxDepth":10,"labelCol":"indexed_target"},"defaultParamMap":{"featuresCol":"features","rawPredictionCol":"rawPrediction","maxBins":32,"predictionCol":"prediction","minInstancesPerNode":1,"minWeightFractionPerNode":0.0,"cacheNodeIds":false,"minInfoGain":0.0,"numTrees":20,"maxDepth":5,"impurity":"gini","subsamplingRate":1.0,"leafCol":"","labelCol":"label","probabilityCol":"probability","bootstrap":true,"featureSubsetStrategy":"auto","checkpointInterval":10,"maxMemoryInMB":256,"seed":6182040365248539008},"numFeatures":19,"numClasses":4,"numTrees":100} diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/treesMetadata/.part-00000-8ea1cd09-af6e-47db-a199-9a18743ad514-c000.snappy.parquet.crc b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/treesMetadata/.part-00000-8ea1cd09-af6e-47db-a199-9a18743ad514-c000.snappy.parquet.crc deleted file mode 100644 index 339b14a..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/treesMetadata/.part-00000-8ea1cd09-af6e-47db-a199-9a18743ad514-c000.snappy.parquet.crc and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/treesMetadata/_SUCCESS b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/treesMetadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/treesMetadata/part-00000-8ea1cd09-af6e-47db-a199-9a18743ad514-c000.snappy.parquet b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/treesMetadata/part-00000-8ea1cd09-af6e-47db-a199-9a18743ad514-c000.snappy.parquet deleted file mode 100644 index bc6c1d3..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/treesMetadata/part-00000-8ea1cd09-af6e-47db-a199-9a18743ad514-c000.snappy.parquet and /dev/null differ diff --git a/src/main/resources/cv_model/estimator/metadata/.part-00000.crc b/src/main/resources/cv_model/estimator/metadata/.part-00000.crc deleted file mode 100644 index 05f028a..0000000 Binary files a/src/main/resources/cv_model/estimator/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/estimator/metadata/_SUCCESS b/src/main/resources/cv_model/estimator/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/estimator/metadata/part-00000 b/src/main/resources/cv_model/estimator/metadata/part-00000 deleted file mode 100644 index 483a4fc..0000000 --- a/src/main/resources/cv_model/estimator/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.Pipeline","timestamp":1723422048367,"sparkVersion":"3.4.3","uid":"Pipeline_58a1fe22f286","paramMap":{"stageUids":["StringIndexer_d3c63289c493","VectorAssembler_517fc429fbfb","RandomForestClassifier_4909b7ca2bbe"]},"defaultParamMap":{}} diff --git a/src/main/resources/cv_model/estimator/stages/0_StringIndexer_d3c63289c493/metadata/.part-00000.crc b/src/main/resources/cv_model/estimator/stages/0_StringIndexer_d3c63289c493/metadata/.part-00000.crc deleted file mode 100644 index 853858a..0000000 Binary files a/src/main/resources/cv_model/estimator/stages/0_StringIndexer_d3c63289c493/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/estimator/stages/0_StringIndexer_d3c63289c493/metadata/_SUCCESS b/src/main/resources/cv_model/estimator/stages/0_StringIndexer_d3c63289c493/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/estimator/stages/0_StringIndexer_d3c63289c493/metadata/part-00000 b/src/main/resources/cv_model/estimator/stages/0_StringIndexer_d3c63289c493/metadata/part-00000 deleted file mode 100644 index e72c7fc..0000000 --- a/src/main/resources/cv_model/estimator/stages/0_StringIndexer_d3c63289c493/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.feature.StringIndexer","timestamp":1723422049018,"sparkVersion":"3.4.3","uid":"StringIndexer_d3c63289c493","paramMap":{"outputCol":"indexed_target","inputCol":"target"},"defaultParamMap":{"stringOrderType":"frequencyDesc","handleInvalid":"error","outputCol":"StringIndexer_d3c63289c493__output"}} diff --git a/src/main/resources/cv_model/estimator/stages/1_VectorAssembler_517fc429fbfb/metadata/.part-00000.crc b/src/main/resources/cv_model/estimator/stages/1_VectorAssembler_517fc429fbfb/metadata/.part-00000.crc deleted file mode 100644 index 8a19476..0000000 Binary files a/src/main/resources/cv_model/estimator/stages/1_VectorAssembler_517fc429fbfb/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/estimator/stages/1_VectorAssembler_517fc429fbfb/metadata/_SUCCESS b/src/main/resources/cv_model/estimator/stages/1_VectorAssembler_517fc429fbfb/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/estimator/stages/1_VectorAssembler_517fc429fbfb/metadata/part-00000 b/src/main/resources/cv_model/estimator/stages/1_VectorAssembler_517fc429fbfb/metadata/part-00000 deleted file mode 100644 index 6ef6134..0000000 --- a/src/main/resources/cv_model/estimator/stages/1_VectorAssembler_517fc429fbfb/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.feature.VectorAssembler","timestamp":1723422049580,"sparkVersion":"3.4.3","uid":"VectorAssembler_517fc429fbfb","paramMap":{"inputCols":["domain","subdomain","top_level_domain","query","fragment","redirect","path","redirect_chain","hsts_header","ssl_stripping","hostname_embedding","javascript_check","shortening_service","has_ip_address","tracking_descriptions","url_encoding","has_executable","tls","contents"],"outputCol":"features"},"defaultParamMap":{"handleInvalid":"error","outputCol":"VectorAssembler_517fc429fbfb__output"}} diff --git a/src/main/resources/cv_model/estimator/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/.part-00000.crc b/src/main/resources/cv_model/estimator/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/.part-00000.crc deleted file mode 100644 index df75d0b..0000000 Binary files a/src/main/resources/cv_model/estimator/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/estimator/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/_SUCCESS b/src/main/resources/cv_model/estimator/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/estimator/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/part-00000 b/src/main/resources/cv_model/estimator/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/part-00000 deleted file mode 100644 index 5168986..0000000 --- a/src/main/resources/cv_model/estimator/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.classification.RandomForestClassifier","timestamp":1723422050089,"sparkVersion":"3.4.3","uid":"RandomForestClassifier_4909b7ca2bbe","paramMap":{"featuresCol":"features","labelCol":"indexed_target"},"defaultParamMap":{"featuresCol":"features","rawPredictionCol":"rawPrediction","maxBins":32,"predictionCol":"prediction","minInstancesPerNode":1,"minWeightFractionPerNode":0.0,"cacheNodeIds":false,"minInfoGain":0.0,"numTrees":20,"maxDepth":5,"impurity":"gini","subsamplingRate":1.0,"leafCol":"","labelCol":"label","probabilityCol":"probability","bootstrap":true,"featureSubsetStrategy":"auto","checkpointInterval":10,"maxMemoryInMB":256,"seed":6182040365248539008}} diff --git a/src/main/resources/cv_model/evaluator/metadata/.part-00000.crc b/src/main/resources/cv_model/evaluator/metadata/.part-00000.crc deleted file mode 100644 index 6fde672..0000000 Binary files a/src/main/resources/cv_model/evaluator/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/evaluator/metadata/_SUCCESS b/src/main/resources/cv_model/evaluator/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/evaluator/metadata/part-00000 b/src/main/resources/cv_model/evaluator/metadata/part-00000 deleted file mode 100644 index ca32523..0000000 --- a/src/main/resources/cv_model/evaluator/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator","timestamp":1723422047764,"sparkVersion":"3.4.3","uid":"MulticlassClassificationEvaluator_f31cf4d2b0db","paramMap":{"metricName":"accuracy","labelCol":"indexed_target"},"defaultParamMap":{"eps":1.0E-15,"beta":1.0,"metricName":"f1","predictionCol":"prediction","labelCol":"label","metricLabel":0.0,"probabilityCol":"probability"}} diff --git a/src/main/resources/cv_model/metadata/.part-00000.crc b/src/main/resources/cv_model/metadata/.part-00000.crc deleted file mode 100644 index 8d903e4..0000000 Binary files a/src/main/resources/cv_model/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/metadata/_SUCCESS b/src/main/resources/cv_model/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/metadata/part-00000 b/src/main/resources/cv_model/metadata/part-00000 deleted file mode 100644 index 4fb6ce0..0000000 --- a/src/main/resources/cv_model/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.tuning.CrossValidatorModel","timestamp":1723422046660,"sparkVersion":"3.4.3","uid":"CrossValidatorModel_5c96b33c8d82","paramMap":{"seed":-2084793586583917283,"numFolds":5,"foldCol":"","estimatorParamMaps":[[{"parent":"RandomForestClassifier_4909b7ca2bbe","name":"numTrees","value":"100","isJson":"true"},{"parent":"RandomForestClassifier_4909b7ca2bbe","name":"maxDepth","value":"10","isJson":"true"}]]},"defaultParamMap":{"seed":880116102,"numFolds":3,"foldCol":""},"avgMetrics":[0.8736361548764979],"persistSubModels":false}