diff --git a/src/main/java/com/safeqr/app/constants/APIConstants.java b/src/main/java/com/safeqr/app/constants/APIConstants.java index 36cf11e..9f0b861 100644 --- a/src/main/java/com/safeqr/app/constants/APIConstants.java +++ b/src/main/java/com/safeqr/app/constants/APIConstants.java @@ -13,6 +13,7 @@ public class APIConstants { public static final String API_URL_QRCODE_VIRUS_TOTAL_CHECK = "/qrcodetypes/virusTotalCheck"; public static final String API_URL_QRCODE_REDIRECT_COUNT = "/qrcodetypes/checkRedirects"; public static final String API_URL_QRCODE_GET_QR_DETAILS = "/qrcodetypes/getQRDetails"; + public static final String PREDICTION_API_URL = "http://localhost:8000/predict"; public static final String API_URL_USER_GET = "/user/getUser"; diff --git a/src/main/java/com/safeqr/app/spark/model/URLFeatures.java b/src/main/java/com/safeqr/app/prediction/model/URLFeatures.java similarity index 61% rename from src/main/java/com/safeqr/app/spark/model/URLFeatures.java rename to src/main/java/com/safeqr/app/prediction/model/URLFeatures.java index 2c281d3..e105b94 100644 --- a/src/main/java/com/safeqr/app/spark/model/URLFeatures.java +++ b/src/main/java/com/safeqr/app/prediction/model/URLFeatures.java @@ -1,6 +1,6 @@ -package com.safeqr.app.spark.model; +package com.safeqr.app.prediction.model; -import com.safeqr.app.qrcode.entity.URLEntity; +import com.fasterxml.jackson.annotation.JsonProperty; import com.safeqr.app.qrcode.model.URLModel; import lombok.*; @@ -13,26 +13,62 @@ import java.util.regex.Pattern; @NoArgsConstructor @AllArgsConstructor public class URLFeatures { - private Double domain; - private Double subdomain; - private Double topLevelDomain; - private Double query; - private Double fragment; - private Double redirect; - private Double path; - private Double redirectChain; - private Double hstsHeader; - private Double sslStripping; - private Double hostnameEmbedding; - private Double javascriptCheck; - private Double shorteningService; - private Double hasIpAddress; - private Double trackingDescriptions; - private Double urlEncoding; - private Double hasExecutable; - private Double tls; - private Double contents; - private String target; // This is the label, may be null if predicting + @JsonProperty("domain") + private Integer domain; + + @JsonProperty("subdomain") + private Integer subdomain; + + @JsonProperty("top_level_domain") + private Integer topLevelDomain; + + @JsonProperty("query") + private Integer query; + + @JsonProperty("fragment") + private Integer fragment; + + @JsonProperty("redirect") + private Integer redirect; + + @JsonProperty("path") + private Integer path; + + @JsonProperty("redirect_chain") + private Integer redirectChain; + + @JsonProperty("hsts_header") + private Integer hstsHeader; + + @JsonProperty("ssl_stripping") + private Integer sslStripping; + + @JsonProperty("hostname_embedding") + private Integer hostnameEmbedding; + + @JsonProperty("javascript_check") + private Integer javascriptCheck; + + @JsonProperty("shortening_service") + private Integer shorteningService; + + @JsonProperty("has_ip_address") + private Integer hasIpAddress; + + @JsonProperty("tracking_descriptions") + private Integer trackingDescriptions; + + @JsonProperty("url_encoding") + private Integer urlEncoding; + + @JsonProperty("has_executable") + private Integer hasExecutable; + + @JsonProperty("tls") + private Integer tls; + + @JsonProperty("contents") + private Integer contents; public static URLFeatures fromEntity(URLModel urlModel) { URLFeatures features = URLFeatures.builder() @@ -43,6 +79,7 @@ public class URLFeatures { features.setQuery(urlModel.getDetails().getQuery()); features.setFragment(urlModel.getDetails().getFragment()); features.setPath(urlModel.getDetails().getPath()); + features.setRedirect(urlModel.getDetails().getRedirect()); features.setRedirectChain(urlModel.getDetails().getRedirectChain()); features.setHstsHeader(urlModel.getDetails().getHstsHeader()); features.setSslStripping(urlModel.getDetails().getSslStripping()); @@ -59,100 +96,104 @@ public class URLFeatures { return features; } + private void setRedirect(int redirect) { + this.redirect = redirect; + } + // Custom setter for tls (qr_code_type_id) public void setTls(Integer tls) { if (tls != null) { - this.tls = tls == 1 ? 0.0 : tls == 9 ? 1.0 : tls.doubleValue(); + this.tls = tls == 1 ? 0 : tls == 9 ? 1 : tls.intValue(); } else { - this.tls = 0.0; + this.tls = 0; } } // Custom setter for hostnameEmbedding and other similar columns public void setHostnameEmbedding(Integer hostnameEmbedding) { - this.hostnameEmbedding = (hostnameEmbedding != null && hostnameEmbedding != 0) ? 1.0 : 0.0; + this.hostnameEmbedding = (hostnameEmbedding != null && hostnameEmbedding != 0) ? 1 : 0; } public void setJavascriptCheck(String javascriptCheck) { - this.javascriptCheck = (javascriptCheck != null && !javascriptCheck.isEmpty()) ? 1.0 : 0.0; + this.javascriptCheck = (javascriptCheck != null && !javascriptCheck.isEmpty()) ? 1 : 0; } public void setShorteningService(String shorteningService) { - this.shorteningService = (shorteningService != null && !shorteningService.isEmpty()) ? 1.0 : 0.0; + this.shorteningService = (shorteningService != null && !shorteningService.isEmpty()) ? 1 : 0; } public void setHasIpAddress(String hasIpAddress) { - this.hasIpAddress = (hasIpAddress != null && !hasIpAddress.isEmpty()) ? 1.0 : 0.0; + this.hasIpAddress = (hasIpAddress != null && !hasIpAddress.isEmpty()) ? 1 : 0; } public void setUrlEncoding(String urlEncoding) { - this.urlEncoding = (urlEncoding != null && !urlEncoding.isEmpty()) ? 1.0 : 0.0; + this.urlEncoding = (urlEncoding != null && !urlEncoding.isEmpty()) ? 1 : 0; } public void setHasExecutable(String hasExecutable) { - this.hasExecutable = (hasExecutable != null && !hasExecutable.isEmpty()) ? 1.0 : 0.0; + this.hasExecutable = (hasExecutable != null && !hasExecutable.isEmpty()) ? 1 : 0; } public void setTrackingDescriptions(List trackingDescriptions) { - this.trackingDescriptions = (trackingDescriptions != null && !trackingDescriptions.isEmpty()) ? 1.0 : 0.0; + this.trackingDescriptions = (trackingDescriptions != null && !trackingDescriptions.isEmpty()) ? 1 : 0; } // Custom setter for sslStripping public void setSslStripping(List sslStripping) { if (sslStripping != null && !sslStripping.isEmpty() && sslStripping.get(0) != null) { - this.sslStripping = sslStripping.get(0) ? 1.0 : 0.0; + this.sslStripping = sslStripping.get(0) ? 1 : 0; } else { - this.sslStripping = 0.0; + this.sslStripping = 0; } } // Custom setter for hstsHeader public void setHstsHeader(List hstsHeader) { if (hstsHeader == null || hstsHeader.isEmpty()) { - this.hstsHeader = 0.0; + this.hstsHeader = 0; } else if (hstsHeader.get(0).startsWith("{") && hstsHeader.get(0).endsWith("}")) { Pattern pattern = Pattern.compile("\"(.*?)\""); Matcher matcher = pattern.matcher(hstsHeader.get(0)); if (matcher.find() && matcher.group(1).toLowerCase().contains("no")) { - this.hstsHeader = 0.0; + this.hstsHeader = 0; } else { - this.hstsHeader = 1.0; + this.hstsHeader = 1; } } else { - this.hstsHeader = 1.0; + this.hstsHeader = 1; } } // Custom setters for calculating string lengths public void setDomain(String domain) { - this.domain = (domain != null) ? (double) domain.length() : 0.0; + this.domain = (domain != null) ? domain.length() : 0; } public void setSubdomain(String subdomain) { - this.subdomain = (subdomain != null) ? (double) subdomain.length() : 0.0; + this.subdomain = (subdomain != null) ? subdomain.length() : 0; } public void setTopLevelDomain(String topLevelDomain) { - this.topLevelDomain = (topLevelDomain != null) ? (double) topLevelDomain.length() : 0.0; + this.topLevelDomain = (topLevelDomain != null) ? topLevelDomain.length() : 0; } public void setQuery(String query) { - this.query = (query != null) ? (double) query.length() : 0.0; + this.query = (query != null) ? query.length() : 0; } public void setFragment(String fragment) { - this.fragment = (fragment != null) ? (double) fragment.length() : 0.0; + this.fragment = (fragment != null) ? fragment.length() : 0; } public void setPath(String path) { - this.path = (path != null) ? (double) path.length() : 0.0; + this.path = (path != null) ? path.length() : 0; } public void setRedirectChain(List redirectChain) { - this.redirectChain = (redirectChain != null) ? (double) redirectChain.size() : 0.0; + this.redirectChain = (redirectChain != null) ? redirectChain.size() : 0; } public void setContents(String contents) { - this.contents = (contents != null) ? (double) contents.length() : 0.0; + this.contents = (contents != null) ? contents.length() : 0; } } diff --git a/src/main/java/com/safeqr/app/prediction/service/PredictionService.java b/src/main/java/com/safeqr/app/prediction/service/PredictionService.java new file mode 100644 index 0000000..2513d7a --- /dev/null +++ b/src/main/java/com/safeqr/app/prediction/service/PredictionService.java @@ -0,0 +1,63 @@ +package com.safeqr.app.prediction.service; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.safeqr.app.prediction.model.URLFeatures; +import com.safeqr.app.qrcode.model.URLModel; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Service; +import org.springframework.web.client.RestTemplate; +import org.springframework.http.ResponseEntity; +import org.springframework.http.HttpEntity; +import org.springframework.http.HttpHeaders; +import org.springframework.http.HttpMethod; +import org.springframework.http.MediaType; + +import static com.safeqr.app.constants.APIConstants.PREDICTION_API_URL; + +@Service +public class PredictionService { + private static final Logger logger = LoggerFactory.getLogger(PredictionService.class); + + private final RestTemplate restTemplate; + private final ObjectMapper objectMapper; + + public PredictionService(RestTemplate restTemplate, ObjectMapper objectMapper) { + this.restTemplate = restTemplate; + this.objectMapper = objectMapper; + } + + public String predict(URLModel urlModel) { + // Convert URLModel to URLFeatures + URLFeatures features = URLFeatures.fromEntity(urlModel); + logger.info("Prediction request: {}", features); + + // Prepare the HTTP headers + HttpHeaders headers = new HttpHeaders(); + headers.setContentType(MediaType.APPLICATION_JSON); + + // Create the HTTP entity containing the features and headers + HttpEntity requestEntity = new HttpEntity<>(features, headers); + + // Make the HTTP POST request to the FastAPI prediction endpoint + ResponseEntity response = restTemplate.exchange( + PREDICTION_API_URL, + HttpMethod.POST, + requestEntity, + String.class + ); + + // Use ObjectMapper to deserialize the response and automatically remove quotes + String prediction = response.getBody(); + try { + prediction = objectMapper.readValue(prediction, String.class); + } catch (Exception e) { + logger.error("Failed to parse prediction response", e); + prediction = "Unknown"; + } + logger.info("Prediction response: {}", prediction); + + // Return the prediction + return prediction; + } +} diff --git a/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java b/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java index 85caae4..1a43bec 100644 --- a/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java +++ b/src/main/java/com/safeqr/app/qrcode/service/URLVerificationService.java @@ -4,7 +4,7 @@ import static com.safeqr.app.constants.CommonConstants.*; import com.safeqr.app.qrcode.entity.URLEntity; import com.safeqr.app.qrcode.model.URLModel; import com.safeqr.app.qrcode.repository.URLRepository; -import com.safeqr.app.spark.service.MLModelService; +import com.safeqr.app.prediction.service.PredictionService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -28,11 +28,11 @@ public class URLVerificationService { private static final int READ_TIMEOUT_MS = 10000; private static final Logger logger = LoggerFactory.getLogger(URLVerificationService.class); private final URLRepository urlRepository; - private final MLModelService mlModelService; + private final PredictionService predictionService; @Autowired - public URLVerificationService(URLRepository urlRepository, MLModelService mlModelService) { + public URLVerificationService(URLRepository urlRepository, PredictionService predictionService) { this.urlRepository = urlRepository; - this.mlModelService = mlModelService; + this.predictionService = predictionService; } // Regular expression pattern for shortening services @@ -428,6 +428,6 @@ public class URLVerificationService { // Get Classification using ML Model public String getClassification(URLModel urlModel){ - return mlModelService.predict(urlModel); + return predictionService.predict(urlModel); } } \ No newline at end of file diff --git a/src/main/java/com/safeqr/app/spark/service/MLModelService.java b/src/main/java/com/safeqr/app/spark/service/MLModelService.java deleted file mode 100644 index 25e5f65..0000000 --- a/src/main/java/com/safeqr/app/spark/service/MLModelService.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.safeqr.app.spark.service; - -import com.safeqr.app.qrcode.model.URLModel; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.stereotype.Service; - - -@Service -public class MLModelService { - private static final Logger logger = LoggerFactory.getLogger(MLModelService.class); - - public MLModelService() { - - } - - - public String predict(URLModel urlModel) { - - return "haha"; - } -} diff --git a/src/main/java/com/safeqr/app/utils/RestTemplateConfig.java b/src/main/java/com/safeqr/app/utils/RestTemplateConfig.java new file mode 100644 index 0000000..39605d7 --- /dev/null +++ b/src/main/java/com/safeqr/app/utils/RestTemplateConfig.java @@ -0,0 +1,12 @@ +package com.safeqr.app.utils; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.web.client.RestTemplate; +@Configuration +public class RestTemplateConfig { + @Bean + public RestTemplate restTemplate() { + return new RestTemplate(); + } +} diff --git a/src/main/resources/cv_model/bestModel/metadata/.part-00000.crc b/src/main/resources/cv_model/bestModel/metadata/.part-00000.crc deleted file mode 100644 index 2042fbe..0000000 Binary files a/src/main/resources/cv_model/bestModel/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/metadata/_SUCCESS b/src/main/resources/cv_model/bestModel/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/bestModel/metadata/part-00000 b/src/main/resources/cv_model/bestModel/metadata/part-00000 deleted file mode 100644 index 81a3616..0000000 --- a/src/main/resources/cv_model/bestModel/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.PipelineModel","timestamp":1723422050490,"sparkVersion":"3.4.3","uid":"PipelineModel_4ecdd9f71524","paramMap":{"stageUids":["StringIndexer_d3c63289c493","VectorAssembler_517fc429fbfb","RandomForestClassifier_4909b7ca2bbe"]},"defaultParamMap":{}} diff --git a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/data/.part-00000-82eb0422-116b-4264-a2e1-f02308435abb-c000.snappy.parquet.crc b/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/data/.part-00000-82eb0422-116b-4264-a2e1-f02308435abb-c000.snappy.parquet.crc deleted file mode 100644 index 258b61d..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/data/.part-00000-82eb0422-116b-4264-a2e1-f02308435abb-c000.snappy.parquet.crc and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/data/_SUCCESS b/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/data/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/data/part-00000-82eb0422-116b-4264-a2e1-f02308435abb-c000.snappy.parquet b/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/data/part-00000-82eb0422-116b-4264-a2e1-f02308435abb-c000.snappy.parquet deleted file mode 100644 index b6f842f..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/data/part-00000-82eb0422-116b-4264-a2e1-f02308435abb-c000.snappy.parquet and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/metadata/.part-00000.crc b/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/metadata/.part-00000.crc deleted file mode 100644 index 9302067..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/metadata/_SUCCESS b/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/metadata/part-00000 b/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/metadata/part-00000 deleted file mode 100644 index 76212bf..0000000 --- a/src/main/resources/cv_model/bestModel/stages/0_StringIndexer_d3c63289c493/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.feature.StringIndexerModel","timestamp":1723422050930,"sparkVersion":"3.4.3","uid":"StringIndexer_d3c63289c493","paramMap":{"outputCol":"indexed_target","inputCol":"target"},"defaultParamMap":{"stringOrderType":"frequencyDesc","handleInvalid":"error","outputCol":"StringIndexer_d3c63289c493__output"}} diff --git a/src/main/resources/cv_model/bestModel/stages/1_VectorAssembler_517fc429fbfb/metadata/.part-00000.crc b/src/main/resources/cv_model/bestModel/stages/1_VectorAssembler_517fc429fbfb/metadata/.part-00000.crc deleted file mode 100644 index 2ccb975..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/1_VectorAssembler_517fc429fbfb/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/stages/1_VectorAssembler_517fc429fbfb/metadata/_SUCCESS b/src/main/resources/cv_model/bestModel/stages/1_VectorAssembler_517fc429fbfb/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/bestModel/stages/1_VectorAssembler_517fc429fbfb/metadata/part-00000 b/src/main/resources/cv_model/bestModel/stages/1_VectorAssembler_517fc429fbfb/metadata/part-00000 deleted file mode 100644 index 05fc83c..0000000 --- a/src/main/resources/cv_model/bestModel/stages/1_VectorAssembler_517fc429fbfb/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.feature.VectorAssembler","timestamp":1723422054241,"sparkVersion":"3.4.3","uid":"VectorAssembler_517fc429fbfb","paramMap":{"inputCols":["domain","subdomain","top_level_domain","query","fragment","redirect","path","redirect_chain","hsts_header","ssl_stripping","hostname_embedding","javascript_check","shortening_service","has_ip_address","tracking_descriptions","url_encoding","has_executable","tls","contents"],"outputCol":"features"},"defaultParamMap":{"handleInvalid":"error","outputCol":"VectorAssembler_517fc429fbfb__output"}} diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/data/.part-00000-93fad09f-9ae6-465a-9d23-54dac5816f4b-c000.snappy.parquet.crc b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/data/.part-00000-93fad09f-9ae6-465a-9d23-54dac5816f4b-c000.snappy.parquet.crc deleted file mode 100644 index 7722ab3..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/data/.part-00000-93fad09f-9ae6-465a-9d23-54dac5816f4b-c000.snappy.parquet.crc and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/data/_SUCCESS b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/data/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/data/part-00000-93fad09f-9ae6-465a-9d23-54dac5816f4b-c000.snappy.parquet b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/data/part-00000-93fad09f-9ae6-465a-9d23-54dac5816f4b-c000.snappy.parquet deleted file mode 100644 index 2664881..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/data/part-00000-93fad09f-9ae6-465a-9d23-54dac5816f4b-c000.snappy.parquet and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/.part-00000.crc b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/.part-00000.crc deleted file mode 100644 index e758ca5..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/_SUCCESS b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/part-00000 b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/part-00000 deleted file mode 100644 index 9f8fc55..0000000 --- a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.classification.RandomForestClassificationModel","timestamp":1723422054559,"sparkVersion":"3.4.3","uid":"RandomForestClassifier_4909b7ca2bbe","paramMap":{"featuresCol":"features","numTrees":100,"maxDepth":10,"labelCol":"indexed_target"},"defaultParamMap":{"featuresCol":"features","rawPredictionCol":"rawPrediction","maxBins":32,"predictionCol":"prediction","minInstancesPerNode":1,"minWeightFractionPerNode":0.0,"cacheNodeIds":false,"minInfoGain":0.0,"numTrees":20,"maxDepth":5,"impurity":"gini","subsamplingRate":1.0,"leafCol":"","labelCol":"label","probabilityCol":"probability","bootstrap":true,"featureSubsetStrategy":"auto","checkpointInterval":10,"maxMemoryInMB":256,"seed":6182040365248539008},"numFeatures":19,"numClasses":4,"numTrees":100} diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/treesMetadata/.part-00000-8ea1cd09-af6e-47db-a199-9a18743ad514-c000.snappy.parquet.crc b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/treesMetadata/.part-00000-8ea1cd09-af6e-47db-a199-9a18743ad514-c000.snappy.parquet.crc deleted file mode 100644 index 339b14a..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/treesMetadata/.part-00000-8ea1cd09-af6e-47db-a199-9a18743ad514-c000.snappy.parquet.crc and /dev/null differ diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/treesMetadata/_SUCCESS b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/treesMetadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/treesMetadata/part-00000-8ea1cd09-af6e-47db-a199-9a18743ad514-c000.snappy.parquet b/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/treesMetadata/part-00000-8ea1cd09-af6e-47db-a199-9a18743ad514-c000.snappy.parquet deleted file mode 100644 index bc6c1d3..0000000 Binary files a/src/main/resources/cv_model/bestModel/stages/2_RandomForestClassifier_4909b7ca2bbe/treesMetadata/part-00000-8ea1cd09-af6e-47db-a199-9a18743ad514-c000.snappy.parquet and /dev/null differ diff --git a/src/main/resources/cv_model/estimator/metadata/.part-00000.crc b/src/main/resources/cv_model/estimator/metadata/.part-00000.crc deleted file mode 100644 index 05f028a..0000000 Binary files a/src/main/resources/cv_model/estimator/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/estimator/metadata/_SUCCESS b/src/main/resources/cv_model/estimator/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/estimator/metadata/part-00000 b/src/main/resources/cv_model/estimator/metadata/part-00000 deleted file mode 100644 index 483a4fc..0000000 --- a/src/main/resources/cv_model/estimator/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.Pipeline","timestamp":1723422048367,"sparkVersion":"3.4.3","uid":"Pipeline_58a1fe22f286","paramMap":{"stageUids":["StringIndexer_d3c63289c493","VectorAssembler_517fc429fbfb","RandomForestClassifier_4909b7ca2bbe"]},"defaultParamMap":{}} diff --git a/src/main/resources/cv_model/estimator/stages/0_StringIndexer_d3c63289c493/metadata/.part-00000.crc b/src/main/resources/cv_model/estimator/stages/0_StringIndexer_d3c63289c493/metadata/.part-00000.crc deleted file mode 100644 index 853858a..0000000 Binary files a/src/main/resources/cv_model/estimator/stages/0_StringIndexer_d3c63289c493/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/estimator/stages/0_StringIndexer_d3c63289c493/metadata/_SUCCESS b/src/main/resources/cv_model/estimator/stages/0_StringIndexer_d3c63289c493/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/estimator/stages/0_StringIndexer_d3c63289c493/metadata/part-00000 b/src/main/resources/cv_model/estimator/stages/0_StringIndexer_d3c63289c493/metadata/part-00000 deleted file mode 100644 index e72c7fc..0000000 --- a/src/main/resources/cv_model/estimator/stages/0_StringIndexer_d3c63289c493/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.feature.StringIndexer","timestamp":1723422049018,"sparkVersion":"3.4.3","uid":"StringIndexer_d3c63289c493","paramMap":{"outputCol":"indexed_target","inputCol":"target"},"defaultParamMap":{"stringOrderType":"frequencyDesc","handleInvalid":"error","outputCol":"StringIndexer_d3c63289c493__output"}} diff --git a/src/main/resources/cv_model/estimator/stages/1_VectorAssembler_517fc429fbfb/metadata/.part-00000.crc b/src/main/resources/cv_model/estimator/stages/1_VectorAssembler_517fc429fbfb/metadata/.part-00000.crc deleted file mode 100644 index 8a19476..0000000 Binary files a/src/main/resources/cv_model/estimator/stages/1_VectorAssembler_517fc429fbfb/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/estimator/stages/1_VectorAssembler_517fc429fbfb/metadata/_SUCCESS b/src/main/resources/cv_model/estimator/stages/1_VectorAssembler_517fc429fbfb/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/estimator/stages/1_VectorAssembler_517fc429fbfb/metadata/part-00000 b/src/main/resources/cv_model/estimator/stages/1_VectorAssembler_517fc429fbfb/metadata/part-00000 deleted file mode 100644 index 6ef6134..0000000 --- a/src/main/resources/cv_model/estimator/stages/1_VectorAssembler_517fc429fbfb/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.feature.VectorAssembler","timestamp":1723422049580,"sparkVersion":"3.4.3","uid":"VectorAssembler_517fc429fbfb","paramMap":{"inputCols":["domain","subdomain","top_level_domain","query","fragment","redirect","path","redirect_chain","hsts_header","ssl_stripping","hostname_embedding","javascript_check","shortening_service","has_ip_address","tracking_descriptions","url_encoding","has_executable","tls","contents"],"outputCol":"features"},"defaultParamMap":{"handleInvalid":"error","outputCol":"VectorAssembler_517fc429fbfb__output"}} diff --git a/src/main/resources/cv_model/estimator/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/.part-00000.crc b/src/main/resources/cv_model/estimator/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/.part-00000.crc deleted file mode 100644 index df75d0b..0000000 Binary files a/src/main/resources/cv_model/estimator/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/estimator/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/_SUCCESS b/src/main/resources/cv_model/estimator/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/estimator/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/part-00000 b/src/main/resources/cv_model/estimator/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/part-00000 deleted file mode 100644 index 5168986..0000000 --- a/src/main/resources/cv_model/estimator/stages/2_RandomForestClassifier_4909b7ca2bbe/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.classification.RandomForestClassifier","timestamp":1723422050089,"sparkVersion":"3.4.3","uid":"RandomForestClassifier_4909b7ca2bbe","paramMap":{"featuresCol":"features","labelCol":"indexed_target"},"defaultParamMap":{"featuresCol":"features","rawPredictionCol":"rawPrediction","maxBins":32,"predictionCol":"prediction","minInstancesPerNode":1,"minWeightFractionPerNode":0.0,"cacheNodeIds":false,"minInfoGain":0.0,"numTrees":20,"maxDepth":5,"impurity":"gini","subsamplingRate":1.0,"leafCol":"","labelCol":"label","probabilityCol":"probability","bootstrap":true,"featureSubsetStrategy":"auto","checkpointInterval":10,"maxMemoryInMB":256,"seed":6182040365248539008}} diff --git a/src/main/resources/cv_model/evaluator/metadata/.part-00000.crc b/src/main/resources/cv_model/evaluator/metadata/.part-00000.crc deleted file mode 100644 index 6fde672..0000000 Binary files a/src/main/resources/cv_model/evaluator/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/evaluator/metadata/_SUCCESS b/src/main/resources/cv_model/evaluator/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/evaluator/metadata/part-00000 b/src/main/resources/cv_model/evaluator/metadata/part-00000 deleted file mode 100644 index ca32523..0000000 --- a/src/main/resources/cv_model/evaluator/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator","timestamp":1723422047764,"sparkVersion":"3.4.3","uid":"MulticlassClassificationEvaluator_f31cf4d2b0db","paramMap":{"metricName":"accuracy","labelCol":"indexed_target"},"defaultParamMap":{"eps":1.0E-15,"beta":1.0,"metricName":"f1","predictionCol":"prediction","labelCol":"label","metricLabel":0.0,"probabilityCol":"probability"}} diff --git a/src/main/resources/cv_model/metadata/.part-00000.crc b/src/main/resources/cv_model/metadata/.part-00000.crc deleted file mode 100644 index 8d903e4..0000000 Binary files a/src/main/resources/cv_model/metadata/.part-00000.crc and /dev/null differ diff --git a/src/main/resources/cv_model/metadata/_SUCCESS b/src/main/resources/cv_model/metadata/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/resources/cv_model/metadata/part-00000 b/src/main/resources/cv_model/metadata/part-00000 deleted file mode 100644 index 4fb6ce0..0000000 --- a/src/main/resources/cv_model/metadata/part-00000 +++ /dev/null @@ -1 +0,0 @@ -{"class":"org.apache.spark.ml.tuning.CrossValidatorModel","timestamp":1723422046660,"sparkVersion":"3.4.3","uid":"CrossValidatorModel_5c96b33c8d82","paramMap":{"seed":-2084793586583917283,"numFolds":5,"foldCol":"","estimatorParamMaps":[[{"parent":"RandomForestClassifier_4909b7ca2bbe","name":"numTrees","value":"100","isJson":"true"},{"parent":"RandomForestClassifier_4909b7ca2bbe","name":"maxDepth","value":"10","isJson":"true"}]]},"defaultParamMap":{"seed":880116102,"numFolds":3,"foldCol":""},"avgMetrics":[0.8736361548764979],"persistSubModels":false}