add new prediction service
This commit is contained in:
@@ -13,6 +13,7 @@ public class APIConstants {
|
||||
public static final String API_URL_QRCODE_VIRUS_TOTAL_CHECK = "/qrcodetypes/virusTotalCheck";
|
||||
public static final String API_URL_QRCODE_REDIRECT_COUNT = "/qrcodetypes/checkRedirects";
|
||||
public static final String API_URL_QRCODE_GET_QR_DETAILS = "/qrcodetypes/getQRDetails";
|
||||
public static final String PREDICTION_API_URL = "http://localhost:8000/predict";
|
||||
|
||||
|
||||
public static final String API_URL_USER_GET = "/user/getUser";
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
package com.safeqr.app.spark.model;
|
||||
package com.safeqr.app.prediction.model;
|
||||
|
||||
import com.safeqr.app.qrcode.entity.URLEntity;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.safeqr.app.qrcode.model.URLModel;
|
||||
import lombok.*;
|
||||
|
||||
@@ -13,26 +13,62 @@ import java.util.regex.Pattern;
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class URLFeatures {
|
||||
private Double domain;
|
||||
private Double subdomain;
|
||||
private Double topLevelDomain;
|
||||
private Double query;
|
||||
private Double fragment;
|
||||
private Double redirect;
|
||||
private Double path;
|
||||
private Double redirectChain;
|
||||
private Double hstsHeader;
|
||||
private Double sslStripping;
|
||||
private Double hostnameEmbedding;
|
||||
private Double javascriptCheck;
|
||||
private Double shorteningService;
|
||||
private Double hasIpAddress;
|
||||
private Double trackingDescriptions;
|
||||
private Double urlEncoding;
|
||||
private Double hasExecutable;
|
||||
private Double tls;
|
||||
private Double contents;
|
||||
private String target; // This is the label, may be null if predicting
|
||||
@JsonProperty("domain")
|
||||
private Integer domain;
|
||||
|
||||
@JsonProperty("subdomain")
|
||||
private Integer subdomain;
|
||||
|
||||
@JsonProperty("top_level_domain")
|
||||
private Integer topLevelDomain;
|
||||
|
||||
@JsonProperty("query")
|
||||
private Integer query;
|
||||
|
||||
@JsonProperty("fragment")
|
||||
private Integer fragment;
|
||||
|
||||
@JsonProperty("redirect")
|
||||
private Integer redirect;
|
||||
|
||||
@JsonProperty("path")
|
||||
private Integer path;
|
||||
|
||||
@JsonProperty("redirect_chain")
|
||||
private Integer redirectChain;
|
||||
|
||||
@JsonProperty("hsts_header")
|
||||
private Integer hstsHeader;
|
||||
|
||||
@JsonProperty("ssl_stripping")
|
||||
private Integer sslStripping;
|
||||
|
||||
@JsonProperty("hostname_embedding")
|
||||
private Integer hostnameEmbedding;
|
||||
|
||||
@JsonProperty("javascript_check")
|
||||
private Integer javascriptCheck;
|
||||
|
||||
@JsonProperty("shortening_service")
|
||||
private Integer shorteningService;
|
||||
|
||||
@JsonProperty("has_ip_address")
|
||||
private Integer hasIpAddress;
|
||||
|
||||
@JsonProperty("tracking_descriptions")
|
||||
private Integer trackingDescriptions;
|
||||
|
||||
@JsonProperty("url_encoding")
|
||||
private Integer urlEncoding;
|
||||
|
||||
@JsonProperty("has_executable")
|
||||
private Integer hasExecutable;
|
||||
|
||||
@JsonProperty("tls")
|
||||
private Integer tls;
|
||||
|
||||
@JsonProperty("contents")
|
||||
private Integer contents;
|
||||
|
||||
public static URLFeatures fromEntity(URLModel urlModel) {
|
||||
URLFeatures features = URLFeatures.builder()
|
||||
@@ -43,6 +79,7 @@ public class URLFeatures {
|
||||
features.setQuery(urlModel.getDetails().getQuery());
|
||||
features.setFragment(urlModel.getDetails().getFragment());
|
||||
features.setPath(urlModel.getDetails().getPath());
|
||||
features.setRedirect(urlModel.getDetails().getRedirect());
|
||||
features.setRedirectChain(urlModel.getDetails().getRedirectChain());
|
||||
features.setHstsHeader(urlModel.getDetails().getHstsHeader());
|
||||
features.setSslStripping(urlModel.getDetails().getSslStripping());
|
||||
@@ -59,100 +96,104 @@ public class URLFeatures {
|
||||
return features;
|
||||
}
|
||||
|
||||
private void setRedirect(int redirect) {
|
||||
this.redirect = redirect;
|
||||
}
|
||||
|
||||
// Custom setter for tls (qr_code_type_id)
|
||||
public void setTls(Integer tls) {
|
||||
if (tls != null) {
|
||||
this.tls = tls == 1 ? 0.0 : tls == 9 ? 1.0 : tls.doubleValue();
|
||||
this.tls = tls == 1 ? 0 : tls == 9 ? 1 : tls.intValue();
|
||||
} else {
|
||||
this.tls = 0.0;
|
||||
this.tls = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Custom setter for hostnameEmbedding and other similar columns
|
||||
public void setHostnameEmbedding(Integer hostnameEmbedding) {
|
||||
this.hostnameEmbedding = (hostnameEmbedding != null && hostnameEmbedding != 0) ? 1.0 : 0.0;
|
||||
this.hostnameEmbedding = (hostnameEmbedding != null && hostnameEmbedding != 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
public void setJavascriptCheck(String javascriptCheck) {
|
||||
this.javascriptCheck = (javascriptCheck != null && !javascriptCheck.isEmpty()) ? 1.0 : 0.0;
|
||||
this.javascriptCheck = (javascriptCheck != null && !javascriptCheck.isEmpty()) ? 1 : 0;
|
||||
}
|
||||
|
||||
public void setShorteningService(String shorteningService) {
|
||||
this.shorteningService = (shorteningService != null && !shorteningService.isEmpty()) ? 1.0 : 0.0;
|
||||
this.shorteningService = (shorteningService != null && !shorteningService.isEmpty()) ? 1 : 0;
|
||||
}
|
||||
|
||||
public void setHasIpAddress(String hasIpAddress) {
|
||||
this.hasIpAddress = (hasIpAddress != null && !hasIpAddress.isEmpty()) ? 1.0 : 0.0;
|
||||
this.hasIpAddress = (hasIpAddress != null && !hasIpAddress.isEmpty()) ? 1 : 0;
|
||||
}
|
||||
|
||||
public void setUrlEncoding(String urlEncoding) {
|
||||
this.urlEncoding = (urlEncoding != null && !urlEncoding.isEmpty()) ? 1.0 : 0.0;
|
||||
this.urlEncoding = (urlEncoding != null && !urlEncoding.isEmpty()) ? 1 : 0;
|
||||
}
|
||||
|
||||
public void setHasExecutable(String hasExecutable) {
|
||||
this.hasExecutable = (hasExecutable != null && !hasExecutable.isEmpty()) ? 1.0 : 0.0;
|
||||
this.hasExecutable = (hasExecutable != null && !hasExecutable.isEmpty()) ? 1 : 0;
|
||||
}
|
||||
|
||||
public void setTrackingDescriptions(List<String> trackingDescriptions) {
|
||||
this.trackingDescriptions = (trackingDescriptions != null && !trackingDescriptions.isEmpty()) ? 1.0 : 0.0;
|
||||
this.trackingDescriptions = (trackingDescriptions != null && !trackingDescriptions.isEmpty()) ? 1 : 0;
|
||||
}
|
||||
|
||||
// Custom setter for sslStripping
|
||||
public void setSslStripping(List<Boolean> sslStripping) {
|
||||
if (sslStripping != null && !sslStripping.isEmpty() && sslStripping.get(0) != null) {
|
||||
this.sslStripping = sslStripping.get(0) ? 1.0 : 0.0;
|
||||
this.sslStripping = sslStripping.get(0) ? 1 : 0;
|
||||
} else {
|
||||
this.sslStripping = 0.0;
|
||||
this.sslStripping = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Custom setter for hstsHeader
|
||||
public void setHstsHeader(List<String> hstsHeader) {
|
||||
if (hstsHeader == null || hstsHeader.isEmpty()) {
|
||||
this.hstsHeader = 0.0;
|
||||
this.hstsHeader = 0;
|
||||
} else if (hstsHeader.get(0).startsWith("{") && hstsHeader.get(0).endsWith("}")) {
|
||||
Pattern pattern = Pattern.compile("\"(.*?)\"");
|
||||
Matcher matcher = pattern.matcher(hstsHeader.get(0));
|
||||
if (matcher.find() && matcher.group(1).toLowerCase().contains("no")) {
|
||||
this.hstsHeader = 0.0;
|
||||
this.hstsHeader = 0;
|
||||
} else {
|
||||
this.hstsHeader = 1.0;
|
||||
this.hstsHeader = 1;
|
||||
}
|
||||
} else {
|
||||
this.hstsHeader = 1.0;
|
||||
this.hstsHeader = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Custom setters for calculating string lengths
|
||||
public void setDomain(String domain) {
|
||||
this.domain = (domain != null) ? (double) domain.length() : 0.0;
|
||||
this.domain = (domain != null) ? domain.length() : 0;
|
||||
}
|
||||
|
||||
public void setSubdomain(String subdomain) {
|
||||
this.subdomain = (subdomain != null) ? (double) subdomain.length() : 0.0;
|
||||
this.subdomain = (subdomain != null) ? subdomain.length() : 0;
|
||||
}
|
||||
|
||||
public void setTopLevelDomain(String topLevelDomain) {
|
||||
this.topLevelDomain = (topLevelDomain != null) ? (double) topLevelDomain.length() : 0.0;
|
||||
this.topLevelDomain = (topLevelDomain != null) ? topLevelDomain.length() : 0;
|
||||
}
|
||||
|
||||
public void setQuery(String query) {
|
||||
this.query = (query != null) ? (double) query.length() : 0.0;
|
||||
this.query = (query != null) ? query.length() : 0;
|
||||
}
|
||||
|
||||
public void setFragment(String fragment) {
|
||||
this.fragment = (fragment != null) ? (double) fragment.length() : 0.0;
|
||||
this.fragment = (fragment != null) ? fragment.length() : 0;
|
||||
}
|
||||
|
||||
public void setPath(String path) {
|
||||
this.path = (path != null) ? (double) path.length() : 0.0;
|
||||
this.path = (path != null) ? path.length() : 0;
|
||||
}
|
||||
|
||||
public void setRedirectChain(List<String> redirectChain) {
|
||||
this.redirectChain = (redirectChain != null) ? (double) redirectChain.size() : 0.0;
|
||||
this.redirectChain = (redirectChain != null) ? redirectChain.size() : 0;
|
||||
}
|
||||
|
||||
public void setContents(String contents) {
|
||||
this.contents = (contents != null) ? (double) contents.length() : 0.0;
|
||||
this.contents = (contents != null) ? contents.length() : 0;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
package com.safeqr.app.prediction.service;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.safeqr.app.prediction.model.URLFeatures;
|
||||
import com.safeqr.app.qrcode.model.URLModel;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpMethod;
|
||||
import org.springframework.http.MediaType;
|
||||
|
||||
import static com.safeqr.app.constants.APIConstants.PREDICTION_API_URL;
|
||||
|
||||
@Service
|
||||
public class PredictionService {
|
||||
private static final Logger logger = LoggerFactory.getLogger(PredictionService.class);
|
||||
|
||||
private final RestTemplate restTemplate;
|
||||
private final ObjectMapper objectMapper;
|
||||
|
||||
public PredictionService(RestTemplate restTemplate, ObjectMapper objectMapper) {
|
||||
this.restTemplate = restTemplate;
|
||||
this.objectMapper = objectMapper;
|
||||
}
|
||||
|
||||
public String predict(URLModel urlModel) {
|
||||
// Convert URLModel to URLFeatures
|
||||
URLFeatures features = URLFeatures.fromEntity(urlModel);
|
||||
logger.info("Prediction request: {}", features);
|
||||
|
||||
// Prepare the HTTP headers
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.APPLICATION_JSON);
|
||||
|
||||
// Create the HTTP entity containing the features and headers
|
||||
HttpEntity<URLFeatures> requestEntity = new HttpEntity<>(features, headers);
|
||||
|
||||
// Make the HTTP POST request to the FastAPI prediction endpoint
|
||||
ResponseEntity<String> response = restTemplate.exchange(
|
||||
PREDICTION_API_URL,
|
||||
HttpMethod.POST,
|
||||
requestEntity,
|
||||
String.class
|
||||
);
|
||||
|
||||
// Use ObjectMapper to deserialize the response and automatically remove quotes
|
||||
String prediction = response.getBody();
|
||||
try {
|
||||
prediction = objectMapper.readValue(prediction, String.class);
|
||||
} catch (Exception e) {
|
||||
logger.error("Failed to parse prediction response", e);
|
||||
prediction = "Unknown";
|
||||
}
|
||||
logger.info("Prediction response: {}", prediction);
|
||||
|
||||
// Return the prediction
|
||||
return prediction;
|
||||
}
|
||||
}
|
||||
@@ -4,7 +4,7 @@ import static com.safeqr.app.constants.CommonConstants.*;
|
||||
import com.safeqr.app.qrcode.entity.URLEntity;
|
||||
import com.safeqr.app.qrcode.model.URLModel;
|
||||
import com.safeqr.app.qrcode.repository.URLRepository;
|
||||
import com.safeqr.app.spark.service.MLModelService;
|
||||
import com.safeqr.app.prediction.service.PredictionService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
@@ -28,11 +28,11 @@ public class URLVerificationService {
|
||||
private static final int READ_TIMEOUT_MS = 10000;
|
||||
private static final Logger logger = LoggerFactory.getLogger(URLVerificationService.class);
|
||||
private final URLRepository urlRepository;
|
||||
private final MLModelService mlModelService;
|
||||
private final PredictionService predictionService;
|
||||
@Autowired
|
||||
public URLVerificationService(URLRepository urlRepository, MLModelService mlModelService) {
|
||||
public URLVerificationService(URLRepository urlRepository, PredictionService predictionService) {
|
||||
this.urlRepository = urlRepository;
|
||||
this.mlModelService = mlModelService;
|
||||
this.predictionService = predictionService;
|
||||
}
|
||||
|
||||
// Regular expression pattern for shortening services
|
||||
@@ -428,6 +428,6 @@ public class URLVerificationService {
|
||||
|
||||
// Get Classification using ML Model
|
||||
public String getClassification(URLModel urlModel){
|
||||
return mlModelService.predict(urlModel);
|
||||
return predictionService.predict(urlModel);
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
package com.safeqr.app.spark.service;
|
||||
|
||||
import com.safeqr.app.qrcode.model.URLModel;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
|
||||
@Service
|
||||
public class MLModelService {
|
||||
private static final Logger logger = LoggerFactory.getLogger(MLModelService.class);
|
||||
|
||||
public MLModelService() {
|
||||
|
||||
}
|
||||
|
||||
|
||||
public String predict(URLModel urlModel) {
|
||||
|
||||
return "haha";
|
||||
}
|
||||
}
|
||||
12
src/main/java/com/safeqr/app/utils/RestTemplateConfig.java
Normal file
12
src/main/java/com/safeqr/app/utils/RestTemplateConfig.java
Normal file
@@ -0,0 +1,12 @@
|
||||
package com.safeqr.app.utils;
|
||||
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
@Configuration
|
||||
public class RestTemplateConfig {
|
||||
@Bean
|
||||
public RestTemplate restTemplate() {
|
||||
return new RestTemplate();
|
||||
}
|
||||
}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
{"class":"org.apache.spark.ml.PipelineModel","timestamp":1723422050490,"sparkVersion":"3.4.3","uid":"PipelineModel_4ecdd9f71524","paramMap":{"stageUids":["StringIndexer_d3c63289c493","VectorAssembler_517fc429fbfb","RandomForestClassifier_4909b7ca2bbe"]},"defaultParamMap":{}}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1 +0,0 @@
|
||||
{"class":"org.apache.spark.ml.feature.StringIndexerModel","timestamp":1723422050930,"sparkVersion":"3.4.3","uid":"StringIndexer_d3c63289c493","paramMap":{"outputCol":"indexed_target","inputCol":"target"},"defaultParamMap":{"stringOrderType":"frequencyDesc","handleInvalid":"error","outputCol":"StringIndexer_d3c63289c493__output"}}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
{"class":"org.apache.spark.ml.feature.VectorAssembler","timestamp":1723422054241,"sparkVersion":"3.4.3","uid":"VectorAssembler_517fc429fbfb","paramMap":{"inputCols":["domain","subdomain","top_level_domain","query","fragment","redirect","path","redirect_chain","hsts_header","ssl_stripping","hostname_embedding","javascript_check","shortening_service","has_ip_address","tracking_descriptions","url_encoding","has_executable","tls","contents"],"outputCol":"features"},"defaultParamMap":{"handleInvalid":"error","outputCol":"VectorAssembler_517fc429fbfb__output"}}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1 +0,0 @@
|
||||
{"class":"org.apache.spark.ml.classification.RandomForestClassificationModel","timestamp":1723422054559,"sparkVersion":"3.4.3","uid":"RandomForestClassifier_4909b7ca2bbe","paramMap":{"featuresCol":"features","numTrees":100,"maxDepth":10,"labelCol":"indexed_target"},"defaultParamMap":{"featuresCol":"features","rawPredictionCol":"rawPrediction","maxBins":32,"predictionCol":"prediction","minInstancesPerNode":1,"minWeightFractionPerNode":0.0,"cacheNodeIds":false,"minInfoGain":0.0,"numTrees":20,"maxDepth":5,"impurity":"gini","subsamplingRate":1.0,"leafCol":"","labelCol":"label","probabilityCol":"probability","bootstrap":true,"featureSubsetStrategy":"auto","checkpointInterval":10,"maxMemoryInMB":256,"seed":6182040365248539008},"numFeatures":19,"numClasses":4,"numTrees":100}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1 +0,0 @@
|
||||
{"class":"org.apache.spark.ml.Pipeline","timestamp":1723422048367,"sparkVersion":"3.4.3","uid":"Pipeline_58a1fe22f286","paramMap":{"stageUids":["StringIndexer_d3c63289c493","VectorAssembler_517fc429fbfb","RandomForestClassifier_4909b7ca2bbe"]},"defaultParamMap":{}}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
{"class":"org.apache.spark.ml.feature.StringIndexer","timestamp":1723422049018,"sparkVersion":"3.4.3","uid":"StringIndexer_d3c63289c493","paramMap":{"outputCol":"indexed_target","inputCol":"target"},"defaultParamMap":{"stringOrderType":"frequencyDesc","handleInvalid":"error","outputCol":"StringIndexer_d3c63289c493__output"}}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
{"class":"org.apache.spark.ml.feature.VectorAssembler","timestamp":1723422049580,"sparkVersion":"3.4.3","uid":"VectorAssembler_517fc429fbfb","paramMap":{"inputCols":["domain","subdomain","top_level_domain","query","fragment","redirect","path","redirect_chain","hsts_header","ssl_stripping","hostname_embedding","javascript_check","shortening_service","has_ip_address","tracking_descriptions","url_encoding","has_executable","tls","contents"],"outputCol":"features"},"defaultParamMap":{"handleInvalid":"error","outputCol":"VectorAssembler_517fc429fbfb__output"}}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
{"class":"org.apache.spark.ml.classification.RandomForestClassifier","timestamp":1723422050089,"sparkVersion":"3.4.3","uid":"RandomForestClassifier_4909b7ca2bbe","paramMap":{"featuresCol":"features","labelCol":"indexed_target"},"defaultParamMap":{"featuresCol":"features","rawPredictionCol":"rawPrediction","maxBins":32,"predictionCol":"prediction","minInstancesPerNode":1,"minWeightFractionPerNode":0.0,"cacheNodeIds":false,"minInfoGain":0.0,"numTrees":20,"maxDepth":5,"impurity":"gini","subsamplingRate":1.0,"leafCol":"","labelCol":"label","probabilityCol":"probability","bootstrap":true,"featureSubsetStrategy":"auto","checkpointInterval":10,"maxMemoryInMB":256,"seed":6182040365248539008}}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
{"class":"org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator","timestamp":1723422047764,"sparkVersion":"3.4.3","uid":"MulticlassClassificationEvaluator_f31cf4d2b0db","paramMap":{"metricName":"accuracy","labelCol":"indexed_target"},"defaultParamMap":{"eps":1.0E-15,"beta":1.0,"metricName":"f1","predictionCol":"prediction","labelCol":"label","metricLabel":0.0,"probabilityCol":"probability"}}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
{"class":"org.apache.spark.ml.tuning.CrossValidatorModel","timestamp":1723422046660,"sparkVersion":"3.4.3","uid":"CrossValidatorModel_5c96b33c8d82","paramMap":{"seed":-2084793586583917283,"numFolds":5,"foldCol":"","estimatorParamMaps":[[{"parent":"RandomForestClassifier_4909b7ca2bbe","name":"numTrees","value":"100","isJson":"true"},{"parent":"RandomForestClassifier_4909b7ca2bbe","name":"maxDepth","value":"10","isJson":"true"}]]},"defaultParamMap":{"seed":880116102,"numFolds":3,"foldCol":""},"avgMetrics":[0.8736361548764979],"persistSubModels":false}
|
||||
Reference in New Issue
Block a user