add new prediction service

This commit is contained in:
heyethereum
2024-08-13 02:34:47 +08:00
parent 53f9acd922
commit c8cfe610a6
45 changed files with 167 additions and 82 deletions

View File

@@ -13,6 +13,7 @@ public class APIConstants {
public static final String API_URL_QRCODE_VIRUS_TOTAL_CHECK = "/qrcodetypes/virusTotalCheck";
public static final String API_URL_QRCODE_REDIRECT_COUNT = "/qrcodetypes/checkRedirects";
public static final String API_URL_QRCODE_GET_QR_DETAILS = "/qrcodetypes/getQRDetails";
public static final String PREDICTION_API_URL = "http://localhost:8000/predict";
public static final String API_URL_USER_GET = "/user/getUser";

View File

@@ -1,6 +1,6 @@
package com.safeqr.app.spark.model;
package com.safeqr.app.prediction.model;
import com.safeqr.app.qrcode.entity.URLEntity;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.safeqr.app.qrcode.model.URLModel;
import lombok.*;
@@ -13,26 +13,62 @@ import java.util.regex.Pattern;
@NoArgsConstructor
@AllArgsConstructor
public class URLFeatures {
private Double domain;
private Double subdomain;
private Double topLevelDomain;
private Double query;
private Double fragment;
private Double redirect;
private Double path;
private Double redirectChain;
private Double hstsHeader;
private Double sslStripping;
private Double hostnameEmbedding;
private Double javascriptCheck;
private Double shorteningService;
private Double hasIpAddress;
private Double trackingDescriptions;
private Double urlEncoding;
private Double hasExecutable;
private Double tls;
private Double contents;
private String target; // This is the label, may be null if predicting
@JsonProperty("domain")
private Integer domain;
@JsonProperty("subdomain")
private Integer subdomain;
@JsonProperty("top_level_domain")
private Integer topLevelDomain;
@JsonProperty("query")
private Integer query;
@JsonProperty("fragment")
private Integer fragment;
@JsonProperty("redirect")
private Integer redirect;
@JsonProperty("path")
private Integer path;
@JsonProperty("redirect_chain")
private Integer redirectChain;
@JsonProperty("hsts_header")
private Integer hstsHeader;
@JsonProperty("ssl_stripping")
private Integer sslStripping;
@JsonProperty("hostname_embedding")
private Integer hostnameEmbedding;
@JsonProperty("javascript_check")
private Integer javascriptCheck;
@JsonProperty("shortening_service")
private Integer shorteningService;
@JsonProperty("has_ip_address")
private Integer hasIpAddress;
@JsonProperty("tracking_descriptions")
private Integer trackingDescriptions;
@JsonProperty("url_encoding")
private Integer urlEncoding;
@JsonProperty("has_executable")
private Integer hasExecutable;
@JsonProperty("tls")
private Integer tls;
@JsonProperty("contents")
private Integer contents;
public static URLFeatures fromEntity(URLModel urlModel) {
URLFeatures features = URLFeatures.builder()
@@ -43,6 +79,7 @@ public class URLFeatures {
features.setQuery(urlModel.getDetails().getQuery());
features.setFragment(urlModel.getDetails().getFragment());
features.setPath(urlModel.getDetails().getPath());
features.setRedirect(urlModel.getDetails().getRedirect());
features.setRedirectChain(urlModel.getDetails().getRedirectChain());
features.setHstsHeader(urlModel.getDetails().getHstsHeader());
features.setSslStripping(urlModel.getDetails().getSslStripping());
@@ -59,100 +96,104 @@ public class URLFeatures {
return features;
}
private void setRedirect(int redirect) {
this.redirect = redirect;
}
// Custom setter for tls (qr_code_type_id)
public void setTls(Integer tls) {
if (tls != null) {
this.tls = tls == 1 ? 0.0 : tls == 9 ? 1.0 : tls.doubleValue();
this.tls = tls == 1 ? 0 : tls == 9 ? 1 : tls.intValue();
} else {
this.tls = 0.0;
this.tls = 0;
}
}
// Custom setter for hostnameEmbedding and other similar columns
public void setHostnameEmbedding(Integer hostnameEmbedding) {
this.hostnameEmbedding = (hostnameEmbedding != null && hostnameEmbedding != 0) ? 1.0 : 0.0;
this.hostnameEmbedding = (hostnameEmbedding != null && hostnameEmbedding != 0) ? 1 : 0;
}
public void setJavascriptCheck(String javascriptCheck) {
this.javascriptCheck = (javascriptCheck != null && !javascriptCheck.isEmpty()) ? 1.0 : 0.0;
this.javascriptCheck = (javascriptCheck != null && !javascriptCheck.isEmpty()) ? 1 : 0;
}
public void setShorteningService(String shorteningService) {
this.shorteningService = (shorteningService != null && !shorteningService.isEmpty()) ? 1.0 : 0.0;
this.shorteningService = (shorteningService != null && !shorteningService.isEmpty()) ? 1 : 0;
}
public void setHasIpAddress(String hasIpAddress) {
this.hasIpAddress = (hasIpAddress != null && !hasIpAddress.isEmpty()) ? 1.0 : 0.0;
this.hasIpAddress = (hasIpAddress != null && !hasIpAddress.isEmpty()) ? 1 : 0;
}
public void setUrlEncoding(String urlEncoding) {
this.urlEncoding = (urlEncoding != null && !urlEncoding.isEmpty()) ? 1.0 : 0.0;
this.urlEncoding = (urlEncoding != null && !urlEncoding.isEmpty()) ? 1 : 0;
}
public void setHasExecutable(String hasExecutable) {
this.hasExecutable = (hasExecutable != null && !hasExecutable.isEmpty()) ? 1.0 : 0.0;
this.hasExecutable = (hasExecutable != null && !hasExecutable.isEmpty()) ? 1 : 0;
}
public void setTrackingDescriptions(List<String> trackingDescriptions) {
this.trackingDescriptions = (trackingDescriptions != null && !trackingDescriptions.isEmpty()) ? 1.0 : 0.0;
this.trackingDescriptions = (trackingDescriptions != null && !trackingDescriptions.isEmpty()) ? 1 : 0;
}
// Custom setter for sslStripping
public void setSslStripping(List<Boolean> sslStripping) {
if (sslStripping != null && !sslStripping.isEmpty() && sslStripping.get(0) != null) {
this.sslStripping = sslStripping.get(0) ? 1.0 : 0.0;
this.sslStripping = sslStripping.get(0) ? 1 : 0;
} else {
this.sslStripping = 0.0;
this.sslStripping = 0;
}
}
// Custom setter for hstsHeader
public void setHstsHeader(List<String> hstsHeader) {
if (hstsHeader == null || hstsHeader.isEmpty()) {
this.hstsHeader = 0.0;
this.hstsHeader = 0;
} else if (hstsHeader.get(0).startsWith("{") && hstsHeader.get(0).endsWith("}")) {
Pattern pattern = Pattern.compile("\"(.*?)\"");
Matcher matcher = pattern.matcher(hstsHeader.get(0));
if (matcher.find() && matcher.group(1).toLowerCase().contains("no")) {
this.hstsHeader = 0.0;
this.hstsHeader = 0;
} else {
this.hstsHeader = 1.0;
this.hstsHeader = 1;
}
} else {
this.hstsHeader = 1.0;
this.hstsHeader = 1;
}
}
// Custom setters for calculating string lengths
public void setDomain(String domain) {
this.domain = (domain != null) ? (double) domain.length() : 0.0;
this.domain = (domain != null) ? domain.length() : 0;
}
public void setSubdomain(String subdomain) {
this.subdomain = (subdomain != null) ? (double) subdomain.length() : 0.0;
this.subdomain = (subdomain != null) ? subdomain.length() : 0;
}
public void setTopLevelDomain(String topLevelDomain) {
this.topLevelDomain = (topLevelDomain != null) ? (double) topLevelDomain.length() : 0.0;
this.topLevelDomain = (topLevelDomain != null) ? topLevelDomain.length() : 0;
}
public void setQuery(String query) {
this.query = (query != null) ? (double) query.length() : 0.0;
this.query = (query != null) ? query.length() : 0;
}
public void setFragment(String fragment) {
this.fragment = (fragment != null) ? (double) fragment.length() : 0.0;
this.fragment = (fragment != null) ? fragment.length() : 0;
}
public void setPath(String path) {
this.path = (path != null) ? (double) path.length() : 0.0;
this.path = (path != null) ? path.length() : 0;
}
public void setRedirectChain(List<String> redirectChain) {
this.redirectChain = (redirectChain != null) ? (double) redirectChain.size() : 0.0;
this.redirectChain = (redirectChain != null) ? redirectChain.size() : 0;
}
public void setContents(String contents) {
this.contents = (contents != null) ? (double) contents.length() : 0.0;
this.contents = (contents != null) ? contents.length() : 0;
}
}

View File

@@ -0,0 +1,63 @@
package com.safeqr.app.prediction.service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.safeqr.app.prediction.model.URLFeatures;
import com.safeqr.app.qrcode.model.URLModel;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;
import org.springframework.http.ResponseEntity;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpMethod;
import org.springframework.http.MediaType;
import static com.safeqr.app.constants.APIConstants.PREDICTION_API_URL;
@Service
public class PredictionService {
private static final Logger logger = LoggerFactory.getLogger(PredictionService.class);
private final RestTemplate restTemplate;
private final ObjectMapper objectMapper;
public PredictionService(RestTemplate restTemplate, ObjectMapper objectMapper) {
this.restTemplate = restTemplate;
this.objectMapper = objectMapper;
}
public String predict(URLModel urlModel) {
// Convert URLModel to URLFeatures
URLFeatures features = URLFeatures.fromEntity(urlModel);
logger.info("Prediction request: {}", features);
// Prepare the HTTP headers
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);
// Create the HTTP entity containing the features and headers
HttpEntity<URLFeatures> requestEntity = new HttpEntity<>(features, headers);
// Make the HTTP POST request to the FastAPI prediction endpoint
ResponseEntity<String> response = restTemplate.exchange(
PREDICTION_API_URL,
HttpMethod.POST,
requestEntity,
String.class
);
// Use ObjectMapper to deserialize the response and automatically remove quotes
String prediction = response.getBody();
try {
prediction = objectMapper.readValue(prediction, String.class);
} catch (Exception e) {
logger.error("Failed to parse prediction response", e);
prediction = "Unknown";
}
logger.info("Prediction response: {}", prediction);
// Return the prediction
return prediction;
}
}

View File

@@ -4,7 +4,7 @@ import static com.safeqr.app.constants.CommonConstants.*;
import com.safeqr.app.qrcode.entity.URLEntity;
import com.safeqr.app.qrcode.model.URLModel;
import com.safeqr.app.qrcode.repository.URLRepository;
import com.safeqr.app.spark.service.MLModelService;
import com.safeqr.app.prediction.service.PredictionService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
@@ -28,11 +28,11 @@ public class URLVerificationService {
private static final int READ_TIMEOUT_MS = 10000;
private static final Logger logger = LoggerFactory.getLogger(URLVerificationService.class);
private final URLRepository urlRepository;
private final MLModelService mlModelService;
private final PredictionService predictionService;
@Autowired
public URLVerificationService(URLRepository urlRepository, MLModelService mlModelService) {
public URLVerificationService(URLRepository urlRepository, PredictionService predictionService) {
this.urlRepository = urlRepository;
this.mlModelService = mlModelService;
this.predictionService = predictionService;
}
// Regular expression pattern for shortening services
@@ -428,6 +428,6 @@ public class URLVerificationService {
// Get Classification using ML Model
public String getClassification(URLModel urlModel){
return mlModelService.predict(urlModel);
return predictionService.predict(urlModel);
}
}

View File

@@ -1,22 +0,0 @@
package com.safeqr.app.spark.service;
import com.safeqr.app.qrcode.model.URLModel;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
@Service
public class MLModelService {
private static final Logger logger = LoggerFactory.getLogger(MLModelService.class);
public MLModelService() {
}
public String predict(URLModel urlModel) {
return "haha";
}
}

View File

@@ -0,0 +1,12 @@
package com.safeqr.app.utils;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.client.RestTemplate;
@Configuration
public class RestTemplateConfig {
@Bean
public RestTemplate restTemplate() {
return new RestTemplate();
}
}

View File

@@ -1 +0,0 @@
{"class":"org.apache.spark.ml.PipelineModel","timestamp":1723422050490,"sparkVersion":"3.4.3","uid":"PipelineModel_4ecdd9f71524","paramMap":{"stageUids":["StringIndexer_d3c63289c493","VectorAssembler_517fc429fbfb","RandomForestClassifier_4909b7ca2bbe"]},"defaultParamMap":{}}

View File

@@ -1 +0,0 @@
{"class":"org.apache.spark.ml.feature.StringIndexerModel","timestamp":1723422050930,"sparkVersion":"3.4.3","uid":"StringIndexer_d3c63289c493","paramMap":{"outputCol":"indexed_target","inputCol":"target"},"defaultParamMap":{"stringOrderType":"frequencyDesc","handleInvalid":"error","outputCol":"StringIndexer_d3c63289c493__output"}}

View File

@@ -1 +0,0 @@
{"class":"org.apache.spark.ml.feature.VectorAssembler","timestamp":1723422054241,"sparkVersion":"3.4.3","uid":"VectorAssembler_517fc429fbfb","paramMap":{"inputCols":["domain","subdomain","top_level_domain","query","fragment","redirect","path","redirect_chain","hsts_header","ssl_stripping","hostname_embedding","javascript_check","shortening_service","has_ip_address","tracking_descriptions","url_encoding","has_executable","tls","contents"],"outputCol":"features"},"defaultParamMap":{"handleInvalid":"error","outputCol":"VectorAssembler_517fc429fbfb__output"}}

View File

@@ -1 +0,0 @@
{"class":"org.apache.spark.ml.classification.RandomForestClassificationModel","timestamp":1723422054559,"sparkVersion":"3.4.3","uid":"RandomForestClassifier_4909b7ca2bbe","paramMap":{"featuresCol":"features","numTrees":100,"maxDepth":10,"labelCol":"indexed_target"},"defaultParamMap":{"featuresCol":"features","rawPredictionCol":"rawPrediction","maxBins":32,"predictionCol":"prediction","minInstancesPerNode":1,"minWeightFractionPerNode":0.0,"cacheNodeIds":false,"minInfoGain":0.0,"numTrees":20,"maxDepth":5,"impurity":"gini","subsamplingRate":1.0,"leafCol":"","labelCol":"label","probabilityCol":"probability","bootstrap":true,"featureSubsetStrategy":"auto","checkpointInterval":10,"maxMemoryInMB":256,"seed":6182040365248539008},"numFeatures":19,"numClasses":4,"numTrees":100}

View File

@@ -1 +0,0 @@
{"class":"org.apache.spark.ml.Pipeline","timestamp":1723422048367,"sparkVersion":"3.4.3","uid":"Pipeline_58a1fe22f286","paramMap":{"stageUids":["StringIndexer_d3c63289c493","VectorAssembler_517fc429fbfb","RandomForestClassifier_4909b7ca2bbe"]},"defaultParamMap":{}}

View File

@@ -1 +0,0 @@
{"class":"org.apache.spark.ml.feature.StringIndexer","timestamp":1723422049018,"sparkVersion":"3.4.3","uid":"StringIndexer_d3c63289c493","paramMap":{"outputCol":"indexed_target","inputCol":"target"},"defaultParamMap":{"stringOrderType":"frequencyDesc","handleInvalid":"error","outputCol":"StringIndexer_d3c63289c493__output"}}

View File

@@ -1 +0,0 @@
{"class":"org.apache.spark.ml.feature.VectorAssembler","timestamp":1723422049580,"sparkVersion":"3.4.3","uid":"VectorAssembler_517fc429fbfb","paramMap":{"inputCols":["domain","subdomain","top_level_domain","query","fragment","redirect","path","redirect_chain","hsts_header","ssl_stripping","hostname_embedding","javascript_check","shortening_service","has_ip_address","tracking_descriptions","url_encoding","has_executable","tls","contents"],"outputCol":"features"},"defaultParamMap":{"handleInvalid":"error","outputCol":"VectorAssembler_517fc429fbfb__output"}}

View File

@@ -1 +0,0 @@
{"class":"org.apache.spark.ml.classification.RandomForestClassifier","timestamp":1723422050089,"sparkVersion":"3.4.3","uid":"RandomForestClassifier_4909b7ca2bbe","paramMap":{"featuresCol":"features","labelCol":"indexed_target"},"defaultParamMap":{"featuresCol":"features","rawPredictionCol":"rawPrediction","maxBins":32,"predictionCol":"prediction","minInstancesPerNode":1,"minWeightFractionPerNode":0.0,"cacheNodeIds":false,"minInfoGain":0.0,"numTrees":20,"maxDepth":5,"impurity":"gini","subsamplingRate":1.0,"leafCol":"","labelCol":"label","probabilityCol":"probability","bootstrap":true,"featureSubsetStrategy":"auto","checkpointInterval":10,"maxMemoryInMB":256,"seed":6182040365248539008}}

View File

@@ -1 +0,0 @@
{"class":"org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator","timestamp":1723422047764,"sparkVersion":"3.4.3","uid":"MulticlassClassificationEvaluator_f31cf4d2b0db","paramMap":{"metricName":"accuracy","labelCol":"indexed_target"},"defaultParamMap":{"eps":1.0E-15,"beta":1.0,"metricName":"f1","predictionCol":"prediction","labelCol":"label","metricLabel":0.0,"probabilityCol":"probability"}}

View File

@@ -1 +0,0 @@
{"class":"org.apache.spark.ml.tuning.CrossValidatorModel","timestamp":1723422046660,"sparkVersion":"3.4.3","uid":"CrossValidatorModel_5c96b33c8d82","paramMap":{"seed":-2084793586583917283,"numFolds":5,"foldCol":"","estimatorParamMaps":[[{"parent":"RandomForestClassifier_4909b7ca2bbe","name":"numTrees","value":"100","isJson":"true"},{"parent":"RandomForestClassifier_4909b7ca2bbe","name":"maxDepth","value":"10","isJson":"true"}]]},"defaultParamMap":{"seed":880116102,"numFolds":3,"foldCol":""},"avgMetrics":[0.8736361548764979],"persistSubModels":false}