split into 2 diff files and use concurrent thread

This commit is contained in:
heyethereum
2024-08-05 08:37:28 +08:00
parent 5b88a4ca7c
commit 4feabce446
21 changed files with 651265 additions and 11 deletions

View File

@@ -1,5 +1,7 @@
import csv
import os
import requests
import concurrent.futures
# Define the endpoint URL
endpoint_url = "http://localhost:8080/v1/qrcodetypes/scan"
@@ -7,21 +9,55 @@ endpoint_url = "http://localhost:8080/v1/qrcodetypes/scan"
# Path to the CSV file
csv_file_path = "malicious_phish.csv"
# Directory to store the split CSV files
split_files_dir = "split_csv_files"
os.makedirs(split_files_dir, exist_ok=True)
# Function to ensure URL starts with http:// or https://
def ensure_url_prefix(url):
if not (url.startswith("http://") or url.startswith("https://")):
return "https://" + url
return url
# Read the CSV file, prefix URLs, and send POST requests
with open(csv_file_path, newline='') as csvfile:
# Read the CSV file and split into 20 files
def split_csv_file(csv_file_path, split_files_dir, num_splits=20):
with open(csv_file_path, newline='') as csvfile:
reader = list(csv.DictReader(csvfile))
total_rows = len(reader)
rows_per_file = total_rows // num_splits
for i in range(num_splits):
split_file_path = os.path.join(split_files_dir, f"split_file_{i+1}.csv")
with open(split_file_path, 'w', newline='') as split_file:
writer = csv.DictWriter(split_file, fieldnames=['url', 'type'])
writer.writeheader()
start_index = i * rows_per_file
end_index = (i + 1) * rows_per_file if i != num_splits - 1 else total_rows
for row in reader[start_index:end_index]:
row['url'] = ensure_url_prefix(row['url'])
writer.writerow(row)
# Function to process a CSV file and send POST requests
def process_csv_file(csv_file_path):
with open(csv_file_path, newline='') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
url = row['url']
url_with_prefix = ensure_url_prefix(url)
response = requests.post(endpoint_url, json={"data": url_with_prefix})
url = row['url'] # Column header for URL is 'url'
response = requests.post(endpoint_url, json={"data": url})
if response.status_code == 200:
print(f"Successfully sent data: {url_with_prefix}")
print(f"Successfully sent data: {url}")
else:
print(f"Failed to send data: {url_with_prefix}, Status code: {response.status_code}")
print(f"Failed to send data: {url}, Status code: {response.status_code}")
# Split the original CSV file into 20 parts
split_csv_file(csv_file_path, split_files_dir)
# Get the list of split CSV files
split_files = [os.path.join(split_files_dir, file) for file in os.listdir(split_files_dir) if file.endswith('.csv')]
# Execute the requests concurrently
with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
futures = [executor.submit(process_csv_file, split_file) for split_file in split_files]
concurrent.futures.wait(futures)
print("Processing completed.")

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff