feat: switch to FlareSolverr + slug-based notification tracking
- All ZIPAIR fetches now go through FlareSolverr at 192.168.10.76:8191 - Dropped sitemap/probe approach; scrape EN notification listing directly - State file now stores JSON list of seen slugs instead of last integer ID - Matches keywords against slug and full page content Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
289
check_zipair.py
289
check_zipair.py
@@ -1,8 +1,8 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
ZIPAIR Singapore Winter Sale Monitor
|
ZIPAIR Singapore Winter Sale Monitor
|
||||||
Checks ZIPAIR's sitemap for new notifications about Singapore ticket sales.
|
Uses FlareSolverr to bypass WAF, scrapes the EN notification listing,
|
||||||
Sends a push notification via ntfy when detected.
|
and fires an ntfy push when a Singapore/winter sale is detected.
|
||||||
|
|
||||||
State is persisted in last_seen.txt (committed back to repo by the workflow).
|
State is persisted in last_seen.txt (committed back to repo by the workflow).
|
||||||
"""
|
"""
|
||||||
@@ -14,170 +14,99 @@ import json
|
|||||||
import time
|
import time
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import urllib.error
|
import urllib.error
|
||||||
import http.cookiejar
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
# ── Config (set via environment variables / Gitea secrets) ──────────────────
|
# ── Config ───────────────────────────────────────────────────────────────────
|
||||||
NTFY_URL = os.environ.get("NTFY_URL") or "https://ntfy.isky-homelab.com/zipair"
|
NTFY_URL = os.environ.get("NTFY_URL") or "https://ntfy.isky-homelab.com/zipair"
|
||||||
NTFY_TOKEN = os.environ.get("NTFY_TOKEN", "") # optional, if your ntfy requires auth
|
NTFY_TOKEN = os.environ.get("NTFY_TOKEN", "")
|
||||||
STATE_FILE = os.environ.get("STATE_FILE", "last_seen.txt")
|
STATE_FILE = os.environ.get("STATE_FILE", "last_seen.txt")
|
||||||
|
FLARESOLVERR_URL = os.environ.get("FLARESOLVERR_URL", "http://192.168.10.76:8191")
|
||||||
|
|
||||||
ZIPAIR_SITEMAP = "https://www.zipair.net/sitemap.xml"
|
ZIPAIR_NOTIF_LIST = "https://www.zipair.net/en/notification/"
|
||||||
ZIPAIR_SITEMAP_INDEX = "https://www.zipair.net/sitemap_index.xml"
|
ZIPAIR_NOTIF_BASE = "https://www.zipair.net"
|
||||||
ZIPAIR_NOTIF = "https://www.zipair.net/en/notification/{id}"
|
|
||||||
|
|
||||||
# How many IDs above last_seen to probe when sitemap is unavailable
|
|
||||||
PROBE_AHEAD = 20
|
|
||||||
|
|
||||||
# Keywords that must ALL appear (case-insensitive) in a notification page
|
|
||||||
# to trigger an alert. Tune these as needed.
|
|
||||||
TRIGGER_KEYWORDS = ["singapore", "winter"]
|
TRIGGER_KEYWORDS = ["singapore", "winter"]
|
||||||
|
|
||||||
# Browser-like headers to avoid 403
|
# ── FlareSolverr fetch ───────────────────────────────────────────────────────
|
||||||
HEADERS = {
|
|
||||||
"User-Agent": (
|
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
||||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
||||||
"Chrome/124.0.0.0 Safari/537.36"
|
|
||||||
),
|
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
||||||
"Accept-Language": "en-US,en;q=0.9,ja;q=0.8",
|
|
||||||
"Accept-Encoding": "gzip, deflate, br",
|
|
||||||
"Connection": "keep-alive",
|
|
||||||
"Upgrade-Insecure-Requests": "1",
|
|
||||||
}
|
|
||||||
|
|
||||||
# ── Helpers ─────────────────────────────────────────────────────────────────
|
def fs_fetch(url: str, timeout_ms: int = 60000) -> str:
|
||||||
|
"""Fetch a URL via FlareSolverr and return the HTML, or empty string on error."""
|
||||||
# Shared cookie jar + opener so session cookies persist across requests
|
payload = json.dumps({
|
||||||
_cookie_jar = http.cookiejar.CookieJar()
|
"cmd": "request.get",
|
||||||
_opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(_cookie_jar))
|
"url": url,
|
||||||
|
"maxTimeout": timeout_ms,
|
||||||
def warm_session():
|
}).encode()
|
||||||
"""Visit the homepage once to pick up any WAF/CDN session cookies."""
|
req = urllib.request.Request(
|
||||||
print("Warming session via homepage …")
|
f"{FLARESOLVERR_URL.rstrip('/')}/v1",
|
||||||
req = urllib.request.Request("https://www.zipair.net/", headers=HEADERS)
|
data=payload,
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
with _opener.open(req, timeout=15):
|
with urllib.request.urlopen(req, timeout=timeout_ms // 1000 + 10) as resp:
|
||||||
pass
|
data = json.loads(resp.read())
|
||||||
print(f" Cookies acquired: {len(list(_cookie_jar))}")
|
status = data.get("solution", {}).get("status", 0)
|
||||||
|
html = data.get("solution", {}).get("response", "")
|
||||||
|
print(f" FlareSolverr: {data.get('status')} | HTTP {status} | {len(html)} bytes")
|
||||||
|
return html
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f" Homepage fetch failed (non-fatal): {e}", file=sys.stderr)
|
print(f" FlareSolverr error for {url}: {e}", file=sys.stderr)
|
||||||
|
|
||||||
def fetch(url: str, timeout: int = 15) -> str:
|
|
||||||
"""Fetch a URL and return the decoded body, or empty string on error."""
|
|
||||||
req = urllib.request.Request(url, headers=HEADERS)
|
|
||||||
try:
|
|
||||||
with _opener.open(req, timeout=timeout) as resp:
|
|
||||||
raw = resp.read()
|
|
||||||
try:
|
|
||||||
import gzip
|
|
||||||
return gzip.decompress(raw).decode("utf-8", errors="replace")
|
|
||||||
except Exception:
|
|
||||||
return raw.decode("utf-8", errors="replace")
|
|
||||||
except urllib.error.HTTPError as e:
|
|
||||||
print(f" HTTP {e.code} for {url}", file=sys.stderr)
|
|
||||||
return ""
|
|
||||||
except Exception as e:
|
|
||||||
print(f" Error fetching {url}: {e}", file=sys.stderr)
|
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
# ── State (slug-based) ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
def get_notification_ids_from_sitemap() -> list[int]:
|
def read_seen_slugs() -> set:
|
||||||
"""Try sitemap.xml then sitemap_index.xml; return sorted notification IDs."""
|
|
||||||
for sitemap_url in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX):
|
|
||||||
print(f"Fetching {sitemap_url} …")
|
|
||||||
xml = fetch(sitemap_url)
|
|
||||||
if xml:
|
|
||||||
sub_sitemaps = re.findall(r"<loc>(https?://[^<]*sitemap[^<]*)</loc>", xml)
|
|
||||||
for sub in sub_sitemaps:
|
|
||||||
if sub not in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX):
|
|
||||||
print(f" Fetching sub-sitemap {sub} …")
|
|
||||||
chunk = fetch(sub)
|
|
||||||
print(f" → {len(chunk)} bytes")
|
|
||||||
xml += chunk
|
|
||||||
time.sleep(0.5)
|
|
||||||
ids = [int(m) for m in re.findall(r"/notification/(\d+)", xml)]
|
|
||||||
if ids:
|
|
||||||
ids = sorted(set(ids))
|
|
||||||
print(f" Found {len(ids)} notification IDs (max={ids[-1]})")
|
|
||||||
return ids
|
|
||||||
# Debug: show sample URLs from sitemap so we can see the real pattern
|
|
||||||
sample_urls = re.findall(r"<loc>(https?://[^<]{10,})</loc>", xml)[:5]
|
|
||||||
print(f" No notification IDs found. Sample URLs from sitemap:")
|
|
||||||
for u in sample_urls:
|
|
||||||
print(f" {u}")
|
|
||||||
|
|
||||||
# Fallback: EN notification listing page (IDs may be in HTML even if JS-rendered)
|
|
||||||
print("Trying EN notification listing page …")
|
|
||||||
html = fetch("https://www.zipair.net/en/notification/")
|
|
||||||
print(f" → {len(html)} bytes")
|
|
||||||
ids = [int(m) for m in re.findall(r"/(?:en|ja|ko|th|zh-tw|zh-cn)/notification/(\d+)", html)]
|
|
||||||
if not ids:
|
|
||||||
ids = [int(m) for m in re.findall(r"/notification/(\d+)", html)]
|
|
||||||
if ids:
|
|
||||||
ids = sorted(set(ids))
|
|
||||||
print(f" Found {len(ids)} notification IDs from listing page (max={ids[-1]})")
|
|
||||||
return ids
|
|
||||||
# Debug: show a snippet around "notification" in the HTML
|
|
||||||
lower = html.lower()
|
|
||||||
pos = lower.find("notification")
|
|
||||||
if pos != -1:
|
|
||||||
print(f" Sample HTML around 'notification': {repr(html[pos:pos+300])}")
|
|
||||||
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def probe_for_new_ids(last_seen: int) -> list[int]:
|
|
||||||
"""When sitemap is unavailable, probe notification pages above last_seen.
|
|
||||||
If last_seen is 0 we have no anchor — skip probe to avoid spamming."""
|
|
||||||
if last_seen == 0:
|
|
||||||
print(" last_seen=0 and no sitemap — cannot probe without an anchor ID.")
|
|
||||||
return []
|
|
||||||
print(f" Probing IDs {last_seen+1} to {last_seen+PROBE_AHEAD} …")
|
|
||||||
found = []
|
|
||||||
for nid in range(last_seen + 1, last_seen + PROBE_AHEAD + 1):
|
|
||||||
url = ZIPAIR_NOTIF.format(id=nid)
|
|
||||||
html = fetch(url)
|
|
||||||
time.sleep(0.5)
|
|
||||||
if html:
|
|
||||||
print(f" ID {nid} exists.")
|
|
||||||
found.append(nid)
|
|
||||||
else:
|
|
||||||
print(f" ID {nid} not found (or blocked), stopping probe.")
|
|
||||||
break
|
|
||||||
return found
|
|
||||||
|
|
||||||
|
|
||||||
def read_last_seen() -> int:
|
|
||||||
"""Read the last-seen notification ID from the state file."""
|
|
||||||
try:
|
try:
|
||||||
with open(STATE_FILE) as f:
|
with open(STATE_FILE) as f:
|
||||||
return int(f.read().strip())
|
raw = f.read().strip()
|
||||||
|
# new format: JSON list of slugs
|
||||||
|
data = json.loads(raw)
|
||||||
|
if isinstance(data, list):
|
||||||
|
return set(data)
|
||||||
except Exception:
|
except Exception:
|
||||||
return 0
|
pass
|
||||||
|
return set()
|
||||||
|
|
||||||
|
|
||||||
def write_last_seen(n: int):
|
def write_seen_slugs(slugs: set):
|
||||||
"""Persist the last-seen notification ID."""
|
|
||||||
with open(STATE_FILE, "w") as f:
|
with open(STATE_FILE, "w") as f:
|
||||||
f.write(str(n))
|
json.dump(sorted(slugs), f)
|
||||||
print(f"State updated: last_seen = {n}")
|
print(f"State updated: {len(slugs)} slug(s) tracked.")
|
||||||
|
|
||||||
|
# ── Notification discovery ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def get_notification_slugs() -> list[str]:
|
||||||
|
"""Fetch the EN notification listing and return all notification slugs."""
|
||||||
|
print(f"Fetching notification listing via FlareSolverr …")
|
||||||
|
html = fs_fetch(ZIPAIR_NOTIF_LIST)
|
||||||
|
if not html:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Match /en/notification/some-slug or /en/notification/123
|
||||||
|
slugs = re.findall(r'href="(/(?:en|ja|ko|th|zh-tw|zh-cn)/notification/([^"?#/]+))"', html)
|
||||||
|
# slugs is list of (full_path, slug) — dedupe by slug
|
||||||
|
seen = set()
|
||||||
|
result = []
|
||||||
|
for path, slug in slugs:
|
||||||
|
if slug and slug not in seen:
|
||||||
|
seen.add(slug)
|
||||||
|
result.append((slug, ZIPAIR_NOTIF_BASE + path))
|
||||||
|
print(f" Found {len(result)} notification(s) on listing page.")
|
||||||
|
return result # list of (slug, full_url)
|
||||||
|
|
||||||
|
# ── Keyword check ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
def matches_keywords(text: str) -> bool:
|
def matches_keywords(text: str) -> bool:
|
||||||
"""Return True if all TRIGGER_KEYWORDS appear in text."""
|
|
||||||
lower = text.lower()
|
lower = text.lower()
|
||||||
return all(kw in lower for kw in TRIGGER_KEYWORDS)
|
return all(kw in lower for kw in TRIGGER_KEYWORDS)
|
||||||
|
|
||||||
|
# ── ntfy ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
def send_ntfy(notif_id: int, snippet: str):
|
def send_ntfy(slug: str, url: str):
|
||||||
"""Fire a push notification via ntfy."""
|
|
||||||
title = "✈️ ZIPAIR SIN→TYO Tickets On Sale!"
|
title = "✈️ ZIPAIR SIN→TYO Tickets On Sale!"
|
||||||
message = (
|
message = (
|
||||||
f"A new ZIPAIR announcement about Singapore winter sales was detected "
|
f"New ZIPAIR Singapore/winter announcement detected. "
|
||||||
f"(notification #{notif_id}). "
|
f"Check: {url}"
|
||||||
f"Check: https://www.zipair.net/en/notification/{notif_id}"
|
|
||||||
)
|
)
|
||||||
payload = json.dumps({
|
payload = json.dumps({
|
||||||
"topic": NTFY_URL.rstrip("/").rsplit("/", 1)[-1],
|
"topic": NTFY_URL.rstrip("/").rsplit("/", 1)[-1],
|
||||||
@@ -185,12 +114,8 @@ def send_ntfy(notif_id: int, snippet: str):
|
|||||||
"message": message,
|
"message": message,
|
||||||
"priority": 5,
|
"priority": 5,
|
||||||
"tags": ["airplane", "moneybag"],
|
"tags": ["airplane", "moneybag"],
|
||||||
"click": f"https://www.zipair.net/en/notification/{notif_id}",
|
"click": url,
|
||||||
"actions": [{
|
"actions": [{"action": "view", "label": "Open ZIPAIR", "url": url}],
|
||||||
"action": "view",
|
|
||||||
"label": "Open ZIPAIR",
|
|
||||||
"url": f"https://www.zipair.net/en/notification/{notif_id}",
|
|
||||||
}],
|
|
||||||
}).encode()
|
}).encode()
|
||||||
|
|
||||||
base_url = NTFY_URL.rstrip("/").rsplit("/", 1)[0]
|
base_url = NTFY_URL.rstrip("/").rsplit("/", 1)[0]
|
||||||
@@ -202,7 +127,6 @@ def send_ntfy(notif_id: int, snippet: str):
|
|||||||
)
|
)
|
||||||
if NTFY_TOKEN:
|
if NTFY_TOKEN:
|
||||||
req.add_header("Authorization", f"Bearer {NTFY_TOKEN}")
|
req.add_header("Authorization", f"Bearer {NTFY_TOKEN}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
print(f"ntfy response: {resp.status} {resp.reason}")
|
print(f"ntfy response: {resp.status} {resp.reason}")
|
||||||
@@ -210,68 +134,55 @@ def send_ntfy(notif_id: int, snippet: str):
|
|||||||
print(f"Failed to send ntfy: {e}", file=sys.stderr)
|
print(f"Failed to send ntfy: {e}", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
# ── Main ─────────────────────────────────────────────────────────────────────
|
||||||
# ── Main ────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
print(f"\n[{datetime.utcnow().isoformat()}Z] ZIPAIR monitor starting …")
|
print(f"\n[{datetime.utcnow().isoformat()}Z] ZIPAIR monitor starting …")
|
||||||
print(f" Keywords : {TRIGGER_KEYWORDS}")
|
print(f" Keywords : {TRIGGER_KEYWORDS}")
|
||||||
print(f" ntfy URL : {NTFY_URL}")
|
print(f" ntfy URL : {NTFY_URL}")
|
||||||
|
print(f" FlareSolverr : {FLARESOLVERR_URL}")
|
||||||
|
|
||||||
warm_session()
|
seen_slugs = read_seen_slugs()
|
||||||
time.sleep(1)
|
print(f" Known slugs : {len(seen_slugs)}")
|
||||||
|
|
||||||
last_seen = read_last_seen()
|
notifications = get_notification_slugs()
|
||||||
print(f" Last seen notification ID: {last_seen}")
|
if not notifications:
|
||||||
|
print("Could not retrieve notification list; exiting.")
|
||||||
ids = get_notification_ids_from_sitemap()
|
|
||||||
|
|
||||||
if not ids:
|
|
||||||
# Sitemap completely blocked — probe directly
|
|
||||||
new_ids = probe_for_new_ids(last_seen)
|
|
||||||
if not new_ids:
|
|
||||||
print("No new notifications found via probe either.")
|
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
else:
|
|
||||||
new_ids = [i for i in ids if i > last_seen]
|
all_slugs = {slug for slug, _ in notifications}
|
||||||
if not new_ids:
|
new_entries = [(slug, url) for slug, url in notifications if slug not in seen_slugs]
|
||||||
|
|
||||||
|
if not new_entries:
|
||||||
print("No new notifications since last check. All good.")
|
print("No new notifications since last check. All good.")
|
||||||
write_last_seen(max(ids))
|
write_seen_slugs(all_slugs)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
print(f" {len(new_ids)} new notification(s) to check: {new_ids}")
|
print(f" {len(new_entries)} new notification(s): {[s for s,_ in new_entries]}")
|
||||||
found_match = None
|
found_match = None
|
||||||
|
|
||||||
for nid in new_ids:
|
for slug, url in new_entries:
|
||||||
url = ZIPAIR_NOTIF.format(id=nid)
|
print(f" Checking {slug} …")
|
||||||
print(f" Fetching notification #{nid} …")
|
# Check slug itself first (fast, no extra fetch needed)
|
||||||
text = fetch(url)
|
if matches_keywords(slug):
|
||||||
time.sleep(1)
|
print(f" ✅ MATCH in slug: {slug}")
|
||||||
|
found_match = (slug, url)
|
||||||
if not text:
|
|
||||||
print(f" Could not fetch #{nid}, skipping.")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if matches_keywords(text):
|
|
||||||
print(f" ✅ MATCH in notification #{nid}!")
|
|
||||||
lower = text.lower()
|
|
||||||
pos = lower.find("singapore")
|
|
||||||
snippet = text[max(0, pos - 50): pos + 200].strip()
|
|
||||||
found_match = (nid, snippet)
|
|
||||||
break
|
break
|
||||||
else:
|
# Fetch full page and check content
|
||||||
print(f" No match in #{nid}.")
|
text = fs_fetch(url)
|
||||||
|
time.sleep(1)
|
||||||
|
if matches_keywords(text):
|
||||||
|
print(f" ✅ MATCH in page content: {slug}")
|
||||||
|
found_match = (slug, url)
|
||||||
|
break
|
||||||
|
print(f" No match.")
|
||||||
|
|
||||||
# Advance state to highest ID we've confirmed exists
|
write_seen_slugs(all_slugs)
|
||||||
if ids:
|
|
||||||
write_last_seen(max(ids))
|
|
||||||
elif new_ids:
|
|
||||||
write_last_seen(max(new_ids))
|
|
||||||
|
|
||||||
if found_match:
|
if found_match:
|
||||||
nid, snippet = found_match
|
slug, url = found_match
|
||||||
print(f"\n🚨 Sending ntfy push for notification #{nid}")
|
print(f"\n🚨 Sending ntfy push for {slug}")
|
||||||
send_ntfy(nid, snippet)
|
send_ntfy(slug, url)
|
||||||
print("Done — notification sent!")
|
print("Done — notification sent!")
|
||||||
else:
|
else:
|
||||||
print("\nNo Singapore winter sale announcement found yet.")
|
print("\nNo Singapore winter sale announcement found yet.")
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
0
|
[]
|
||||||
|
|||||||
Reference in New Issue
Block a user