diff --git a/check_zipair.py b/check_zipair.py index 94df330..b0222d7 100644 --- a/check_zipair.py +++ b/check_zipair.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 """ ZIPAIR Singapore Winter Sale Monitor -Checks ZIPAIR's sitemap for new notifications about Singapore ticket sales. -Sends a push notification via ntfy when detected. +Uses FlareSolverr to bypass WAF, scrapes the EN notification listing, +and fires an ntfy push when a Singapore/winter sale is detected. State is persisted in last_seen.txt (committed back to repo by the workflow). """ @@ -14,170 +14,99 @@ import json import time import urllib.request import urllib.error -import http.cookiejar from datetime import datetime -# ── Config (set via environment variables / Gitea secrets) ────────────────── -NTFY_URL = os.environ.get("NTFY_URL") or "https://ntfy.isky-homelab.com/zipair" -NTFY_TOKEN = os.environ.get("NTFY_TOKEN", "") # optional, if your ntfy requires auth -STATE_FILE = os.environ.get("STATE_FILE", "last_seen.txt") +# ── Config ─────────────────────────────────────────────────────────────────── +NTFY_URL = os.environ.get("NTFY_URL") or "https://ntfy.isky-homelab.com/zipair" +NTFY_TOKEN = os.environ.get("NTFY_TOKEN", "") +STATE_FILE = os.environ.get("STATE_FILE", "last_seen.txt") +FLARESOLVERR_URL = os.environ.get("FLARESOLVERR_URL", "http://192.168.10.76:8191") -ZIPAIR_SITEMAP = "https://www.zipair.net/sitemap.xml" -ZIPAIR_SITEMAP_INDEX = "https://www.zipair.net/sitemap_index.xml" -ZIPAIR_NOTIF = "https://www.zipair.net/en/notification/{id}" +ZIPAIR_NOTIF_LIST = "https://www.zipair.net/en/notification/" +ZIPAIR_NOTIF_BASE = "https://www.zipair.net" -# How many IDs above last_seen to probe when sitemap is unavailable -PROBE_AHEAD = 20 +TRIGGER_KEYWORDS = ["singapore", "winter"] -# Keywords that must ALL appear (case-insensitive) in a notification page -# to trigger an alert. Tune these as needed. -TRIGGER_KEYWORDS = ["singapore", "winter"] +# ── FlareSolverr fetch ─────────────────────────────────────────────────────── -# Browser-like headers to avoid 403 -HEADERS = { - "User-Agent": ( - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " - "AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/124.0.0.0 Safari/537.36" - ), - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", - "Accept-Language": "en-US,en;q=0.9,ja;q=0.8", - "Accept-Encoding": "gzip, deflate, br", - "Connection": "keep-alive", - "Upgrade-Insecure-Requests": "1", -} - -# ── Helpers ───────────────────────────────────────────────────────────────── - -# Shared cookie jar + opener so session cookies persist across requests -_cookie_jar = http.cookiejar.CookieJar() -_opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(_cookie_jar)) - -def warm_session(): - """Visit the homepage once to pick up any WAF/CDN session cookies.""" - print("Warming session via homepage …") - req = urllib.request.Request("https://www.zipair.net/", headers=HEADERS) +def fs_fetch(url: str, timeout_ms: int = 60000) -> str: + """Fetch a URL via FlareSolverr and return the HTML, or empty string on error.""" + payload = json.dumps({ + "cmd": "request.get", + "url": url, + "maxTimeout": timeout_ms, + }).encode() + req = urllib.request.Request( + f"{FLARESOLVERR_URL.rstrip('/')}/v1", + data=payload, + headers={"Content-Type": "application/json"}, + method="POST", + ) try: - with _opener.open(req, timeout=15): - pass - print(f" Cookies acquired: {len(list(_cookie_jar))}") + with urllib.request.urlopen(req, timeout=timeout_ms // 1000 + 10) as resp: + data = json.loads(resp.read()) + status = data.get("solution", {}).get("status", 0) + html = data.get("solution", {}).get("response", "") + print(f" FlareSolverr: {data.get('status')} | HTTP {status} | {len(html)} bytes") + return html except Exception as e: - print(f" Homepage fetch failed (non-fatal): {e}", file=sys.stderr) - -def fetch(url: str, timeout: int = 15) -> str: - """Fetch a URL and return the decoded body, or empty string on error.""" - req = urllib.request.Request(url, headers=HEADERS) - try: - with _opener.open(req, timeout=timeout) as resp: - raw = resp.read() - try: - import gzip - return gzip.decompress(raw).decode("utf-8", errors="replace") - except Exception: - return raw.decode("utf-8", errors="replace") - except urllib.error.HTTPError as e: - print(f" HTTP {e.code} for {url}", file=sys.stderr) - return "" - except Exception as e: - print(f" Error fetching {url}: {e}", file=sys.stderr) + print(f" FlareSolverr error for {url}: {e}", file=sys.stderr) return "" +# ── State (slug-based) ─────────────────────────────────────────────────────── -def get_notification_ids_from_sitemap() -> list[int]: - """Try sitemap.xml then sitemap_index.xml; return sorted notification IDs.""" - for sitemap_url in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX): - print(f"Fetching {sitemap_url} …") - xml = fetch(sitemap_url) - if xml: - sub_sitemaps = re.findall(r"(https?://[^<]*sitemap[^<]*)", xml) - for sub in sub_sitemaps: - if sub not in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX): - print(f" Fetching sub-sitemap {sub} …") - chunk = fetch(sub) - print(f" → {len(chunk)} bytes") - xml += chunk - time.sleep(0.5) - ids = [int(m) for m in re.findall(r"/notification/(\d+)", xml)] - if ids: - ids = sorted(set(ids)) - print(f" Found {len(ids)} notification IDs (max={ids[-1]})") - return ids - # Debug: show sample URLs from sitemap so we can see the real pattern - sample_urls = re.findall(r"(https?://[^<]{10,})", xml)[:5] - print(f" No notification IDs found. Sample URLs from sitemap:") - for u in sample_urls: - print(f" {u}") - - # Fallback: EN notification listing page (IDs may be in HTML even if JS-rendered) - print("Trying EN notification listing page …") - html = fetch("https://www.zipair.net/en/notification/") - print(f" → {len(html)} bytes") - ids = [int(m) for m in re.findall(r"/(?:en|ja|ko|th|zh-tw|zh-cn)/notification/(\d+)", html)] - if not ids: - ids = [int(m) for m in re.findall(r"/notification/(\d+)", html)] - if ids: - ids = sorted(set(ids)) - print(f" Found {len(ids)} notification IDs from listing page (max={ids[-1]})") - return ids - # Debug: show a snippet around "notification" in the HTML - lower = html.lower() - pos = lower.find("notification") - if pos != -1: - print(f" Sample HTML around 'notification': {repr(html[pos:pos+300])}") - - return [] - - -def probe_for_new_ids(last_seen: int) -> list[int]: - """When sitemap is unavailable, probe notification pages above last_seen. - If last_seen is 0 we have no anchor — skip probe to avoid spamming.""" - if last_seen == 0: - print(" last_seen=0 and no sitemap — cannot probe without an anchor ID.") - return [] - print(f" Probing IDs {last_seen+1} to {last_seen+PROBE_AHEAD} …") - found = [] - for nid in range(last_seen + 1, last_seen + PROBE_AHEAD + 1): - url = ZIPAIR_NOTIF.format(id=nid) - html = fetch(url) - time.sleep(0.5) - if html: - print(f" ID {nid} exists.") - found.append(nid) - else: - print(f" ID {nid} not found (or blocked), stopping probe.") - break - return found - - -def read_last_seen() -> int: - """Read the last-seen notification ID from the state file.""" +def read_seen_slugs() -> set: try: with open(STATE_FILE) as f: - return int(f.read().strip()) + raw = f.read().strip() + # new format: JSON list of slugs + data = json.loads(raw) + if isinstance(data, list): + return set(data) except Exception: - return 0 + pass + return set() -def write_last_seen(n: int): - """Persist the last-seen notification ID.""" +def write_seen_slugs(slugs: set): with open(STATE_FILE, "w") as f: - f.write(str(n)) - print(f"State updated: last_seen = {n}") + json.dump(sorted(slugs), f) + print(f"State updated: {len(slugs)} slug(s) tracked.") +# ── Notification discovery ─────────────────────────────────────────────────── + +def get_notification_slugs() -> list[str]: + """Fetch the EN notification listing and return all notification slugs.""" + print(f"Fetching notification listing via FlareSolverr …") + html = fs_fetch(ZIPAIR_NOTIF_LIST) + if not html: + return [] + + # Match /en/notification/some-slug or /en/notification/123 + slugs = re.findall(r'href="(/(?:en|ja|ko|th|zh-tw|zh-cn)/notification/([^"?#/]+))"', html) + # slugs is list of (full_path, slug) — dedupe by slug + seen = set() + result = [] + for path, slug in slugs: + if slug and slug not in seen: + seen.add(slug) + result.append((slug, ZIPAIR_NOTIF_BASE + path)) + print(f" Found {len(result)} notification(s) on listing page.") + return result # list of (slug, full_url) + +# ── Keyword check ──────────────────────────────────────────────────────────── def matches_keywords(text: str) -> bool: - """Return True if all TRIGGER_KEYWORDS appear in text.""" lower = text.lower() return all(kw in lower for kw in TRIGGER_KEYWORDS) +# ── ntfy ───────────────────────────────────────────────────────────────────── -def send_ntfy(notif_id: int, snippet: str): - """Fire a push notification via ntfy.""" +def send_ntfy(slug: str, url: str): title = "✈️ ZIPAIR SIN→TYO Tickets On Sale!" message = ( - f"A new ZIPAIR announcement about Singapore winter sales was detected " - f"(notification #{notif_id}). " - f"Check: https://www.zipair.net/en/notification/{notif_id}" + f"New ZIPAIR Singapore/winter announcement detected. " + f"Check: {url}" ) payload = json.dumps({ "topic": NTFY_URL.rstrip("/").rsplit("/", 1)[-1], @@ -185,12 +114,8 @@ def send_ntfy(notif_id: int, snippet: str): "message": message, "priority": 5, "tags": ["airplane", "moneybag"], - "click": f"https://www.zipair.net/en/notification/{notif_id}", - "actions": [{ - "action": "view", - "label": "Open ZIPAIR", - "url": f"https://www.zipair.net/en/notification/{notif_id}", - }], + "click": url, + "actions": [{"action": "view", "label": "Open ZIPAIR", "url": url}], }).encode() base_url = NTFY_URL.rstrip("/").rsplit("/", 1)[0] @@ -202,7 +127,6 @@ def send_ntfy(notif_id: int, snippet: str): ) if NTFY_TOKEN: req.add_header("Authorization", f"Bearer {NTFY_TOKEN}") - try: with urllib.request.urlopen(req, timeout=10) as resp: print(f"ntfy response: {resp.status} {resp.reason}") @@ -210,68 +134,55 @@ def send_ntfy(notif_id: int, snippet: str): print(f"Failed to send ntfy: {e}", file=sys.stderr) sys.exit(1) - -# ── Main ──────────────────────────────────────────────────────────────────── +# ── Main ───────────────────────────────────────────────────────────────────── def main(): print(f"\n[{datetime.utcnow().isoformat()}Z] ZIPAIR monitor starting …") - print(f" Keywords : {TRIGGER_KEYWORDS}") - print(f" ntfy URL : {NTFY_URL}") + print(f" Keywords : {TRIGGER_KEYWORDS}") + print(f" ntfy URL : {NTFY_URL}") + print(f" FlareSolverr : {FLARESOLVERR_URL}") - warm_session() - time.sleep(1) + seen_slugs = read_seen_slugs() + print(f" Known slugs : {len(seen_slugs)}") - last_seen = read_last_seen() - print(f" Last seen notification ID: {last_seen}") + notifications = get_notification_slugs() + if not notifications: + print("Could not retrieve notification list; exiting.") + sys.exit(0) - ids = get_notification_ids_from_sitemap() + all_slugs = {slug for slug, _ in notifications} + new_entries = [(slug, url) for slug, url in notifications if slug not in seen_slugs] - if not ids: - # Sitemap completely blocked — probe directly - new_ids = probe_for_new_ids(last_seen) - if not new_ids: - print("No new notifications found via probe either.") - sys.exit(0) - else: - new_ids = [i for i in ids if i > last_seen] - if not new_ids: - print("No new notifications since last check. All good.") - write_last_seen(max(ids)) - sys.exit(0) + if not new_entries: + print("No new notifications since last check. All good.") + write_seen_slugs(all_slugs) + sys.exit(0) - print(f" {len(new_ids)} new notification(s) to check: {new_ids}") + print(f" {len(new_entries)} new notification(s): {[s for s,_ in new_entries]}") found_match = None - for nid in new_ids: - url = ZIPAIR_NOTIF.format(id=nid) - print(f" Fetching notification #{nid} …") - text = fetch(url) - time.sleep(1) - - if not text: - print(f" Could not fetch #{nid}, skipping.") - continue - - if matches_keywords(text): - print(f" ✅ MATCH in notification #{nid}!") - lower = text.lower() - pos = lower.find("singapore") - snippet = text[max(0, pos - 50): pos + 200].strip() - found_match = (nid, snippet) + for slug, url in new_entries: + print(f" Checking {slug} …") + # Check slug itself first (fast, no extra fetch needed) + if matches_keywords(slug): + print(f" ✅ MATCH in slug: {slug}") + found_match = (slug, url) break - else: - print(f" No match in #{nid}.") + # Fetch full page and check content + text = fs_fetch(url) + time.sleep(1) + if matches_keywords(text): + print(f" ✅ MATCH in page content: {slug}") + found_match = (slug, url) + break + print(f" No match.") - # Advance state to highest ID we've confirmed exists - if ids: - write_last_seen(max(ids)) - elif new_ids: - write_last_seen(max(new_ids)) + write_seen_slugs(all_slugs) if found_match: - nid, snippet = found_match - print(f"\n🚨 Sending ntfy push for notification #{nid}") - send_ntfy(nid, snippet) + slug, url = found_match + print(f"\n🚨 Sending ntfy push for {slug}") + send_ntfy(slug, url) print("Done — notification sent!") else: print("\nNo Singapore winter sale announcement found yet.") diff --git a/last_seen.txt b/last_seen.txt index 573541a..fe51488 100644 --- a/last_seen.txt +++ b/last_seen.txt @@ -1 +1 @@ -0 +[]