- Use `or` so empty NTFY_URL env var falls back to hardcoded default - Remove Sec-Fetch-* headers that can trigger Cloudflare bot detection - Try sitemap_index.xml as second sitemap attempt - When both sitemaps are blocked, probe notification IDs sequentially above last_seen as a last resort Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
238 lines
8.2 KiB
Python
238 lines
8.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
ZIPAIR Singapore Winter Sale Monitor
|
|
Checks ZIPAIR's sitemap for new notifications about Singapore ticket sales.
|
|
Sends a push notification via ntfy when detected.
|
|
|
|
State is persisted in last_seen.txt (committed back to repo by the workflow).
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import json
|
|
import time
|
|
import urllib.request
|
|
import urllib.error
|
|
from datetime import datetime
|
|
|
|
# ── Config (set via environment variables / Gitea secrets) ──────────────────
|
|
NTFY_URL = os.environ.get("NTFY_URL") or "https://ntfy.isky-homelab.com/zipair"
|
|
NTFY_TOKEN = os.environ.get("NTFY_TOKEN", "") # optional, if your ntfy requires auth
|
|
STATE_FILE = os.environ.get("STATE_FILE", "last_seen.txt")
|
|
|
|
ZIPAIR_SITEMAP = "https://www.zipair.net/sitemap.xml"
|
|
ZIPAIR_SITEMAP_INDEX = "https://www.zipair.net/sitemap_index.xml"
|
|
ZIPAIR_NOTIF = "https://www.zipair.net/en/notification/{id}"
|
|
|
|
# How many IDs above last_seen to probe when sitemap is unavailable
|
|
PROBE_AHEAD = 20
|
|
|
|
# Keywords that must ALL appear (case-insensitive) in a notification page
|
|
# to trigger an alert. Tune these as needed.
|
|
TRIGGER_KEYWORDS = ["singapore", "winter"]
|
|
|
|
# Browser-like headers to avoid 403
|
|
HEADERS = {
|
|
"User-Agent": (
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
"Chrome/124.0.0.0 Safari/537.36"
|
|
),
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
"Accept-Language": "en-US,en;q=0.9,ja;q=0.8",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"Connection": "keep-alive",
|
|
"Upgrade-Insecure-Requests": "1",
|
|
}
|
|
|
|
# ── Helpers ─────────────────────────────────────────────────────────────────
|
|
|
|
def fetch(url: str, timeout: int = 15) -> str:
|
|
"""Fetch a URL and return the decoded body, or empty string on error."""
|
|
req = urllib.request.Request(url, headers=HEADERS)
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
raw = resp.read()
|
|
try:
|
|
import gzip
|
|
return gzip.decompress(raw).decode("utf-8", errors="replace")
|
|
except Exception:
|
|
return raw.decode("utf-8", errors="replace")
|
|
except urllib.error.HTTPError as e:
|
|
print(f" HTTP {e.code} for {url}", file=sys.stderr)
|
|
return ""
|
|
except Exception as e:
|
|
print(f" Error fetching {url}: {e}", file=sys.stderr)
|
|
return ""
|
|
|
|
|
|
def get_notification_ids_from_sitemap() -> list[int]:
|
|
"""Try sitemap.xml then sitemap_index.xml; return sorted notification IDs."""
|
|
for sitemap_url in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX):
|
|
print(f"Fetching {sitemap_url} …")
|
|
xml = fetch(sitemap_url)
|
|
if xml:
|
|
# sitemap index may reference sub-sitemaps — fetch those too
|
|
sub_sitemaps = re.findall(r"<loc>(https?://[^<]*sitemap[^<]*)</loc>", xml)
|
|
for sub in sub_sitemaps:
|
|
if sub not in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX):
|
|
print(f" Fetching sub-sitemap {sub} …")
|
|
xml += fetch(sub)
|
|
time.sleep(0.5)
|
|
ids = [int(m) for m in re.findall(r"/notification/(\d+)", xml)]
|
|
if ids:
|
|
ids = sorted(set(ids))
|
|
print(f" Found {len(ids)} notification IDs (max={ids[-1]})")
|
|
return ids
|
|
|
|
return []
|
|
|
|
|
|
def probe_for_new_ids(last_seen: int) -> list[int]:
|
|
"""When sitemap is unavailable, probe notification pages above last_seen."""
|
|
print(f" Sitemap unavailable — probing IDs {last_seen+1} to {last_seen+PROBE_AHEAD} …")
|
|
found = []
|
|
for nid in range(last_seen + 1, last_seen + PROBE_AHEAD + 1):
|
|
url = ZIPAIR_NOTIF.format(id=nid)
|
|
html = fetch(url)
|
|
time.sleep(0.5)
|
|
if html and f"/notification/{nid}" in html:
|
|
print(f" ID {nid} exists.")
|
|
found.append(nid)
|
|
else:
|
|
print(f" ID {nid} not found, stopping probe.")
|
|
break
|
|
return found
|
|
|
|
|
|
def read_last_seen() -> int:
|
|
"""Read the last-seen notification ID from the state file."""
|
|
try:
|
|
with open(STATE_FILE) as f:
|
|
return int(f.read().strip())
|
|
except Exception:
|
|
return 0
|
|
|
|
|
|
def write_last_seen(n: int):
|
|
"""Persist the last-seen notification ID."""
|
|
with open(STATE_FILE, "w") as f:
|
|
f.write(str(n))
|
|
print(f"State updated: last_seen = {n}")
|
|
|
|
|
|
def matches_keywords(text: str) -> bool:
|
|
"""Return True if all TRIGGER_KEYWORDS appear in text."""
|
|
lower = text.lower()
|
|
return all(kw in lower for kw in TRIGGER_KEYWORDS)
|
|
|
|
|
|
def send_ntfy(notif_id: int, snippet: str):
|
|
"""Fire a push notification via ntfy."""
|
|
title = "✈️ ZIPAIR SIN→TYO Tickets On Sale!"
|
|
message = (
|
|
f"A new ZIPAIR announcement about Singapore winter sales was detected "
|
|
f"(notification #{notif_id}). "
|
|
f"Check: https://www.zipair.net/en/notification/{notif_id}"
|
|
)
|
|
payload = json.dumps({
|
|
"topic": NTFY_URL.rstrip("/").rsplit("/", 1)[-1],
|
|
"title": title,
|
|
"message": message,
|
|
"priority": 5,
|
|
"tags": ["airplane", "moneybag"],
|
|
"click": f"https://www.zipair.net/en/notification/{notif_id}",
|
|
"actions": [{
|
|
"action": "view",
|
|
"label": "Open ZIPAIR",
|
|
"url": f"https://www.zipair.net/en/notification/{notif_id}",
|
|
}],
|
|
}).encode()
|
|
|
|
base_url = NTFY_URL.rstrip("/").rsplit("/", 1)[0]
|
|
req = urllib.request.Request(
|
|
f"{base_url}/",
|
|
data=payload,
|
|
headers={"Content-Type": "application/json"},
|
|
method="POST",
|
|
)
|
|
if NTFY_TOKEN:
|
|
req.add_header("Authorization", f"Bearer {NTFY_TOKEN}")
|
|
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
print(f"ntfy response: {resp.status} {resp.reason}")
|
|
except Exception as e:
|
|
print(f"Failed to send ntfy: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
|
|
# ── Main ────────────────────────────────────────────────────────────────────
|
|
|
|
def main():
|
|
print(f"\n[{datetime.utcnow().isoformat()}Z] ZIPAIR monitor starting …")
|
|
print(f" Keywords : {TRIGGER_KEYWORDS}")
|
|
print(f" ntfy URL : {NTFY_URL}")
|
|
|
|
last_seen = read_last_seen()
|
|
print(f" Last seen notification ID: {last_seen}")
|
|
|
|
ids = get_notification_ids_from_sitemap()
|
|
|
|
if not ids:
|
|
# Sitemap completely blocked — probe directly
|
|
new_ids = probe_for_new_ids(last_seen)
|
|
if not new_ids:
|
|
print("No new notifications found via probe either.")
|
|
sys.exit(0)
|
|
else:
|
|
new_ids = [i for i in ids if i > last_seen]
|
|
if not new_ids:
|
|
print("No new notifications since last check. All good.")
|
|
write_last_seen(max(ids))
|
|
sys.exit(0)
|
|
|
|
print(f" {len(new_ids)} new notification(s) to check: {new_ids}")
|
|
found_match = None
|
|
|
|
for nid in new_ids:
|
|
url = ZIPAIR_NOTIF.format(id=nid)
|
|
print(f" Fetching notification #{nid} …")
|
|
text = fetch(url)
|
|
time.sleep(1)
|
|
|
|
if not text:
|
|
print(f" Could not fetch #{nid}, skipping.")
|
|
continue
|
|
|
|
if matches_keywords(text):
|
|
print(f" ✅ MATCH in notification #{nid}!")
|
|
lower = text.lower()
|
|
pos = lower.find("singapore")
|
|
snippet = text[max(0, pos - 50): pos + 200].strip()
|
|
found_match = (nid, snippet)
|
|
break
|
|
else:
|
|
print(f" No match in #{nid}.")
|
|
|
|
# Advance state to highest ID we've confirmed exists
|
|
if ids:
|
|
write_last_seen(max(ids))
|
|
elif new_ids:
|
|
write_last_seen(max(new_ids))
|
|
|
|
if found_match:
|
|
nid, snippet = found_match
|
|
print(f"\n🚨 Sending ntfy push for notification #{nid}")
|
|
send_ntfy(nid, snippet)
|
|
print("Done — notification sent!")
|
|
else:
|
|
print("\nNo Singapore winter sale announcement found yet.")
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|