fix: ntfy URL fallback, drop Sec-Fetch headers, probe IDs when sitemap blocked

- Use `or` so empty NTFY_URL env var falls back to hardcoded default
- Remove Sec-Fetch-* headers that can trigger Cloudflare bot detection
- Try sitemap_index.xml as second sitemap attempt
- When both sitemaps are blocked, probe notification IDs sequentially
  above last_seen as a last resort

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-17 16:12:20 +08:00
parent ec71d6a504
commit e9d7fdf7cc

View File

@@ -17,14 +17,17 @@ import urllib.error
from datetime import datetime
# ── Config (set via environment variables / Gitea secrets) ──────────────────
NTFY_URL = os.environ.get("NTFY_URL", "https://ntfy.isky-homelab.com/zipair")
NTFY_URL = os.environ.get("NTFY_URL") or "https://ntfy.isky-homelab.com/zipair"
NTFY_TOKEN = os.environ.get("NTFY_TOKEN", "") # optional, if your ntfy requires auth
STATE_FILE = os.environ.get("STATE_FILE", "last_seen.txt")
ZIPAIR_SITEMAP = "https://www.zipair.net/sitemap.xml"
ZIPAIR_NOTIF_LIST = "https://www.zipair.net/en/notification"
ZIPAIR_SITEMAP_INDEX = "https://www.zipair.net/sitemap_index.xml"
ZIPAIR_NOTIF = "https://www.zipair.net/en/notification/{id}"
# How many IDs above last_seen to probe when sitemap is unavailable
PROBE_AHEAD = 20
# Keywords that must ALL appear (case-insensitive) in a notification page
# to trigger an alert. Tune these as needed.
TRIGGER_KEYWORDS = ["singapore", "winter"]
@@ -40,10 +43,6 @@ HEADERS = {
"Accept-Language": "en-US,en;q=0.9,ja;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Referer": "https://www.zipair.net/",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Upgrade-Insecure-Requests": "1",
}
@@ -55,7 +54,6 @@ def fetch(url: str, timeout: int = 15) -> str:
try:
with urllib.request.urlopen(req, timeout=timeout) as resp:
raw = resp.read()
# handle gzip transparently (urlopen usually does, but just in case)
try:
import gzip
return gzip.decompress(raw).decode("utf-8", errors="replace")
@@ -70,26 +68,42 @@ def fetch(url: str, timeout: int = 15) -> str:
def get_notification_ids_from_sitemap() -> list[int]:
"""Parse the ZIPAIR sitemap and return all notification IDs found."""
print("Fetching sitemap …")
xml = fetch(ZIPAIR_SITEMAP)
"""Try sitemap.xml then sitemap_index.xml; return sorted notification IDs."""
for sitemap_url in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX):
print(f"Fetching {sitemap_url}")
xml = fetch(sitemap_url)
if xml:
# sitemap index may reference sub-sitemaps — fetch those too
sub_sitemaps = re.findall(r"<loc>(https?://[^<]*sitemap[^<]*)</loc>", xml)
for sub in sub_sitemaps:
if sub not in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX):
print(f" Fetching sub-sitemap {sub}")
xml += fetch(sub)
time.sleep(0.5)
ids = [int(m) for m in re.findall(r"/notification/(\d+)", xml)]
if ids:
ids = sorted(set(ids))
print(f" Found {len(ids)} notification IDs in sitemap (max={ids[-1]})")
print(f" Found {len(ids)} notification IDs (max={ids[-1]})")
return ids
# Fallback: scrape the notification listing page
print(" Sitemap unavailable, trying notification listing page …")
html = fetch(ZIPAIR_NOTIF_LIST)
if not html:
print(" Notification listing page also unavailable.", file=sys.stderr)
return []
ids = [int(m) for m in re.findall(r"/notification/(\d+)", html)]
ids = sorted(set(ids))
print(f" Found {len(ids)} notification IDs from listing page (max={ids[-1] if ids else 'n/a'})")
return ids
def probe_for_new_ids(last_seen: int) -> list[int]:
"""When sitemap is unavailable, probe notification pages above last_seen."""
print(f" Sitemap unavailable — probing IDs {last_seen+1} to {last_seen+PROBE_AHEAD}")
found = []
for nid in range(last_seen + 1, last_seen + PROBE_AHEAD + 1):
url = ZIPAIR_NOTIF.format(id=nid)
html = fetch(url)
time.sleep(0.5)
if html and f"/notification/{nid}" in html:
print(f" ID {nid} exists.")
found.append(nid)
else:
print(f" ID {nid} not found, stopping probe.")
break
return found
def read_last_seen() -> int:
@@ -136,7 +150,6 @@ def send_ntfy(notif_id: int, snippet: str):
}],
}).encode()
# Build the POST request to the ntfy server base URL
base_url = NTFY_URL.rstrip("/").rsplit("/", 1)[0]
req = urllib.request.Request(
f"{base_url}/",
@@ -166,10 +179,14 @@ def main():
print(f" Last seen notification ID: {last_seen}")
ids = get_notification_ids_from_sitemap()
if not ids:
print("No notification IDs found; exiting.")
sys.exit(0)
if not ids:
# Sitemap completely blocked — probe directly
new_ids = probe_for_new_ids(last_seen)
if not new_ids:
print("No new notifications found via probe either.")
sys.exit(0)
else:
new_ids = [i for i in ids if i > last_seen]
if not new_ids:
print("No new notifications since last check. All good.")
@@ -183,7 +200,7 @@ def main():
url = ZIPAIR_NOTIF.format(id=nid)
print(f" Fetching notification #{nid}")
text = fetch(url)
time.sleep(1) # be polite
time.sleep(1)
if not text:
print(f" Could not fetch #{nid}, skipping.")
@@ -191,7 +208,6 @@ def main():
if matches_keywords(text):
print(f" ✅ MATCH in notification #{nid}!")
# Grab a short snippet for context
lower = text.lower()
pos = lower.find("singapore")
snippet = text[max(0, pos - 50): pos + 200].strip()
@@ -200,8 +216,11 @@ def main():
else:
print(f" No match in #{nid}.")
# Always advance the state to the latest ID we've seen
# Advance state to highest ID we've confirmed exists
if ids:
write_last_seen(max(ids))
elif new_ids:
write_last_seen(max(new_ids))
if found_match:
nid, snippet = found_match