fix: ntfy URL fallback, drop Sec-Fetch headers, probe IDs when sitemap blocked

- Use `or` so empty NTFY_URL env var falls back to hardcoded default
- Remove Sec-Fetch-* headers that can trigger Cloudflare bot detection
- Try sitemap_index.xml as second sitemap attempt
- When both sitemaps are blocked, probe notification IDs sequentially
  above last_seen as a last resort

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-17 16:12:20 +08:00
parent ec71d6a504
commit e9d7fdf7cc

View File

@@ -17,13 +17,16 @@ import urllib.error
from datetime import datetime from datetime import datetime
# ── Config (set via environment variables / Gitea secrets) ────────────────── # ── Config (set via environment variables / Gitea secrets) ──────────────────
NTFY_URL = os.environ.get("NTFY_URL", "https://ntfy.isky-homelab.com/zipair") NTFY_URL = os.environ.get("NTFY_URL") or "https://ntfy.isky-homelab.com/zipair"
NTFY_TOKEN = os.environ.get("NTFY_TOKEN", "") # optional, if your ntfy requires auth NTFY_TOKEN = os.environ.get("NTFY_TOKEN", "") # optional, if your ntfy requires auth
STATE_FILE = os.environ.get("STATE_FILE", "last_seen.txt") STATE_FILE = os.environ.get("STATE_FILE", "last_seen.txt")
ZIPAIR_SITEMAP = "https://www.zipair.net/sitemap.xml" ZIPAIR_SITEMAP = "https://www.zipair.net/sitemap.xml"
ZIPAIR_NOTIF_LIST = "https://www.zipair.net/en/notification" ZIPAIR_SITEMAP_INDEX = "https://www.zipair.net/sitemap_index.xml"
ZIPAIR_NOTIF = "https://www.zipair.net/en/notification/{id}" ZIPAIR_NOTIF = "https://www.zipair.net/en/notification/{id}"
# How many IDs above last_seen to probe when sitemap is unavailable
PROBE_AHEAD = 20
# Keywords that must ALL appear (case-insensitive) in a notification page # Keywords that must ALL appear (case-insensitive) in a notification page
# to trigger an alert. Tune these as needed. # to trigger an alert. Tune these as needed.
@@ -40,10 +43,6 @@ HEADERS = {
"Accept-Language": "en-US,en;q=0.9,ja;q=0.8", "Accept-Language": "en-US,en;q=0.9,ja;q=0.8",
"Accept-Encoding": "gzip, deflate, br", "Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive", "Connection": "keep-alive",
"Referer": "https://www.zipair.net/",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Upgrade-Insecure-Requests": "1", "Upgrade-Insecure-Requests": "1",
} }
@@ -55,7 +54,6 @@ def fetch(url: str, timeout: int = 15) -> str:
try: try:
with urllib.request.urlopen(req, timeout=timeout) as resp: with urllib.request.urlopen(req, timeout=timeout) as resp:
raw = resp.read() raw = resp.read()
# handle gzip transparently (urlopen usually does, but just in case)
try: try:
import gzip import gzip
return gzip.decompress(raw).decode("utf-8", errors="replace") return gzip.decompress(raw).decode("utf-8", errors="replace")
@@ -70,26 +68,42 @@ def fetch(url: str, timeout: int = 15) -> str:
def get_notification_ids_from_sitemap() -> list[int]: def get_notification_ids_from_sitemap() -> list[int]:
"""Parse the ZIPAIR sitemap and return all notification IDs found.""" """Try sitemap.xml then sitemap_index.xml; return sorted notification IDs."""
print("Fetching sitemap …") for sitemap_url in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX):
xml = fetch(ZIPAIR_SITEMAP) print(f"Fetching {sitemap_url}")
if xml: xml = fetch(sitemap_url)
ids = [int(m) for m in re.findall(r"/notification/(\d+)", xml)] if xml:
if ids: # sitemap index may reference sub-sitemaps — fetch those too
ids = sorted(set(ids)) sub_sitemaps = re.findall(r"<loc>(https?://[^<]*sitemap[^<]*)</loc>", xml)
print(f" Found {len(ids)} notification IDs in sitemap (max={ids[-1]})") for sub in sub_sitemaps:
return ids if sub not in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX):
print(f" Fetching sub-sitemap {sub}")
xml += fetch(sub)
time.sleep(0.5)
ids = [int(m) for m in re.findall(r"/notification/(\d+)", xml)]
if ids:
ids = sorted(set(ids))
print(f" Found {len(ids)} notification IDs (max={ids[-1]})")
return ids
# Fallback: scrape the notification listing page return []
print(" Sitemap unavailable, trying notification listing page …")
html = fetch(ZIPAIR_NOTIF_LIST)
if not html: def probe_for_new_ids(last_seen: int) -> list[int]:
print(" Notification listing page also unavailable.", file=sys.stderr) """When sitemap is unavailable, probe notification pages above last_seen."""
return [] print(f" Sitemap unavailable — probing IDs {last_seen+1} to {last_seen+PROBE_AHEAD}")
ids = [int(m) for m in re.findall(r"/notification/(\d+)", html)] found = []
ids = sorted(set(ids)) for nid in range(last_seen + 1, last_seen + PROBE_AHEAD + 1):
print(f" Found {len(ids)} notification IDs from listing page (max={ids[-1] if ids else 'n/a'})") url = ZIPAIR_NOTIF.format(id=nid)
return ids html = fetch(url)
time.sleep(0.5)
if html and f"/notification/{nid}" in html:
print(f" ID {nid} exists.")
found.append(nid)
else:
print(f" ID {nid} not found, stopping probe.")
break
return found
def read_last_seen() -> int: def read_last_seen() -> int:
@@ -136,7 +150,6 @@ def send_ntfy(notif_id: int, snippet: str):
}], }],
}).encode() }).encode()
# Build the POST request to the ntfy server base URL
base_url = NTFY_URL.rstrip("/").rsplit("/", 1)[0] base_url = NTFY_URL.rstrip("/").rsplit("/", 1)[0]
req = urllib.request.Request( req = urllib.request.Request(
f"{base_url}/", f"{base_url}/",
@@ -166,15 +179,19 @@ def main():
print(f" Last seen notification ID: {last_seen}") print(f" Last seen notification ID: {last_seen}")
ids = get_notification_ids_from_sitemap() ids = get_notification_ids_from_sitemap()
if not ids:
print("No notification IDs found; exiting.")
sys.exit(0)
new_ids = [i for i in ids if i > last_seen] if not ids:
if not new_ids: # Sitemap completely blocked — probe directly
print("No new notifications since last check. All good.") new_ids = probe_for_new_ids(last_seen)
write_last_seen(max(ids)) if not new_ids:
sys.exit(0) print("No new notifications found via probe either.")
sys.exit(0)
else:
new_ids = [i for i in ids if i > last_seen]
if not new_ids:
print("No new notifications since last check. All good.")
write_last_seen(max(ids))
sys.exit(0)
print(f" {len(new_ids)} new notification(s) to check: {new_ids}") print(f" {len(new_ids)} new notification(s) to check: {new_ids}")
found_match = None found_match = None
@@ -183,7 +200,7 @@ def main():
url = ZIPAIR_NOTIF.format(id=nid) url = ZIPAIR_NOTIF.format(id=nid)
print(f" Fetching notification #{nid}") print(f" Fetching notification #{nid}")
text = fetch(url) text = fetch(url)
time.sleep(1) # be polite time.sleep(1)
if not text: if not text:
print(f" Could not fetch #{nid}, skipping.") print(f" Could not fetch #{nid}, skipping.")
@@ -191,7 +208,6 @@ def main():
if matches_keywords(text): if matches_keywords(text):
print(f" ✅ MATCH in notification #{nid}!") print(f" ✅ MATCH in notification #{nid}!")
# Grab a short snippet for context
lower = text.lower() lower = text.lower()
pos = lower.find("singapore") pos = lower.find("singapore")
snippet = text[max(0, pos - 50): pos + 200].strip() snippet = text[max(0, pos - 50): pos + 200].strip()
@@ -200,8 +216,11 @@ def main():
else: else:
print(f" No match in #{nid}.") print(f" No match in #{nid}.")
# Always advance the state to the latest ID we've seen # Advance state to highest ID we've confirmed exists
write_last_seen(max(ids)) if ids:
write_last_seen(max(ids))
elif new_ids:
write_last_seen(max(new_ids))
if found_match: if found_match:
nid, snippet = found_match nid, snippet = found_match