fix: ntfy URL fallback, drop Sec-Fetch headers, probe IDs when sitemap blocked
- Use `or` so empty NTFY_URL env var falls back to hardcoded default - Remove Sec-Fetch-* headers that can trigger Cloudflare bot detection - Try sitemap_index.xml as second sitemap attempt - When both sitemaps are blocked, probe notification IDs sequentially above last_seen as a last resort Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
101
check_zipair.py
101
check_zipair.py
@@ -17,13 +17,16 @@ import urllib.error
|
||||
from datetime import datetime
|
||||
|
||||
# ── Config (set via environment variables / Gitea secrets) ──────────────────
|
||||
NTFY_URL = os.environ.get("NTFY_URL", "https://ntfy.isky-homelab.com/zipair")
|
||||
NTFY_URL = os.environ.get("NTFY_URL") or "https://ntfy.isky-homelab.com/zipair"
|
||||
NTFY_TOKEN = os.environ.get("NTFY_TOKEN", "") # optional, if your ntfy requires auth
|
||||
STATE_FILE = os.environ.get("STATE_FILE", "last_seen.txt")
|
||||
|
||||
ZIPAIR_SITEMAP = "https://www.zipair.net/sitemap.xml"
|
||||
ZIPAIR_NOTIF_LIST = "https://www.zipair.net/en/notification"
|
||||
ZIPAIR_NOTIF = "https://www.zipair.net/en/notification/{id}"
|
||||
ZIPAIR_SITEMAP = "https://www.zipair.net/sitemap.xml"
|
||||
ZIPAIR_SITEMAP_INDEX = "https://www.zipair.net/sitemap_index.xml"
|
||||
ZIPAIR_NOTIF = "https://www.zipair.net/en/notification/{id}"
|
||||
|
||||
# How many IDs above last_seen to probe when sitemap is unavailable
|
||||
PROBE_AHEAD = 20
|
||||
|
||||
# Keywords that must ALL appear (case-insensitive) in a notification page
|
||||
# to trigger an alert. Tune these as needed.
|
||||
@@ -40,10 +43,6 @@ HEADERS = {
|
||||
"Accept-Language": "en-US,en;q=0.9,ja;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Connection": "keep-alive",
|
||||
"Referer": "https://www.zipair.net/",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
}
|
||||
|
||||
@@ -55,7 +54,6 @@ def fetch(url: str, timeout: int = 15) -> str:
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
raw = resp.read()
|
||||
# handle gzip transparently (urlopen usually does, but just in case)
|
||||
try:
|
||||
import gzip
|
||||
return gzip.decompress(raw).decode("utf-8", errors="replace")
|
||||
@@ -70,26 +68,42 @@ def fetch(url: str, timeout: int = 15) -> str:
|
||||
|
||||
|
||||
def get_notification_ids_from_sitemap() -> list[int]:
|
||||
"""Parse the ZIPAIR sitemap and return all notification IDs found."""
|
||||
print("Fetching sitemap …")
|
||||
xml = fetch(ZIPAIR_SITEMAP)
|
||||
if xml:
|
||||
ids = [int(m) for m in re.findall(r"/notification/(\d+)", xml)]
|
||||
if ids:
|
||||
ids = sorted(set(ids))
|
||||
print(f" Found {len(ids)} notification IDs in sitemap (max={ids[-1]})")
|
||||
return ids
|
||||
"""Try sitemap.xml then sitemap_index.xml; return sorted notification IDs."""
|
||||
for sitemap_url in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX):
|
||||
print(f"Fetching {sitemap_url} …")
|
||||
xml = fetch(sitemap_url)
|
||||
if xml:
|
||||
# sitemap index may reference sub-sitemaps — fetch those too
|
||||
sub_sitemaps = re.findall(r"<loc>(https?://[^<]*sitemap[^<]*)</loc>", xml)
|
||||
for sub in sub_sitemaps:
|
||||
if sub not in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX):
|
||||
print(f" Fetching sub-sitemap {sub} …")
|
||||
xml += fetch(sub)
|
||||
time.sleep(0.5)
|
||||
ids = [int(m) for m in re.findall(r"/notification/(\d+)", xml)]
|
||||
if ids:
|
||||
ids = sorted(set(ids))
|
||||
print(f" Found {len(ids)} notification IDs (max={ids[-1]})")
|
||||
return ids
|
||||
|
||||
# Fallback: scrape the notification listing page
|
||||
print(" Sitemap unavailable, trying notification listing page …")
|
||||
html = fetch(ZIPAIR_NOTIF_LIST)
|
||||
if not html:
|
||||
print(" Notification listing page also unavailable.", file=sys.stderr)
|
||||
return []
|
||||
ids = [int(m) for m in re.findall(r"/notification/(\d+)", html)]
|
||||
ids = sorted(set(ids))
|
||||
print(f" Found {len(ids)} notification IDs from listing page (max={ids[-1] if ids else 'n/a'})")
|
||||
return ids
|
||||
return []
|
||||
|
||||
|
||||
def probe_for_new_ids(last_seen: int) -> list[int]:
|
||||
"""When sitemap is unavailable, probe notification pages above last_seen."""
|
||||
print(f" Sitemap unavailable — probing IDs {last_seen+1} to {last_seen+PROBE_AHEAD} …")
|
||||
found = []
|
||||
for nid in range(last_seen + 1, last_seen + PROBE_AHEAD + 1):
|
||||
url = ZIPAIR_NOTIF.format(id=nid)
|
||||
html = fetch(url)
|
||||
time.sleep(0.5)
|
||||
if html and f"/notification/{nid}" in html:
|
||||
print(f" ID {nid} exists.")
|
||||
found.append(nid)
|
||||
else:
|
||||
print(f" ID {nid} not found, stopping probe.")
|
||||
break
|
||||
return found
|
||||
|
||||
|
||||
def read_last_seen() -> int:
|
||||
@@ -136,7 +150,6 @@ def send_ntfy(notif_id: int, snippet: str):
|
||||
}],
|
||||
}).encode()
|
||||
|
||||
# Build the POST request to the ntfy server base URL
|
||||
base_url = NTFY_URL.rstrip("/").rsplit("/", 1)[0]
|
||||
req = urllib.request.Request(
|
||||
f"{base_url}/",
|
||||
@@ -166,15 +179,19 @@ def main():
|
||||
print(f" Last seen notification ID: {last_seen}")
|
||||
|
||||
ids = get_notification_ids_from_sitemap()
|
||||
if not ids:
|
||||
print("No notification IDs found; exiting.")
|
||||
sys.exit(0)
|
||||
|
||||
new_ids = [i for i in ids if i > last_seen]
|
||||
if not new_ids:
|
||||
print("No new notifications since last check. All good.")
|
||||
write_last_seen(max(ids))
|
||||
sys.exit(0)
|
||||
if not ids:
|
||||
# Sitemap completely blocked — probe directly
|
||||
new_ids = probe_for_new_ids(last_seen)
|
||||
if not new_ids:
|
||||
print("No new notifications found via probe either.")
|
||||
sys.exit(0)
|
||||
else:
|
||||
new_ids = [i for i in ids if i > last_seen]
|
||||
if not new_ids:
|
||||
print("No new notifications since last check. All good.")
|
||||
write_last_seen(max(ids))
|
||||
sys.exit(0)
|
||||
|
||||
print(f" {len(new_ids)} new notification(s) to check: {new_ids}")
|
||||
found_match = None
|
||||
@@ -183,7 +200,7 @@ def main():
|
||||
url = ZIPAIR_NOTIF.format(id=nid)
|
||||
print(f" Fetching notification #{nid} …")
|
||||
text = fetch(url)
|
||||
time.sleep(1) # be polite
|
||||
time.sleep(1)
|
||||
|
||||
if not text:
|
||||
print(f" Could not fetch #{nid}, skipping.")
|
||||
@@ -191,7 +208,6 @@ def main():
|
||||
|
||||
if matches_keywords(text):
|
||||
print(f" ✅ MATCH in notification #{nid}!")
|
||||
# Grab a short snippet for context
|
||||
lower = text.lower()
|
||||
pos = lower.find("singapore")
|
||||
snippet = text[max(0, pos - 50): pos + 200].strip()
|
||||
@@ -200,8 +216,11 @@ def main():
|
||||
else:
|
||||
print(f" No match in #{nid}.")
|
||||
|
||||
# Always advance the state to the latest ID we've seen
|
||||
write_last_seen(max(ids))
|
||||
# Advance state to highest ID we've confirmed exists
|
||||
if ids:
|
||||
write_last_seen(max(ids))
|
||||
elif new_ids:
|
||||
write_last_seen(max(new_ids))
|
||||
|
||||
if found_match:
|
||||
nid, snippet = found_match
|
||||
|
||||
Reference in New Issue
Block a user