fix: ntfy URL fallback, drop Sec-Fetch headers, probe IDs when sitemap blocked
- Use `or` so empty NTFY_URL env var falls back to hardcoded default - Remove Sec-Fetch-* headers that can trigger Cloudflare bot detection - Try sitemap_index.xml as second sitemap attempt - When both sitemaps are blocked, probe notification IDs sequentially above last_seen as a last resort Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -17,14 +17,17 @@ import urllib.error
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
# ── Config (set via environment variables / Gitea secrets) ──────────────────
|
# ── Config (set via environment variables / Gitea secrets) ──────────────────
|
||||||
NTFY_URL = os.environ.get("NTFY_URL", "https://ntfy.isky-homelab.com/zipair")
|
NTFY_URL = os.environ.get("NTFY_URL") or "https://ntfy.isky-homelab.com/zipair"
|
||||||
NTFY_TOKEN = os.environ.get("NTFY_TOKEN", "") # optional, if your ntfy requires auth
|
NTFY_TOKEN = os.environ.get("NTFY_TOKEN", "") # optional, if your ntfy requires auth
|
||||||
STATE_FILE = os.environ.get("STATE_FILE", "last_seen.txt")
|
STATE_FILE = os.environ.get("STATE_FILE", "last_seen.txt")
|
||||||
|
|
||||||
ZIPAIR_SITEMAP = "https://www.zipair.net/sitemap.xml"
|
ZIPAIR_SITEMAP = "https://www.zipair.net/sitemap.xml"
|
||||||
ZIPAIR_NOTIF_LIST = "https://www.zipair.net/en/notification"
|
ZIPAIR_SITEMAP_INDEX = "https://www.zipair.net/sitemap_index.xml"
|
||||||
ZIPAIR_NOTIF = "https://www.zipair.net/en/notification/{id}"
|
ZIPAIR_NOTIF = "https://www.zipair.net/en/notification/{id}"
|
||||||
|
|
||||||
|
# How many IDs above last_seen to probe when sitemap is unavailable
|
||||||
|
PROBE_AHEAD = 20
|
||||||
|
|
||||||
# Keywords that must ALL appear (case-insensitive) in a notification page
|
# Keywords that must ALL appear (case-insensitive) in a notification page
|
||||||
# to trigger an alert. Tune these as needed.
|
# to trigger an alert. Tune these as needed.
|
||||||
TRIGGER_KEYWORDS = ["singapore", "winter"]
|
TRIGGER_KEYWORDS = ["singapore", "winter"]
|
||||||
@@ -40,10 +43,6 @@ HEADERS = {
|
|||||||
"Accept-Language": "en-US,en;q=0.9,ja;q=0.8",
|
"Accept-Language": "en-US,en;q=0.9,ja;q=0.8",
|
||||||
"Accept-Encoding": "gzip, deflate, br",
|
"Accept-Encoding": "gzip, deflate, br",
|
||||||
"Connection": "keep-alive",
|
"Connection": "keep-alive",
|
||||||
"Referer": "https://www.zipair.net/",
|
|
||||||
"Sec-Fetch-Dest": "document",
|
|
||||||
"Sec-Fetch-Mode": "navigate",
|
|
||||||
"Sec-Fetch-Site": "same-origin",
|
|
||||||
"Upgrade-Insecure-Requests": "1",
|
"Upgrade-Insecure-Requests": "1",
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -55,7 +54,6 @@ def fetch(url: str, timeout: int = 15) -> str:
|
|||||||
try:
|
try:
|
||||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
raw = resp.read()
|
raw = resp.read()
|
||||||
# handle gzip transparently (urlopen usually does, but just in case)
|
|
||||||
try:
|
try:
|
||||||
import gzip
|
import gzip
|
||||||
return gzip.decompress(raw).decode("utf-8", errors="replace")
|
return gzip.decompress(raw).decode("utf-8", errors="replace")
|
||||||
@@ -70,26 +68,42 @@ def fetch(url: str, timeout: int = 15) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def get_notification_ids_from_sitemap() -> list[int]:
|
def get_notification_ids_from_sitemap() -> list[int]:
|
||||||
"""Parse the ZIPAIR sitemap and return all notification IDs found."""
|
"""Try sitemap.xml then sitemap_index.xml; return sorted notification IDs."""
|
||||||
print("Fetching sitemap …")
|
for sitemap_url in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX):
|
||||||
xml = fetch(ZIPAIR_SITEMAP)
|
print(f"Fetching {sitemap_url} …")
|
||||||
|
xml = fetch(sitemap_url)
|
||||||
if xml:
|
if xml:
|
||||||
|
# sitemap index may reference sub-sitemaps — fetch those too
|
||||||
|
sub_sitemaps = re.findall(r"<loc>(https?://[^<]*sitemap[^<]*)</loc>", xml)
|
||||||
|
for sub in sub_sitemaps:
|
||||||
|
if sub not in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX):
|
||||||
|
print(f" Fetching sub-sitemap {sub} …")
|
||||||
|
xml += fetch(sub)
|
||||||
|
time.sleep(0.5)
|
||||||
ids = [int(m) for m in re.findall(r"/notification/(\d+)", xml)]
|
ids = [int(m) for m in re.findall(r"/notification/(\d+)", xml)]
|
||||||
if ids:
|
if ids:
|
||||||
ids = sorted(set(ids))
|
ids = sorted(set(ids))
|
||||||
print(f" Found {len(ids)} notification IDs in sitemap (max={ids[-1]})")
|
print(f" Found {len(ids)} notification IDs (max={ids[-1]})")
|
||||||
return ids
|
return ids
|
||||||
|
|
||||||
# Fallback: scrape the notification listing page
|
|
||||||
print(" Sitemap unavailable, trying notification listing page …")
|
|
||||||
html = fetch(ZIPAIR_NOTIF_LIST)
|
|
||||||
if not html:
|
|
||||||
print(" Notification listing page also unavailable.", file=sys.stderr)
|
|
||||||
return []
|
return []
|
||||||
ids = [int(m) for m in re.findall(r"/notification/(\d+)", html)]
|
|
||||||
ids = sorted(set(ids))
|
|
||||||
print(f" Found {len(ids)} notification IDs from listing page (max={ids[-1] if ids else 'n/a'})")
|
def probe_for_new_ids(last_seen: int) -> list[int]:
|
||||||
return ids
|
"""When sitemap is unavailable, probe notification pages above last_seen."""
|
||||||
|
print(f" Sitemap unavailable — probing IDs {last_seen+1} to {last_seen+PROBE_AHEAD} …")
|
||||||
|
found = []
|
||||||
|
for nid in range(last_seen + 1, last_seen + PROBE_AHEAD + 1):
|
||||||
|
url = ZIPAIR_NOTIF.format(id=nid)
|
||||||
|
html = fetch(url)
|
||||||
|
time.sleep(0.5)
|
||||||
|
if html and f"/notification/{nid}" in html:
|
||||||
|
print(f" ID {nid} exists.")
|
||||||
|
found.append(nid)
|
||||||
|
else:
|
||||||
|
print(f" ID {nid} not found, stopping probe.")
|
||||||
|
break
|
||||||
|
return found
|
||||||
|
|
||||||
|
|
||||||
def read_last_seen() -> int:
|
def read_last_seen() -> int:
|
||||||
@@ -136,7 +150,6 @@ def send_ntfy(notif_id: int, snippet: str):
|
|||||||
}],
|
}],
|
||||||
}).encode()
|
}).encode()
|
||||||
|
|
||||||
# Build the POST request to the ntfy server base URL
|
|
||||||
base_url = NTFY_URL.rstrip("/").rsplit("/", 1)[0]
|
base_url = NTFY_URL.rstrip("/").rsplit("/", 1)[0]
|
||||||
req = urllib.request.Request(
|
req = urllib.request.Request(
|
||||||
f"{base_url}/",
|
f"{base_url}/",
|
||||||
@@ -166,10 +179,14 @@ def main():
|
|||||||
print(f" Last seen notification ID: {last_seen}")
|
print(f" Last seen notification ID: {last_seen}")
|
||||||
|
|
||||||
ids = get_notification_ids_from_sitemap()
|
ids = get_notification_ids_from_sitemap()
|
||||||
if not ids:
|
|
||||||
print("No notification IDs found; exiting.")
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
|
if not ids:
|
||||||
|
# Sitemap completely blocked — probe directly
|
||||||
|
new_ids = probe_for_new_ids(last_seen)
|
||||||
|
if not new_ids:
|
||||||
|
print("No new notifications found via probe either.")
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
new_ids = [i for i in ids if i > last_seen]
|
new_ids = [i for i in ids if i > last_seen]
|
||||||
if not new_ids:
|
if not new_ids:
|
||||||
print("No new notifications since last check. All good.")
|
print("No new notifications since last check. All good.")
|
||||||
@@ -183,7 +200,7 @@ def main():
|
|||||||
url = ZIPAIR_NOTIF.format(id=nid)
|
url = ZIPAIR_NOTIF.format(id=nid)
|
||||||
print(f" Fetching notification #{nid} …")
|
print(f" Fetching notification #{nid} …")
|
||||||
text = fetch(url)
|
text = fetch(url)
|
||||||
time.sleep(1) # be polite
|
time.sleep(1)
|
||||||
|
|
||||||
if not text:
|
if not text:
|
||||||
print(f" Could not fetch #{nid}, skipping.")
|
print(f" Could not fetch #{nid}, skipping.")
|
||||||
@@ -191,7 +208,6 @@ def main():
|
|||||||
|
|
||||||
if matches_keywords(text):
|
if matches_keywords(text):
|
||||||
print(f" ✅ MATCH in notification #{nid}!")
|
print(f" ✅ MATCH in notification #{nid}!")
|
||||||
# Grab a short snippet for context
|
|
||||||
lower = text.lower()
|
lower = text.lower()
|
||||||
pos = lower.find("singapore")
|
pos = lower.find("singapore")
|
||||||
snippet = text[max(0, pos - 50): pos + 200].strip()
|
snippet = text[max(0, pos - 50): pos + 200].strip()
|
||||||
@@ -200,8 +216,11 @@ def main():
|
|||||||
else:
|
else:
|
||||||
print(f" No match in #{nid}.")
|
print(f" No match in #{nid}.")
|
||||||
|
|
||||||
# Always advance the state to the latest ID we've seen
|
# Advance state to highest ID we've confirmed exists
|
||||||
|
if ids:
|
||||||
write_last_seen(max(ids))
|
write_last_seen(max(ids))
|
||||||
|
elif new_ids:
|
||||||
|
write_last_seen(max(new_ids))
|
||||||
|
|
||||||
if found_match:
|
if found_match:
|
||||||
nid, snippet = found_match
|
nid, snippet = found_match
|
||||||
|
|||||||
Reference in New Issue
Block a user