From 751fd361479b56ad28727c9454336b6a4d9b1d9e Mon Sep 17 00:00:00 2001 From: isky Date: Sun, 17 May 2026 16:19:45 +0800 Subject: [PATCH] fix: log sub-sitemap byte counts, add EN notification listing fallback Co-Authored-By: Claude Sonnet 4.6 --- check_zipair.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/check_zipair.py b/check_zipair.py index f6ff849..67bb347 100644 --- a/check_zipair.py +++ b/check_zipair.py @@ -89,18 +89,33 @@ def get_notification_ids_from_sitemap() -> list[int]: print(f"Fetching {sitemap_url} …") xml = fetch(sitemap_url) if xml: - # sitemap index may reference sub-sitemaps — fetch those too sub_sitemaps = re.findall(r"(https?://[^<]*sitemap[^<]*)", xml) for sub in sub_sitemaps: if sub not in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX): print(f" Fetching sub-sitemap {sub} …") - xml += fetch(sub) + chunk = fetch(sub) + print(f" → {len(chunk)} bytes") + xml += chunk time.sleep(0.5) ids = [int(m) for m in re.findall(r"/notification/(\d+)", xml)] if ids: ids = sorted(set(ids)) print(f" Found {len(ids)} notification IDs (max={ids[-1]})") return ids + print(f" No notification IDs found in sitemap(s).") + + # Fallback: EN notification listing page (IDs may be in HTML even if JS-rendered) + print("Trying EN notification listing page …") + html = fetch("https://www.zipair.net/en/notification/") + print(f" → {len(html)} bytes") + ids = [int(m) for m in re.findall(r"/(?:en|ja|ko|th|zh-tw|zh-cn)/notification/(\d+)", html)] + if not ids: + # also try bare /notification/NNN pattern + ids = [int(m) for m in re.findall(r"/notification/(\d+)", html)] + if ids: + ids = sorted(set(ids)) + print(f" Found {len(ids)} notification IDs from listing page (max={ids[-1]})") + return ids return []