fix: log sub-sitemap byte counts, add EN notification listing fallback
All checks were successful
ZIPAIR Singapore Sale Monitor / check (push) Successful in 26s

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-17 16:19:45 +08:00
parent 5df8c9f1ac
commit 751fd36147

View File

@@ -89,18 +89,33 @@ def get_notification_ids_from_sitemap() -> list[int]:
print(f"Fetching {sitemap_url}")
xml = fetch(sitemap_url)
if xml:
# sitemap index may reference sub-sitemaps — fetch those too
sub_sitemaps = re.findall(r"<loc>(https?://[^<]*sitemap[^<]*)</loc>", xml)
for sub in sub_sitemaps:
if sub not in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX):
print(f" Fetching sub-sitemap {sub}")
xml += fetch(sub)
chunk = fetch(sub)
print(f"{len(chunk)} bytes")
xml += chunk
time.sleep(0.5)
ids = [int(m) for m in re.findall(r"/notification/(\d+)", xml)]
if ids:
ids = sorted(set(ids))
print(f" Found {len(ids)} notification IDs (max={ids[-1]})")
return ids
print(f" No notification IDs found in sitemap(s).")
# Fallback: EN notification listing page (IDs may be in HTML even if JS-rendered)
print("Trying EN notification listing page …")
html = fetch("https://www.zipair.net/en/notification/")
print(f"{len(html)} bytes")
ids = [int(m) for m in re.findall(r"/(?:en|ja|ko|th|zh-tw|zh-cn)/notification/(\d+)", html)]
if not ids:
# also try bare /notification/NNN pattern
ids = [int(m) for m in re.findall(r"/notification/(\d+)", html)]
if ids:
ids = sorted(set(ids))
print(f" Found {len(ids)} notification IDs from listing page (max={ids[-1]})")
return ids
return []