fix: log sub-sitemap byte counts, add EN notification listing fallback
All checks were successful
ZIPAIR Singapore Sale Monitor / check (push) Successful in 26s
All checks were successful
ZIPAIR Singapore Sale Monitor / check (push) Successful in 26s
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -89,18 +89,33 @@ def get_notification_ids_from_sitemap() -> list[int]:
|
||||
print(f"Fetching {sitemap_url} …")
|
||||
xml = fetch(sitemap_url)
|
||||
if xml:
|
||||
# sitemap index may reference sub-sitemaps — fetch those too
|
||||
sub_sitemaps = re.findall(r"<loc>(https?://[^<]*sitemap[^<]*)</loc>", xml)
|
||||
for sub in sub_sitemaps:
|
||||
if sub not in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX):
|
||||
print(f" Fetching sub-sitemap {sub} …")
|
||||
xml += fetch(sub)
|
||||
chunk = fetch(sub)
|
||||
print(f" → {len(chunk)} bytes")
|
||||
xml += chunk
|
||||
time.sleep(0.5)
|
||||
ids = [int(m) for m in re.findall(r"/notification/(\d+)", xml)]
|
||||
if ids:
|
||||
ids = sorted(set(ids))
|
||||
print(f" Found {len(ids)} notification IDs (max={ids[-1]})")
|
||||
return ids
|
||||
print(f" No notification IDs found in sitemap(s).")
|
||||
|
||||
# Fallback: EN notification listing page (IDs may be in HTML even if JS-rendered)
|
||||
print("Trying EN notification listing page …")
|
||||
html = fetch("https://www.zipair.net/en/notification/")
|
||||
print(f" → {len(html)} bytes")
|
||||
ids = [int(m) for m in re.findall(r"/(?:en|ja|ko|th|zh-tw|zh-cn)/notification/(\d+)", html)]
|
||||
if not ids:
|
||||
# also try bare /notification/NNN pattern
|
||||
ids = [int(m) for m in re.findall(r"/notification/(\d+)", html)]
|
||||
if ids:
|
||||
ids = sorted(set(ids))
|
||||
print(f" Found {len(ids)} notification IDs from listing page (max={ids[-1]})")
|
||||
return ids
|
||||
|
||||
return []
|
||||
|
||||
|
||||
Reference in New Issue
Block a user