fix: log sub-sitemap byte counts, add EN notification listing fallback
All checks were successful
ZIPAIR Singapore Sale Monitor / check (push) Successful in 26s
All checks were successful
ZIPAIR Singapore Sale Monitor / check (push) Successful in 26s
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -89,18 +89,33 @@ def get_notification_ids_from_sitemap() -> list[int]:
|
|||||||
print(f"Fetching {sitemap_url} …")
|
print(f"Fetching {sitemap_url} …")
|
||||||
xml = fetch(sitemap_url)
|
xml = fetch(sitemap_url)
|
||||||
if xml:
|
if xml:
|
||||||
# sitemap index may reference sub-sitemaps — fetch those too
|
|
||||||
sub_sitemaps = re.findall(r"<loc>(https?://[^<]*sitemap[^<]*)</loc>", xml)
|
sub_sitemaps = re.findall(r"<loc>(https?://[^<]*sitemap[^<]*)</loc>", xml)
|
||||||
for sub in sub_sitemaps:
|
for sub in sub_sitemaps:
|
||||||
if sub not in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX):
|
if sub not in (ZIPAIR_SITEMAP, ZIPAIR_SITEMAP_INDEX):
|
||||||
print(f" Fetching sub-sitemap {sub} …")
|
print(f" Fetching sub-sitemap {sub} …")
|
||||||
xml += fetch(sub)
|
chunk = fetch(sub)
|
||||||
|
print(f" → {len(chunk)} bytes")
|
||||||
|
xml += chunk
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
ids = [int(m) for m in re.findall(r"/notification/(\d+)", xml)]
|
ids = [int(m) for m in re.findall(r"/notification/(\d+)", xml)]
|
||||||
if ids:
|
if ids:
|
||||||
ids = sorted(set(ids))
|
ids = sorted(set(ids))
|
||||||
print(f" Found {len(ids)} notification IDs (max={ids[-1]})")
|
print(f" Found {len(ids)} notification IDs (max={ids[-1]})")
|
||||||
return ids
|
return ids
|
||||||
|
print(f" No notification IDs found in sitemap(s).")
|
||||||
|
|
||||||
|
# Fallback: EN notification listing page (IDs may be in HTML even if JS-rendered)
|
||||||
|
print("Trying EN notification listing page …")
|
||||||
|
html = fetch("https://www.zipair.net/en/notification/")
|
||||||
|
print(f" → {len(html)} bytes")
|
||||||
|
ids = [int(m) for m in re.findall(r"/(?:en|ja|ko|th|zh-tw|zh-cn)/notification/(\d+)", html)]
|
||||||
|
if not ids:
|
||||||
|
# also try bare /notification/NNN pattern
|
||||||
|
ids = [int(m) for m in re.findall(r"/notification/(\d+)", html)]
|
||||||
|
if ids:
|
||||||
|
ids = sorted(set(ids))
|
||||||
|
print(f" Found {len(ids)} notification IDs from listing page (max={ids[-1]})")
|
||||||
|
return ids
|
||||||
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user