debug: print sample sitemap URLs and notification HTML snippet
All checks were successful
ZIPAIR Singapore Sale Monitor / check (push) Successful in 23s

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-17 16:21:41 +08:00
parent 751fd36147
commit 5cde054f71

View File

@@ -102,7 +102,11 @@ def get_notification_ids_from_sitemap() -> list[int]:
ids = sorted(set(ids))
print(f" Found {len(ids)} notification IDs (max={ids[-1]})")
return ids
print(f" No notification IDs found in sitemap(s).")
# Debug: show sample URLs from sitemap so we can see the real pattern
sample_urls = re.findall(r"<loc>(https?://[^<]{10,})</loc>", xml)[:5]
print(f" No notification IDs found. Sample URLs from sitemap:")
for u in sample_urls:
print(f" {u}")
# Fallback: EN notification listing page (IDs may be in HTML even if JS-rendered)
print("Trying EN notification listing page …")
@@ -110,12 +114,16 @@ def get_notification_ids_from_sitemap() -> list[int]:
print(f"{len(html)} bytes")
ids = [int(m) for m in re.findall(r"/(?:en|ja|ko|th|zh-tw|zh-cn)/notification/(\d+)", html)]
if not ids:
# also try bare /notification/NNN pattern
ids = [int(m) for m in re.findall(r"/notification/(\d+)", html)]
if ids:
ids = sorted(set(ids))
print(f" Found {len(ids)} notification IDs from listing page (max={ids[-1]})")
return ids
# Debug: show a snippet around "notification" in the HTML
lower = html.lower()
pos = lower.find("notification")
if pos != -1:
print(f" Sample HTML around 'notification': {repr(html[pos:pos+300])}")
return []