diff --git a/check_zipair.py b/check_zipair.py index 67bb347..94df330 100644 --- a/check_zipair.py +++ b/check_zipair.py @@ -102,7 +102,11 @@ def get_notification_ids_from_sitemap() -> list[int]: ids = sorted(set(ids)) print(f" Found {len(ids)} notification IDs (max={ids[-1]})") return ids - print(f" No notification IDs found in sitemap(s).") + # Debug: show sample URLs from sitemap so we can see the real pattern + sample_urls = re.findall(r"(https?://[^<]{10,})", xml)[:5] + print(f" No notification IDs found. Sample URLs from sitemap:") + for u in sample_urls: + print(f" {u}") # Fallback: EN notification listing page (IDs may be in HTML even if JS-rendered) print("Trying EN notification listing page …") @@ -110,12 +114,16 @@ def get_notification_ids_from_sitemap() -> list[int]: print(f" → {len(html)} bytes") ids = [int(m) for m in re.findall(r"/(?:en|ja|ko|th|zh-tw|zh-cn)/notification/(\d+)", html)] if not ids: - # also try bare /notification/NNN pattern ids = [int(m) for m in re.findall(r"/notification/(\d+)", html)] if ids: ids = sorted(set(ids)) print(f" Found {len(ids)} notification IDs from listing page (max={ids[-1]})") return ids + # Debug: show a snippet around "notification" in the HTML + lower = html.lower() + pos = lower.find("notification") + if pos != -1: + print(f" Sample HTML around 'notification': {repr(html[pos:pos+300])}") return []