From 2801757e342c2b9feb4c3ea0155217feebe3c420 Mon Sep 17 00:00:00 2001
From: isky <iskandershah_ss43@hotmail.com>
Date: Sun, 17 May 2026 16:47:08 +0800
Subject: [PATCH] fix: parse __NEXT_DATA__ JSON for notification slugs

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 check_zipair.py | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)
diff --git a/check_zipair.py b/check_zipair.py
index 63df00d..d2b17d5 100644
--- a/check_zipair.py
+++ b/check_zipair.py
@@ -81,17 +81,31 @@ def get_notification_slugs() -> list[str]:
     if not html:
         return []
 
-    # Match /en/notification/some-slug or /en/notification/123
-    # Try both double and single quotes, and also JSON-style escaped URLs
-    slugs = re.findall(r'["\'](/(?:en|ja|ko|th|zh-tw|zh-cn)/notification/([^"\'?#/]+))["\']', html)
+    # Strategy 1: parse __NEXT_DATA__ JSON (Next.js SSR)
+    paths = []
+    m = re.search(r'<script id="__NEXT_DATA__"[^>]*>(.*?)</script>', html, re.DOTALL)
+    if m:
+        try:
+            nd = json.loads(m.group(1))
+            # flatten all string values and grep for /notification/ paths
+            raw_json = json.dumps(nd)
+            paths = re.findall(r'(?:\\?/(?:en|ja|ko|th|zh-tw|zh-cn)\\?/notification\\?/([^"\\/?#]+))', raw_json)
+        except Exception as e:
+            print(f"  __NEXT_DATA__ parse error: {e}", file=sys.stderr)
 
-    # dedupe by slug
+    # Strategy 2: any href / quoted path in raw HTML
+    if not paths:
+        paths = [s for _, s in re.findall(
+            r'["\'](/(?:en|ja|ko|th|zh-tw|zh-cn)/notification/([^"\'?#/]+))["\']', html
+        )]
+
+    # dedupe, preserving order
     seen = set()
     result = []
-    for path, slug in slugs:
+    for slug in paths:
         if slug and slug not in seen:
             seen.add(slug)
-            result.append((slug, ZIPAIR_NOTIF_BASE + path))
+            result.append((slug, f"{ZIPAIR_NOTIF_BASE}/en/notification/{slug}"))
     print(f"  Found {len(result)} notification(s) on listing page.")
     return result  # list of (slug, full_url)