From 77e76077d8bea450c70ca8650d252cd051aeedb6 Mon Sep 17 00:00:00 2001
From: wangzhuc <wangzhuc@outlook.com>
Date: Wed, 1 Apr 2026 11:40:14 +0800
Subject: [PATCH] fix(learn-theme): HTTP error handling, DRY title extraction,
 text_light fix

- fetch_article: catch RequestException, add raise_for_status()
- Extract _attach_title() shared by fetch_article and _load_from_file
- text_light: only search foreground colors, not background values

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 scripts/learn_theme.py | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/scripts/learn_theme.py b/scripts/learn_theme.py
index 40df14a..26a3595 100644
--- a/scripts/learn_theme.py
+++ b/scripts/learn_theme.py
@@ -158,32 +158,40 @@ _BROWSER_UA = (
 )
 
 
+def _attach_title(soup, content) -> None:
+    """Find the article title in *soup* and stash it on *content*."""
+    title_tag = soup.find("h1", class_="rich_media_title") or soup.find(
+        "h1", id="activity-name"
+    )
+    content._wewrite_title = title_tag.get_text(strip=True) if title_tag else ""
+
+
 def fetch_article(url: str, timeout: int = 20) -> "BeautifulSoup tag":
     """Fetch a WeChat article, return the ``#js_content`` element.
 
     The article title is attached as ``content._wewrite_title`` (empty string
-    if not found).  Exits with code 1 if ``#js_content`` is absent.
+    if not found).  Exits with code 1 on network errors or missing content.
 
     Parameters
     ----------
     url:     WeChat article URL (mp.weixin.qq.com/…)
     timeout: HTTP request timeout in seconds (default 20).
     """
-    resp = requests.get(url, headers={"User-Agent": _BROWSER_UA}, timeout=timeout)
+    try:
+        resp = requests.get(url, headers={"User-Agent": _BROWSER_UA}, timeout=timeout)
+        resp.raise_for_status()
+    except requests.exceptions.RequestException as exc:
+        print(f"Error: failed to fetch URL: {exc}", file=sys.stderr)
+        sys.exit(1)
     resp.encoding = "utf-8"
     soup = BeautifulSoup(resp.text, "html.parser")
 
     content = soup.find(id="js_content")
     if content is None:
-        print("Error: #js_content not found in the fetched page.", file=sys.stderr)
+        print("Error: #js_content not found — the page may require verification.", file=sys.stderr)
         sys.exit(1)
 
-    title_tag = soup.find("h1", class_="rich_media_title") or soup.find(
-        "h1", id="activity-name"
-    )
-    content._wewrite_title = (
-        title_tag.get_text(strip=True) if title_tag else ""
-    )
+    _attach_title(soup, content)
     return content
 
 
@@ -270,14 +278,13 @@ def analyze_styles(grouped: dict) -> dict:
         result["text"] = rgb_to_hex(raw_text)
 
     # --- text_light ------------------------------------------------------------
-    # Collect ALL colours from every element, look for grays in lightness 0.15-0.85
+    # Collect foreground colours only (not backgrounds) for text_light candidates
     all_colors = []
     for tag_styles in grouped.values():
         for d in tag_styles:
-            for prop in ("color", "background-color", "background"):
-                val = d.get(prop)
-                if val:
-                    all_colors.append(rgb_to_hex(val))
+            val = d.get("color")
+            if val:
+                all_colors.append(rgb_to_hex(val))
 
     text_light_candidates = [
         c for c in all_colors
@@ -445,10 +452,7 @@ def _load_from_file(path: str):
     if content is None:
         print(f"Error: #js_content not found in {path}", file=sys.stderr)
         sys.exit(1)
-    title_tag = soup.find("h1", class_="rich_media_title") or soup.find(
-        "h1", id="activity-name"
-    )
-    content._wewrite_title = title_tag.get_text(strip=True) if title_tag else ""
+    _attach_title(soup, content)
     return content