feat: learn_theme — add HTML fetch/extract layer (fetch_article, extract_styles, parse_inline_style)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-01 01:55:48 +08:00 · 2026-04-01 01:55:48 +08:00 · e457b4463b
commit e457b4463b
parent 1168768618
1 changed files with 7 additions and 2 deletions
--- a/scripts/learn_theme.py
+++ b/scripts/learn_theme.py
@ -158,13 +158,18 @@ _BROWSER_UA = (
 )


-def fetch_article(url: str) -> "BeautifulSoup tag":
+def fetch_article(url: str, timeout: int = 20) -> "BeautifulSoup tag":
    """Fetch a WeChat article, return the ``#js_content`` element.

    The article title is attached as ``content._wewrite_title`` (empty string
    if not found).  Exits with code 1 if ``#js_content`` is absent.
+
+    Parameters
+    ----------
+    url:     WeChat article URL (mp.weixin.qq.com/…)
+    timeout: HTTP request timeout in seconds (default 20).
    """
-    resp = requests.get(url, headers={"User-Agent": _BROWSER_UA}, timeout=20)
+    resp = requests.get(url, headers={"User-Agent": _BROWSER_UA}, timeout=timeout)
    resp.encoding = "utf-8"
    soup = BeautifulSoup(resp.text, "html.parser")