From e457b4463baf3d033e8dad4c8d1771a6bb16cf58 Mon Sep 17 00:00:00 2001
From: wangzhuc <wangzhuc@outlook.com>
Date: Wed, 1 Apr 2026 01:55:48 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20learn=5Ftheme=20=E2=80=94=20add=20HTML?=
 =?UTF-8?q?=20fetch/extract=20layer=20(fetch=5Farticle,=20extract=5Fstyles?=
 =?UTF-8?q?,=20parse=5Finline=5Fstyle)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 scripts/learn_theme.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/scripts/learn_theme.py b/scripts/learn_theme.py
index fc5de0a..93bc77a 100644
--- a/scripts/learn_theme.py
+++ b/scripts/learn_theme.py
@@ -158,13 +158,18 @@ _BROWSER_UA = (
 )
 
 
-def fetch_article(url: str) -> "BeautifulSoup tag":
+def fetch_article(url: str, timeout: int = 20) -> "BeautifulSoup tag":
     """Fetch a WeChat article, return the ``#js_content`` element.
 
     The article title is attached as ``content._wewrite_title`` (empty string
     if not found).  Exits with code 1 if ``#js_content`` is absent.
+
+    Parameters
+    ----------
+    url:     WeChat article URL (mp.weixin.qq.com/…)
+    timeout: HTTP request timeout in seconds (default 20).
     """
-    resp = requests.get(url, headers={"User-Agent": _BROWSER_UA}, timeout=20)
+    resp = requests.get(url, headers={"User-Agent": _BROWSER_UA}, timeout=timeout)
     resp.encoding = "utf-8"
     soup = BeautifulSoup(resp.text, "html.parser")