feat: sync edits from WeChat draft box for learn-edits

- publisher.py: add get_draft() to fetch draft content by media_id, add html_to_plaintext() for HTML→text conversion - learn_edits.py: add --from-wechat flag that auto-fetches latest draft from WeChat, converts both sides to plaintext, and diffs - learn_edits.py: add markdown_to_plaintext() for local file conversion - SKILL.md: update edit workflow — both local and WeChat edits supported Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 15:46:51 +08:00 · 2026-03-31 15:46:51 +08:00 · 2773a8bb9b
commit 2773a8bb9b
parent 973aad5951
3 changed files with 164 additions and 10 deletions
--- a/SKILL.md
+++ b/SKILL.md
@ -45,7 +45,9 @@ allowed-tools:

 **辅助功能**（按需加载，不在主管道内）：
 - 用户说"重新设置风格" → `读取: {skill_dir}/references/onboard.md`
- 用户说"学习我的修改" → `读取: {skill_dir}/references/learn-edits.md`
+- 用户说"学习我的修改" → `读取: {skill_dir}/references/learn-edits.md`。支持两种来源：
+  - **本地修改**（默认）：用户在 `output/` 的 markdown 文件中修改
+  - **微信草稿箱同步**：`python3 {skill_dir}/scripts/learn_edits.py --from-wechat`，自动从草稿箱拉回最新内容，与本地原文做纯文本 diff
 - 用户说"看看文章数据" → `读取: {skill_dir}/references/effect-review.md`
 - 用户说"检查一下"/"自检"/"这篇文章怎么样" → 对最近一篇生成的文章（或用户指定的文章）执行自检，输出生成报告：

@ -448,14 +450,10 @@ python3 {skill_dir}/toolkit/cli.py preview {markdown} --theme {theme} --no-open
  stats: null
 ```

-**8.2 编辑建议**：
-
-如果用户想让 WeWrite 学习自己的修改风格，需要在 WeWrite 输出的 markdown 文件（`output/` 目录）中直接修改，然后说"学习我的修改"。微信草稿箱内的修改无法同步回 WeWrite，因此建议用户：先在本地 markdown 改完 → 说"学习我的修改" → 再重新推送。
-
-**8.3 回复用户**：
+**8.2 回复用户**：

 - 最终标题 + 2 备选 + 摘要 + 5 标签 + media_id
- 编辑建议："文章有 2-3 个编辑锚点，建议在 `output/` 目录的 markdown 文件里加入你自己的话，改完后说**'学习我的修改'**再重新推送，效果更好。"
+- 编辑建议："文章有 2-3 个编辑锚点，建议加入你自己的话。你可以在本地 markdown 里改，也可以直接在微信草稿箱改——改完后说**'学习我的修改'**，WeWrite 都能学到你的风格。"

 **8.3 后续操作**：

@ -468,7 +466,7 @@ python3 {skill_dir}/toolkit/cli.py preview {markdown} --theme {theme} --no-open
 | 看看有什么主题 | `python3 {skill_dir}/toolkit/cli.py gallery` |
 | 换成 XX 主题 | 重新渲染 |
 | 看看文章数据 | `读取: {skill_dir}/references/effect-review.md` |
-| 学习我的修改 | `读取: {skill_dir}/references/learn-edits.md`。需要在 `output/` 目录的 markdown 文件中修改，不支持微信草稿箱内的修改同步学习 |
+| 学习我的修改 | `读取: {skill_dir}/references/learn-edits.md`。支持本地 markdown 修改和微信草稿箱同步（`--from-wechat`） |
 | 做一个小绿书/图片帖 | `python3 {skill_dir}/toolkit/cli.py image-post img1.jpg img2.jpg -t "标题"` |
 | 检查一下 / 自检 / 这篇文章怎么样 | 生成报告（生成档案 + 质量检查，见辅助功能） |
 | 导入范文 / 建范文库 | `python3 {skill_dir}/scripts/extract_exemplar.py article.md` |
--- a/scripts/learn_edits.py
+++ b/scripts/learn_edits.py
@ -16,7 +16,8 @@ The Agent uses this to write structured playbook.md rules.

 Usage:
    python3 learn_edits.py --draft path/to/draft.md --final path/to/final.md
-    python3 learn_edits.py --summarize          # all lessons with confidence
+    python3 learn_edits.py --from-wechat         # auto-sync from WeChat draft box
+    python3 learn_edits.py --summarize           # all lessons with confidence
    python3 learn_edits.py --summarize --json    # JSON output for agent
 """

@ -48,6 +49,102 @@ def load_text(path: str) -> str:
    return Path(path).read_text(encoding="utf-8")


+def markdown_to_plaintext(md: str) -> str:
+    """Strip markdown formatting to plain text for diff comparison."""
+    text = md
+    # Remove HTML comments (editing anchors etc.)
+    text = re.sub(r"<!--.*?-->", "", text, flags=re.DOTALL)
+    # Remove markdown headers markers
+    text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE)
+    # Remove bold/italic markers
+    text = re.sub(r"\*{1,3}(.*?)\*{1,3}", r"\1", text)
+    # Remove inline code
+    text = re.sub(r"`([^`]+)`", r"\1", text)
+    # Remove link syntax [text](url) → text
+    text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
+    # Remove image syntax
+    text = re.sub(r"!\[([^\]]*)\]\([^)]+\)", r"\1", text)
+    # Collapse whitespace
+    text = re.sub(r"[ \t]+", " ", text)
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    return text.strip()
+
+
+def fetch_wechat_draft() -> tuple[str, str, str]:
+    """
+    Fetch the latest draft from WeChat and find the corresponding local file.
+    Returns (draft_plaintext, final_plaintext, draft_path).
+    """
+    # Load config
+    config_path = SKILL_DIR / "config.yaml"
+    if not config_path.exists():
+        raise FileNotFoundError("config.yaml not found — need WeChat API credentials")
+
+    with open(config_path) as f:
+        config = yaml.safe_load(f)
+
+    wechat = config.get("wechat", {})
+    appid = wechat.get("appid", "")
+    secret = wechat.get("secret", "")
+    if not appid or not secret:
+        raise ValueError("config.yaml missing wechat.appid or wechat.secret")
+
+    # Load history to find latest article with media_id
+    history_path = SKILL_DIR / "history.yaml"
+    if not history_path.exists():
+        raise FileNotFoundError("history.yaml not found — no articles to compare")
+
+    with open(history_path) as f:
+        history = yaml.safe_load(f) or []
+
+    # Find most recent article with media_id
+    latest = None
+    for article in reversed(history):
+        if article.get("media_id"):
+            latest = article
+            break
+
+    if not latest:
+        raise ValueError("No article with media_id found in history.yaml")
+
+    media_id = latest["media_id"]
+    title = latest.get("title", "")
+
+    # Find the local draft file
+    date = latest.get("date", "")
+    output_dir = SKILL_DIR / "output"
+    draft_path = None
+    if date:
+        candidates = list(output_dir.glob(f"{date}-*.md"))
+        if candidates:
+            draft_path = candidates[0]
+
+    if not draft_path or not draft_path.exists():
+        raise FileNotFoundError(
+            f"Cannot find local draft for '{title}' (date={date}) in output/"
+        )
+
+    # Get access token and fetch draft from WeChat
+    sys.path.insert(0, str(SKILL_DIR / "toolkit"))
+    from wechat_api import get_access_token
+    from publisher import get_draft, html_to_plaintext
+
+    token = get_access_token(appid, secret)
+    html = get_draft(token, media_id)
+    wechat_text = html_to_plaintext(html)
+
+    # Convert local draft to plaintext
+    local_md = load_text(str(draft_path))
+    local_text = markdown_to_plaintext(local_md)
+
+    print(f"本地文件: {draft_path}")
+    print(f"微信草稿: media_id={media_id}")
+    print(f"文章标题: {title}")
+    print(f"本地字数: {len(local_text)}, 微信字数: {len(wechat_text)}")
+
+    return local_text, wechat_text, str(draft_path)
+
+
 def split_sections(text: str) -> list[dict]:
    """Split markdown into sections by H2 headers."""
    sections = []
@ -276,6 +373,8 @@ def main():
    parser = argparse.ArgumentParser(description="Learn from human edits")
    parser.add_argument("--draft", help="Path to AI draft")
    parser.add_argument("--final", help="Path to human-edited final")
+    parser.add_argument("--from-wechat", action="store_true",
+                        help="Auto-fetch edited version from WeChat draft box")
    parser.add_argument("--summarize", action="store_true", help="Summarize all lessons")
    parser.add_argument("--json", action="store_true", help="JSON output (with --summarize)")
    args = parser.parse_args()
@ -284,8 +383,22 @@ def main():
        summarize_lessons(as_json=args.json)
        return

+    if args.from_wechat:
+        local_text, wechat_text, draft_path = fetch_wechat_draft()
+        if local_text == wechat_text:
+            print("\n微信草稿与本地文件内容一致，没有修改。")
+            return
+        diff_result = compute_diff(local_text, wechat_text)
+        # Save with special marker for wechat source
+        lesson_file = save_lesson(diff_result, draft_path, f"wechat:{draft_path}")
+        print(f"\nLesson saved to: {lesson_file}")
+        print(f"\n检测到 {diff_result['lines_added']} 处新增, {diff_result['lines_deleted']} 处删除")
+        print(f"字数变化: {diff_result['char_diff']:+d}")
+        print(f"\nAgent 接下来读取 {draft_path} 和微信草稿内容，分析修改模式并写入 {lesson_file}")
+        return
+
    if not args.draft or not args.final:
-        print("Error: --draft and --final required", file=sys.stderr)
+        print("Error: --draft and --final required (or use --from-wechat)", file=sys.stderr)
        sys.exit(1)

    draft = load_text(args.draft)
--- a/toolkit/publisher.py
+++ b/toolkit/publisher.py
@ -68,6 +68,49 @@ def create_draft(
    return DraftResult(media_id=data["media_id"])


+def get_draft(access_token: str, media_id: str) -> str:
+    """
+    Get draft content from WeChat by media_id.
+    API: POST https://api.weixin.qq.com/cgi-bin/draft/get
+    Returns the HTML content of the first article.
+    """
+    resp = requests.post(
+        "https://api.weixin.qq.com/cgi-bin/draft/get",
+        params={"access_token": access_token},
+        json={"media_id": media_id},
+    )
+    data = resp.json()
+
+    errcode = data.get("errcode", 0)
+    if errcode != 0:
+        errmsg = data.get("errmsg", "unknown error")
+        raise ValueError(f"WeChat get_draft error: errcode={errcode}, errmsg={errmsg}")
+
+    articles = data.get("news_item", [])
+    if not articles:
+        raise ValueError(f"WeChat get_draft: no articles in draft {media_id}")
+
+    return articles[0].get("content", "")
+
+
+def html_to_plaintext(html: str) -> str:
+    """Extract plain text from WeChat HTML, stripping all tags and styles."""
+    import re
+    # Remove script/style blocks
+    text = re.sub(r"<(script|style)[^>]*>.*?</\1>", "", html, flags=re.DOTALL | re.IGNORECASE)
+    # Replace block-level tags with newlines
+    text = re.sub(r"<(br|p|div|section|h[1-6])[^>]*>", "\n", text, flags=re.IGNORECASE)
+    # Remove all remaining tags
+    text = re.sub(r"<[^>]+>", "", text)
+    # Decode HTML entities
+    import html as html_module
+    text = html_module.unescape(text)
+    # Collapse whitespace
+    text = re.sub(r"[ \t]+", " ", text)
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    return text.strip()
+
+
 def create_image_post(
    access_token: str,
    title: str,