diff --git a/SKILL.md b/SKILL.md index 409d20b..5aa5286 100644 --- a/SKILL.md +++ b/SKILL.md @@ -45,7 +45,9 @@ allowed-tools: **辅助功能**(按需加载,不在主管道内): - 用户说"重新设置风格" → `读取: {skill_dir}/references/onboard.md` -- 用户说"学习我的修改" → `读取: {skill_dir}/references/learn-edits.md` +- 用户说"学习我的修改" → `读取: {skill_dir}/references/learn-edits.md`。支持两种来源: + - **本地修改**(默认):用户在 `output/` 的 markdown 文件中修改 + - **微信草稿箱同步**:`python3 {skill_dir}/scripts/learn_edits.py --from-wechat`,自动从草稿箱拉回最新内容,与本地原文做纯文本 diff - 用户说"看看文章数据" → `读取: {skill_dir}/references/effect-review.md` - 用户说"检查一下"/"自检"/"这篇文章怎么样" → 对最近一篇生成的文章(或用户指定的文章)执行自检,输出生成报告: @@ -448,14 +450,10 @@ python3 {skill_dir}/toolkit/cli.py preview {markdown} --theme {theme} --no-open stats: null ``` -**8.2 编辑建议**: - -如果用户想让 WeWrite 学习自己的修改风格,需要在 WeWrite 输出的 markdown 文件(`output/` 目录)中直接修改,然后说"学习我的修改"。微信草稿箱内的修改无法同步回 WeWrite,因此建议用户:先在本地 markdown 改完 → 说"学习我的修改" → 再重新推送。 - -**8.3 回复用户**: +**8.2 回复用户**: - 最终标题 + 2 备选 + 摘要 + 5 标签 + media_id -- 编辑建议:"文章有 2-3 个编辑锚点,建议在 `output/` 目录的 markdown 文件里加入你自己的话,改完后说**'学习我的修改'**再重新推送,效果更好。" +- 编辑建议:"文章有 2-3 个编辑锚点,建议加入你自己的话。你可以在本地 markdown 里改,也可以直接在微信草稿箱改——改完后说**'学习我的修改'**,WeWrite 都能学到你的风格。" **8.3 后续操作**: @@ -468,7 +466,7 @@ python3 {skill_dir}/toolkit/cli.py preview {markdown} --theme {theme} --no-open | 看看有什么主题 | `python3 {skill_dir}/toolkit/cli.py gallery` | | 换成 XX 主题 | 重新渲染 | | 看看文章数据 | `读取: {skill_dir}/references/effect-review.md` | -| 学习我的修改 | `读取: {skill_dir}/references/learn-edits.md`。需要在 `output/` 目录的 markdown 文件中修改,不支持微信草稿箱内的修改同步学习 | +| 学习我的修改 | `读取: {skill_dir}/references/learn-edits.md`。支持本地 markdown 修改和微信草稿箱同步(`--from-wechat`) | | 做一个小绿书/图片帖 | `python3 {skill_dir}/toolkit/cli.py image-post img1.jpg img2.jpg -t "标题"` | | 检查一下 / 自检 / 这篇文章怎么样 | 生成报告(生成档案 + 质量检查,见辅助功能) | | 导入范文 / 建范文库 | `python3 {skill_dir}/scripts/extract_exemplar.py article.md` | diff --git a/scripts/learn_edits.py b/scripts/learn_edits.py index 1b6ac67..54119a3 100644 --- a/scripts/learn_edits.py +++ b/scripts/learn_edits.py @@ -16,7 +16,8 @@ The Agent uses this to write structured playbook.md rules. Usage: python3 learn_edits.py --draft path/to/draft.md --final path/to/final.md - python3 learn_edits.py --summarize # all lessons with confidence + python3 learn_edits.py --from-wechat # auto-sync from WeChat draft box + python3 learn_edits.py --summarize # all lessons with confidence python3 learn_edits.py --summarize --json # JSON output for agent """ @@ -48,6 +49,102 @@ def load_text(path: str) -> str: return Path(path).read_text(encoding="utf-8") +def markdown_to_plaintext(md: str) -> str: + """Strip markdown formatting to plain text for diff comparison.""" + text = md + # Remove HTML comments (editing anchors etc.) + text = re.sub(r"", "", text, flags=re.DOTALL) + # Remove markdown headers markers + text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE) + # Remove bold/italic markers + text = re.sub(r"\*{1,3}(.*?)\*{1,3}", r"\1", text) + # Remove inline code + text = re.sub(r"`([^`]+)`", r"\1", text) + # Remove link syntax [text](url) → text + text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text) + # Remove image syntax + text = re.sub(r"!\[([^\]]*)\]\([^)]+\)", r"\1", text) + # Collapse whitespace + text = re.sub(r"[ \t]+", " ", text) + text = re.sub(r"\n{3,}", "\n\n", text) + return text.strip() + + +def fetch_wechat_draft() -> tuple[str, str, str]: + """ + Fetch the latest draft from WeChat and find the corresponding local file. + Returns (draft_plaintext, final_plaintext, draft_path). + """ + # Load config + config_path = SKILL_DIR / "config.yaml" + if not config_path.exists(): + raise FileNotFoundError("config.yaml not found — need WeChat API credentials") + + with open(config_path) as f: + config = yaml.safe_load(f) + + wechat = config.get("wechat", {}) + appid = wechat.get("appid", "") + secret = wechat.get("secret", "") + if not appid or not secret: + raise ValueError("config.yaml missing wechat.appid or wechat.secret") + + # Load history to find latest article with media_id + history_path = SKILL_DIR / "history.yaml" + if not history_path.exists(): + raise FileNotFoundError("history.yaml not found — no articles to compare") + + with open(history_path) as f: + history = yaml.safe_load(f) or [] + + # Find most recent article with media_id + latest = None + for article in reversed(history): + if article.get("media_id"): + latest = article + break + + if not latest: + raise ValueError("No article with media_id found in history.yaml") + + media_id = latest["media_id"] + title = latest.get("title", "") + + # Find the local draft file + date = latest.get("date", "") + output_dir = SKILL_DIR / "output" + draft_path = None + if date: + candidates = list(output_dir.glob(f"{date}-*.md")) + if candidates: + draft_path = candidates[0] + + if not draft_path or not draft_path.exists(): + raise FileNotFoundError( + f"Cannot find local draft for '{title}' (date={date}) in output/" + ) + + # Get access token and fetch draft from WeChat + sys.path.insert(0, str(SKILL_DIR / "toolkit")) + from wechat_api import get_access_token + from publisher import get_draft, html_to_plaintext + + token = get_access_token(appid, secret) + html = get_draft(token, media_id) + wechat_text = html_to_plaintext(html) + + # Convert local draft to plaintext + local_md = load_text(str(draft_path)) + local_text = markdown_to_plaintext(local_md) + + print(f"本地文件: {draft_path}") + print(f"微信草稿: media_id={media_id}") + print(f"文章标题: {title}") + print(f"本地字数: {len(local_text)}, 微信字数: {len(wechat_text)}") + + return local_text, wechat_text, str(draft_path) + + def split_sections(text: str) -> list[dict]: """Split markdown into sections by H2 headers.""" sections = [] @@ -276,6 +373,8 @@ def main(): parser = argparse.ArgumentParser(description="Learn from human edits") parser.add_argument("--draft", help="Path to AI draft") parser.add_argument("--final", help="Path to human-edited final") + parser.add_argument("--from-wechat", action="store_true", + help="Auto-fetch edited version from WeChat draft box") parser.add_argument("--summarize", action="store_true", help="Summarize all lessons") parser.add_argument("--json", action="store_true", help="JSON output (with --summarize)") args = parser.parse_args() @@ -284,8 +383,22 @@ def main(): summarize_lessons(as_json=args.json) return + if args.from_wechat: + local_text, wechat_text, draft_path = fetch_wechat_draft() + if local_text == wechat_text: + print("\n微信草稿与本地文件内容一致,没有修改。") + return + diff_result = compute_diff(local_text, wechat_text) + # Save with special marker for wechat source + lesson_file = save_lesson(diff_result, draft_path, f"wechat:{draft_path}") + print(f"\nLesson saved to: {lesson_file}") + print(f"\n检测到 {diff_result['lines_added']} 处新增, {diff_result['lines_deleted']} 处删除") + print(f"字数变化: {diff_result['char_diff']:+d}") + print(f"\nAgent 接下来读取 {draft_path} 和微信草稿内容,分析修改模式并写入 {lesson_file}") + return + if not args.draft or not args.final: - print("Error: --draft and --final required", file=sys.stderr) + print("Error: --draft and --final required (or use --from-wechat)", file=sys.stderr) sys.exit(1) draft = load_text(args.draft) diff --git a/toolkit/publisher.py b/toolkit/publisher.py index 28fc596..a2ea664 100644 --- a/toolkit/publisher.py +++ b/toolkit/publisher.py @@ -68,6 +68,49 @@ def create_draft( return DraftResult(media_id=data["media_id"]) +def get_draft(access_token: str, media_id: str) -> str: + """ + Get draft content from WeChat by media_id. + API: POST https://api.weixin.qq.com/cgi-bin/draft/get + Returns the HTML content of the first article. + """ + resp = requests.post( + "https://api.weixin.qq.com/cgi-bin/draft/get", + params={"access_token": access_token}, + json={"media_id": media_id}, + ) + data = resp.json() + + errcode = data.get("errcode", 0) + if errcode != 0: + errmsg = data.get("errmsg", "unknown error") + raise ValueError(f"WeChat get_draft error: errcode={errcode}, errmsg={errmsg}") + + articles = data.get("news_item", []) + if not articles: + raise ValueError(f"WeChat get_draft: no articles in draft {media_id}") + + return articles[0].get("content", "") + + +def html_to_plaintext(html: str) -> str: + """Extract plain text from WeChat HTML, stripping all tags and styles.""" + import re + # Remove script/style blocks + text = re.sub(r"<(script|style)[^>]*>.*?", "", html, flags=re.DOTALL | re.IGNORECASE) + # Replace block-level tags with newlines + text = re.sub(r"<(br|p|div|section|h[1-6])[^>]*>", "\n", text, flags=re.IGNORECASE) + # Remove all remaining tags + text = re.sub(r"<[^>]+>", "", text) + # Decode HTML entities + import html as html_module + text = html_module.unescape(text) + # Collapse whitespace + text = re.sub(r"[ \t]+", " ", text) + text = re.sub(r"\n{3,}", "\n\n", text) + return text.strip() + + def create_image_post( access_token: str, title: str,