feat: sync edits from WeChat draft box for learn-edits

- publisher.py: add get_draft() to fetch draft content by media_id,
  add html_to_plaintext() for HTML→text conversion
- learn_edits.py: add --from-wechat flag that auto-fetches latest draft
  from WeChat, converts both sides to plaintext, and diffs
- learn_edits.py: add markdown_to_plaintext() for local file conversion
- SKILL.md: update edit workflow — both local and WeChat edits supported

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
wangzhuc 2026-03-31 15:46:51 +08:00
parent 973aad5951
commit 2773a8bb9b
3 changed files with 164 additions and 10 deletions

View file

@ -45,7 +45,9 @@ allowed-tools:
**辅助功能**(按需加载,不在主管道内):
- 用户说"重新设置风格" → `读取: {skill_dir}/references/onboard.md`
- 用户说"学习我的修改" → `读取: {skill_dir}/references/learn-edits.md`
- 用户说"学习我的修改" → `读取: {skill_dir}/references/learn-edits.md`。支持两种来源:
- **本地修改**(默认):用户在 `output/` 的 markdown 文件中修改
- **微信草稿箱同步**`python3 {skill_dir}/scripts/learn_edits.py --from-wechat`,自动从草稿箱拉回最新内容,与本地原文做纯文本 diff
- 用户说"看看文章数据" → `读取: {skill_dir}/references/effect-review.md`
- 用户说"检查一下"/"自检"/"这篇文章怎么样" → 对最近一篇生成的文章(或用户指定的文章)执行自检,输出生成报告:
@ -448,14 +450,10 @@ python3 {skill_dir}/toolkit/cli.py preview {markdown} --theme {theme} --no-open
stats: null
```
**8.2 编辑建议**
如果用户想让 WeWrite 学习自己的修改风格,需要在 WeWrite 输出的 markdown 文件(`output/` 目录)中直接修改,然后说"学习我的修改"。微信草稿箱内的修改无法同步回 WeWrite因此建议用户先在本地 markdown 改完 → 说"学习我的修改" → 再重新推送。
**8.3 回复用户**
**8.2 回复用户**
- 最终标题 + 2 备选 + 摘要 + 5 标签 + media_id
- 编辑建议:"文章有 2-3 个编辑锚点,建议`output/` 目录的 markdown 文件里加入你自己的话,改完后说**'学习我的修改'**再重新推送,效果更好。"
- 编辑建议:"文章有 2-3 个编辑锚点,建议加入你自己的话。你可以在本地 markdown 里改,也可以直接在微信草稿箱改——改完后说**'学习我的修改'**WeWrite 都能学到你的风格。"
**8.3 后续操作**
@ -468,7 +466,7 @@ python3 {skill_dir}/toolkit/cli.py preview {markdown} --theme {theme} --no-open
| 看看有什么主题 | `python3 {skill_dir}/toolkit/cli.py gallery` |
| 换成 XX 主题 | 重新渲染 |
| 看看文章数据 | `读取: {skill_dir}/references/effect-review.md` |
| 学习我的修改 | `读取: {skill_dir}/references/learn-edits.md`需要在 `output/` 目录的 markdown 文件中修改,不支持微信草稿箱内的修改同步学习 |
| 学习我的修改 | `读取: {skill_dir}/references/learn-edits.md`支持本地 markdown 修改和微信草稿箱同步(`--from-wechat` |
| 做一个小绿书/图片帖 | `python3 {skill_dir}/toolkit/cli.py image-post img1.jpg img2.jpg -t "标题"` |
| 检查一下 / 自检 / 这篇文章怎么样 | 生成报告(生成档案 + 质量检查,见辅助功能) |
| 导入范文 / 建范文库 | `python3 {skill_dir}/scripts/extract_exemplar.py article.md` |

View file

@ -16,7 +16,8 @@ The Agent uses this to write structured playbook.md rules.
Usage:
python3 learn_edits.py --draft path/to/draft.md --final path/to/final.md
python3 learn_edits.py --summarize # all lessons with confidence
python3 learn_edits.py --from-wechat # auto-sync from WeChat draft box
python3 learn_edits.py --summarize # all lessons with confidence
python3 learn_edits.py --summarize --json # JSON output for agent
"""
@ -48,6 +49,102 @@ def load_text(path: str) -> str:
return Path(path).read_text(encoding="utf-8")
def markdown_to_plaintext(md: str) -> str:
"""Strip markdown formatting to plain text for diff comparison."""
text = md
# Remove HTML comments (editing anchors etc.)
text = re.sub(r"<!--.*?-->", "", text, flags=re.DOTALL)
# Remove markdown headers markers
text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE)
# Remove bold/italic markers
text = re.sub(r"\*{1,3}(.*?)\*{1,3}", r"\1", text)
# Remove inline code
text = re.sub(r"`([^`]+)`", r"\1", text)
# Remove link syntax [text](url) → text
text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
# Remove image syntax
text = re.sub(r"!\[([^\]]*)\]\([^)]+\)", r"\1", text)
# Collapse whitespace
text = re.sub(r"[ \t]+", " ", text)
text = re.sub(r"\n{3,}", "\n\n", text)
return text.strip()
def fetch_wechat_draft() -> tuple[str, str, str]:
"""
Fetch the latest draft from WeChat and find the corresponding local file.
Returns (draft_plaintext, final_plaintext, draft_path).
"""
# Load config
config_path = SKILL_DIR / "config.yaml"
if not config_path.exists():
raise FileNotFoundError("config.yaml not found — need WeChat API credentials")
with open(config_path) as f:
config = yaml.safe_load(f)
wechat = config.get("wechat", {})
appid = wechat.get("appid", "")
secret = wechat.get("secret", "")
if not appid or not secret:
raise ValueError("config.yaml missing wechat.appid or wechat.secret")
# Load history to find latest article with media_id
history_path = SKILL_DIR / "history.yaml"
if not history_path.exists():
raise FileNotFoundError("history.yaml not found — no articles to compare")
with open(history_path) as f:
history = yaml.safe_load(f) or []
# Find most recent article with media_id
latest = None
for article in reversed(history):
if article.get("media_id"):
latest = article
break
if not latest:
raise ValueError("No article with media_id found in history.yaml")
media_id = latest["media_id"]
title = latest.get("title", "")
# Find the local draft file
date = latest.get("date", "")
output_dir = SKILL_DIR / "output"
draft_path = None
if date:
candidates = list(output_dir.glob(f"{date}-*.md"))
if candidates:
draft_path = candidates[0]
if not draft_path or not draft_path.exists():
raise FileNotFoundError(
f"Cannot find local draft for '{title}' (date={date}) in output/"
)
# Get access token and fetch draft from WeChat
sys.path.insert(0, str(SKILL_DIR / "toolkit"))
from wechat_api import get_access_token
from publisher import get_draft, html_to_plaintext
token = get_access_token(appid, secret)
html = get_draft(token, media_id)
wechat_text = html_to_plaintext(html)
# Convert local draft to plaintext
local_md = load_text(str(draft_path))
local_text = markdown_to_plaintext(local_md)
print(f"本地文件: {draft_path}")
print(f"微信草稿: media_id={media_id}")
print(f"文章标题: {title}")
print(f"本地字数: {len(local_text)}, 微信字数: {len(wechat_text)}")
return local_text, wechat_text, str(draft_path)
def split_sections(text: str) -> list[dict]:
"""Split markdown into sections by H2 headers."""
sections = []
@ -276,6 +373,8 @@ def main():
parser = argparse.ArgumentParser(description="Learn from human edits")
parser.add_argument("--draft", help="Path to AI draft")
parser.add_argument("--final", help="Path to human-edited final")
parser.add_argument("--from-wechat", action="store_true",
help="Auto-fetch edited version from WeChat draft box")
parser.add_argument("--summarize", action="store_true", help="Summarize all lessons")
parser.add_argument("--json", action="store_true", help="JSON output (with --summarize)")
args = parser.parse_args()
@ -284,8 +383,22 @@ def main():
summarize_lessons(as_json=args.json)
return
if args.from_wechat:
local_text, wechat_text, draft_path = fetch_wechat_draft()
if local_text == wechat_text:
print("\n微信草稿与本地文件内容一致,没有修改。")
return
diff_result = compute_diff(local_text, wechat_text)
# Save with special marker for wechat source
lesson_file = save_lesson(diff_result, draft_path, f"wechat:{draft_path}")
print(f"\nLesson saved to: {lesson_file}")
print(f"\n检测到 {diff_result['lines_added']} 处新增, {diff_result['lines_deleted']} 处删除")
print(f"字数变化: {diff_result['char_diff']:+d}")
print(f"\nAgent 接下来读取 {draft_path} 和微信草稿内容,分析修改模式并写入 {lesson_file}")
return
if not args.draft or not args.final:
print("Error: --draft and --final required", file=sys.stderr)
print("Error: --draft and --final required (or use --from-wechat)", file=sys.stderr)
sys.exit(1)
draft = load_text(args.draft)

View file

@ -68,6 +68,49 @@ def create_draft(
return DraftResult(media_id=data["media_id"])
def get_draft(access_token: str, media_id: str) -> str:
"""
Get draft content from WeChat by media_id.
API: POST https://api.weixin.qq.com/cgi-bin/draft/get
Returns the HTML content of the first article.
"""
resp = requests.post(
"https://api.weixin.qq.com/cgi-bin/draft/get",
params={"access_token": access_token},
json={"media_id": media_id},
)
data = resp.json()
errcode = data.get("errcode", 0)
if errcode != 0:
errmsg = data.get("errmsg", "unknown error")
raise ValueError(f"WeChat get_draft error: errcode={errcode}, errmsg={errmsg}")
articles = data.get("news_item", [])
if not articles:
raise ValueError(f"WeChat get_draft: no articles in draft {media_id}")
return articles[0].get("content", "")
def html_to_plaintext(html: str) -> str:
"""Extract plain text from WeChat HTML, stripping all tags and styles."""
import re
# Remove script/style blocks
text = re.sub(r"<(script|style)[^>]*>.*?</\1>", "", html, flags=re.DOTALL | re.IGNORECASE)
# Replace block-level tags with newlines
text = re.sub(r"<(br|p|div|section|h[1-6])[^>]*>", "\n", text, flags=re.IGNORECASE)
# Remove all remaining tags
text = re.sub(r"<[^>]+>", "", text)
# Decode HTML entities
import html as html_module
text = html_module.unescape(text)
# Collapse whitespace
text = re.sub(r"[ \t]+", " ", text)
text = re.sub(r"\n{3,}", "\n\n", text)
return text.strip()
def create_image_post(
access_token: str,
title: str,