feat: sync edits from WeChat draft box for learn-edits
- publisher.py: add get_draft() to fetch draft content by media_id, add html_to_plaintext() for HTML→text conversion - learn_edits.py: add --from-wechat flag that auto-fetches latest draft from WeChat, converts both sides to plaintext, and diffs - learn_edits.py: add markdown_to_plaintext() for local file conversion - SKILL.md: update edit workflow — both local and WeChat edits supported Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
973aad5951
commit
2773a8bb9b
3 changed files with 164 additions and 10 deletions
14
SKILL.md
14
SKILL.md
|
|
@ -45,7 +45,9 @@ allowed-tools:
|
|||
|
||||
**辅助功能**(按需加载,不在主管道内):
|
||||
- 用户说"重新设置风格" → `读取: {skill_dir}/references/onboard.md`
|
||||
- 用户说"学习我的修改" → `读取: {skill_dir}/references/learn-edits.md`
|
||||
- 用户说"学习我的修改" → `读取: {skill_dir}/references/learn-edits.md`。支持两种来源:
|
||||
- **本地修改**(默认):用户在 `output/` 的 markdown 文件中修改
|
||||
- **微信草稿箱同步**:`python3 {skill_dir}/scripts/learn_edits.py --from-wechat`,自动从草稿箱拉回最新内容,与本地原文做纯文本 diff
|
||||
- 用户说"看看文章数据" → `读取: {skill_dir}/references/effect-review.md`
|
||||
- 用户说"检查一下"/"自检"/"这篇文章怎么样" → 对最近一篇生成的文章(或用户指定的文章)执行自检,输出生成报告:
|
||||
|
||||
|
|
@ -448,14 +450,10 @@ python3 {skill_dir}/toolkit/cli.py preview {markdown} --theme {theme} --no-open
|
|||
stats: null
|
||||
```
|
||||
|
||||
**8.2 编辑建议**:
|
||||
|
||||
如果用户想让 WeWrite 学习自己的修改风格,需要在 WeWrite 输出的 markdown 文件(`output/` 目录)中直接修改,然后说"学习我的修改"。微信草稿箱内的修改无法同步回 WeWrite,因此建议用户:先在本地 markdown 改完 → 说"学习我的修改" → 再重新推送。
|
||||
|
||||
**8.3 回复用户**:
|
||||
**8.2 回复用户**:
|
||||
|
||||
- 最终标题 + 2 备选 + 摘要 + 5 标签 + media_id
|
||||
- 编辑建议:"文章有 2-3 个编辑锚点,建议在 `output/` 目录的 markdown 文件里加入你自己的话,改完后说**'学习我的修改'**再重新推送,效果更好。"
|
||||
- 编辑建议:"文章有 2-3 个编辑锚点,建议加入你自己的话。你可以在本地 markdown 里改,也可以直接在微信草稿箱改——改完后说**'学习我的修改'**,WeWrite 都能学到你的风格。"
|
||||
|
||||
**8.3 后续操作**:
|
||||
|
||||
|
|
@ -468,7 +466,7 @@ python3 {skill_dir}/toolkit/cli.py preview {markdown} --theme {theme} --no-open
|
|||
| 看看有什么主题 | `python3 {skill_dir}/toolkit/cli.py gallery` |
|
||||
| 换成 XX 主题 | 重新渲染 |
|
||||
| 看看文章数据 | `读取: {skill_dir}/references/effect-review.md` |
|
||||
| 学习我的修改 | `读取: {skill_dir}/references/learn-edits.md`。需要在 `output/` 目录的 markdown 文件中修改,不支持微信草稿箱内的修改同步学习 |
|
||||
| 学习我的修改 | `读取: {skill_dir}/references/learn-edits.md`。支持本地 markdown 修改和微信草稿箱同步(`--from-wechat`) |
|
||||
| 做一个小绿书/图片帖 | `python3 {skill_dir}/toolkit/cli.py image-post img1.jpg img2.jpg -t "标题"` |
|
||||
| 检查一下 / 自检 / 这篇文章怎么样 | 生成报告(生成档案 + 质量检查,见辅助功能) |
|
||||
| 导入范文 / 建范文库 | `python3 {skill_dir}/scripts/extract_exemplar.py article.md` |
|
||||
|
|
|
|||
|
|
@ -16,7 +16,8 @@ The Agent uses this to write structured playbook.md rules.
|
|||
|
||||
Usage:
|
||||
python3 learn_edits.py --draft path/to/draft.md --final path/to/final.md
|
||||
python3 learn_edits.py --summarize # all lessons with confidence
|
||||
python3 learn_edits.py --from-wechat # auto-sync from WeChat draft box
|
||||
python3 learn_edits.py --summarize # all lessons with confidence
|
||||
python3 learn_edits.py --summarize --json # JSON output for agent
|
||||
"""
|
||||
|
||||
|
|
@ -48,6 +49,102 @@ def load_text(path: str) -> str:
|
|||
return Path(path).read_text(encoding="utf-8")
|
||||
|
||||
|
||||
def markdown_to_plaintext(md: str) -> str:
|
||||
"""Strip markdown formatting to plain text for diff comparison."""
|
||||
text = md
|
||||
# Remove HTML comments (editing anchors etc.)
|
||||
text = re.sub(r"<!--.*?-->", "", text, flags=re.DOTALL)
|
||||
# Remove markdown headers markers
|
||||
text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE)
|
||||
# Remove bold/italic markers
|
||||
text = re.sub(r"\*{1,3}(.*?)\*{1,3}", r"\1", text)
|
||||
# Remove inline code
|
||||
text = re.sub(r"`([^`]+)`", r"\1", text)
|
||||
# Remove link syntax [text](url) → text
|
||||
text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
|
||||
# Remove image syntax
|
||||
text = re.sub(r"!\[([^\]]*)\]\([^)]+\)", r"\1", text)
|
||||
# Collapse whitespace
|
||||
text = re.sub(r"[ \t]+", " ", text)
|
||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
def fetch_wechat_draft() -> tuple[str, str, str]:
|
||||
"""
|
||||
Fetch the latest draft from WeChat and find the corresponding local file.
|
||||
Returns (draft_plaintext, final_plaintext, draft_path).
|
||||
"""
|
||||
# Load config
|
||||
config_path = SKILL_DIR / "config.yaml"
|
||||
if not config_path.exists():
|
||||
raise FileNotFoundError("config.yaml not found — need WeChat API credentials")
|
||||
|
||||
with open(config_path) as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
wechat = config.get("wechat", {})
|
||||
appid = wechat.get("appid", "")
|
||||
secret = wechat.get("secret", "")
|
||||
if not appid or not secret:
|
||||
raise ValueError("config.yaml missing wechat.appid or wechat.secret")
|
||||
|
||||
# Load history to find latest article with media_id
|
||||
history_path = SKILL_DIR / "history.yaml"
|
||||
if not history_path.exists():
|
||||
raise FileNotFoundError("history.yaml not found — no articles to compare")
|
||||
|
||||
with open(history_path) as f:
|
||||
history = yaml.safe_load(f) or []
|
||||
|
||||
# Find most recent article with media_id
|
||||
latest = None
|
||||
for article in reversed(history):
|
||||
if article.get("media_id"):
|
||||
latest = article
|
||||
break
|
||||
|
||||
if not latest:
|
||||
raise ValueError("No article with media_id found in history.yaml")
|
||||
|
||||
media_id = latest["media_id"]
|
||||
title = latest.get("title", "")
|
||||
|
||||
# Find the local draft file
|
||||
date = latest.get("date", "")
|
||||
output_dir = SKILL_DIR / "output"
|
||||
draft_path = None
|
||||
if date:
|
||||
candidates = list(output_dir.glob(f"{date}-*.md"))
|
||||
if candidates:
|
||||
draft_path = candidates[0]
|
||||
|
||||
if not draft_path or not draft_path.exists():
|
||||
raise FileNotFoundError(
|
||||
f"Cannot find local draft for '{title}' (date={date}) in output/"
|
||||
)
|
||||
|
||||
# Get access token and fetch draft from WeChat
|
||||
sys.path.insert(0, str(SKILL_DIR / "toolkit"))
|
||||
from wechat_api import get_access_token
|
||||
from publisher import get_draft, html_to_plaintext
|
||||
|
||||
token = get_access_token(appid, secret)
|
||||
html = get_draft(token, media_id)
|
||||
wechat_text = html_to_plaintext(html)
|
||||
|
||||
# Convert local draft to plaintext
|
||||
local_md = load_text(str(draft_path))
|
||||
local_text = markdown_to_plaintext(local_md)
|
||||
|
||||
print(f"本地文件: {draft_path}")
|
||||
print(f"微信草稿: media_id={media_id}")
|
||||
print(f"文章标题: {title}")
|
||||
print(f"本地字数: {len(local_text)}, 微信字数: {len(wechat_text)}")
|
||||
|
||||
return local_text, wechat_text, str(draft_path)
|
||||
|
||||
|
||||
def split_sections(text: str) -> list[dict]:
|
||||
"""Split markdown into sections by H2 headers."""
|
||||
sections = []
|
||||
|
|
@ -276,6 +373,8 @@ def main():
|
|||
parser = argparse.ArgumentParser(description="Learn from human edits")
|
||||
parser.add_argument("--draft", help="Path to AI draft")
|
||||
parser.add_argument("--final", help="Path to human-edited final")
|
||||
parser.add_argument("--from-wechat", action="store_true",
|
||||
help="Auto-fetch edited version from WeChat draft box")
|
||||
parser.add_argument("--summarize", action="store_true", help="Summarize all lessons")
|
||||
parser.add_argument("--json", action="store_true", help="JSON output (with --summarize)")
|
||||
args = parser.parse_args()
|
||||
|
|
@ -284,8 +383,22 @@ def main():
|
|||
summarize_lessons(as_json=args.json)
|
||||
return
|
||||
|
||||
if args.from_wechat:
|
||||
local_text, wechat_text, draft_path = fetch_wechat_draft()
|
||||
if local_text == wechat_text:
|
||||
print("\n微信草稿与本地文件内容一致,没有修改。")
|
||||
return
|
||||
diff_result = compute_diff(local_text, wechat_text)
|
||||
# Save with special marker for wechat source
|
||||
lesson_file = save_lesson(diff_result, draft_path, f"wechat:{draft_path}")
|
||||
print(f"\nLesson saved to: {lesson_file}")
|
||||
print(f"\n检测到 {diff_result['lines_added']} 处新增, {diff_result['lines_deleted']} 处删除")
|
||||
print(f"字数变化: {diff_result['char_diff']:+d}")
|
||||
print(f"\nAgent 接下来读取 {draft_path} 和微信草稿内容,分析修改模式并写入 {lesson_file}")
|
||||
return
|
||||
|
||||
if not args.draft or not args.final:
|
||||
print("Error: --draft and --final required", file=sys.stderr)
|
||||
print("Error: --draft and --final required (or use --from-wechat)", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
draft = load_text(args.draft)
|
||||
|
|
|
|||
|
|
@ -68,6 +68,49 @@ def create_draft(
|
|||
return DraftResult(media_id=data["media_id"])
|
||||
|
||||
|
||||
def get_draft(access_token: str, media_id: str) -> str:
|
||||
"""
|
||||
Get draft content from WeChat by media_id.
|
||||
API: POST https://api.weixin.qq.com/cgi-bin/draft/get
|
||||
Returns the HTML content of the first article.
|
||||
"""
|
||||
resp = requests.post(
|
||||
"https://api.weixin.qq.com/cgi-bin/draft/get",
|
||||
params={"access_token": access_token},
|
||||
json={"media_id": media_id},
|
||||
)
|
||||
data = resp.json()
|
||||
|
||||
errcode = data.get("errcode", 0)
|
||||
if errcode != 0:
|
||||
errmsg = data.get("errmsg", "unknown error")
|
||||
raise ValueError(f"WeChat get_draft error: errcode={errcode}, errmsg={errmsg}")
|
||||
|
||||
articles = data.get("news_item", [])
|
||||
if not articles:
|
||||
raise ValueError(f"WeChat get_draft: no articles in draft {media_id}")
|
||||
|
||||
return articles[0].get("content", "")
|
||||
|
||||
|
||||
def html_to_plaintext(html: str) -> str:
|
||||
"""Extract plain text from WeChat HTML, stripping all tags and styles."""
|
||||
import re
|
||||
# Remove script/style blocks
|
||||
text = re.sub(r"<(script|style)[^>]*>.*?</\1>", "", html, flags=re.DOTALL | re.IGNORECASE)
|
||||
# Replace block-level tags with newlines
|
||||
text = re.sub(r"<(br|p|div|section|h[1-6])[^>]*>", "\n", text, flags=re.IGNORECASE)
|
||||
# Remove all remaining tags
|
||||
text = re.sub(r"<[^>]+>", "", text)
|
||||
# Decode HTML entities
|
||||
import html as html_module
|
||||
text = html_module.unescape(text)
|
||||
# Collapse whitespace
|
||||
text = re.sub(r"[ \t]+", " ", text)
|
||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
def create_image_post(
|
||||
access_token: str,
|
||||
title: str,
|
||||
|
|
|
|||
Loading…
Reference in a new issue