wewrite/scripts/fetch_hotspots.py
wangzhuc 1ab34fa450 Initial release — 公众号文章全流程 AI Skill
热点抓取 → 选题 → 框架 → 写作 → SEO → 视觉AI → 排版 → 微信草稿箱,
一句话触发完整流程。适用于 Claude Code skill 格式。

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 22:16:18 +08:00

167 lines
5 KiB
Python

#!/usr/bin/env python3
"""
Fetch trending topics from multiple Chinese platforms.
Sources (all attempted in parallel, results merged and deduplicated):
1. Weibo hot search (weibo.com/ajax/side/hotSearch)
2. Toutiao hot board (toutiao.com/hot-event/hot-board)
3. Baidu hot search (top.baidu.com/api/board)
Usage:
python3 fetch_hotspots.py --limit 20
"""
import argparse
import json
import sys
from datetime import datetime, timezone, timedelta
import requests
TIMEOUT = 10
HEADERS = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36",
"Accept": "application/json, text/plain, */*",
}
def fetch_weibo() -> list[dict]:
"""Fetch Weibo hot search."""
try:
resp = requests.get(
"https://weibo.com/ajax/side/hotSearch",
headers={**HEADERS, "Referer": "https://weibo.com/"},
timeout=TIMEOUT,
)
data = resp.json()
items = []
for entry in data.get("data", {}).get("realtime", []):
note = entry.get("note", "")
if not note:
continue
items.append({
"title": note,
"source": "微博",
"hot": entry.get("num", 0),
"url": f"https://s.weibo.com/weibo?q=%23{note}%23",
"description": entry.get("label_name", ""),
})
return items
except Exception as e:
print(f"[warn] weibo failed: {e}", file=sys.stderr)
return []
def fetch_toutiao() -> list[dict]:
"""Fetch Toutiao hot board."""
try:
resp = requests.get(
"https://www.toutiao.com/hot-event/hot-board/?origin=toutiao_pc",
headers=HEADERS,
timeout=TIMEOUT,
)
data = resp.json()
items = []
for entry in data.get("data", []):
title = entry.get("Title", "")
if not title:
continue
items.append({
"title": title,
"source": "今日头条",
"hot": int(entry.get("HotValue", 0) or 0),
"url": entry.get("Url", ""),
"description": "",
})
return items
except Exception as e:
print(f"[warn] toutiao failed: {e}", file=sys.stderr)
return []
def fetch_baidu() -> list[dict]:
"""Fetch Baidu hot search."""
try:
resp = requests.get(
"https://top.baidu.com/api/board?platform=wise&tab=realtime",
headers=HEADERS,
timeout=TIMEOUT,
)
data = resp.json()
items = []
# Baidu nests items inside cards[0].content[0].content
for card in data.get("data", {}).get("cards", []):
top_content = card.get("content", [])
if not top_content:
continue
entries = top_content[0].get("content", []) if isinstance(top_content[0], dict) else top_content
for entry in entries:
word = entry.get("word", "")
if not word:
continue
items.append({
"title": word,
"source": "百度",
"hot": int(entry.get("hotScore", 0) or 0),
"url": entry.get("url", ""),
"description": "",
})
return items
except Exception as e:
print(f"[warn] baidu failed: {e}", file=sys.stderr)
return []
def deduplicate(items: list[dict]) -> list[dict]:
"""Remove duplicates by exact title match."""
seen = set()
result = []
for item in items:
title = item["title"].strip()
if title and title not in seen:
seen.add(title)
result.append(item)
return result
def main():
parser = argparse.ArgumentParser(description="Fetch trending topics")
parser.add_argument("--limit", type=int, default=20, help="Max items to return")
args = parser.parse_args()
all_items = []
sources_ok = []
sources_fail = []
for name, fetcher in [("weibo", fetch_weibo), ("toutiao", fetch_toutiao), ("baidu", fetch_baidu)]:
items = fetcher()
if items:
sources_ok.append(name)
all_items.extend(items)
else:
sources_fail.append(name)
all_items = deduplicate(all_items)
# Normalize hot values for sorting (different scales across sources)
all_items.sort(key=lambda x: int(x.get("hot", 0) or 0), reverse=True)
all_items = all_items[:args.limit]
tz = timezone(timedelta(hours=8))
output = {
"timestamp": datetime.now(tz).isoformat(),
"sources": sources_ok,
"sources_failed": sources_fail,
"count": len(all_items),
"items": all_items,
}
if not all_items:
output["error"] = "All sources failed. SKILL.md should fall back to WebSearch."
json.dump(output, sys.stdout, ensure_ascii=False, indent=2)
if __name__ == "__main__":
main()