fix: expand negative markers and vocabulary temperature word lists

NEGATIVE_MARKERS: 26 → 51 words
  Added: despair (绝望/迷茫/心累), deception (骗/忽悠/割韭菜/套路),
  failure (白费/黄了/凉了), self-deprecation (傻/天真/自嗨),
  sarcasm (呵呵/行吧/真服了), complaint (受够了/苦哈哈)

COLD_WORDS: 7 → 25 (技术栈/标准化/护城河/飞轮/底层逻辑/PMF/ROI...)
WARM_WORDS: 7 → 15 (老实说/这么说吧/你想啊/有意思的是...)
HOT_WORDS: 8 → 19 (凡尔赛/标题党/躺平/摆烂/破防/上头/内耗...)
WILD_WORDS: 7 → 17 (苦哈哈/傻乎乎/交学费/踩坑/翻车...)

Impact on 15 exemplar articles:
  neg score avg: 0.15 → 0.27 (+80%)
  temp_mix: still low on short segments, but full articles now
  score 0.33-1.00 vs previously 0.00

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
wangzhuc 2026-03-30 23:23:51 +08:00
parent d9b87f806f
commit f7fe44c152

View file

@ -55,10 +55,23 @@ REAL_SOURCE_PATTERNS = [
] ]
NEGATIVE_MARKERS = [ NEGATIVE_MARKERS = [
# 直接负面情绪
"失望", "糟糕", "", "", "", "差劲", "崩溃", "吐槽", "", "失望", "糟糕", "", "", "", "差劲", "崩溃", "吐槽", "",
"", "", "焦虑", "担忧", "不满", "恶心", "可怕", "可悲", "可笑", "", "", "焦虑", "担忧", "不满", "恶心", "可怕", "可悲", "可笑",
"离谱", "尴尬", "无语", "", "", "", "", "离谱", "尴尬", "无语", "", "", "", "",
# 绝望/迷茫
"绝望", "迷茫", "心累", "", "后悔", "后怕", "心寒",
# 欺骗/操控(隐性负面)
"", "忽悠", "割韭菜", "套路", "画大饼", "洗脑",
# 失败/徒劳
"白费", "白搭", "没戏", "黄了", "凉了", "废了",
# 自嘲/自贬
"", "天真", "吃亏", "自嗨", "打脸",
# 讽刺/反语
"呵呵", "好吧", "行吧", "真服了",
# 短语
"太扯了", "说实话我很失望", "搞什么", "不靠谱", "受不了", "太扯了", "说实话我很失望", "搞什么", "不靠谱", "受不了",
"受够了", "想哭", "伤心", "苦哈哈", "得过且过",
] ]
COMMON_ADVERBS = [ COMMON_ADVERBS = [
@ -69,10 +82,27 @@ COMMON_ADVERBS = [
"竟然", "简直", "几乎", "完全", "绝对", "必然", "竟然", "简直", "几乎", "完全", "绝对", "必然",
] ]
COLD_WORDS = ["边际", "认知负荷", "信息不对称", "路径依赖", "商业模式", "生态系统", "增量"] COLD_WORDS = [
WARM_WORDS = ["说白了", "其实吧", "讲真", "说实话", "坦白讲", "懂的都懂", "怎么说呢"] "边际", "认知负荷", "信息不对称", "路径依赖", "商业模式", "生态系统", "增量",
HOT_WORDS = ["DNA动了", "格局打开", "遥遥<EFBFBD><EFBFBD>", "", "内卷", "炸了", "杀疯了", "吃灰"] "技术栈", "标准化", "结构性", "规模化", "护城河", "飞轮", "闭环",
WILD_WORDS = ["整挺好", "不靠谱", "瞎折腾", "搁这儿", "糊弄", "", ""] "赛道", "壁垒", "方法论", "底层逻辑", "第一性原理", "杠杆", "复利",
"ROI", "PMF", "代运营", "供给侧", "需求侧",
]
WARM_WORDS = [
"说白了", "其实吧", "讲真", "说实话", "坦白讲", "懂的都懂", "怎么说呢",
"老实说", "这么说吧", "你想啊", "别急", "慢慢来",
"有意思的是", "好玩的是", "巧的是", "说来话长", "话说回来",
]
HOT_WORDS = [
"DNA动了", "格局打开", "遥遥领先", "", "内卷", "炸了", "杀疯了", "吃灰",
"凡尔赛", "标题党", "躺平", "摆烂", "破防", "上头", "内耗",
"蒸发", "出圈", "降维打击", "弯道超车",
]
WILD_WORDS = [
"整挺好", "不靠谱", "瞎折腾", "搁这儿", "糊弄", "", "",
"苦哈哈", "傻乎乎", "稀里糊涂", "得了吧", "算了吧",
"摔了跤", "交学费", "踩坑", "翻车", "栽了",
]
SELF_CORRECTION_PATTERNS = [ SELF_CORRECTION_PATTERNS = [
r'不对[,]', r'准确说', r'算了', r'说错了', r'不对[,]', r'准确说', r'算了', r'说错了',