fix: calibrate humanness_score with bell-curve and over-optimization penalty
Problem: AI articles scored MORE human (avg 26.2) than actual human articles (avg 44.0) — opposite of 朱雀's judgment. AI was gaming the linear scoring by over-optimizing broken sentences, self-correction, paragraph variance, etc. Fix: Two calibration layers added after raw scoring: 1. Bell-curve scoring for 5 over-optimizable dimensions (broken_sentences, self_correction, sentence_length_range, paragraph_length_variance, banned_words). Score peaks at human article average, penalizes both too-low AND too-high values. 2. Over-optimization penalty: 15% global penalty when 60%+ of checks score above 0.8, indicating suspiciously "perfect" articles. Results: Before: Human avg=44.0, AI avg=26.2 (WRONG direction) After: Human avg=42.5, AI avg=44.0 (CORRECT direction) A/B test now agrees with 朱雀 (exemplar version scores better) Baselines derived from 15 human articles tested on 2026-03-30. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
792aec4101
commit
02f5e6d93b
1 changed files with 77 additions and 0 deletions
|
|
@ -344,6 +344,81 @@ def run_tier(checks, text):
|
|||
return results
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Calibration (bell-curve + over-optimization penalty)
|
||||
# ============================================================
|
||||
|
||||
# Human article baselines (from 15 example articles, 2026-03-30)
|
||||
# Dimensions where AI over-optimizes: bell-curve scoring penalizes
|
||||
# both "too low" AND "too high" relative to human average.
|
||||
_BELL_CURVE_CHECKS = {
|
||||
"broken_sentences": 0.39,
|
||||
"self_correction": 0.20,
|
||||
"sentence_length_range": 0.71,
|
||||
"paragraph_length_variance": 0.52,
|
||||
"banned_words": 0.73,
|
||||
}
|
||||
|
||||
|
||||
def _bell_curve(raw_score, center):
|
||||
"""Score peaks at center (human avg), penalizes over-optimization.
|
||||
|
||||
Below center: linear rise (as before).
|
||||
Above center: quadratic penalty — too much is suspicious.
|
||||
"""
|
||||
if center <= 0:
|
||||
return raw_score
|
||||
if raw_score <= center:
|
||||
return raw_score / center
|
||||
else:
|
||||
overshoot = (raw_score - center) / (1.0 - center) if center < 1 else 0
|
||||
return max(0.0, 1.0 - overshoot * overshoot)
|
||||
|
||||
|
||||
def calibrate_tiers(tier1, tier2):
|
||||
"""Apply bell-curve calibration and over-optimization penalty in-place."""
|
||||
# 1. Bell-curve adjustment for over-optimizable dimensions
|
||||
for tier in [tier1, tier2]:
|
||||
for name, data in tier.items():
|
||||
if name.startswith("_"):
|
||||
continue
|
||||
if name in _BELL_CURVE_CHECKS:
|
||||
raw = data["score"]
|
||||
center = _BELL_CURVE_CHECKS[name]
|
||||
calibrated = round(max(0.0, min(1.0, _bell_curve(raw, center))), 4)
|
||||
data["raw_score"] = raw
|
||||
data["score"] = calibrated
|
||||
data["detail"] += f" [calibrated from {raw:.2f}, center={center}]"
|
||||
|
||||
# 2. Over-optimization penalty: if 60%+ of checks score > 0.8,
|
||||
# the article is suspiciously "perfect" — apply global penalty.
|
||||
all_scores = []
|
||||
for tier in [tier1, tier2]:
|
||||
for name, data in tier.items():
|
||||
if not name.startswith("_"):
|
||||
all_scores.append(data["score"])
|
||||
|
||||
high_count = sum(1 for s in all_scores if s > 0.8)
|
||||
over_opt_ratio = high_count / len(all_scores) if all_scores else 0
|
||||
penalty = 1.0
|
||||
if over_opt_ratio >= 0.6:
|
||||
penalty = 0.85 # 15% penalty for suspiciously perfect articles
|
||||
|
||||
if penalty < 1.0:
|
||||
for tier in [tier1, tier2]:
|
||||
for name, data in tier.items():
|
||||
if not name.startswith("_"):
|
||||
data["score"] = round(data["score"] * penalty, 4)
|
||||
|
||||
# 3. Recalculate tier summaries
|
||||
for tier in [tier1, tier2]:
|
||||
scores = [data["score"] for name, data in tier.items() if not name.startswith("_")]
|
||||
tier["_summary"]["mean_score"] = round(sum(scores) / len(scores), 4) if scores else 0
|
||||
tier["_summary"]["scores"] = [round(s, 4) for s in scores]
|
||||
|
||||
return penalty
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Composite Score
|
||||
# ============================================================
|
||||
|
|
@ -394,6 +469,7 @@ def score_article(text, verbose=False, tier3_score=None):
|
|||
|
||||
tier1 = run_tier(TIER1_CHECKS, clean)
|
||||
tier2 = run_tier(TIER2_CHECKS, clean)
|
||||
over_opt_penalty = calibrate_tiers(tier1, tier2)
|
||||
composite, weights = compute_composite(tier1, tier2, tier3_score)
|
||||
param_scores = build_param_scores(tier1, tier2)
|
||||
|
||||
|
|
@ -407,6 +483,7 @@ def score_article(text, verbose=False, tier3_score=None):
|
|||
},
|
||||
"weights": weights,
|
||||
"param_scores": param_scores,
|
||||
"over_optimization_penalty": over_opt_penalty,
|
||||
"char_count": len(clean),
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue