From 83c963527c90e797ae4e7ec5a47f0592c3c54beb Mon Sep 17 00:00:00 2001 From: wangzhuc Date: Mon, 30 Mar 2026 22:42:23 +0800 Subject: [PATCH] fix: use filename as fallback source when article has no H1 title Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/extract_exemplar.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/extract_exemplar.py b/scripts/extract_exemplar.py index e05097d..710d1fa 100644 --- a/scripts/extract_exemplar.py +++ b/scripts/extract_exemplar.py @@ -178,7 +178,7 @@ def extract_exemplar(text, category=None, source=None): paragraphs = hs._split_paragraphs(text) sentences = hs._split_sentences(clean) headings = extract_headings(text) - title = extract_title(text) or source or "untitled" + title = extract_title(text) or source or "" if not category: category = detect_category(clean, paragraphs, headings) @@ -349,7 +349,8 @@ def main(): continue text = path.read_text(encoding="utf-8") - exemplar = extract_exemplar(text, category=args.category, source=args.source) + source = args.source or path.stem # fallback to filename without extension + exemplar = extract_exemplar(text, category=args.category, source=source) filepath = save_exemplar(exemplar) if args.json: