From 53d86e2e16d63df323a146293fab1a93bfec8aae Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Thu, 24 Oct 2024 16:45:12 -0300 Subject: [PATCH] configurable language files --- open_notebook/graphs/content_process.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/open_notebook/graphs/content_process.py b/open_notebook/graphs/content_process.py index 7d1eea9..6075c42 100644 --- a/open_notebook/graphs/content_process.py +++ b/open_notebook/graphs/content_process.py @@ -9,6 +9,8 @@ from typing_extensions import TypedDict from youtube_transcript_api import YouTubeTranscriptApi # type: ignore from youtube_transcript_api.formatters import TextFormatter # type: ignore +from open_notebook.config import CONFIG + class SourceState(TypedDict): content: str @@ -183,9 +185,12 @@ def extract_youtube_transcript(state: SourceState): Parse the text file and print its content. """ - transcript = YouTubeTranscriptApi.get_transcript( - _extract_youtube_id(state.get("url")), languages=["pt", "en"] + languages = CONFIG.get("youtube_transcripts", {}).get( + "preferred_languages", ["pt", "en"] ) + + video_id = _extract_youtube_id(state.get("url")) + transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=languages) formatter = TextFormatter() title = _get_title(state.get("url")) return {"content": formatter.format_transcript(transcript), "title": title}