From dbe362f95a8148280dfe25fb1751aaccb754f1b2 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Tue, 19 Nov 2024 00:01:18 -0300 Subject: [PATCH] enable podcast longform --- open_notebook/plugins/podcasts.py | 72 +++++++------------------------ pages/5_🎙️_Podcasts.py | 11 ++--- pages/stream_app/chat.py | 29 +++++++------ 3 files changed, 35 insertions(+), 77 deletions(-) diff --git a/open_notebook/plugins/podcasts.py b/open_notebook/plugins/podcasts.py index 7f2a23a..523ca5a 100644 --- a/open_notebook/plugins/podcasts.py +++ b/open_notebook/plugins/podcasts.py @@ -58,11 +58,15 @@ class PodcastConfig(ObjectModel): text: str, instructions: str = "", longform: bool = False, + chunks: int = 8, + min_chunk_size=600, ): self.user_instructions = ( instructions if instructions else self.user_instructions ) conversation_config = { + "max_num_chunks": chunks, + "min_chunk_size": min_chunk_size, "conversation_style": self.conversation_style, "roles_person1": self.person1_role, "roles_person2": self.person2_role, @@ -94,6 +98,8 @@ class PodcastConfig(ObjectModel): api_key_label = None llm_model_name = None + tts_model = None + if self.transcript_model_provider: if self.transcript_model_provider == "openai": api_key_label = "OPENAI_API_KEY" @@ -105,14 +111,21 @@ class PodcastConfig(ObjectModel): api_key_label = "GEMINI_API_KEY" llm_model_name = self.transcript_model + if self.provider == "gemini": + tts_model = "geminimulti" + elif self.provider == "openai": + tts_model = "openai" + elif self.provider == "anthropic": + tts_model = "anthropic" + logger.debug( - f"Generating episode {episode_name} with config {conversation_config} and using model {llm_model_name}" + f"Generating episode {episode_name} with config {conversation_config} and using model {llm_model_name}, tts model {tts_model}" ) audio_file = generate_podcast( conversation_config=conversation_config, text=text, - tts_model=self.provider, + tts_model=tts_model, llm_model_name=llm_model_name, api_key_label=api_key_label, longform=longform, @@ -242,31 +255,6 @@ participant_roles = [ "Advocate", "Debater", "Explorer", - "Opponent", - "Proponent", - "Philosopher", - "Engineer", - "Doctor", - "Psychologist", - "Economist", - "Politician", - "Scientist", - "Entrepreneur", - "Artist", - "Author", - "Journalist", - "Activist", - "Panelist", - "Data Analyst", - "Myth Buster", - "Trend Analyst", - "Futurist", - "Voice of Reason", - "Pragmatist", - "Idealist", - "Realist", - "Satirist", - "Field Reporter", ] # Engagement Techniques @@ -278,50 +266,20 @@ engagement_techniques = [ "Metaphors", "Storytelling", "Quizzes", - "Polls", - "Contests/Giveaways", - "Guest Appearances", - "Sound Effects", - "Music Interludes", - "Shout-outs", - "Interactive Challenges", "Personal Testimonials", "Quotes", "Jokes", - "Surprise Elements", "Emotional Appeals", "Provocative Statements", - "Irony", "Sarcasm", - "Alliteration", - "Repetition", - "Foreshadowing", - "Cliffhangers", - "Audience Participation", - "Sensory Descriptions", - "Visual Aids (if applicable)", - "Callbacks to Earlier Points", "Pop Culture References", - "Hyperbole", - "Parables", "Thought Experiments", "Puzzles and Riddles", "Role-playing", - "Mock Scenarios", "Debates", - "Sound Bites", "Catchphrases", - "Voice Modulation", - "Interactive Games", - "Live Demos", - "Behind-the-Scenes Insights", - "Vivid Imagery", "Statistics and Facts", "Open-ended Questions", "Challenges to Assumptions", "Evoking Curiosity", - "Memes (if visual components are included)", - "Surveys", - "Testimonials", - "Provocations", ] diff --git a/pages/5_🎙️_Podcasts.py b/pages/5_🎙️_Podcasts.py index ade6641..4e152f4 100644 --- a/pages/5_🎙️_Podcasts.py +++ b/pages/5_🎙️_Podcasts.py @@ -133,10 +133,7 @@ with templates_tab: st.caption("Voice names are case sensitive. Be sure to add the exact name.") st.markdown( - "[Open AI voices](https://platform.openai.com/docs/guides/text-to-speech)" - ) - st.markdown( - "[Gemini voices](https://cloud.google.com/text-to-speech/docs/voices)" + "Sample voices from: [Open AI](https://platform.openai.com/docs/guides/text-to-speech), [Gemini](https://cloud.google.com/text-to-speech/docs/voices), [Elevenlabs](https://elevenlabs.io/text-to-speech)" ) pd_cfg["voice2"] = st.text_input( @@ -285,11 +282,9 @@ with templates_tab: ) st.caption("Voice names are case sensitive. Be sure to add the exact name.") st.markdown( - "[Open AI voices](https://platform.openai.com/docs/guides/text-to-speech)" - ) - st.markdown( - "[Gemini voices](https://cloud.google.com/text-to-speech/docs/voices)" + "Sample voices from: [Open AI](https://platform.openai.com/docs/guides/text-to-speech), [Gemini](https://cloud.google.com/text-to-speech/docs/voices), [Elevenlabs](https://elevenlabs.io/text-to-speech)" ) + pd_config.voice2 = st.text_input( "Voice 2", value=pd_config.voice2, diff --git a/pages/stream_app/chat.py b/pages/stream_app/chat.py index ecf2f7a..0baff8c 100644 --- a/pages/stream_app/chat.py +++ b/pages/stream_app/chat.py @@ -84,18 +84,21 @@ def chat_sidebar(current_notebook: Notebook, current_session: ChatSession): instructions = st.text_area( "Instructions", value=selected_template.user_instructions ) - # if selected_template.provider == "gemini": - # st.warning( - # "Gemini models are not available for long podcast generation yet. So, this will be a short podcast. Coming soon. Pinky promise. If you want to try long podcasts, please change your text to speech model to Open AI." - # ) - # longform = False - # else: - # podcast_length = st.radio( - # "Podcast Length", - # ["Short (5-10 min)", "Long (20-30 min)"], - # ) - # longform = podcast_length == "Long (20-30 min)" - longform = False + podcast_length = st.radio( + "Podcast Length", + ["Short (5-10 min)", "Medium (10-20 min)", "Longer (20+ min)"], + ) + if podcast_length == "Short (5-10 min)": + longform = False + elif podcast_length == "Medium (10-20 min)": + longform = True + chunks = 4 + min_chunk_size = 600 + else: + longform = True + chunks = 8 + min_chunk_size = 600 + if len(context.get("note", [])) + len(context.get("source", [])) == 0: st.warning( "No notes or sources found in context. You don't want a boring podcast, right? So, add some context first." @@ -108,6 +111,8 @@ def chat_sidebar(current_notebook: Notebook, current_session: ChatSession): episode_name=episode_name, text=str(context), longform=longform, + chunks=chunks, + min_chunk_size=min_chunk_size, instructions=instructions, ) st.success("Episode generated successfully")