From dbe362f95a8148280dfe25fb1751aaccb754f1b2 Mon Sep 17 00:00:00 2001
From: LUIS NOVO <lfnovo@gmail.com>
Date: Tue, 19 Nov 2024 00:01:18 -0300
Subject: [PATCH] enable podcast longform

---
 open_notebook/plugins/podcasts.py | 72 +++++++------------------------
 pages/5_🎙️_Podcasts.py             | 11 ++---
 pages/stream_app/chat.py          | 29 +++++++------
 3 files changed, 35 insertions(+), 77 deletions(-)

diff --git a/open_notebook/plugins/podcasts.py b/open_notebook/plugins/podcasts.py
index 7f2a23a..523ca5a 100644
--- a/open_notebook/plugins/podcasts.py
+++ b/open_notebook/plugins/podcasts.py
@@ -58,11 +58,15 @@ class PodcastConfig(ObjectModel):
         text: str,
         instructions: str = "",
         longform: bool = False,
+        chunks: int = 8,
+        min_chunk_size=600,
     ):
         self.user_instructions = (
             instructions if instructions else self.user_instructions
         )
         conversation_config = {
+            "max_num_chunks": chunks,
+            "min_chunk_size": min_chunk_size,
             "conversation_style": self.conversation_style,
             "roles_person1": self.person1_role,
             "roles_person2": self.person2_role,
@@ -94,6 +98,8 @@ class PodcastConfig(ObjectModel):
 
         api_key_label = None
         llm_model_name = None
+        tts_model = None
+
         if self.transcript_model_provider:
             if self.transcript_model_provider == "openai":
                 api_key_label = "OPENAI_API_KEY"
@@ -105,14 +111,21 @@ class PodcastConfig(ObjectModel):
                 api_key_label = "GEMINI_API_KEY"
                 llm_model_name = self.transcript_model
 
+        if self.provider == "gemini":
+            tts_model = "geminimulti"
+        elif self.provider == "openai":
+            tts_model = "openai"
+        elif self.provider == "anthropic":
+            tts_model = "anthropic"
+
         logger.debug(
-            f"Generating episode {episode_name} with config {conversation_config} and using model {llm_model_name}"
+            f"Generating episode {episode_name} with config {conversation_config} and using model {llm_model_name}, tts model {tts_model}"
         )
 
         audio_file = generate_podcast(
             conversation_config=conversation_config,
             text=text,
-            tts_model=self.provider,
+            tts_model=tts_model,
             llm_model_name=llm_model_name,
             api_key_label=api_key_label,
             longform=longform,
@@ -242,31 +255,6 @@ participant_roles = [
     "Advocate",
     "Debater",
     "Explorer",
-    "Opponent",
-    "Proponent",
-    "Philosopher",
-    "Engineer",
-    "Doctor",
-    "Psychologist",
-    "Economist",
-    "Politician",
-    "Scientist",
-    "Entrepreneur",
-    "Artist",
-    "Author",
-    "Journalist",
-    "Activist",
-    "Panelist",
-    "Data Analyst",
-    "Myth Buster",
-    "Trend Analyst",
-    "Futurist",
-    "Voice of Reason",
-    "Pragmatist",
-    "Idealist",
-    "Realist",
-    "Satirist",
-    "Field Reporter",
 ]
 
 # Engagement Techniques
@@ -278,50 +266,20 @@ engagement_techniques = [
     "Metaphors",
     "Storytelling",
     "Quizzes",
-    "Polls",
-    "Contests/Giveaways",
-    "Guest Appearances",
-    "Sound Effects",
-    "Music Interludes",
-    "Shout-outs",
-    "Interactive Challenges",
     "Personal Testimonials",
     "Quotes",
     "Jokes",
-    "Surprise Elements",
     "Emotional Appeals",
     "Provocative Statements",
-    "Irony",
     "Sarcasm",
-    "Alliteration",
-    "Repetition",
-    "Foreshadowing",
-    "Cliffhangers",
-    "Audience Participation",
-    "Sensory Descriptions",
-    "Visual Aids (if applicable)",
-    "Callbacks to Earlier Points",
     "Pop Culture References",
-    "Hyperbole",
-    "Parables",
     "Thought Experiments",
     "Puzzles and Riddles",
     "Role-playing",
-    "Mock Scenarios",
     "Debates",
-    "Sound Bites",
     "Catchphrases",
-    "Voice Modulation",
-    "Interactive Games",
-    "Live Demos",
-    "Behind-the-Scenes Insights",
-    "Vivid Imagery",
     "Statistics and Facts",
     "Open-ended Questions",
     "Challenges to Assumptions",
     "Evoking Curiosity",
-    "Memes (if visual components are included)",
-    "Surveys",
-    "Testimonials",
-    "Provocations",
 ]
diff --git a/pages/5_🎙️_Podcasts.py b/pages/5_🎙️_Podcasts.py
index ade6641..4e152f4 100644
--- a/pages/5_🎙️_Podcasts.py
+++ b/pages/5_🎙️_Podcasts.py
@@ -133,10 +133,7 @@ with templates_tab:
         st.caption("Voice names are case sensitive. Be sure to add the exact name.")
 
         st.markdown(
-            "[Open AI voices](https://platform.openai.com/docs/guides/text-to-speech)"
-        )
-        st.markdown(
-            "[Gemini voices](https://cloud.google.com/text-to-speech/docs/voices)"
+            "Sample voices from: [Open AI](https://platform.openai.com/docs/guides/text-to-speech), [Gemini](https://cloud.google.com/text-to-speech/docs/voices), [Elevenlabs](https://elevenlabs.io/text-to-speech)"
         )
 
         pd_cfg["voice2"] = st.text_input(
@@ -285,11 +282,9 @@ with templates_tab:
             )
             st.caption("Voice names are case sensitive. Be sure to add the exact name.")
             st.markdown(
-                "[Open AI voices](https://platform.openai.com/docs/guides/text-to-speech)"
-            )
-            st.markdown(
-                "[Gemini voices](https://cloud.google.com/text-to-speech/docs/voices)"
+                "Sample voices from: [Open AI](https://platform.openai.com/docs/guides/text-to-speech), [Gemini](https://cloud.google.com/text-to-speech/docs/voices), [Elevenlabs](https://elevenlabs.io/text-to-speech)"
             )
+
             pd_config.voice2 = st.text_input(
                 "Voice 2",
                 value=pd_config.voice2,
diff --git a/pages/stream_app/chat.py b/pages/stream_app/chat.py
index ecf2f7a..0baff8c 100644
--- a/pages/stream_app/chat.py
+++ b/pages/stream_app/chat.py
@@ -84,18 +84,21 @@ def chat_sidebar(current_notebook: Notebook, current_session: ChatSession):
                 instructions = st.text_area(
                     "Instructions", value=selected_template.user_instructions
                 )
-                # if selected_template.provider == "gemini":
-                #     st.warning(
-                #         "Gemini models are not available for long podcast generation yet. So, this will be a short podcast. Coming soon. Pinky promise. If you want to try long podcasts, please change your text to speech model to Open AI."
-                #     )
-                #     longform = False
-                # else:
-                #     podcast_length = st.radio(
-                #         "Podcast Length",
-                #         ["Short (5-10 min)", "Long (20-30 min)"],
-                #     )
-                #     longform = podcast_length == "Long (20-30 min)"
-                longform = False
+                podcast_length = st.radio(
+                    "Podcast Length",
+                    ["Short (5-10 min)", "Medium (10-20 min)", "Longer (20+ min)"],
+                )
+                if podcast_length == "Short (5-10 min)":
+                    longform = False
+                elif podcast_length == "Medium (10-20 min)":
+                    longform = True
+                    chunks = 4
+                    min_chunk_size = 600
+                else:
+                    longform = True
+                    chunks = 8
+                    min_chunk_size = 600
+
                 if len(context.get("note", [])) + len(context.get("source", [])) == 0:
                     st.warning(
                         "No notes or sources found in context. You don't want a boring podcast, right? So, add some context first."
@@ -108,6 +111,8 @@ def chat_sidebar(current_notebook: Notebook, current_session: ChatSession):
                                     episode_name=episode_name,
                                     text=str(context),
                                     longform=longform,
+                                    chunks=chunks,
+                                    min_chunk_size=min_chunk_size,
                                     instructions=instructions,
                                 )
                             st.success("Episode generated successfully")