enable podcast longform

2024-11-19 00:01:18 -03:00 · 2024-11-19 00:01:18 -03:00 · dbe362f95a
commit dbe362f95a
parent 7f79f8224f
3 changed files with 35 additions and 77 deletions
--- a/open_notebook/plugins/podcasts.py
+++ b/open_notebook/plugins/podcasts.py
@ -58,11 +58,15 @@ class PodcastConfig(ObjectModel):
        text: str,
        instructions: str = "",
        longform: bool = False,
+        chunks: int = 8,
+        min_chunk_size=600,
    ):
        self.user_instructions = (
            instructions if instructions else self.user_instructions
        )
        conversation_config = {
+            "max_num_chunks": chunks,
+            "min_chunk_size": min_chunk_size,
            "conversation_style": self.conversation_style,
            "roles_person1": self.person1_role,
            "roles_person2": self.person2_role,
@ -94,6 +98,8 @@ class PodcastConfig(ObjectModel):

        api_key_label = None
        llm_model_name = None
+        tts_model = None
+
        if self.transcript_model_provider:
            if self.transcript_model_provider == "openai":
                api_key_label = "OPENAI_API_KEY"
@ -105,14 +111,21 @@ class PodcastConfig(ObjectModel):
                api_key_label = "GEMINI_API_KEY"
                llm_model_name = self.transcript_model

+        if self.provider == "gemini":
+            tts_model = "geminimulti"
+        elif self.provider == "openai":
+            tts_model = "openai"
+        elif self.provider == "anthropic":
+            tts_model = "anthropic"
+
        logger.debug(
-            f"Generating episode {episode_name} with config {conversation_config} and using model {llm_model_name}"
+            f"Generating episode {episode_name} with config {conversation_config} and using model {llm_model_name}, tts model {tts_model}"
        )

        audio_file = generate_podcast(
            conversation_config=conversation_config,
            text=text,
-            tts_model=self.provider,
+            tts_model=tts_model,
            llm_model_name=llm_model_name,
            api_key_label=api_key_label,
            longform=longform,
@ -242,31 +255,6 @@ participant_roles = [
    "Advocate",
    "Debater",
    "Explorer",
-    "Opponent",
-    "Proponent",
-    "Philosopher",
-    "Engineer",
-    "Doctor",
-    "Psychologist",
-    "Economist",
-    "Politician",
-    "Scientist",
-    "Entrepreneur",
-    "Artist",
-    "Author",
-    "Journalist",
-    "Activist",
-    "Panelist",
-    "Data Analyst",
-    "Myth Buster",
-    "Trend Analyst",
-    "Futurist",
-    "Voice of Reason",
-    "Pragmatist",
-    "Idealist",
-    "Realist",
-    "Satirist",
-    "Field Reporter",
 ]

 # Engagement Techniques
@ -278,50 +266,20 @@ engagement_techniques = [
    "Metaphors",
    "Storytelling",
    "Quizzes",
-    "Polls",
-    "Contests/Giveaways",
-    "Guest Appearances",
-    "Sound Effects",
-    "Music Interludes",
-    "Shout-outs",
-    "Interactive Challenges",
    "Personal Testimonials",
    "Quotes",
    "Jokes",
-    "Surprise Elements",
    "Emotional Appeals",
    "Provocative Statements",
-    "Irony",
    "Sarcasm",
-    "Alliteration",
-    "Repetition",
-    "Foreshadowing",
-    "Cliffhangers",
-    "Audience Participation",
-    "Sensory Descriptions",
-    "Visual Aids (if applicable)",
-    "Callbacks to Earlier Points",
    "Pop Culture References",
-    "Hyperbole",
-    "Parables",
    "Thought Experiments",
    "Puzzles and Riddles",
    "Role-playing",
-    "Mock Scenarios",
    "Debates",
-    "Sound Bites",
    "Catchphrases",
-    "Voice Modulation",
-    "Interactive Games",
-    "Live Demos",
-    "Behind-the-Scenes Insights",
-    "Vivid Imagery",
    "Statistics and Facts",
    "Open-ended Questions",
    "Challenges to Assumptions",
    "Evoking Curiosity",
-    "Memes (if visual components are included)",
-    "Surveys",
-    "Testimonials",
-    "Provocations",
 ]
--- a/pages/5_🎙️_Podcasts.py
+++ b/pages/5_🎙️_Podcasts.py
@ -133,10 +133,7 @@ with templates_tab:
        st.caption("Voice names are case sensitive. Be sure to add the exact name.")

        st.markdown(
-            "[Open AI voices](https://platform.openai.com/docs/guides/text-to-speech)"
-        )
-        st.markdown(
-            "[Gemini voices](https://cloud.google.com/text-to-speech/docs/voices)"
+            "Sample voices from: [Open AI](https://platform.openai.com/docs/guides/text-to-speech), [Gemini](https://cloud.google.com/text-to-speech/docs/voices), [Elevenlabs](https://elevenlabs.io/text-to-speech)"
        )

        pd_cfg["voice2"] = st.text_input(
@ -285,11 +282,9 @@ with templates_tab:
            )
            st.caption("Voice names are case sensitive. Be sure to add the exact name.")
            st.markdown(
-                "[Open AI voices](https://platform.openai.com/docs/guides/text-to-speech)"
-            )
-            st.markdown(
-                "[Gemini voices](https://cloud.google.com/text-to-speech/docs/voices)"
+                "Sample voices from: [Open AI](https://platform.openai.com/docs/guides/text-to-speech), [Gemini](https://cloud.google.com/text-to-speech/docs/voices), [Elevenlabs](https://elevenlabs.io/text-to-speech)"
            )
+
            pd_config.voice2 = st.text_input(
                "Voice 2",
                value=pd_config.voice2,
--- a/pages/stream_app/chat.py
+++ b/pages/stream_app/chat.py
@ -84,18 +84,21 @@ def chat_sidebar(current_notebook: Notebook, current_session: ChatSession):
                instructions = st.text_area(
                    "Instructions", value=selected_template.user_instructions
                )
-                # if selected_template.provider == "gemini":
-                #     st.warning(
-                #         "Gemini models are not available for long podcast generation yet. So, this will be a short podcast. Coming soon. Pinky promise. If you want to try long podcasts, please change your text to speech model to Open AI."
-                #     )
-                #     longform = False
-                # else:
-                #     podcast_length = st.radio(
-                #         "Podcast Length",
-                #         ["Short (5-10 min)", "Long (20-30 min)"],
-                #     )
-                #     longform = podcast_length == "Long (20-30 min)"
-                longform = False
+                podcast_length = st.radio(
+                    "Podcast Length",
+                    ["Short (5-10 min)", "Medium (10-20 min)", "Longer (20+ min)"],
+                )
+                if podcast_length == "Short (5-10 min)":
+                    longform = False
+                elif podcast_length == "Medium (10-20 min)":
+                    longform = True
+                    chunks = 4
+                    min_chunk_size = 600
+                else:
+                    longform = True
+                    chunks = 8
+                    min_chunk_size = 600
+
                if len(context.get("note", [])) + len(context.get("source", [])) == 0:
                    st.warning(
                        "No notes or sources found in context. You don't want a boring podcast, right? So, add some context first."
@ -108,6 +111,8 @@ def chat_sidebar(current_notebook: Notebook, current_session: ChatSession):
                                    episode_name=episode_name,
                                    text=str(context),
                                    longform=longform,
+                                    chunks=chunks,
+                                    min_chunk_size=min_chunk_size,
                                    instructions=instructions,
                                )
                            st.success("Episode generated successfully")