enable podcast longform

This commit is contained in:
LUIS NOVO 2024-11-19 00:01:18 -03:00
parent 7f79f8224f
commit dbe362f95a
3 changed files with 35 additions and 77 deletions

View file

@ -58,11 +58,15 @@ class PodcastConfig(ObjectModel):
text: str,
instructions: str = "",
longform: bool = False,
chunks: int = 8,
min_chunk_size=600,
):
self.user_instructions = (
instructions if instructions else self.user_instructions
)
conversation_config = {
"max_num_chunks": chunks,
"min_chunk_size": min_chunk_size,
"conversation_style": self.conversation_style,
"roles_person1": self.person1_role,
"roles_person2": self.person2_role,
@ -94,6 +98,8 @@ class PodcastConfig(ObjectModel):
api_key_label = None
llm_model_name = None
tts_model = None
if self.transcript_model_provider:
if self.transcript_model_provider == "openai":
api_key_label = "OPENAI_API_KEY"
@ -105,14 +111,21 @@ class PodcastConfig(ObjectModel):
api_key_label = "GEMINI_API_KEY"
llm_model_name = self.transcript_model
if self.provider == "gemini":
tts_model = "geminimulti"
elif self.provider == "openai":
tts_model = "openai"
elif self.provider == "anthropic":
tts_model = "anthropic"
logger.debug(
f"Generating episode {episode_name} with config {conversation_config} and using model {llm_model_name}"
f"Generating episode {episode_name} with config {conversation_config} and using model {llm_model_name}, tts model {tts_model}"
)
audio_file = generate_podcast(
conversation_config=conversation_config,
text=text,
tts_model=self.provider,
tts_model=tts_model,
llm_model_name=llm_model_name,
api_key_label=api_key_label,
longform=longform,
@ -242,31 +255,6 @@ participant_roles = [
"Advocate",
"Debater",
"Explorer",
"Opponent",
"Proponent",
"Philosopher",
"Engineer",
"Doctor",
"Psychologist",
"Economist",
"Politician",
"Scientist",
"Entrepreneur",
"Artist",
"Author",
"Journalist",
"Activist",
"Panelist",
"Data Analyst",
"Myth Buster",
"Trend Analyst",
"Futurist",
"Voice of Reason",
"Pragmatist",
"Idealist",
"Realist",
"Satirist",
"Field Reporter",
]
# Engagement Techniques
@ -278,50 +266,20 @@ engagement_techniques = [
"Metaphors",
"Storytelling",
"Quizzes",
"Polls",
"Contests/Giveaways",
"Guest Appearances",
"Sound Effects",
"Music Interludes",
"Shout-outs",
"Interactive Challenges",
"Personal Testimonials",
"Quotes",
"Jokes",
"Surprise Elements",
"Emotional Appeals",
"Provocative Statements",
"Irony",
"Sarcasm",
"Alliteration",
"Repetition",
"Foreshadowing",
"Cliffhangers",
"Audience Participation",
"Sensory Descriptions",
"Visual Aids (if applicable)",
"Callbacks to Earlier Points",
"Pop Culture References",
"Hyperbole",
"Parables",
"Thought Experiments",
"Puzzles and Riddles",
"Role-playing",
"Mock Scenarios",
"Debates",
"Sound Bites",
"Catchphrases",
"Voice Modulation",
"Interactive Games",
"Live Demos",
"Behind-the-Scenes Insights",
"Vivid Imagery",
"Statistics and Facts",
"Open-ended Questions",
"Challenges to Assumptions",
"Evoking Curiosity",
"Memes (if visual components are included)",
"Surveys",
"Testimonials",
"Provocations",
]

View file

@ -133,10 +133,7 @@ with templates_tab:
st.caption("Voice names are case sensitive. Be sure to add the exact name.")
st.markdown(
"[Open AI voices](https://platform.openai.com/docs/guides/text-to-speech)"
)
st.markdown(
"[Gemini voices](https://cloud.google.com/text-to-speech/docs/voices)"
"Sample voices from: [Open AI](https://platform.openai.com/docs/guides/text-to-speech), [Gemini](https://cloud.google.com/text-to-speech/docs/voices), [Elevenlabs](https://elevenlabs.io/text-to-speech)"
)
pd_cfg["voice2"] = st.text_input(
@ -285,11 +282,9 @@ with templates_tab:
)
st.caption("Voice names are case sensitive. Be sure to add the exact name.")
st.markdown(
"[Open AI voices](https://platform.openai.com/docs/guides/text-to-speech)"
)
st.markdown(
"[Gemini voices](https://cloud.google.com/text-to-speech/docs/voices)"
"Sample voices from: [Open AI](https://platform.openai.com/docs/guides/text-to-speech), [Gemini](https://cloud.google.com/text-to-speech/docs/voices), [Elevenlabs](https://elevenlabs.io/text-to-speech)"
)
pd_config.voice2 = st.text_input(
"Voice 2",
value=pd_config.voice2,

View file

@ -84,18 +84,21 @@ def chat_sidebar(current_notebook: Notebook, current_session: ChatSession):
instructions = st.text_area(
"Instructions", value=selected_template.user_instructions
)
# if selected_template.provider == "gemini":
# st.warning(
# "Gemini models are not available for long podcast generation yet. So, this will be a short podcast. Coming soon. Pinky promise. If you want to try long podcasts, please change your text to speech model to Open AI."
# )
# longform = False
# else:
# podcast_length = st.radio(
# "Podcast Length",
# ["Short (5-10 min)", "Long (20-30 min)"],
# )
# longform = podcast_length == "Long (20-30 min)"
longform = False
podcast_length = st.radio(
"Podcast Length",
["Short (5-10 min)", "Medium (10-20 min)", "Longer (20+ min)"],
)
if podcast_length == "Short (5-10 min)":
longform = False
elif podcast_length == "Medium (10-20 min)":
longform = True
chunks = 4
min_chunk_size = 600
else:
longform = True
chunks = 8
min_chunk_size = 600
if len(context.get("note", [])) + len(context.get("source", [])) == 0:
st.warning(
"No notes or sources found in context. You don't want a boring podcast, right? So, add some context first."
@ -108,6 +111,8 @@ def chat_sidebar(current_notebook: Notebook, current_session: ChatSession):
episode_name=episode_name,
text=str(context),
longform=longform,
chunks=chunks,
min_chunk_size=min_chunk_size,
instructions=instructions,
)
st.success("Episode generated successfully")