enable different text models for podcast
This commit is contained in:
parent
183149014e
commit
dc76d6b8d8
2 changed files with 114 additions and 23 deletions
|
|
@ -4,6 +4,7 @@ from loguru import logger
|
|||
from podcastfy.client import generate_podcast
|
||||
from pydantic import Field, field_validator
|
||||
|
||||
from open_notebook.config import DATA_FOLDER
|
||||
from open_notebook.domain.notebook import ObjectModel
|
||||
|
||||
|
||||
|
|
@ -27,6 +28,8 @@ class PodcastConfig(ObjectModel):
|
|||
conversation_style: List[str]
|
||||
engagement_technique: List[str]
|
||||
dialogue_structure: List[str]
|
||||
transcript_model: Optional[str] = None
|
||||
transcript_model_provider: Optional[str] = None
|
||||
user_instructions: Optional[str] = None
|
||||
ending_message: Optional[str] = None
|
||||
wordcount: int = Field(ge=400, le=10000)
|
||||
|
|
@ -53,7 +56,11 @@ class PodcastConfig(ObjectModel):
|
|||
"engagement_techniques": self.engagement_technique,
|
||||
"creativity": self.creativity,
|
||||
"text_to_speech": {
|
||||
# "temp_audio_dir": f"{PODCASTS_FOLDER}/tmp",
|
||||
"output_directories": {
|
||||
"transcripts": f"{DATA_FOLDER}/podcasts/transcripts",
|
||||
"audio": f"{DATA_FOLDER}/podcasts/audio",
|
||||
},
|
||||
"temp_audio_dir": f"{DATA_FOLDER}/podcasts/audio/tmp",
|
||||
"ending_message": "Thank you for listening to this episode. Don't forget to subscribe to our podcast for more interesting conversations.",
|
||||
"default_tts_model": self.provider,
|
||||
self.provider: {
|
||||
|
|
@ -71,8 +78,25 @@ class PodcastConfig(ObjectModel):
|
|||
f"Generating episode {episode_name} with config {conversation_config}"
|
||||
)
|
||||
|
||||
api_key_label = None
|
||||
llm_model_name = None
|
||||
if self.transcript_model_provider:
|
||||
if self.transcript_model_provider == "openai":
|
||||
api_key_label = "OPENAI_API_KEY"
|
||||
llm_model_name = self.transcript_model
|
||||
elif self.transcript_model_provider == "anthropic":
|
||||
api_key_label = "ANTHROPIC_API_KEY"
|
||||
llm_model_name = self.transcript_model
|
||||
elif self.transcript_model_provider == "gemini":
|
||||
api_key_label = "GEMINI_API_KEY"
|
||||
llm_model_name = self.transcript_model
|
||||
|
||||
audio_file = generate_podcast(
|
||||
conversation_config=conversation_config, text=text, tts_model=self.provider
|
||||
conversation_config=conversation_config,
|
||||
text=text,
|
||||
tts_model=self.provider,
|
||||
llm_model_name=llm_model_name,
|
||||
api_key_label=api_key_label,
|
||||
)
|
||||
episode = PodcastEpisode(
|
||||
name=episode_name,
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ setup_page("🎙️ Podcasts")
|
|||
|
||||
text_to_speech_models = Model.get_models_by_type("text_to_speech")
|
||||
|
||||
|
||||
provider_models: Dict[str, List[str]] = {}
|
||||
|
||||
for model in text_to_speech_models:
|
||||
|
|
@ -26,11 +25,28 @@ for model in text_to_speech_models:
|
|||
provider_models[model.provider] = []
|
||||
provider_models[model.provider].append(model.name)
|
||||
|
||||
text_models = Model.get_models_by_type("language")
|
||||
|
||||
transcript_provider_models: Dict[str, List[str]] = {}
|
||||
|
||||
for model in text_models:
|
||||
if model.provider not in ["gemini", "openai", "anthropic"]:
|
||||
continue
|
||||
if model.provider not in transcript_provider_models:
|
||||
transcript_provider_models[model.provider] = []
|
||||
transcript_provider_models[model.provider].append(model.name)
|
||||
|
||||
|
||||
if len(text_to_speech_models) == 0:
|
||||
st.error("No text to speech models found. Please set one up in the Settings page.")
|
||||
st.stop()
|
||||
|
||||
if len(text_models) == 0:
|
||||
st.error(
|
||||
"No language models found. Please set one up in the Settings page. Only Gemini, Open AI and Anthropic models supported for transcript generation."
|
||||
)
|
||||
st.stop()
|
||||
|
||||
episodes_tab, templates_tab = st.tabs(["Episodes", "Templates"])
|
||||
|
||||
with episodes_tab:
|
||||
|
|
@ -90,7 +106,23 @@ with templates_tab:
|
|||
pd_cfg["ending_message"] = st.text_input(
|
||||
"Ending Message", placeholder="Thank you for listening!"
|
||||
)
|
||||
pd_cfg["provider"] = st.selectbox("Provider", provider_models.keys())
|
||||
pd_cfg["transcript_model_provider"] = st.selectbox(
|
||||
"Transcript Model Provider", transcript_provider_models.keys()
|
||||
)
|
||||
pd_cfg["transcript_model"] = st.selectbox(
|
||||
"Transcript Model",
|
||||
transcript_provider_models[pd_cfg["transcript_model_provider"]],
|
||||
)
|
||||
|
||||
pd_cfg["provider"] = st.selectbox(
|
||||
"Audio Model Provider", provider_models.keys()
|
||||
)
|
||||
pd_cfg["model"] = st.selectbox(
|
||||
"Audio Model", provider_models[pd_cfg["provider"]]
|
||||
)
|
||||
st.caption(
|
||||
"OpenAI: tts-1 or tts-1-hd, Elevenlabs: eleven_multilingual_v2, eleven_turbo_v2_5"
|
||||
)
|
||||
pd_cfg["voice1"] = st.text_input(
|
||||
"Voice 1", help="You can use Elevenlabs voice ID"
|
||||
)
|
||||
|
|
@ -105,10 +137,6 @@ with templates_tab:
|
|||
"Voice 2", help="You can use Elevenlabs voice ID"
|
||||
)
|
||||
|
||||
pd_cfg["model"] = st.selectbox("Model", provider_models[pd_cfg["provider"]])
|
||||
st.caption(
|
||||
"OpenAI: tts-1 or tts-1-hd, Elevenlabs: eleven_multilingual_v2, eleven_turbo_v2_5"
|
||||
)
|
||||
if st.button("Save"):
|
||||
try:
|
||||
pd = PodcastConfig(**pd_cfg)
|
||||
|
|
@ -200,12 +228,65 @@ with templates_tab:
|
|||
placeholder="Thank you for listening!",
|
||||
key=f"ending_message_{pd_config.id}",
|
||||
)
|
||||
|
||||
if pd_config.transcript_model_provider not in transcript_provider_models:
|
||||
st.warning(
|
||||
f"Transcript Model Provider {pd_config.transcript_model_provider} not setup. Changing to default."
|
||||
)
|
||||
index = 0
|
||||
else:
|
||||
index = list(transcript_provider_models.keys()).index(
|
||||
pd_config.transcript_model_provider
|
||||
)
|
||||
|
||||
pd_config.transcript_model_provider = st.selectbox(
|
||||
"Transcript Model Provider",
|
||||
list(transcript_provider_models.keys()),
|
||||
index=index,
|
||||
key=f"transcript_provider_{pd_config.id}",
|
||||
)
|
||||
if (
|
||||
not pd_config.transcript_model
|
||||
or pd_config.transcript_model
|
||||
not in transcript_provider_models[pd_config.transcript_model_provider]
|
||||
):
|
||||
st.warning(
|
||||
f"Transcript Model {pd_config.transcript_model} not setup. Changing to default."
|
||||
)
|
||||
index = 0
|
||||
else:
|
||||
index = transcript_provider_models[
|
||||
pd_config.transcript_model_provider
|
||||
].index(pd_config.transcript_model)
|
||||
pd_config.transcript_model = st.selectbox(
|
||||
"Transcript Model",
|
||||
transcript_provider_models[pd_config.transcript_model_provider],
|
||||
index=index,
|
||||
key=f"transcript_model_{pd_config.id}",
|
||||
)
|
||||
|
||||
pd_config.provider = st.selectbox(
|
||||
"Provider",
|
||||
"Audio Model Provider",
|
||||
list(provider_models.keys()),
|
||||
index=list(provider_models.keys()).index(pd_config.provider),
|
||||
key=f"provider_{pd_config.id}",
|
||||
)
|
||||
if pd_config.model not in provider_models[pd_config.provider]:
|
||||
st.warning(
|
||||
f"Audio Model {pd_config.model} not setup. Changing to default."
|
||||
)
|
||||
index = 0
|
||||
else:
|
||||
index = provider_models[pd_config.provider].index(pd_config.model)
|
||||
pd_config.model = st.selectbox(
|
||||
"Model",
|
||||
provider_models[pd_config.provider],
|
||||
index=index,
|
||||
key=f"model_{pd_config.id}",
|
||||
)
|
||||
st.caption(
|
||||
"OpenAI: tts-1 or tts-1-hd, Elevenlabs: eleven_multilingual_v2, eleven_turbo_v2_5"
|
||||
)
|
||||
pd_config.voice1 = st.text_input(
|
||||
"Voice 1",
|
||||
value=pd_config.voice1,
|
||||
|
|
@ -224,20 +305,6 @@ with templates_tab:
|
|||
key=f"voice2_{pd_config.id}",
|
||||
help="You can use Elevenlabs voice ID",
|
||||
)
|
||||
if pd_config.model not in provider_models[pd_config.provider]:
|
||||
st.warning(f"Model {pd_config.model} not setup. Changing to default.")
|
||||
index = 0
|
||||
else:
|
||||
index = provider_models[pd_config.provider].index(pd_config.model)
|
||||
pd_config.model = st.selectbox(
|
||||
"Model",
|
||||
provider_models[pd_config.provider],
|
||||
index=index,
|
||||
key=f"model_{pd_config.id}",
|
||||
)
|
||||
st.caption(
|
||||
"OpenAI: tts-1 or tts-1-hd, Elevenlabs: eleven_multilingual_v2, eleven_turbo_v2_5"
|
||||
)
|
||||
|
||||
if st.button("Save Config", key=f"btn_save{pd_config.id}"):
|
||||
try:
|
||||
|
|
|
|||
Loading…
Reference in a new issue