diff --git a/CLAUDE.md b/CLAUDE.md index a8a1f18..a2878e6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -218,4 +218,4 @@ See dedicated CLAUDE.md files for detailed guidance: --- -**Last Updated**: February 2026 | **Project Version**: 1.7.4 +**Last Updated**: February 2026 | **Project Version**: 1.8.0 diff --git a/api/CLAUDE.md b/api/CLAUDE.md index 8a69458..8e17a5d 100644 --- a/api/CLAUDE.md +++ b/api/CLAUDE.md @@ -17,6 +17,7 @@ FastAPI application serving three architectural layers: routes (HTTP endpoints), - Load .env environment variables - Initialize CORS middleware + password auth middleware - Run database migrations via AsyncMigrationManager on lifespan startup +- Run podcast profile data migration (legacy string to model registry conversion) - Register all routers **Key services**: @@ -62,6 +63,7 @@ FastAPI application serving three architectural layers: routes (HTTP endpoints), - **routers/transformations.py**: POST /transformations - **routers/insights.py**: GET /sources/{source_id}/insights - **routers/auth.py**: POST /auth/password (password-based auth) +- **routers/languages.py**: GET /languages (available podcast languages via pycountry+babel) - **routers/commands.py**: GET /commands/{command_id} (job status tracking) ## Common Patterns diff --git a/api/main.py b/api/main.py index cf159d4..661637e 100644 --- a/api/main.py +++ b/api/main.py @@ -32,6 +32,7 @@ from api.routers import ( embedding_rebuild, episode_profiles, insights, + languages, models, notebooks, notes, @@ -97,6 +98,15 @@ async def lifespan(app: FastAPI): # Fail fast - don't start the API with an outdated database schema raise RuntimeError(f"Failed to run database migrations: {str(e)}") from e + # Run podcast profile data migration (legacy strings -> Model registry) + try: + from open_notebook.podcasts.migration import migrate_podcast_profiles + + await migrate_podcast_profiles() + except Exception as e: + logger.warning(f"Podcast profile migration encountered errors: {e}") + # Non-fatal: profiles can be migrated manually via UI + logger.success("API initialization completed successfully") # Yield control to the application @@ -269,6 +279,7 @@ app.include_router(speaker_profiles.router, prefix="/api", tags=["speaker-profil app.include_router(chat.router, prefix="/api", tags=["chat"]) app.include_router(source_chat.router, prefix="/api", tags=["source-chat"]) app.include_router(credentials.router, prefix="/api", tags=["credentials"]) +app.include_router(languages.router, prefix="/api", tags=["languages"]) @app.get("/") diff --git a/api/routers/episode_profiles.py b/api/routers/episode_profiles.py index da0baff..cc5e070 100644 --- a/api/routers/episode_profiles.py +++ b/api/routers/episode_profiles.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Optional from fastapi import APIRouter, HTTPException from loguru import logger @@ -14,12 +14,34 @@ class EpisodeProfileResponse(BaseModel): name: str description: str speaker_config: str - outline_provider: str - outline_model: str - transcript_provider: str - transcript_model: str + outline_llm: Optional[str] = None + transcript_llm: Optional[str] = None + language: Optional[str] = None default_briefing: str num_segments: int + # Legacy fields (for display/migration awareness) + outline_provider: Optional[str] = None + outline_model: Optional[str] = None + transcript_provider: Optional[str] = None + transcript_model: Optional[str] = None + + +def _profile_to_response(profile: EpisodeProfile) -> EpisodeProfileResponse: + return EpisodeProfileResponse( + id=str(profile.id), + name=profile.name, + description=profile.description or "", + speaker_config=profile.speaker_config, + outline_llm=profile.outline_llm, + transcript_llm=profile.transcript_llm, + language=profile.language, + default_briefing=profile.default_briefing, + num_segments=profile.num_segments, + outline_provider=profile.outline_provider, + outline_model=profile.outline_model, + transcript_provider=profile.transcript_provider, + transcript_model=profile.transcript_model, + ) @router.get("/episode-profiles", response_model=List[EpisodeProfileResponse]) @@ -27,23 +49,7 @@ async def list_episode_profiles(): """List all available episode profiles""" try: profiles = await EpisodeProfile.get_all(order_by="name asc") - - return [ - EpisodeProfileResponse( - id=str(profile.id), - name=profile.name, - description=profile.description or "", - speaker_config=profile.speaker_config, - outline_provider=profile.outline_provider, - outline_model=profile.outline_model, - transcript_provider=profile.transcript_provider, - transcript_model=profile.transcript_model, - default_briefing=profile.default_briefing, - num_segments=profile.num_segments, - ) - for profile in profiles - ] - + return [_profile_to_response(p) for p in profiles] except Exception as e: logger.error(f"Failed to fetch episode profiles: {e}") raise HTTPException( @@ -62,18 +68,7 @@ async def get_episode_profile(profile_name: str): status_code=404, detail=f"Episode profile '{profile_name}' not found" ) - return EpisodeProfileResponse( - id=str(profile.id), - name=profile.name, - description=profile.description or "", - speaker_config=profile.speaker_config, - outline_provider=profile.outline_provider, - outline_model=profile.outline_model, - transcript_provider=profile.transcript_provider, - transcript_model=profile.transcript_model, - default_briefing=profile.default_briefing, - num_segments=profile.num_segments, - ) + return _profile_to_response(profile) except HTTPException: raise @@ -88,14 +83,18 @@ class EpisodeProfileCreate(BaseModel): name: str = Field(..., description="Unique profile name") description: str = Field("", description="Profile description") speaker_config: str = Field(..., description="Reference to speaker profile name") - outline_provider: str = Field(..., description="AI provider for outline generation") - outline_model: str = Field(..., description="AI model for outline generation") - transcript_provider: str = Field( - ..., description="AI provider for transcript generation" + outline_llm: Optional[str] = Field(None, description="Model record ID for outline") + transcript_llm: Optional[str] = Field( + None, description="Model record ID for transcript" ) - transcript_model: str = Field(..., description="AI model for transcript generation") + language: Optional[str] = Field(None, description="Podcast language code") default_briefing: str = Field(..., description="Default briefing template") num_segments: int = Field(default=5, description="Number of podcast segments") + # Legacy fields (accepted but not required) + outline_provider: Optional[str] = None + outline_model: Optional[str] = None + transcript_provider: Optional[str] = None + transcript_model: Optional[str] = None @router.post("/episode-profiles", response_model=EpisodeProfileResponse) @@ -106,28 +105,19 @@ async def create_episode_profile(profile_data: EpisodeProfileCreate): name=profile_data.name, description=profile_data.description, speaker_config=profile_data.speaker_config, + outline_llm=profile_data.outline_llm, + transcript_llm=profile_data.transcript_llm, + language=profile_data.language, + default_briefing=profile_data.default_briefing, + num_segments=profile_data.num_segments, outline_provider=profile_data.outline_provider, outline_model=profile_data.outline_model, transcript_provider=profile_data.transcript_provider, transcript_model=profile_data.transcript_model, - default_briefing=profile_data.default_briefing, - num_segments=profile_data.num_segments, ) await profile.save() - - return EpisodeProfileResponse( - id=str(profile.id), - name=profile.name, - description=profile.description or "", - speaker_config=profile.speaker_config, - outline_provider=profile.outline_provider, - outline_model=profile.outline_model, - transcript_provider=profile.transcript_provider, - transcript_model=profile.transcript_model, - default_briefing=profile.default_briefing, - num_segments=profile.num_segments, - ) + return _profile_to_response(profile) except Exception as e: logger.error(f"Failed to create episode profile: {e}") @@ -147,31 +137,21 @@ async def update_episode_profile(profile_id: str, profile_data: EpisodeProfileCr status_code=404, detail=f"Episode profile '{profile_id}' not found" ) - # Update fields profile.name = profile_data.name profile.description = profile_data.description profile.speaker_config = profile_data.speaker_config + profile.outline_llm = profile_data.outline_llm + profile.transcript_llm = profile_data.transcript_llm + profile.language = profile_data.language + profile.default_briefing = profile_data.default_briefing + profile.num_segments = profile_data.num_segments profile.outline_provider = profile_data.outline_provider profile.outline_model = profile_data.outline_model profile.transcript_provider = profile_data.transcript_provider profile.transcript_model = profile_data.transcript_model - profile.default_briefing = profile_data.default_briefing - profile.num_segments = profile_data.num_segments await profile.save() - - return EpisodeProfileResponse( - id=str(profile.id), - name=profile.name, - description=profile.description or "", - speaker_config=profile.speaker_config, - outline_provider=profile.outline_provider, - outline_model=profile.outline_model, - transcript_provider=profile.transcript_provider, - transcript_model=profile.transcript_model, - default_briefing=profile.default_briefing, - num_segments=profile.num_segments, - ) + return _profile_to_response(profile) except HTTPException: raise @@ -219,33 +199,23 @@ async def duplicate_episode_profile(profile_id: str): status_code=404, detail=f"Episode profile '{profile_id}' not found" ) - # Create duplicate with modified name duplicate = EpisodeProfile( name=f"{original.name} - Copy", description=original.description, speaker_config=original.speaker_config, + outline_llm=original.outline_llm, + transcript_llm=original.transcript_llm, + language=original.language, + default_briefing=original.default_briefing, + num_segments=original.num_segments, outline_provider=original.outline_provider, outline_model=original.outline_model, transcript_provider=original.transcript_provider, transcript_model=original.transcript_model, - default_briefing=original.default_briefing, - num_segments=original.num_segments, ) await duplicate.save() - - return EpisodeProfileResponse( - id=str(duplicate.id), - name=duplicate.name, - description=duplicate.description or "", - speaker_config=duplicate.speaker_config, - outline_provider=duplicate.outline_provider, - outline_model=duplicate.outline_model, - transcript_provider=duplicate.transcript_provider, - transcript_model=duplicate.transcript_model, - default_briefing=duplicate.default_briefing, - num_segments=duplicate.num_segments, - ) + return _profile_to_response(duplicate) except HTTPException: raise diff --git a/api/routers/languages.py b/api/routers/languages.py new file mode 100644 index 0000000..a5d1db8 --- /dev/null +++ b/api/routers/languages.py @@ -0,0 +1,83 @@ +from typing import List + +import pycountry +from babel import Locale +from babel.core import get_global +from fastapi import APIRouter +from pydantic import BaseModel + +router = APIRouter() + +# Additional regional variants for languages where the distinction matters +# (TTS accent, vocabulary, spelling differences) +_EXTRA_VARIANTS = [ + "pt_PT", + "en_GB", + "en_AU", + "en_IN", + "es_MX", + "es_AR", + "es_CO", + "fr_CA", + "fr_CH", + "zh_TW", + "zh_HK", + "de_AT", + "de_CH", + "ar_SA", + "nl_BE", +] + + +class LanguageResponse(BaseModel): + code: str + name: str + + +@router.get("/languages", response_model=List[LanguageResponse]) +async def list_languages(): + """List available languages as BCP 47 locale codes (e.g. pt-BR, en-US).""" + likely_subtags = get_global("likely_subtags") + languages = [] + seen = set() + + # 1. For each language, resolve its default locale via CLDR likely subtags + for lang in pycountry.languages: + if not hasattr(lang, "alpha_2"): + continue + + code = lang.alpha_2 + likely = likely_subtags.get(code) + + if likely: + try: + loc = Locale.parse(likely) + if loc.territory: + bcp47 = f"{loc.language}-{loc.territory}" + display = loc.get_display_name("en") + if bcp47 not in seen: + seen.add(bcp47) + languages.append(LanguageResponse(code=bcp47, name=display)) + continue + except Exception: + pass + + # Fallback: bare language code + if code not in seen: + seen.add(code) + languages.append(LanguageResponse(code=code, name=lang.name)) + + # 2. Add important regional variants + for locale_str in _EXTRA_VARIANTS: + try: + loc = Locale.parse(locale_str) + bcp47 = f"{loc.language}-{loc.territory}" + if bcp47 not in seen: + seen.add(bcp47) + display = loc.get_display_name("en") + languages.append(LanguageResponse(code=bcp47, name=display)) + except Exception: + pass + + languages.sort(key=lambda x: x.name) + return languages diff --git a/api/routers/speaker_profiles.py b/api/routers/speaker_profiles.py index 3ce8886..a440160 100644 --- a/api/routers/speaker_profiles.py +++ b/api/routers/speaker_profiles.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from fastapi import APIRouter, HTTPException from loguru import logger @@ -13,9 +13,23 @@ class SpeakerProfileResponse(BaseModel): id: str name: str description: str - tts_provider: str - tts_model: str + voice_model: Optional[str] = None speakers: List[Dict[str, Any]] + # Legacy fields (for display/migration awareness) + tts_provider: Optional[str] = None + tts_model: Optional[str] = None + + +def _profile_to_response(profile: SpeakerProfile) -> SpeakerProfileResponse: + return SpeakerProfileResponse( + id=str(profile.id), + name=profile.name, + description=profile.description or "", + voice_model=profile.voice_model, + speakers=profile.speakers, + tts_provider=profile.tts_provider, + tts_model=profile.tts_model, + ) @router.get("/speaker-profiles", response_model=List[SpeakerProfileResponse]) @@ -23,19 +37,7 @@ async def list_speaker_profiles(): """List all available speaker profiles""" try: profiles = await SpeakerProfile.get_all(order_by="name asc") - - return [ - SpeakerProfileResponse( - id=str(profile.id), - name=profile.name, - description=profile.description or "", - tts_provider=profile.tts_provider, - tts_model=profile.tts_model, - speakers=profile.speakers, - ) - for profile in profiles - ] - + return [_profile_to_response(p) for p in profiles] except Exception as e: logger.error(f"Failed to fetch speaker profiles: {e}") raise HTTPException( @@ -54,14 +56,7 @@ async def get_speaker_profile(profile_name: str): status_code=404, detail=f"Speaker profile '{profile_name}' not found" ) - return SpeakerProfileResponse( - id=str(profile.id), - name=profile.name, - description=profile.description or "", - tts_provider=profile.tts_provider, - tts_model=profile.tts_model, - speakers=profile.speakers, - ) + return _profile_to_response(profile) except HTTPException: raise @@ -75,11 +70,13 @@ async def get_speaker_profile(profile_name: str): class SpeakerProfileCreate(BaseModel): name: str = Field(..., description="Unique profile name") description: str = Field("", description="Profile description") - tts_provider: str = Field(..., description="TTS provider") - tts_model: str = Field(..., description="TTS model name") + voice_model: Optional[str] = Field(None, description="Model record ID for TTS") speakers: List[Dict[str, Any]] = Field( ..., description="Array of speaker configurations" ) + # Legacy fields (accepted but not required) + tts_provider: Optional[str] = None + tts_model: Optional[str] = None @router.post("/speaker-profiles", response_model=SpeakerProfileResponse) @@ -89,21 +86,14 @@ async def create_speaker_profile(profile_data: SpeakerProfileCreate): profile = SpeakerProfile( name=profile_data.name, description=profile_data.description, + voice_model=profile_data.voice_model, + speakers=profile_data.speakers, tts_provider=profile_data.tts_provider, tts_model=profile_data.tts_model, - speakers=profile_data.speakers, ) await profile.save() - - return SpeakerProfileResponse( - id=str(profile.id), - name=profile.name, - description=profile.description or "", - tts_provider=profile.tts_provider, - tts_model=profile.tts_model, - speakers=profile.speakers, - ) + return _profile_to_response(profile) except Exception as e: logger.error(f"Failed to create speaker profile: {e}") @@ -123,23 +113,15 @@ async def update_speaker_profile(profile_id: str, profile_data: SpeakerProfileCr status_code=404, detail=f"Speaker profile '{profile_id}' not found" ) - # Update fields profile.name = profile_data.name profile.description = profile_data.description + profile.voice_model = profile_data.voice_model + profile.speakers = profile_data.speakers profile.tts_provider = profile_data.tts_provider profile.tts_model = profile_data.tts_model - profile.speakers = profile_data.speakers await profile.save() - - return SpeakerProfileResponse( - id=str(profile.id), - name=profile.name, - description=profile.description or "", - tts_provider=profile.tts_provider, - tts_model=profile.tts_model, - speakers=profile.speakers, - ) + return _profile_to_response(profile) except HTTPException: raise @@ -187,25 +169,17 @@ async def duplicate_speaker_profile(profile_id: str): status_code=404, detail=f"Speaker profile '{profile_id}' not found" ) - # Create duplicate with modified name duplicate = SpeakerProfile( name=f"{original.name} - Copy", description=original.description, + voice_model=original.voice_model, + speakers=original.speakers, tts_provider=original.tts_provider, tts_model=original.tts_model, - speakers=original.speakers, ) await duplicate.save() - - return SpeakerProfileResponse( - id=str(duplicate.id), - name=duplicate.name, - description=duplicate.description or "", - tts_provider=duplicate.tts_provider, - tts_model=duplicate.tts_model, - speakers=duplicate.speakers, - ) + return _profile_to_response(duplicate) except HTTPException: raise diff --git a/commands/CLAUDE.md b/commands/CLAUDE.md index de5ec45..708e843 100644 --- a/commands/CLAUDE.md +++ b/commands/CLAUDE.md @@ -16,7 +16,7 @@ - **`process_source_command`**: Ingests content through `source_graph`, creates embeddings (optional), and generates insights. Retries on transaction conflicts (exp. jitter, max 15×, 1-120s). - **`run_transformation_command`**: Runs a transformation on an existing source to generate an insight. Executes the transformation graph (LLM call) then creates insight via `create_insight_command`. Used by `POST /sources/{id}/insights` API endpoint. Retry: 5 attempts, exponential jitter 1-60s. -- **`generate_podcast_command`**: Creates podcasts via `podcast-creator` library using stored episode/speaker profiles. +- **`generate_podcast_command`**: Creates podcasts via podcast-creator library. Resolves model registry references and credentials for all profiles before invoking podcast-creator. Validates that outline_llm, transcript_llm, and voice_model are configured. - **`process_text_command`** (example): Test fixture for text operations (uppercase, lowercase, reverse, word_count). - **`analyze_data_command`** (example): Test fixture for numeric aggregations. @@ -43,7 +43,7 @@ - **source_commands**: `ensure_record_id()` wraps command IDs for DB storage; transaction conflicts trigger exponential backoff retry. ValueError exceptions are permanent (not retried). - **embedding_commands**: Content type detection uses file extension as primary source, heuristics as fallback. Chunks >1800 chars trigger secondary splitting. Empty/whitespace-only content returns ValueError (not retried). - **rebuild_embeddings_command**: Returns "jobs_submitted" not "processed_items" - embedding is async. Individual commands handle failures with their own retries. -- **podcast_commands**: Profiles loaded from SurrealDB by name (must exist); briefing can be extended with suffix. Episode records created mid-execution. +- **podcast_commands**: Profiles loaded from SurrealDB by name; model configs (credentials) resolved for ALL profiles before podcast-creator validation. Validates outline_llm/transcript_llm/voice_model are set. Episode records created mid-execution. - **Example commands**: Accept optional `delay_seconds` for testing async behavior; not for production. ## Code Example diff --git a/commands/podcast_commands.py b/commands/podcast_commands.py index 3ab4e03..e273c4a 100644 --- a/commands/podcast_commands.py +++ b/commands/podcast_commands.py @@ -8,7 +8,12 @@ from surreal_commands import CommandInput, CommandOutput, command from open_notebook.config import DATA_FOLDER from open_notebook.database.repository import ensure_record_id, repo_query -from open_notebook.podcasts.models import EpisodeProfile, PodcastEpisode, SpeakerProfile +from open_notebook.podcasts.models import ( + EpisodeProfile, + PodcastEpisode, + SpeakerProfile, + _resolve_model_config, +) try: from podcast_creator import configure, create_podcast @@ -79,7 +84,41 @@ async def generate_podcast_command( logger.info(f"Loaded episode profile: {episode_profile.name}") logger.info(f"Loaded speaker profile: {speaker_profile.name}") - # 3. Load all profiles and configure podcast-creator + # 2. Validate that model registry fields are populated + if not episode_profile.outline_llm: + raise ValueError( + f"Episode profile '{episode_profile.name}' has no outline model configured. " + "Please update the profile to select an outline model." + ) + if not episode_profile.transcript_llm: + raise ValueError( + f"Episode profile '{episode_profile.name}' has no transcript model configured. " + "Please update the profile to select a transcript model." + ) + if not speaker_profile.voice_model: + raise ValueError( + f"Speaker profile '{speaker_profile.name}' has no voice model configured. " + "Please update the profile to select a voice model." + ) + + # 3. Resolve model configs with credentials + outline_provider, outline_model_name, outline_config = ( + await episode_profile.resolve_outline_config() + ) + transcript_provider, transcript_model_name, transcript_config = ( + await episode_profile.resolve_transcript_config() + ) + tts_provider, tts_model_name, tts_config = ( + await speaker_profile.resolve_tts_config() + ) + + logger.info( + f"Resolved models - outline: {outline_provider}/{outline_model_name}, " + f"transcript: {transcript_provider}/{transcript_model_name}, " + f"tts: {tts_provider}/{tts_model_name}" + ) + + # 4. Load all profiles and configure podcast-creator episode_profiles = await repo_query("SELECT * FROM episode_profile") speaker_profiles = await repo_query("SELECT * FROM speaker_profile") @@ -91,12 +130,74 @@ async def generate_podcast_command( profile["name"]: profile for profile in speaker_profiles } - # 4. Generate briefing + # 5. Inject resolved model configs into profile dicts + # Resolve ALL episode profiles (podcast-creator validates all). + # Remove profiles that fail resolution to prevent validation errors. + for ep_name in list(episode_profiles_dict.keys()): + ep_dict = episode_profiles_dict[ep_name] + try: + if ep_dict.get("outline_llm"): + prov, model, conf = await _resolve_model_config( + str(ep_dict["outline_llm"]) + ) + ep_dict["outline_provider"] = prov + ep_dict["outline_model"] = model + ep_dict["outline_config"] = conf + if ep_dict.get("transcript_llm"): + prov, model, conf = await _resolve_model_config( + str(ep_dict["transcript_llm"]) + ) + ep_dict["transcript_provider"] = prov + ep_dict["transcript_model"] = model + ep_dict["transcript_config"] = conf + except Exception as e: + logger.warning( + f"Failed to resolve models for episode profile '{ep_name}', " + f"removing from config to prevent validation errors: {e}" + ) + del episode_profiles_dict[ep_name] + + # Resolve TTS for ALL speaker profiles (podcast-creator validates all). + # Remove profiles that fail resolution to prevent validation errors. + for sp_name in list(speaker_profiles_dict.keys()): + sp_dict = speaker_profiles_dict[sp_name] + if sp_dict.get("voice_model"): + try: + prov, model, conf = await _resolve_model_config( + str(sp_dict["voice_model"]) + ) + sp_dict["tts_provider"] = prov + sp_dict["tts_model"] = model + sp_dict["tts_config"] = conf + except Exception as e: + logger.warning( + f"Failed to resolve TTS for speaker profile '{sp_name}', " + f"removing from config to prevent validation errors: {e}" + ) + del speaker_profiles_dict[sp_name] + continue + + # Per-speaker TTS overrides + for speaker in sp_dict.get("speakers", []): + if speaker.get("voice_model"): + try: + prov, model, conf = await _resolve_model_config( + str(speaker["voice_model"]) + ) + speaker["tts_provider"] = prov + speaker["tts_model"] = model + speaker["tts_config"] = conf + except Exception as e: + logger.warning( + f"Failed to resolve per-speaker TTS for '{speaker.get('name')}': {e}" + ) + + # 6. Generate briefing briefing = episode_profile.default_briefing if input_data.briefing_suffix: briefing += f"\n\nAdditional instructions: {input_data.briefing_suffix}" - # Create the a record for the episose and associate with the ongoing command + # Create the record for the episode and associate with the ongoing command episode = PodcastEpisode( name=input_data.episode_name, episode_profile=full_model_dump(episode_profile.model_dump()), @@ -119,13 +220,13 @@ async def generate_podcast_command( logger.info(f"Generated briefing (length: {len(briefing)} chars)") - # 5. Create output directory + # 7. Create output directory output_dir = Path(f"{DATA_FOLDER}/podcasts/episodes/{input_data.episode_name}") output_dir.mkdir(parents=True, exist_ok=True) logger.info(f"Created output directory: {output_dir}") - # 6. Generate podcast using podcast-creator + # 8. Generate podcast using podcast-creator logger.info("Starting podcast generation with podcast-creator...") result = await create_podcast( diff --git a/docs/2-CORE-CONCEPTS/podcasts-explained.md b/docs/2-CORE-CONCEPTS/podcasts-explained.md index 449551e..d023d28 100644 --- a/docs/2-CORE-CONCEPTS/podcasts-explained.md +++ b/docs/2-CORE-CONCEPTS/podcasts-explained.md @@ -94,13 +94,14 @@ Speaker 1: "Expert Alex" ├─ Expertise: "Deep knowledge of alignment research" ├─ Personality: "Rigorous, academic, patient with explanation" ├─ Accent: (Optional) "British English" -└─ TTS Voice: "OpenAI Onyx" (or ElevenLabs, Google, etc.) +└─ Voice Model: Selected from model registry (e.g., OpenAI TTS) + └─ Optional per-speaker override of the episode's default voice model Speaker 2: "Researcher Sam" ├─ Expertise: "Field observer, pragmatic perspective" ├─ Personality: "Curious, asks clarifying questions" ├─ Accent: "American English" -└─ TTS Voice: "ElevenLabs - thoughtful" +└─ Voice Model: Selected from model registry (e.g., ElevenLabs TTS) ``` ### Stage 4: Outline Generation @@ -147,10 +148,10 @@ Alex: "Exactly. And that's where the three approaches come in..." ### Stage 6: Text-to-Speech -System converts dialogue to audio: +System converts dialogue to audio using the voice models configured in the model registry. Credentials are automatically resolved from each model's configuration. ``` -Alex's text → OpenAI TTS → Alex's voice (audio file) -Sam's text → ElevenLabs TTS → Sam's voice (audio file) +Alex's text → Voice model (from registry) → Alex's voice (audio file) +Sam's text → Voice model (from registry) → Sam's voice (audio file) Audio files → Mix together → Final podcast MP3 ``` @@ -181,7 +182,7 @@ When podcast generation fails (e.g., wrong model configured, API key expired, pr | Error | What to Do | |-------|-----------| | Invalid API key | Check Settings -> Credentials for the TTS and language model providers | -| Model not found | Verify the model name in your episode profile exists and is correctly configured | +| Model not found | Verify the model exists in the model registry and has valid credentials configured | | Rate limit exceeded | Wait a few minutes and retry | | Provider unavailable | Check provider status page; retry later | @@ -314,7 +315,7 @@ New team member listens, gets context faster than reading 100 documents 4. Decide on podcast ├─→ Create speaker profiles ├─→ Define episode profile - ├─→ Choose TTS provider + ├─→ Configure voice models (from model registry) └─→ Generate podcast ↓ 5. Listen while commuting/exercising diff --git a/docs/3-USER-GUIDE/creating-podcasts.md b/docs/3-USER-GUIDE/creating-podcasts.md index e91eb54..dbb312a 100644 --- a/docs/3-USER-GUIDE/creating-podcasts.md +++ b/docs/3-USER-GUIDE/creating-podcasts.md @@ -74,7 +74,7 @@ An episode profile defines the structure and tone. **Option A: Use Preset Profile** ``` -Open Notebook provides templates: +Open Notebook provides preset profiles: Academic Presentation (Monologue) ├─ 1 speaker @@ -140,22 +140,22 @@ Speakers are the "voice" of your podcast. **Option A: Use Preset Speakers** ``` -Open Notebook provides templates: +Open Notebook provides preset profiles: "Expert Alex" - Expertise: Deep knowledge - Personality: Rigorous, patient -- TTS: OpenAI (clear voice) +- Voice Model: Selected from model registry "Curious Sam" - Expertise: Curious newcomer - Personality: Asks questions -- TTS: Google (natural voice) +- Voice Model: Selected from model registry "Skeptic Jordan" - Expertise: Critical perspective - Personality: Challenges assumptions -- TTS: ElevenLabs (warm voice) +- Voice Model: Selected from model registry For your first podcast: Use presets For custom podcast: Create your own @@ -179,15 +179,17 @@ Personality: explains clearly, asks good questions" Voice Configuration: -- TTS Provider: OpenAI / Google / ElevenLabs / Local -- Voice selection: Choose from available voices -- Accent (optional): British / American / etc. +- Voice Model: Select from model registry (e.g., OpenAI TTS, Google TTS, ElevenLabs) +- Voice: Choose from available voices for the selected model +- Per-speaker override: Each speaker can optionally use a different voice model + +Credentials are automatically resolved from the model configuration. Example: Name: Dr. Research Expert Expertise: AI safety alignment research Personality: Rigorous, academic but accessible -Voice: ElevenLabs - professional male voice +Voice Model: ElevenLabs TTS (from registry), Voice: professional male ``` ### Step 6: Generate Podcast @@ -463,7 +465,7 @@ Rule: 3-5 sources per podcast **Solutions**: ``` -1. Choose different TTS providers (OpenAI + Google) +1. Choose different voice models from the registry for each speaker 2. Choose very different voice options 3. Increase personality differences in profile 4. Try different speaker count (2 vs 3 vs 4) diff --git a/docs/3-USER-GUIDE/interface-overview.md b/docs/3-USER-GUIDE/interface-overview.md index 0ca78cf..1ce9238 100644 --- a/docs/3-USER-GUIDE/interface-overview.md +++ b/docs/3-USER-GUIDE/interface-overview.md @@ -200,8 +200,8 @@ Inside a notebook, switch to Podcasts: │ Episode Profile: [Select ▼] │ │ │ │ Speakers: │ -│ ├─ Host: Alex (OpenAI) │ -│ └─ Guest: Sam (Google) │ +│ ├─ Host: Alex (voice model) │ +│ └─ Guest: Sam (voice model) │ │ │ │ Include: │ │ ☑ Paper.pdf │ diff --git a/frontend/src/app/(dashboard)/podcasts/page.tsx b/frontend/src/app/(dashboard)/podcasts/page.tsx index fe55d86..8616464 100644 --- a/frontend/src/app/(dashboard)/podcasts/page.tsx +++ b/frontend/src/app/(dashboard)/podcasts/page.tsx @@ -1,18 +1,29 @@ 'use client' -import { useState } from 'react' +import { useMemo, useState } from 'react' +import { AlertTriangle } from 'lucide-react' import { AppShell } from '@/components/layout/AppShell' import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs' +import { Alert, AlertDescription, AlertTitle } from '@/components/ui/alert' import { EpisodesTab } from '@/components/podcasts/EpisodesTab' import { TemplatesTab } from '@/components/podcasts/TemplatesTab' import { Mic, LayoutTemplate } from 'lucide-react' import { useTranslation } from '@/lib/hooks/use-translation' +import { useEpisodeProfiles, useSpeakerProfiles } from '@/lib/hooks/use-podcasts' +import { needsModelSetup } from '@/lib/types/podcasts' export default function PodcastsPage() { const { t } = useTranslation() const [activeTab, setActiveTab] = useState<'episodes' | 'templates'>('episodes') + const { episodeProfiles } = useEpisodeProfiles() + const { speakerProfiles } = useSpeakerProfiles(episodeProfiles) + + const hasUnconfiguredProfiles = useMemo(() => { + return episodeProfiles.some(needsModelSetup) || speakerProfiles.some(needsModelSetup) + }, [episodeProfiles, speakerProfiles]) + return (
@@ -24,6 +35,16 @@ export default function PodcastsPage() {

+ {hasUnconfiguredProfiles ? ( + + + {t.podcasts.setupRequired} + + {t.podcasts.setupRequiredDesc} + + + ) : null} + setActiveTab(value as 'episodes' | 'templates')} diff --git a/frontend/src/components/podcasts/EpisodeProfilesPanel.tsx b/frontend/src/components/podcasts/EpisodeProfilesPanel.tsx index 60dd42b..44064a5 100644 --- a/frontend/src/components/podcasts/EpisodeProfilesPanel.tsx +++ b/frontend/src/components/podcasts/EpisodeProfilesPanel.tsx @@ -1,13 +1,14 @@ 'use client' import { useMemo, useState } from 'react' -import { Copy, Edit3, MoreVertical, Trash2, Users } from 'lucide-react' +import { AlertTriangle, Copy, Edit3, MoreVertical, Trash2, Users } from 'lucide-react' -import { EpisodeProfile, SpeakerProfile } from '@/lib/types/podcasts' +import { EpisodeProfile, SpeakerProfile, needsModelSetup } from '@/lib/types/podcasts' import { useDeleteEpisodeProfile, useDuplicateEpisodeProfile, } from '@/lib/hooks/use-podcasts' +import { useModels } from '@/lib/hooks/use-models' import { EpisodeProfileFormDialog } from '@/components/podcasts/forms/EpisodeProfileFormDialog' import { AlertDialog, @@ -41,7 +42,6 @@ import { useTranslation } from '@/lib/hooks/use-translation' interface EpisodeProfilesPanelProps { episodeProfiles: EpisodeProfile[] speakerProfiles: SpeakerProfile[] - modelOptions: Record } function findSpeakerSummary( @@ -54,7 +54,6 @@ function findSpeakerSummary( export function EpisodeProfilesPanel({ episodeProfiles, speakerProfiles, - modelOptions, }: EpisodeProfilesPanelProps) { const { t } = useTranslation() const [createOpen, setCreateOpen] = useState(false) @@ -62,6 +61,15 @@ export function EpisodeProfilesPanel({ const deleteProfile = useDeleteEpisodeProfile() const duplicateProfile = useDuplicateEpisodeProfile() + const { data: models = [] } = useModels() + + const modelNameMap = useMemo(() => { + const map: Record = {} + for (const m of models) { + map[m.id] = `${m.provider} / ${m.name}` + } + return map + }, [models]) const sortedProfiles = useMemo( () => @@ -102,14 +110,23 @@ export function EpisodeProfilesPanel({ speakerProfiles, profile.speaker_config ) + const unconfigured = needsModelSetup(profile) return (
- - {profile.name} - +
+ + {profile.name} + + {unconfigured ? ( + + + {t.podcasts.setupRequired} + + ) : null} +
{profile.description || t.podcasts.noDescription} @@ -183,7 +200,11 @@ export function EpisodeProfilesPanel({ {t.podcasts.outlineModel}

- {profile.outline_provider} / {profile.outline_model} + {profile.outline_llm + ? (modelNameMap[profile.outline_llm] ?? profile.outline_llm) + : (profile.outline_provider && profile.outline_model + ? `${profile.outline_provider} / ${profile.outline_model}` + : t.podcasts.notConfigured)}

@@ -191,7 +212,11 @@ export function EpisodeProfilesPanel({ {t.podcasts.transcriptModel}

- {profile.transcript_provider} / {profile.transcript_model} + {profile.transcript_llm + ? (modelNameMap[profile.transcript_llm] ?? profile.transcript_llm) + : (profile.transcript_provider && profile.transcript_model + ? `${profile.transcript_provider} / ${profile.transcript_model}` + : t.podcasts.notConfigured)}

@@ -200,6 +225,14 @@ export function EpisodeProfilesPanel({

{profile.num_segments}

+ {profile.language ? ( +
+

+ {t.podcasts.language} +

+

{profile.language}

+
+ ) : null}

{t.podcasts.speakerProfile} @@ -207,7 +240,11 @@ export function EpisodeProfilesPanel({

{profile.speaker_config} - {speakerSummary ? ( + {speakerSummary?.voice_model ? ( + + {modelNameMap[speakerSummary.voice_model] ?? speakerSummary.voice_model} + + ) : speakerSummary?.tts_provider ? ( {speakerSummary.tts_provider} / {speakerSummary.tts_model} @@ -238,7 +275,6 @@ export function EpisodeProfilesPanel({ open={createOpen} onOpenChange={setCreateOpen} speakerProfiles={speakerProfiles} - modelOptions={modelOptions} />
diff --git a/frontend/src/components/podcasts/SpeakerProfilesPanel.tsx b/frontend/src/components/podcasts/SpeakerProfilesPanel.tsx index 4480a82..dd5c504 100644 --- a/frontend/src/components/podcasts/SpeakerProfilesPanel.tsx +++ b/frontend/src/components/podcasts/SpeakerProfilesPanel.tsx @@ -1,13 +1,14 @@ 'use client' import { useMemo, useState } from 'react' -import { Copy, Edit3, MoreVertical, Trash2, Volume2 } from 'lucide-react' +import { AlertTriangle, Copy, Edit3, MoreVertical, Trash2, Volume2 } from 'lucide-react' -import { SpeakerProfile } from '@/lib/types/podcasts' +import { SpeakerProfile, needsModelSetup } from '@/lib/types/podcasts' import { useDeleteSpeakerProfile, useDuplicateSpeakerProfile, } from '@/lib/hooks/use-podcasts' +import { useModels } from '@/lib/hooks/use-models' import { SpeakerProfileFormDialog } from '@/components/podcasts/forms/SpeakerProfileFormDialog' import { AlertDialog, @@ -40,13 +41,11 @@ import { useTranslation } from '@/lib/hooks/use-translation' interface SpeakerProfilesPanelProps { speakerProfiles: SpeakerProfile[] - modelOptions: Record usage: Record } export function SpeakerProfilesPanel({ speakerProfiles, - modelOptions, usage, }: SpeakerProfilesPanelProps) { const { t } = useTranslation() @@ -55,10 +54,19 @@ export function SpeakerProfilesPanel({ const deleteProfile = useDeleteSpeakerProfile() const duplicateProfile = useDuplicateSpeakerProfile() + const { data: models = [] } = useModels() + + const modelNameMap = useMemo(() => { + const map: Record = {} + for (const m of models) { + map[m.id] = `${m.provider} / ${m.name}` + } + return map + }, [models]) const sortedProfiles = useMemo( () => - [...speakerProfiles].sort((a, b) => a.name.localeCompare(b.name, 'en')), + [...speakerProfiles].sort((a, b) => a.name.localeCompare(b.name, 'en')), [speakerProfiles] ) @@ -83,21 +91,34 @@ export function SpeakerProfilesPanel({ {sortedProfiles.map((profile) => { const usageCount = usage[profile.name] ?? 0 const deleteDisabled = usageCount > 0 + const unconfigured = needsModelSetup(profile) return (
- - {profile.name} - +
+ + {profile.name} + + {unconfigured ? ( + + + {t.podcasts.setupRequired} + + ) : null} +
{profile.description || t.podcasts.noDescription}
- {profile.tts_provider} / {profile.tts_model} + {profile.voice_model + ? (modelNameMap[profile.voice_model] ?? profile.voice_model) + : (profile.tts_provider + ? `${profile.tts_provider} / ${profile.tts_model}` + : t.podcasts.notConfigured)}
@@ -126,9 +147,16 @@ export function SpeakerProfilesPanel({ {speaker.name}
- - {t.podcasts.voiceId}: {speaker.voice_id} - +
+ + {t.podcasts.voiceId}: {speaker.voice_id} + + {speaker.voice_model ? ( + + {modelNameMap[speaker.voice_model] ?? speaker.voice_model} + + ) : null} +

{t.podcasts.backstory}: {speaker.backstory} @@ -219,7 +247,6 @@ export function SpeakerProfilesPanel({ mode="create" open={createOpen} onOpenChange={setCreateOpen} - modelOptions={modelOptions} />

diff --git a/frontend/src/components/podcasts/TemplatesTab.tsx b/frontend/src/components/podcasts/TemplatesTab.tsx index 097afe9..fa1eaea 100644 --- a/frontend/src/components/podcasts/TemplatesTab.tsx +++ b/frontend/src/components/podcasts/TemplatesTab.tsx @@ -1,29 +1,14 @@ 'use client' -import { useMemo } from 'react' import { AlertCircle, Lightbulb, Loader2 } from 'lucide-react' import { EpisodeProfilesPanel } from '@/components/podcasts/EpisodeProfilesPanel' import { SpeakerProfilesPanel } from '@/components/podcasts/SpeakerProfilesPanel' import { Alert, AlertDescription, AlertTitle } from '@/components/ui/alert' import { useEpisodeProfiles, useSpeakerProfiles } from '@/lib/hooks/use-podcasts' -import { useModels } from '@/lib/hooks/use-models' -import { Model } from '@/lib/types/models' import { Accordion, AccordionContent, AccordionItem, AccordionTrigger } from '@/components/ui/accordion' import { useTranslation } from '@/lib/hooks/use-translation' -function modelsByProvider(models: Model[], type: Model['type']) { - return models - .filter((model) => model.type === type) - .reduce>((acc, model) => { - if (!acc[model.provider]) { - acc[model.provider] = [] - } - acc[model.provider].push(model.name) - return acc - }, {}) -} - export function TemplatesTab() { const { t } = useTranslation() const { @@ -39,23 +24,8 @@ export function TemplatesTab() { error: speakerProfilesError, } = useSpeakerProfiles(episodeProfiles) - const { - data: models = [], - isLoading: loadingModels, - error: modelsError, - } = useModels() - - const languageModelOptions = useMemo( - () => modelsByProvider(models, 'language'), - [models] - ) - const ttsModelOptions = useMemo( - () => modelsByProvider(models, 'text_to_speech'), - [models] - ) - - const isLoading = loadingEpisodeProfiles || loadingSpeakerProfiles || loadingModels - const hasError = episodeProfilesError || speakerProfilesError || modelsError + const isLoading = loadingEpisodeProfiles || loadingSpeakerProfiles + const hasError = episodeProfilesError || speakerProfilesError return (
@@ -67,8 +37,8 @@ export function TemplatesTab() {
- @@ -137,12 +107,10 @@ export function TemplatesTab() { )} diff --git a/frontend/src/components/podcasts/forms/EpisodeProfileFormDialog.tsx b/frontend/src/components/podcasts/forms/EpisodeProfileFormDialog.tsx index 8f4ecd4..8df7143 100644 --- a/frontend/src/components/podcasts/forms/EpisodeProfileFormDialog.tsx +++ b/frontend/src/components/podcasts/forms/EpisodeProfileFormDialog.tsx @@ -1,6 +1,6 @@ 'use client' -import { useCallback, useEffect, useMemo } from 'react' +import { useCallback, useEffect } from 'react' import { Controller, useForm } from 'react-hook-form' import { zodResolver } from '@hookform/resolvers/zod' import { z } from 'zod' @@ -9,6 +9,7 @@ import { EpisodeProfile, SpeakerProfile } from '@/lib/types/podcasts' import { useCreateEpisodeProfile, useUpdateEpisodeProfile, + useLanguages, } from '@/lib/hooks/use-podcasts' import { useTranslation } from '@/lib/hooks/use-translation' import { @@ -31,16 +32,16 @@ import { } from '@/components/ui/select' import { Textarea } from '@/components/ui/textarea' import { Separator } from '@/components/ui/separator' +import { ModelSelector } from '@/components/common/ModelSelector' import { TranslationKeys } from '@/lib/locales' const episodeProfileSchema = (t: TranslationKeys) => z.object({ name: z.string().min(1, t.podcasts.nameRequired || 'Name is required'), description: z.string().optional(), speaker_config: z.string().min(1, t.podcasts.profileRequired || 'Speaker profile is required'), - outline_provider: z.string().min(1, t.podcasts.outlineProviderRequired || 'Outline provider is required'), - outline_model: z.string().min(1, t.podcasts.outlineModelRequired || 'Outline model is required'), - transcript_provider: z.string().min(1, t.podcasts.transcriptProviderRequired || 'Transcript provider is required'), - transcript_model: z.string().min(1, t.podcasts.transcriptModelRequired || 'Transcript model is required'), + outline_llm: z.string().min(1, t.podcasts.outlineModelRequired || 'Outline model is required'), + transcript_llm: z.string().min(1, t.podcasts.transcriptModelRequired || 'Transcript model is required'), + language: z.string().nullable().optional(), default_briefing: z.string().min(1, t.podcasts.defaultBriefingRequired || 'Default briefing is required'), num_segments: z.number() .int(t.podcasts.segmentsInteger || 'Must be an integer') @@ -55,7 +56,6 @@ interface EpisodeProfileFormDialogProps { open: boolean onOpenChange: (open: boolean) => void speakerProfiles: SpeakerProfile[] - modelOptions: Record initialData?: EpisodeProfile } @@ -64,29 +64,24 @@ export function EpisodeProfileFormDialog({ open, onOpenChange, speakerProfiles, - modelOptions, initialData, }: EpisodeProfileFormDialogProps) { const { t } = useTranslation() const createProfile = useCreateEpisodeProfile() const updateProfile = useUpdateEpisodeProfile() - - const providers = useMemo(() => Object.keys(modelOptions), [modelOptions]) + const { data: languages = [] } = useLanguages() const getDefaults = useCallback((): EpisodeProfileFormValues => { const firstSpeaker = speakerProfiles[0]?.name ?? '' - const firstProvider = providers[0] ?? '' - const firstModel = firstProvider ? modelOptions[firstProvider]?.[0] ?? '' : '' if (initialData) { return { name: initialData.name, description: initialData.description ?? '', speaker_config: initialData.speaker_config, - outline_provider: initialData.outline_provider, - outline_model: initialData.outline_model, - transcript_provider: initialData.transcript_provider, - transcript_model: initialData.transcript_model, + outline_llm: initialData.outline_llm ?? '', + transcript_llm: initialData.transcript_llm ?? '', + language: initialData.language ?? null, default_briefing: initialData.default_briefing, num_segments: initialData.num_segments, } @@ -96,35 +91,25 @@ export function EpisodeProfileFormDialog({ name: '', description: '', speaker_config: firstSpeaker, - outline_provider: firstProvider, - outline_model: firstModel, - transcript_provider: firstProvider, - transcript_model: firstModel, + outline_llm: '', + transcript_llm: '', + language: null, default_briefing: '', num_segments: 5, } - }, [initialData, modelOptions, providers, speakerProfiles]) + }, [initialData, speakerProfiles]) const { control, register, handleSubmit, reset, - setValue, - watch, formState: { errors }, } = useForm({ resolver: zodResolver(episodeProfileSchema(t)), defaultValues: getDefaults(), }) - const outlineProvider = watch('outline_provider') - const outlineModel = watch('outline_model') - const transcriptProvider = watch('transcript_provider') - const transcriptModel = watch('transcript_model') - const availableOutlineModels = modelOptions[outlineProvider] ?? [] - const availableTranscriptModels = modelOptions[transcriptProvider] ?? [] - useEffect(() => { if (!open) { return @@ -132,38 +117,11 @@ export function EpisodeProfileFormDialog({ reset(getDefaults()) }, [open, reset, getDefaults]) - useEffect(() => { - if (!outlineProvider) { - return - } - const models = modelOptions[outlineProvider] ?? [] - if (models.length === 0) { - setValue('outline_model', '') - return - } - if (!models.includes(outlineModel)) { - setValue('outline_model', models[0]) - } - }, [outlineProvider, outlineModel, modelOptions, setValue]) - - useEffect(() => { - if (!transcriptProvider) { - return - } - const models = modelOptions[transcriptProvider] ?? [] - if (models.length === 0) { - setValue('transcript_model', '') - return - } - if (!models.includes(transcriptModel)) { - setValue('transcript_model', models[0]) - } - }, [transcriptProvider, transcriptModel, modelOptions, setValue]) - const onSubmit = async (values: EpisodeProfileFormValues) => { const payload = { ...values, description: values.description ?? '', + language: values.language || null, } if (mode === 'create') { @@ -179,8 +137,7 @@ export function EpisodeProfileFormDialog({ } const isSubmitting = createProfile.isPending || updateProfile.isPending - const disableSubmit = - isSubmitting || speakerProfiles.length === 0 || providers.length === 0 + const disableSubmit = isSubmitting || speakerProfiles.length === 0 const isEdit = mode === 'edit' return ( @@ -204,15 +161,6 @@ export function EpisodeProfileFormDialog({ ) : null} - {providers.length === 0 ? ( - - {t.podcasts.noLanguageModelsAvailable} - - {t.podcasts.noLanguageModelsDesc} - - - ) : null} -
@@ -292,61 +240,26 @@ export function EpisodeProfileFormDialog({
-
- ( -
- - - {errors.outline_provider ? ( -

- {errors.outline_provider.message} -

- ) : null} -
- )} - /> - - ( -
- - - {errors.outline_model ? ( -

- {errors.outline_model.message} -

- ) : null} -
- )} - /> -
+ ( +
+ + {errors.outline_llm ? ( +

+ {errors.outline_llm.message} +

+ ) : null} +
+ )} + />
@@ -356,61 +269,59 @@ export function EpisodeProfileFormDialog({
-
- ( -
- - - {errors.transcript_provider ? ( -

- {errors.transcript_provider.message} -

- ) : null} -
- )} - /> + ( +
+ + {errors.transcript_llm ? ( +

+ {errors.transcript_llm.message} +

+ ) : null} +
+ )} + /> +
- ( -
- - - {errors.transcript_model ? ( -

- {errors.transcript_model.message} -

- ) : null} -
- )} - /> +
+
+

+ {t.podcasts.podcastLanguage} +

+
+ ( +
+ + +
+ )} + />
diff --git a/frontend/src/components/podcasts/forms/SpeakerProfileFormDialog.tsx b/frontend/src/components/podcasts/forms/SpeakerProfileFormDialog.tsx index a4ecf80..efe6658 100644 --- a/frontend/src/components/podcasts/forms/SpeakerProfileFormDialog.tsx +++ b/frontend/src/components/podcasts/forms/SpeakerProfileFormDialog.tsx @@ -1,6 +1,6 @@ 'use client' -import { useCallback, useEffect, useMemo } from 'react' +import { useCallback, useEffect } from 'react' import { Controller, useFieldArray, useForm } from 'react-hook-form' import type { FieldErrorsImpl } from 'react-hook-form' import { zodResolver } from '@hookform/resolvers/zod' @@ -19,19 +19,12 @@ import { DialogHeader, DialogTitle, } from '@/components/ui/dialog' -import { Alert, AlertDescription, AlertTitle } from '@/components/ui/alert' import { Button } from '@/components/ui/button' import { Input } from '@/components/ui/input' import { Label } from '@/components/ui/label' -import { - Select, - SelectContent, - SelectItem, - SelectTrigger, - SelectValue, -} from '@/components/ui/select' import { Textarea } from '@/components/ui/textarea' import { Separator } from '@/components/ui/separator' +import { ModelSelector } from '@/components/common/ModelSelector' import { TranslationKeys } from '@/lib/locales' import { useTranslation } from '@/lib/hooks/use-translation' @@ -41,13 +34,13 @@ const speakerConfigSchema = (t: TranslationKeys) => z.object({ voice_id: z.string().min(1, t.podcasts.voiceIdRequired || 'Voice ID is required'), backstory: z.string().min(1, t.podcasts.backstoryRequired || 'Backstory is required'), personality: z.string().min(1, t.podcasts.personalityRequired || 'Personality is required'), + voice_model: z.string().nullable().optional(), }) const speakerProfileSchema = (t: TranslationKeys) => z.object({ name: z.string().min(1, t.common.nameRequired || 'Name is required'), description: z.string().optional(), - tts_provider: z.string().min(1, t.models.providerRequired || 'Provider is required'), - tts_model: z.string().min(1, t.models.modelRequired || 'Model is required'), + voice_model: z.string().min(1, t.podcasts.voiceModelRequired || 'Voice model is required'), speakers: z .array(speakerConfigSchema(t)) .min(1, t.podcasts.speakerCountMin || 'At least one speaker is required') @@ -60,7 +53,6 @@ interface SpeakerProfileFormDialogProps { mode: 'create' | 'edit' open: boolean onOpenChange: (open: boolean) => void - modelOptions: Record initialData?: SpeakerProfile } @@ -69,51 +61,45 @@ const EMPTY_SPEAKER = { voice_id: '', backstory: '', personality: '', + voice_model: null as string | null, } export function SpeakerProfileFormDialog({ mode, open, onOpenChange, - modelOptions, initialData, }: SpeakerProfileFormDialogProps) { const { t } = useTranslation() const createProfile = useCreateSpeakerProfile() const updateProfile = useUpdateSpeakerProfile() - const providers = useMemo(() => Object.keys(modelOptions), [modelOptions]) - const getDefaults = useCallback((): SpeakerProfileFormValues => { - const firstProvider = providers[0] ?? '' - const firstModel = firstProvider ? modelOptions[firstProvider]?.[0] ?? '' : '' - if (initialData) { return { name: initialData.name, description: initialData.description ?? '', - tts_provider: initialData.tts_provider, - tts_model: initialData.tts_model, - speakers: initialData.speakers?.map((speaker) => ({ ...speaker })) ?? [{ ...EMPTY_SPEAKER }], + voice_model: initialData.voice_model ?? '', + speakers: initialData.speakers?.map((speaker) => ({ + ...speaker, + voice_model: speaker.voice_model ?? null, + })) ?? [{ ...EMPTY_SPEAKER }], } } return { name: '', description: '', - tts_provider: firstProvider, - tts_model: firstModel, + voice_model: '', speakers: [{ ...EMPTY_SPEAKER }], } - }, [initialData, modelOptions, providers]) + }, [initialData]) const { control, register, handleSubmit, reset, - setValue, - watch, formState: { errors }, } = useForm({ resolver: zodResolver(speakerProfileSchema(t)), @@ -129,13 +115,6 @@ export function SpeakerProfileFormDialog({ name: 'speakers', }) - const provider = watch('tts_provider') - const currentModel = watch('tts_model') - const availableModels = useMemo( - () => modelOptions[provider] ?? [], - [modelOptions, provider] - ) - const speakersArrayError = ( errors.speakers as FieldErrorsImpl<{ root?: { message?: string } }> | undefined )?.root?.message @@ -147,24 +126,14 @@ export function SpeakerProfileFormDialog({ reset(getDefaults()) }, [open, reset, getDefaults]) - useEffect(() => { - if (!provider) { - return - } - const models = modelOptions[provider] ?? [] - if (models.length === 0) { - setValue('tts_model', '') - return - } - if (!models.includes(currentModel)) { - setValue('tts_model', models[0]) - } - }, [provider, currentModel, modelOptions, setValue]) - const onSubmit = async (values: SpeakerProfileFormValues) => { const payload = { ...values, description: values.description ?? '', + speakers: values.speakers.map((s) => ({ + ...s, + voice_model: s.voice_model || null, + })), } if (mode === 'create') { @@ -180,7 +149,7 @@ export function SpeakerProfileFormDialog({ } const isSubmitting = createProfile.isPending || updateProfile.isPending - const disableSubmit = isSubmitting || providers.length === 0 + const disableSubmit = isSubmitting const isEdit = mode === 'edit' return ( @@ -195,15 +164,6 @@ export function SpeakerProfileFormDialog({ - {providers.length === 0 ? ( - - {t.podcasts.noTtsModelsAvailable} - - {t.podcasts.noTtsModelsDesc} - - - ) : null} -
@@ -214,56 +174,6 @@ export function SpeakerProfileFormDialog({ ) : null}
-
- - ( - - )} - /> - {errors.tts_provider ? ( -

{errors.tts_provider.message}

- ) : null} -
- -
- - ( - - )} - /> - {errors.tts_model ? ( -

{errors.tts_model.message}

- ) : null} -
-