diff --git a/.gitignore b/.gitignore index 972de5a..1100153 100644 --- a/.gitignore +++ b/.gitignore @@ -136,4 +136,5 @@ specs/ +*.local.yml **/*.local.md \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 019f91a..3b00b4c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,27 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.6.0] - 2026-01-16 + +### Added +- Content-type aware text chunking with automatic HTML, Markdown, and plain text detection (#350, #142) +- Unified embedding generation with mean pooling for large content that exceeds model context limits +- Dedicated embedding commands: `embed_note`, `embed_insight`, `embed_source` +- New utility modules: `chunking.py` and `embedding.py` in `open_notebook/utils/` + +### Changed +- Embedding is now fire-and-forget: domain models submit embedding commands asynchronously after save +- `rebuild_embeddings_command` now delegates to individual embed_* commands instead of inline processing +- Chunk size reduced to 1500 characters for better compatibility with Ollama embedding models + +### Removed +- Legacy embedding commands: `embed_single_item_command`, `embed_chunk_command`, `vectorize_source_command` +- `needs_embedding()` and `get_embedding_content()` methods from domain models +- `split_text()` function from text_utils (replaced by `chunk_text()` in chunking module) + +### Fixed +- Embedding failures when content exceeds model context limits (#350, #142) + ## [1.5.2] - 2026-01-15 ### Performance diff --git a/CLAUDE.md b/CLAUDE.md index fde20d4..88da333 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -170,7 +170,7 @@ See dedicated CLAUDE.md files for detailed guidance: - **Unit tests**: `tests/test_domain.py`, `test_models_api.py` - **Graph tests**: `tests/test_graphs.py` (workflow integration) -- **Utils tests**: `tests/test_utils.py` +- **Utils tests**: `tests/test_utils.py`, `tests/test_chunking.py`, `tests/test_embedding.py` - **Run all**: `uv run pytest tests/` - **Coverage**: Check with `pytest --cov` diff --git a/api/routers/embedding.py b/api/routers/embedding.py index 63b9dd9..0e6f5cb 100644 --- a/api/routers/embedding.py +++ b/api/routers/embedding.py @@ -38,14 +38,21 @@ async def embed_content(embed_request: EmbedRequest): # Import commands to ensure they're registered import commands.embedding_commands # noqa: F401 - # Submit command + # Submit type-specific command + if item_type == "source": + command_name = "embed_source" + command_input = {"source_id": item_id} + else: # note + command_name = "embed_note" + command_input = {"note_id": item_id} + command_id = await CommandService.submit_command_job( - "open_notebook", # app name - "embed_single_item", # command name - {"item_id": item_id, "item_type": item_type}, + "open_notebook", + command_name, + command_input, ) - logger.info(f"Submitted async embedding command: {command_id}") + logger.info(f"Submitted async {command_name} command: {command_id}") return EmbedResponse( success=True, @@ -62,30 +69,30 @@ async def embed_content(embed_request: EmbedRequest): ) else: - # SYNC PATH: Submit job (returns immediately with command_id) - # NOTE: "sync" here means "submit and return command_id" - actual processing - # still happens asynchronously in the worker pool - logger.info(f"Using sync processing for {item_type} {item_id}") + # DOMAIN MODEL PATH: Submit job via domain model convenience methods + # These methods internally call submit_command() - still fire-and-forget + logger.info(f"Using domain model path for {item_type} {item_id}") command_id = None - # Get the item and embed it + # Get the item and submit embedding job if item_type == "source": source_item = await Source.get(item_id) if not source_item: raise HTTPException(status_code=404, detail="Source not found") - # Submit vectorization job (returns command_id for tracking) + # Submit embed_source job (returns command_id for tracking) command_id = await source_item.vectorize() - message = "Source vectorization job submitted" + message = "Source embedding job submitted" elif item_type == "note": note_item = await Note.get(item_id) if not note_item: raise HTTPException(status_code=404, detail="Note not found") - await note_item.save() # Auto-embeds via ObjectModel.save() - message = "Note embedded successfully" + # Note.save() internally submits embed_note command and returns command_id + command_id = await note_item.save() + message = "Note embedding job submitted" return EmbedResponse( success=True, diff --git a/commands/CLAUDE.md b/commands/CLAUDE.md index 373d3ad..eb9174e 100644 --- a/commands/CLAUDE.md +++ b/commands/CLAUDE.md @@ -4,9 +4,16 @@ ## Key Components +### Embedding Commands + +- **`embed_note_command`**: Embeds a single note using unified embedding pipeline with content-type aware processing. Uses MARKDOWN content type detection. Retry: 5 attempts, exponential jitter 1-60s. +- **`embed_insight_command`**: Embeds a single source insight. Uses MARKDOWN content type. Retry: 5 attempts, exponential jitter 1-60s. +- **`embed_source_command`**: Embeds a source by chunking full_text with content-type aware splitters (HTML, Markdown, plain), then batch embedding all chunks. Uses single Esperanto API call. Retry: 5 attempts, exponential jitter 1-60s. +- **`rebuild_embeddings_command`**: Submits individual embed_* commands for all sources/notes/insights. Returns immediately; actual embedding happens async. No retry (coordinator only). + +### Other Commands + - **`process_source_command`**: Ingests content through `source_graph`, creates embeddings (optional), and generates insights. Retries on transaction conflicts (exp. jitter, max 5×). -- **`embed_single_item_command`**: Embeds individual sources/notes/insights; splits content into chunks for vector storage. -- **`rebuild_embeddings_command`**: Bulk re-embed all/existing items with selective type filtering. - **`generate_podcast_command`**: Creates podcasts via `podcast-creator` library using stored episode/speaker profiles. - **`process_text_command`** (example): Test fixture for text operations (uppercase, lowercase, reverse, word_count). - **`analyze_data_command`** (example): Test fixture for numeric aggregations. @@ -15,23 +22,27 @@ - **Pydantic I/O**: All commands use `CommandInput`/`CommandOutput` subclasses for type safety and serialization. - **Error handling**: Permanent errors return failure output; `RuntimeError` exceptions auto-retry via surreal-commands. -- **Retry configuration**: Aggressive retry settings (15 attempts, 1-120s backoff, DEBUG log level) are a temporary workaround for SurrealDB v2.x transaction conflicts with SEARCH indexes. These can be reduced after migrating to SurrealDB v3. +- **Retry configuration**: Embedding commands use moderate retry settings (5 attempts, 1-60s backoff). Retries handle transient failures (RuntimeError, ConnectionError, TimeoutError). +- **Fire-and-forget embedding**: Domain models submit embed_* commands via `submit_command()` without waiting. Commands process asynchronously. +- **Content-type aware chunking**: `embed_source_command` uses `chunk_text()` with automatic content type detection (HTML, Markdown, plain text) for optimal text splitting. Default: 1500 char chunks with 225 char overlap. +- **Batch embedding**: `embed_source_command` uses `generate_embeddings()` for single API call efficiency instead of per-chunk calls. +- **Mean pooling for large content**: `embed_note_command` and `embed_insight_command` use `generate_embedding()` which handles content larger than chunk size via mean pooling. - **Model dumping**: Recursive `full_model_dump()` utility converts Pydantic models → dicts for DB/API responses. -- **Logging**: Uses `loguru.logger` throughout; logs execution start/end and key metrics (processing time, counts). Retry attempts use `retry_log_level: "debug"` to prevent log noise during concurrent chunk processing. +- **Logging**: Uses `loguru.logger` throughout; logs execution start/end and key metrics (processing time, counts). - **Time tracking**: All commands measure `start_time` → `processing_time` for monitoring. ## Dependencies -**External**: `surreal_commands` (command decorator, job queue), `loguru`, `pydantic`, `podcast_creator` -**Internal**: `open_notebook.domain.*` (Source, Note, Transformation), `open_notebook.graphs.source`, `open_notebook.ai.models` +**External**: `surreal_commands` (command decorator, job queue, submit_command), `loguru`, `pydantic`, `podcast_creator` +**Internal**: `open_notebook.domain.notebook` (Source, Note, SourceInsight), `open_notebook.utils.chunking` (chunk_text, detect_content_type), `open_notebook.utils.embedding` (generate_embedding, generate_embeddings), `open_notebook.database.repository` (repo_query, repo_insert) ## Quirks & Edge Cases -- **source_commands**: `ensure_record_id()` wraps command IDs for DB storage; transaction conflicts trigger exponential backoff retry (1-120s, up to 15 attempts). Non-`RuntimeError` exceptions are permanent. Retry logs at DEBUG level via `retry_log_level` config. -- **embedding_commands**: Queries DB directly for item state; chunk index must match source's chunk list. Model availability checked at command start. Aggressive retry settings (15 attempts, 120s max wait, DEBUG logging) handle deep queues from large documents without log spam. +- **source_commands**: `ensure_record_id()` wraps command IDs for DB storage; transaction conflicts trigger exponential backoff retry. Non-`RuntimeError` exceptions are permanent. +- **embedding_commands**: Content type detection uses file extension as primary source, heuristics as fallback. Chunks >1800 chars trigger secondary splitting. Empty/whitespace-only content returns early. +- **rebuild_embeddings_command**: Returns "jobs_submitted" not "processed_items" - embedding is async. Individual commands handle failures with their own retries. - **podcast_commands**: Profiles loaded from SurrealDB by name (must exist); briefing can be extended with suffix. Episode records created mid-execution. - **Example commands**: Accept optional `delay_seconds` for testing async behavior; not for production. -- **Retry logging**: Uses `retry_log_level: "debug"` in decorator config + manual `logger.debug()` in exception handlers for double protection against retry log noise. ## Code Example diff --git a/commands/__init__.py b/commands/__init__.py index cd7fb89..479b952 100644 --- a/commands/__init__.py +++ b/commands/__init__.py @@ -1,15 +1,24 @@ """Surreal-commands integration for Open Notebook""" -from .embedding_commands import embed_single_item_command, rebuild_embeddings_command +from .embedding_commands import ( + embed_insight_command, + embed_note_command, + embed_source_command, + rebuild_embeddings_command, +) from .example_commands import analyze_data_command, process_text_command from .podcast_commands import generate_podcast_command from .source_commands import process_source_command __all__ = [ - "embed_single_item_command", + # Embedding commands + "embed_note_command", + "embed_insight_command", + "embed_source_command", + "rebuild_embeddings_command", + # Other commands "generate_podcast_command", "process_source_command", "process_text_command", "analyze_data_command", - "rebuild_embeddings_command", ] diff --git a/commands/embedding_commands.py b/commands/embedding_commands.py index a44dc79..f5e05ad 100644 --- a/commands/embedding_commands.py +++ b/commands/embedding_commands.py @@ -6,9 +6,10 @@ from pydantic import BaseModel from surreal_commands import CommandInput, CommandOutput, command, submit_command from open_notebook.ai.models import model_manager -from open_notebook.database.repository import ensure_record_id, repo_query +from open_notebook.database.repository import ensure_record_id, repo_insert, repo_query from open_notebook.domain.notebook import Note, Source, SourceInsight -from open_notebook.utils.text_utils import split_text +from open_notebook.utils.chunking import ContentType, chunk_text, detect_content_type +from open_notebook.utils.embedding import generate_embedding, generate_embeddings def full_model_dump(model): @@ -22,46 +23,6 @@ def full_model_dump(model): return model -class EmbedSingleItemInput(CommandInput): - item_id: str - item_type: Literal["source", "note", "insight"] - - -class EmbedSingleItemOutput(CommandOutput): - success: bool - item_id: str - item_type: str - chunks_created: int = 0 # For sources - processing_time: float - error_message: Optional[str] = None - - -class EmbedChunkInput(CommandInput): - source_id: str - chunk_index: int - chunk_text: str - - -class EmbedChunkOutput(CommandOutput): - success: bool - source_id: str - chunk_index: int - error_message: Optional[str] = None - - -class VectorizeSourceInput(CommandInput): - source_id: str - - -class VectorizeSourceOutput(CommandOutput): - success: bool - source_id: str - total_chunks: int - jobs_submitted: int - processing_time: float - error_message: Optional[str] = None - - class RebuildEmbeddingsInput(CommandInput): mode: Literal["existing", "all"] include_sources: bool = True @@ -72,335 +33,380 @@ class RebuildEmbeddingsInput(CommandInput): class RebuildEmbeddingsOutput(CommandOutput): success: bool total_items: int - processed_items: int - failed_items: int - sources_processed: int = 0 - notes_processed: int = 0 - insights_processed: int = 0 + jobs_submitted: int # Count of embedding commands submitted + failed_submissions: int # Count of items that failed to submit + sources_submitted: int = 0 + notes_submitted: int = 0 + insights_submitted: int = 0 processing_time: float error_message: Optional[str] = None -@command("embed_single_item", app="open_notebook") -async def embed_single_item_command( - input_data: EmbedSingleItemInput, -) -> EmbedSingleItemOutput: +# ============================================================================= +# NEW EMBEDDING COMMANDS (Phase 3) +# ============================================================================= + + +class EmbedNoteInput(CommandInput): + """Input for embedding a single note.""" + + note_id: str + + +class EmbedNoteOutput(CommandOutput): + """Output from note embedding command.""" + + success: bool + note_id: str + processing_time: float + error_message: Optional[str] = None + + +class EmbedInsightInput(CommandInput): + """Input for embedding a single source insight.""" + + insight_id: str + + +class EmbedInsightOutput(CommandOutput): + """Output from insight embedding command.""" + + success: bool + insight_id: str + processing_time: float + error_message: Optional[str] = None + + +class EmbedSourceInput(CommandInput): + """Input for embedding a source (creates multiple chunk embeddings).""" + + source_id: str + + +class EmbedSourceOutput(CommandOutput): + """Output from source embedding command.""" + + success: bool + source_id: str + chunks_created: int + processing_time: float + error_message: Optional[str] = None + + +@command( + "embed_note", + app="open_notebook", + retry={ + "max_attempts": 5, + "wait_strategy": "exponential_jitter", + "wait_min": 1, + "wait_max": 60, + "retry_on": [RuntimeError, ConnectionError, TimeoutError], + "retry_log_level": "debug", + }, +) +async def embed_note_command(input_data: EmbedNoteInput) -> EmbedNoteOutput: """ - Embed a single item (source, note, or insight) + Generate and store embedding for a single note. + + Uses the unified embedding pipeline with automatic chunking and mean pooling + for notes that exceed the chunk size limit. + + Flow: + 1. Load Note by ID + 2. Generate embedding via generate_embedding() (auto-chunks + mean pools if needed) + 3. UPSERT note embedding in database + + Retry Strategy: + - Retries up to 5 times for transient failures (RuntimeError, ConnectionError, TimeoutError) + - Uses exponential-jitter backoff (1-60s) + - Does NOT retry permanent failures (ValueError, authentication errors) """ start_time = time.time() try: - logger.info( - f"Starting embedding for {input_data.item_type}: {input_data.item_id}" + logger.info(f"Starting embedding for note: {input_data.note_id}") + + # 1. Load note + note = await Note.get(input_data.note_id) + if not note: + raise ValueError(f"Note '{input_data.note_id}' not found") + + if not note.content or not note.content.strip(): + raise ValueError(f"Note '{input_data.note_id}' has no content to embed") + + # 2. Generate embedding (auto-chunks + mean pools if needed) + # Notes are typically markdown content + embedding = await generate_embedding( + note.content, content_type=ContentType.MARKDOWN ) - # Check if embedding model is available - EMBEDDING_MODEL = await model_manager.get_embedding_model() - if not EMBEDDING_MODEL: - raise ValueError( - "No embedding model configured. Please configure one in the Models section." - ) - - chunks_created = 0 - - if input_data.item_type == "source": - # Get source and vectorize - source = await Source.get(input_data.item_id) - if not source: - raise ValueError(f"Source '{input_data.item_id}' not found") - - await source.vectorize() - - # Count chunks created - chunks_result = await repo_query( - "SELECT VALUE count() FROM source_embedding WHERE source = $source_id GROUP ALL", - {"source_id": ensure_record_id(input_data.item_id)}, - ) - if chunks_result and isinstance(chunks_result[0], dict): - chunks_created = chunks_result[0].get("count", 0) - elif chunks_result and isinstance(chunks_result[0], int): - chunks_created = chunks_result[0] - else: - chunks_created = 0 - - logger.info(f"Source vectorized: {chunks_created} chunks created") - - elif input_data.item_type == "note": - # Get note and save (auto-embeds via ObjectModel.save()) - note = await Note.get(input_data.item_id) - if not note: - raise ValueError(f"Note '{input_data.item_id}' not found") - - await note.save() - logger.info(f"Note embedded: {input_data.item_id}") - - elif input_data.item_type == "insight": - # Get insight and re-generate embedding - insight = await SourceInsight.get(input_data.item_id) - if not insight: - raise ValueError(f"Insight '{input_data.item_id}' not found") - - # Generate new embedding - embedding = (await EMBEDDING_MODEL.aembed([insight.content]))[0] - - # Update insight with new embedding - await repo_query( - "UPDATE $insight_id SET embedding = $embedding", - { - "insight_id": ensure_record_id(input_data.item_id), - "embedding": embedding, - }, - ) - logger.info(f"Insight embedded: {input_data.item_id}") - - else: - raise ValueError( - f"Invalid item_type: {input_data.item_type}. Must be 'source', 'note', or 'insight'" - ) + # 3. UPSERT embedding into note record + await repo_query( + "UPDATE $note_id SET embedding = $embedding", + { + "note_id": ensure_record_id(input_data.note_id), + "embedding": embedding, + }, + ) processing_time = time.time() - start_time logger.info( - f"Successfully embedded {input_data.item_type} {input_data.item_id} in {processing_time:.2f}s" + f"Successfully embedded note {input_data.note_id} in {processing_time:.2f}s" ) - return EmbedSingleItemOutput( + return EmbedNoteOutput( success=True, - item_id=input_data.item_id, - item_type=input_data.item_type, - chunks_created=chunks_created, + note_id=input_data.note_id, processing_time=processing_time, ) + except RuntimeError: + logger.debug( + f"Transaction conflict for note {input_data.note_id} - will be retried" + ) + raise + except (ConnectionError, TimeoutError) as e: + logger.debug( + f"Network/timeout error for note {input_data.note_id} ({type(e).__name__}: {e}) - will be retried" + ) + raise except Exception as e: processing_time = time.time() - start_time - logger.error( - f"Embedding failed for {input_data.item_type} {input_data.item_id}: {e}" - ) + logger.error(f"Failed to embed note {input_data.note_id}: {e}") logger.exception(e) - return EmbedSingleItemOutput( + return EmbedNoteOutput( success=False, - item_id=input_data.item_id, - item_type=input_data.item_type, + note_id=input_data.note_id, processing_time=processing_time, error_message=str(e), ) @command( - "embed_chunk", + "embed_insight", app="open_notebook", retry={ - "max_attempts": 15, # Increased from 5 to handle deep queues (workaround for SurrealDB v2 transaction conflicts) + "max_attempts": 5, "wait_strategy": "exponential_jitter", "wait_min": 1, - "wait_max": 120, # Increased from 30s to 120s to allow queue to drain + "wait_max": 60, "retry_on": [RuntimeError, ConnectionError, TimeoutError], - "retry_log_level": "debug", # Use debug level to avoid log noise with hundreds of chunks + "retry_log_level": "debug", }, ) -async def embed_chunk_command( - input_data: EmbedChunkInput, -) -> EmbedChunkOutput: +async def embed_insight_command(input_data: EmbedInsightInput) -> EmbedInsightOutput: """ - Process a single text chunk for embedding as part of source vectorization. + Generate and store embedding for a single source insight. - This command is designed to be submitted as a background job for each chunk - of a source document, allowing natural concurrency control through the worker pool. + Uses the unified embedding pipeline with automatic chunking and mean pooling + for insights that exceed the chunk size limit. - Retry Strategy (SurrealDB v2 workaround): - - Retries up to 15 times for transient failures (increased from 5): - * RuntimeError: SurrealDB transaction conflicts ("read or write conflict") - * ConnectionError: Network failures when calling embedding provider - * TimeoutError: Request timeouts to embedding provider - - Uses exponential-jitter backoff (1-120s, increased from 30s max) - - Higher retry limits allow deep queues (200+ chunks) to drain during concurrent processing - - Does NOT retry permanent failures (ValueError, authentication errors, invalid input) - - Note: These aggressive retry settings are a temporary workaround for SurrealDB v2.x - transaction conflict issues. Can be reduced once migrated to SurrealDB v3. - - Exception Handling: - - RuntimeError, ConnectionError, TimeoutError: Re-raised to trigger retry mechanism - - ValueError and other exceptions: Caught and returned as permanent failures (no retry) - """ - try: - logger.debug( - f"Processing chunk {input_data.chunk_index} for source {input_data.source_id}" - ) - - # Get embedding model - EMBEDDING_MODEL = await model_manager.get_embedding_model() - if not EMBEDDING_MODEL: - raise ValueError( - "No embedding model configured. Please configure one in the Models section." - ) - - # Generate embedding for the chunk - embedding = (await EMBEDDING_MODEL.aembed([input_data.chunk_text]))[0] - - # Insert chunk embedding into database - await repo_query( - """ - CREATE source_embedding CONTENT { - "source": $source_id, - "order": $order, - "content": $content, - "embedding": $embedding, - }; - """, - { - "source_id": ensure_record_id(input_data.source_id), - "order": input_data.chunk_index, - "content": input_data.chunk_text, - "embedding": embedding, - }, - ) - - logger.debug( - f"Successfully embedded chunk {input_data.chunk_index} for source {input_data.source_id}" - ) - - return EmbedChunkOutput( - success=True, - source_id=input_data.source_id, - chunk_index=input_data.chunk_index, - ) - - except RuntimeError: - # Re-raise RuntimeError to allow retry mechanism to handle DB transaction conflicts - logger.debug( - f"Transaction conflict for chunk {input_data.chunk_index} - will be retried by retry mechanism" - ) - raise - except (ConnectionError, TimeoutError) as e: - # Re-raise network/timeout errors to allow retry mechanism to handle transient provider failures - logger.debug( - f"Network/timeout error for chunk {input_data.chunk_index} ({type(e).__name__}: {e}) - will be retried by retry mechanism" - ) - raise - except Exception as e: - # Catch other exceptions (ValueError, etc.) as permanent failures - logger.error( - f"Failed to embed chunk {input_data.chunk_index} for source {input_data.source_id}: {e}" - ) - logger.exception(e) - - return EmbedChunkOutput( - success=False, - source_id=input_data.source_id, - chunk_index=input_data.chunk_index, - error_message=str(e), - ) - - -@command("vectorize_source", app="open_notebook", retry=None) -async def vectorize_source_command( - input_data: VectorizeSourceInput, -) -> VectorizeSourceOutput: - """ - Orchestrate source vectorization by splitting text into chunks and submitting - individual embed_chunk jobs to the worker queue. - - This command: - 1. Deletes existing embeddings (idempotency) - 2. Splits source text into chunks - 3. Submits each chunk as a separate embed_chunk job - 4. Returns immediately (jobs run in background) - - Natural concurrency control is provided by the worker pool size. + Flow: + 1. Load SourceInsight by ID + 2. Generate embedding via generate_embedding() (auto-chunks + mean pools if needed) + 3. UPSERT insight embedding in database Retry Strategy: - - Retries disabled (retry=None) - fails fast on job submission errors - - This ensures immediate visibility when orchestration fails - - Individual embed_chunk jobs have their own retry logic for DB conflicts + - Retries up to 5 times for transient failures (RuntimeError, ConnectionError, TimeoutError) + - Uses exponential-jitter backoff (1-60s) + - Does NOT retry permanent failures (ValueError, authentication errors) """ start_time = time.time() try: - logger.info( - f"Starting vectorization orchestration for source {input_data.source_id}" + logger.info(f"Starting embedding for insight: {input_data.insight_id}") + + # 1. Load insight + insight = await SourceInsight.get(input_data.insight_id) + if not insight: + raise ValueError(f"Insight '{input_data.insight_id}' not found") + + if not insight.content or not insight.content.strip(): + raise ValueError( + f"Insight '{input_data.insight_id}' has no content to embed" + ) + + # 2. Generate embedding (auto-chunks + mean pools if needed) + # Insights are typically markdown content (generated by LLM) + embedding = await generate_embedding( + insight.content, content_type=ContentType.MARKDOWN ) + # 3. UPSERT embedding into insight record + await repo_query( + "UPDATE $insight_id SET embedding = $embedding", + { + "insight_id": ensure_record_id(input_data.insight_id), + "embedding": embedding, + }, + ) + + processing_time = time.time() - start_time + logger.info( + f"Successfully embedded insight {input_data.insight_id} in {processing_time:.2f}s" + ) + + return EmbedInsightOutput( + success=True, + insight_id=input_data.insight_id, + processing_time=processing_time, + ) + + except RuntimeError: + logger.debug( + f"Transaction conflict for insight {input_data.insight_id} - will be retried" + ) + raise + except (ConnectionError, TimeoutError) as e: + logger.debug( + f"Network/timeout error for insight {input_data.insight_id} ({type(e).__name__}: {e}) - will be retried" + ) + raise + except Exception as e: + processing_time = time.time() - start_time + logger.error(f"Failed to embed insight {input_data.insight_id}: {e}") + logger.exception(e) + + return EmbedInsightOutput( + success=False, + insight_id=input_data.insight_id, + processing_time=processing_time, + error_message=str(e), + ) + + +@command( + "embed_source", + app="open_notebook", + retry={ + "max_attempts": 5, + "wait_strategy": "exponential_jitter", + "wait_min": 1, + "wait_max": 60, + "retry_on": [RuntimeError, ConnectionError, TimeoutError], + "retry_log_level": "debug", + }, +) +async def embed_source_command(input_data: EmbedSourceInput) -> EmbedSourceOutput: + """ + Generate and store embeddings for a source document. + + Creates multiple chunk embeddings stored in the source_embedding table. + Uses content-type aware chunking based on file extension or content heuristics. + + Flow: + 1. Load Source by ID + 2. DELETE existing source_embedding records for this source + 3. Detect content type from file path or content + 4. Chunk text using appropriate splitter + 5. Generate embeddings for all chunks in a single API call + 6. Bulk INSERT source_embedding records + + Retry Strategy: + - Retries up to 5 times for transient failures (RuntimeError, ConnectionError, TimeoutError) + - Uses exponential-jitter backoff (1-60s) + - Does NOT retry permanent failures (ValueError, authentication errors) + """ + start_time = time.time() + + try: + logger.info(f"Starting embedding for source: {input_data.source_id}") + # 1. Load source source = await Source.get(input_data.source_id) if not source: raise ValueError(f"Source '{input_data.source_id}' not found") - if not source.full_text: - raise ValueError(f"Source {input_data.source_id} has no text to vectorize") + if not source.full_text or not source.full_text.strip(): + raise ValueError(f"Source '{input_data.source_id}' has no text to embed") - # 2. Delete existing embeddings (idempotency) - logger.info(f"Deleting existing embeddings for source {input_data.source_id}") - delete_result = await repo_query( + # 2. DELETE existing embeddings (idempotency) + logger.debug(f"Deleting existing embeddings for source {input_data.source_id}") + await repo_query( "DELETE source_embedding WHERE source = $source_id", {"source_id": ensure_record_id(input_data.source_id)}, ) - deleted_count = len(delete_result) if delete_result else 0 - if deleted_count > 0: - logger.info(f"Deleted {deleted_count} existing embeddings") - # 3. Split text into chunks - logger.info(f"Splitting text into chunks for source {input_data.source_id}") - chunks = split_text(source.full_text) + # 3. Detect content type from file path if available + file_path = source.asset.file_path if source.asset else None + content_type = detect_content_type(source.full_text, file_path) + logger.debug(f"Detected content type: {content_type.value}") + + # 4. Chunk text using appropriate splitter + chunks = chunk_text(source.full_text, content_type=content_type) total_chunks = len(chunks) - logger.info(f"Split into {total_chunks} chunks") + + # Log chunk statistics for debugging + chunk_sizes = [len(c) for c in chunks] + logger.info( + f"Created {total_chunks} chunks for source {input_data.source_id} " + f"(sizes: min={min(chunk_sizes) if chunk_sizes else 0}, " + f"max={max(chunk_sizes) if chunk_sizes else 0}, " + f"avg={sum(chunk_sizes)//len(chunk_sizes) if chunk_sizes else 0} chars)" + ) if total_chunks == 0: raise ValueError("No chunks created after splitting text") - # 4. Submit each chunk as a separate job - logger.info(f"Submitting {total_chunks} chunk jobs to worker queue") - jobs_submitted = 0 + # 5. Generate embeddings for all chunks in single API call + logger.debug(f"Generating embeddings for {total_chunks} chunks") + embeddings = await generate_embeddings(chunks) - for idx, chunk_text in enumerate(chunks): - try: - job_id = submit_command( - "open_notebook", # app name - "embed_chunk", # command name - { - "source_id": input_data.source_id, - "chunk_index": idx, - "chunk_text": chunk_text, - }, - ) - jobs_submitted += 1 + # Verify we got embeddings for all chunks + if len(embeddings) != len(chunks): + raise ValueError( + f"Embedding count mismatch: got {len(embeddings)} embeddings " + f"for {len(chunks)} chunks" + ) - if (idx + 1) % 100 == 0: - logger.info(f" Submitted {idx + 1}/{total_chunks} chunk jobs") + # 6. Bulk INSERT source_embedding records + records = [ + { + "source": ensure_record_id(input_data.source_id), + "order": idx, + "content": chunk, + "embedding": embedding, + } + for idx, (chunk, embedding) in enumerate(zip(chunks, embeddings)) + ] - except Exception as e: - logger.error(f"Failed to submit chunk job {idx}: {e}") - # Continue submitting other chunks even if one fails + logger.debug(f"Inserting {len(records)} source_embedding records") + await repo_insert("source_embedding", records) processing_time = time.time() - start_time - logger.info( - f"Vectorization orchestration complete for source {input_data.source_id}: " - f"{jobs_submitted}/{total_chunks} jobs submitted in {processing_time:.2f}s" + f"Successfully embedded source {input_data.source_id}: " + f"{total_chunks} chunks in {processing_time:.2f}s" ) - return VectorizeSourceOutput( + return EmbedSourceOutput( success=True, source_id=input_data.source_id, - total_chunks=total_chunks, - jobs_submitted=jobs_submitted, + chunks_created=total_chunks, processing_time=processing_time, ) + except RuntimeError: + logger.debug( + f"Transaction conflict for source {input_data.source_id} - will be retried" + ) + raise + except (ConnectionError, TimeoutError) as e: + logger.debug( + f"Network/timeout error for source {input_data.source_id} ({type(e).__name__}: {e}) - will be retried" + ) + raise except Exception as e: processing_time = time.time() - start_time - logger.error( - f"Vectorization orchestration failed for source {input_data.source_id}: {e}" - ) + logger.error(f"Failed to embed source {input_data.source_id}: {e}") logger.exception(e) - return VectorizeSourceOutput( + return EmbedSourceOutput( success=False, source_id=input_data.source_id, - total_chunks=0, - jobs_submitted=0, + chunks_created=0, processing_time=processing_time, error_message=str(e), ) @@ -478,12 +484,20 @@ async def rebuild_embeddings_command( input_data: RebuildEmbeddingsInput, ) -> RebuildEmbeddingsOutput: """ - Rebuild embeddings for sources, notes, and/or insights + Rebuild embeddings for sources, notes, and/or insights. + + This command submits individual embedding jobs for each item: + - embed_source for sources + - embed_note for notes + - embed_insight for insights + + The command returns after submitting all jobs. Actual embedding + happens asynchronously via the individual commands (which have + their own retry strategies). Retry Strategy: - - Retries disabled (retry=None) - batch failures are immediately reported - - This ensures immediate visibility when batch operations fail - - Allows operators to quickly identify and resolve issues + - Retries disabled (retry=None) for this coordinator command + - Individual embed_* commands handle their own retries """ start_time = time.time() @@ -495,16 +509,16 @@ async def rebuild_embeddings_command( ) logger.info("=" * 60) - # Check embedding model availability + # Check embedding model availability (fail fast) EMBEDDING_MODEL = await model_manager.get_embedding_model() if not EMBEDDING_MODEL: raise ValueError( "No embedding model configured. Please configure one in the Models section." ) - logger.info(f"Using embedding model: {EMBEDDING_MODEL}") + logger.info(f"Embedding model configured: {EMBEDDING_MODEL}") - # Collect items to process + # Collect items to process (returns IDs only) items = await collect_items_for_rebuild( input_data.mode, input_data.include_sources, @@ -515,121 +529,106 @@ async def rebuild_embeddings_command( total_items = ( len(items["sources"]) + len(items["notes"]) + len(items["insights"]) ) - logger.info(f"Total items to process: {total_items}") + logger.info(f"Total items to rebuild: {total_items}") if total_items == 0: logger.warning("No items found to rebuild") return RebuildEmbeddingsOutput( success=True, total_items=0, - processed_items=0, - failed_items=0, + jobs_submitted=0, + failed_submissions=0, processing_time=time.time() - start_time, ) # Initialize counters - sources_processed = 0 - notes_processed = 0 - insights_processed = 0 - failed_items = 0 + sources_submitted = 0 + notes_submitted = 0 + insights_submitted = 0 + failed_submissions = 0 - # Process sources - logger.info(f"\nProcessing {len(items['sources'])} sources...") + # Submit embed_source commands for sources + logger.info(f"\nSubmitting {len(items['sources'])} source embedding jobs...") for idx, source_id in enumerate(items["sources"], 1): try: - source = await Source.get(source_id) - if not source: - logger.warning(f"Source {source_id} not found, skipping") - failed_items += 1 - continue + submit_command( + "open_notebook", + "embed_source", + {"source_id": source_id}, + ) + sources_submitted += 1 - await source.vectorize() - sources_processed += 1 - - if idx % 10 == 0 or idx == len(items["sources"]): + if idx % 50 == 0 or idx == len(items["sources"]): logger.info( - f" Progress: {idx}/{len(items['sources'])} sources processed" + f" Progress: {idx}/{len(items['sources'])} source jobs submitted" ) except Exception as e: - logger.error(f"Failed to re-embed source {source_id}: {e}") - failed_items += 1 + logger.error(f"Failed to submit embed_source for {source_id}: {e}") + failed_submissions += 1 - # Process notes - logger.info(f"\nProcessing {len(items['notes'])} notes...") + # Submit embed_note commands for notes + logger.info(f"\nSubmitting {len(items['notes'])} note embedding jobs...") for idx, note_id in enumerate(items["notes"], 1): try: - note = await Note.get(note_id) - if not note: - logger.warning(f"Note {note_id} not found, skipping") - failed_items += 1 - continue + submit_command( + "open_notebook", + "embed_note", + {"note_id": note_id}, + ) + notes_submitted += 1 - await note.save() # Auto-embeds via ObjectModel.save() - notes_processed += 1 - - if idx % 10 == 0 or idx == len(items["notes"]): + if idx % 50 == 0 or idx == len(items["notes"]): logger.info( - f" Progress: {idx}/{len(items['notes'])} notes processed" + f" Progress: {idx}/{len(items['notes'])} note jobs submitted" ) except Exception as e: - logger.error(f"Failed to re-embed note {note_id}: {e}") - failed_items += 1 + logger.error(f"Failed to submit embed_note for {note_id}: {e}") + failed_submissions += 1 - # Process insights - logger.info(f"\nProcessing {len(items['insights'])} insights...") + # Submit embed_insight commands for insights + logger.info(f"\nSubmitting {len(items['insights'])} insight embedding jobs...") for idx, insight_id in enumerate(items["insights"], 1): try: - insight = await SourceInsight.get(insight_id) - if not insight: - logger.warning(f"Insight {insight_id} not found, skipping") - failed_items += 1 - continue - - # Re-generate embedding - embedding = (await EMBEDDING_MODEL.aembed([insight.content]))[0] - - # Update insight with new embedding - await repo_query( - "UPDATE $insight_id SET embedding = $embedding", - { - "insight_id": ensure_record_id(insight_id), - "embedding": embedding, - }, + submit_command( + "open_notebook", + "embed_insight", + {"insight_id": insight_id}, ) - insights_processed += 1 + insights_submitted += 1 - if idx % 10 == 0 or idx == len(items["insights"]): + if idx % 50 == 0 or idx == len(items["insights"]): logger.info( - f" Progress: {idx}/{len(items['insights'])} insights processed" + f" Progress: {idx}/{len(items['insights'])} insight jobs submitted" ) except Exception as e: - logger.error(f"Failed to re-embed insight {insight_id}: {e}") - failed_items += 1 + logger.error(f"Failed to submit embed_insight for {insight_id}: {e}") + failed_submissions += 1 processing_time = time.time() - start_time - processed_items = sources_processed + notes_processed + insights_processed + jobs_submitted = sources_submitted + notes_submitted + insights_submitted logger.info("=" * 60) - logger.info("REBUILD COMPLETE") - logger.info(f" Total processed: {processed_items}/{total_items}") - logger.info(f" Sources: {sources_processed}") - logger.info(f" Notes: {notes_processed}") - logger.info(f" Insights: {insights_processed}") - logger.info(f" Failed: {failed_items}") - logger.info(f" Time: {processing_time:.2f}s") + logger.info("REBUILD JOBS SUBMITTED") + logger.info(f" Total jobs submitted: {jobs_submitted}/{total_items}") + logger.info(f" Sources: {sources_submitted}") + logger.info(f" Notes: {notes_submitted}") + logger.info(f" Insights: {insights_submitted}") + logger.info(f" Failed submissions: {failed_submissions}") + logger.info(f" Submission time: {processing_time:.2f}s") + logger.info(" Note: Actual embedding happens asynchronously") logger.info("=" * 60) return RebuildEmbeddingsOutput( success=True, total_items=total_items, - processed_items=processed_items, - failed_items=failed_items, - sources_processed=sources_processed, - notes_processed=notes_processed, - insights_processed=insights_processed, + jobs_submitted=jobs_submitted, + failed_submissions=failed_submissions, + sources_submitted=sources_submitted, + notes_submitted=notes_submitted, + insights_submitted=insights_submitted, processing_time=processing_time, ) @@ -641,8 +640,8 @@ async def rebuild_embeddings_command( return RebuildEmbeddingsOutput( success=False, total_items=0, - processed_items=0, - failed_items=0, + jobs_submitted=0, + failed_submissions=0, processing_time=processing_time, error_message=str(e), ) diff --git a/frontend/src/app/(dashboard)/search/page.tsx b/frontend/src/app/(dashboard)/search/page.tsx index 4ca15ad..ffd3fd3 100644 --- a/frontend/src/app/(dashboard)/search/page.tsx +++ b/frontend/src/app/(dashboard)/search/page.tsx @@ -443,6 +443,11 @@ export default function SearchPage() {
{searchMutation.data.results.map((result, index) => { // Parse type from parent_id (format: "source:id" or "note:id" or "source_insight:id") + // Handle null parent_id gracefully (orphaned records) + if (!result.parent_id) { + console.warn('Search result with null parent_id:', result) + return null + } const [type, id] = result.parent_id.split(':') const modalType = type === 'source_insight' ? 'insight' : type as 'source' | 'note' | 'insight' diff --git a/open_notebook/CLAUDE.md b/open_notebook/CLAUDE.md index 808d3c5..5389c4f 100644 --- a/open_notebook/CLAUDE.md +++ b/open_notebook/CLAUDE.md @@ -163,8 +163,8 @@ Model types (language, embedding, speech_to_text, text_to_speech) drive factory ### Fire-and-Forget Jobs Time-consuming operations (embedding, podcast generation) return command_id immediately. Caller polls surreal-commands for status; no blocking. -### Embedding on Save -Domain models with `needs_embedding()=True` auto-generate embeddings in `save()`. Search functions (text_search, vector_search) use embeddings for semantic matching. +### Fire-and-Forget Embedding +Domain models submit embedding commands after save via `submit_command()` (non-blocking). Note.save() submits `embed_note`, Source.add_insight() submits `embed_insight`, Source.vectorize() submits `embed_source`. Search functions (text_search, vector_search) use embeddings for semantic matching. ### Relationship Management SurrealDB graph edges link entities: Notebook→Source (has), Source→Note (artifact), Note→Source (refers_to). See `relate()` in domain/base.py. @@ -203,7 +203,7 @@ SurrealDB graph edges link entities: Notebook→Source (has), Source→Note (art **New data model**: 1. Create class inheriting from `ObjectModel` with `table_name` ClassVar 2. Define Pydantic fields and validators -3. Override `needs_embedding()` if searchable +3. Override `save()` to submit embedding command if searchable (use `submit_command("embed_*", id)`) 4. Add custom methods for domain logic (get_X, add_to_Y) 5. Register in domain/__init__.py exports diff --git a/open_notebook/database/migrations/10.surrealql b/open_notebook/database/migrations/10.surrealql index f1844c5..a6a6f4e 100644 --- a/open_notebook/database/migrations/10.surrealql +++ b/open_notebook/database/migrations/10.surrealql @@ -4,3 +4,10 @@ DEFINE INDEX IF NOT EXISTS idx_source_insight_source ON source_insight FIELDS source CONCURRENTLY; DEFINE INDEX IF NOT EXISTS idx_source_embedding_source ON source_embedding FIELDS source CONCURRENTLY; + +DEFINE FIELD OVERWRITE embedding ON TABLE source_insight TYPE option>; +DEFINE FIELD OVERWRITE embedding ON TABLE note TYPE option>; + +-- delete orphan records +DELETE from source_embedding WHERE source.id=NONE; +DELETE from source_insight WHERE source.id=NONE; diff --git a/open_notebook/domain/CLAUDE.md b/open_notebook/domain/CLAUDE.md index 81075fd..b2ac4c8 100644 --- a/open_notebook/domain/CLAUDE.md +++ b/open_notebook/domain/CLAUDE.md @@ -33,7 +33,7 @@ Two base classes support different persistence patterns: **ObjectModel** (mutabl - `add_insight()`: Generate and store insights with embeddings - **Note**: Standalone or linked notes - - `needs_embedding()`: Always True (searchable) + - `save()`: Submits `embed_note` command after save (fire-and-forget) - `add_to_notebook()`: Link to notebook - **SourceInsight, SourceEmbedding**: Derived content models @@ -55,7 +55,7 @@ Two base classes support different persistence patterns: **ObjectModel** (mutabl - **Async/await**: All DB operations async; always use await - **Polymorphic get()**: `ObjectModel.get(id)` determines subclass from ID prefix (table:id format) -- **Auto-embedding**: `save()` generates embeddings if `needs_embedding()` returns True +- **Fire-and-forget embedding**: Models submit embed_* commands after save via `submit_command()` (non-blocking) - **Nullable fields**: Declare via `nullable_fields` ClassVar to allow None in database - **Timestamps**: `created` and `updated` auto-managed as ISO strings - **Fire-and-forget jobs**: `source.vectorize()` returns command_id without waiting @@ -75,14 +75,17 @@ Two base classes support different persistence patterns: **ObjectModel** (mutabl - **RecordModel singleton**: __new__ returns existing instance; call `clear_instance()` in tests - **Source.command field**: Stored as RecordID; auto-parsed from strings via field_validator - **Text truncation**: `Note.get_context(short)` hardcodes 100-char limit -- **Embedding async**: Only Note and SourceInsight embed on save; Source too large (uses async job) +- **Auto-embedding behavior**: + - `Note.save()` → auto-submits `embed_note` command + - `Source.save()` → does NOT auto-submit (must call `vectorize()` explicitly) + - `Source.add_insight()` → auto-submits `embed_insight` command - **Relationship strings**: Must match SurrealDB schema (reference, artifact, refers_to) ## How to Add New Model 1. Inherit from ObjectModel with table_name ClassVar 2. Define Pydantic fields with validators -3. Override `needs_embedding()` if searchable +3. Override `save()` to submit embedding command if searchable (use `submit_command("embed_*", id)`) 4. Add custom methods for domain logic (get_X, add_to_Y) 5. Implement `_prepare_save_data()` if custom serialization needed diff --git a/open_notebook/domain/base.py b/open_notebook/domain/base.py index f1b2d93..08b1f1f 100644 --- a/open_notebook/domain/base.py +++ b/open_notebook/domain/base.py @@ -110,34 +110,19 @@ class ObjectModel(BaseModel): return subclass return None - def needs_embedding(self) -> bool: - return False - - def get_embedding_content(self) -> Optional[str]: - return None - async def save(self) -> None: - from open_notebook.ai.models import model_manager + """ + Save the model to the database. + Note: Embedding is no longer generated inline. Subclasses that need + embedding should override save() to submit the appropriate embed_* + command after calling super().save(). + """ try: self.model_validate(self.model_dump(), strict=True) data = self._prepare_save_data() data["updated"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - if self.needs_embedding(): - embedding_content = self.get_embedding_content() - if embedding_content: - EMBEDDING_MODEL = await model_manager.get_embedding_model() - if not EMBEDDING_MODEL: - logger.warning( - "No embedding model found. Content will not be searchable." - ) - data["embedding"] = ( - (await EMBEDDING_MODEL.aembed([embedding_content]))[0] - if EMBEDDING_MODEL - else [] - ) - repo_result: Union[List[Dict[str, Any]], Dict[str, Any]] if self.id is None: data["created"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") diff --git a/open_notebook/domain/notebook.py b/open_notebook/domain/notebook.py index aacfe9c..fde4ecc 100644 --- a/open_notebook/domain/notebook.py +++ b/open_notebook/domain/notebook.py @@ -8,11 +8,9 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator from surreal_commands import submit_command from surrealdb import RecordID -from open_notebook.ai.models import model_manager from open_notebook.database.repository import ensure_record_id, repo_query from open_notebook.domain.base import ObjectModel from open_notebook.exceptions import DatabaseOperationError, InvalidInputError -from open_notebook.utils import split_text class Notebook(ObjectModel): @@ -268,11 +266,14 @@ class Source(ObjectModel): async def vectorize(self) -> str: """ - Submit vectorization as a background job using the vectorize_source command. + Submit vectorization as a background job using the embed_source command. - This method now leverages the job-based architecture to prevent HTTP connection - pool exhaustion when processing large documents. The actual chunk processing - happens in the background worker pool, with natural concurrency control. + This method leverages the job-based architecture to prevent HTTP connection + pool exhaustion when processing large documents. The embed_source command: + 1. Detects content type from file path + 2. Chunks text using content-type aware splitter + 3. Generates all embeddings in a single API call + 4. Bulk inserts source_embedding records Returns: str: The command/job ID that can be used to track progress via the commands API @@ -281,27 +282,22 @@ class Source(ObjectModel): ValueError: If source has no text to vectorize DatabaseOperationError: If job submission fails """ - logger.info(f"Submitting vectorization job for source {self.id}") + logger.info(f"Submitting embed_source job for source {self.id}") try: if not self.full_text: raise ValueError(f"Source {self.id} has no text to vectorize") - # Submit the vectorize_source command which will: - # 1. Delete existing embeddings (idempotency) - # 2. Split text into chunks - # 3. Submit each chunk as an embed_chunk job + # Submit the embed_source command command_id = submit_command( - "open_notebook", # app name - "vectorize_source", # command name - { - "source_id": str(self.id), - }, + "open_notebook", + "embed_source", + {"source_id": str(self.id)}, ) command_id_str = str(command_id) logger.info( - f"Vectorization job submitted for source {self.id}: " + f"Embed source job submitted for source {self.id}: " f"command_id={command_id_str}" ) @@ -309,40 +305,58 @@ class Source(ObjectModel): except Exception as e: logger.error( - f"Failed to submit vectorization job for source {self.id}: {e}" + f"Failed to submit embed_source job for source {self.id}: {e}" ) logger.exception(e) raise DatabaseOperationError(e) async def add_insight(self, insight_type: str, content: str) -> Any: - EMBEDDING_MODEL = await model_manager.get_embedding_model() - if not EMBEDDING_MODEL: - logger.warning("No embedding model found. Insight will not be searchable.") + """ + Add an insight to this source. + Creates the insight record without embedding, then submits an async + embed_insight command to generate the embedding in the background. + + Args: + insight_type: Type/category of the insight + content: The insight content text + + Returns: + The created insight record(s) + """ if not insight_type or not content: raise InvalidInputError("Insight type and content must be provided") try: - embedding = ( - (await EMBEDDING_MODEL.aembed([content]))[0] if EMBEDDING_MODEL else [] - ) - return await repo_query( + # Create insight WITHOUT embedding (fire-and-forget embedding via command) + result = await repo_query( """ CREATE source_insight CONTENT { "source": $source_id, "insight_type": $insight_type, "content": $content, - "embedding": $embedding, };""", { "source_id": ensure_record_id(self.id), "insight_type": insight_type, "content": content, - "embedding": embedding, }, ) + + # Submit embedding command (fire-and-forget) + if result and len(result) > 0: + insight_id = str(result[0].get("id", "")) + if insight_id: + submit_command( + "open_notebook", + "embed_insight", + {"insight_id": insight_id}, + ) + logger.debug(f"Submitted embed_insight command for {insight_id}") + + return result except Exception as e: logger.error(f"Error adding insight to source {self.id}: {str(e)}") - raise # DatabaseOperationError(e) + raise def _prepare_save_data(self) -> dict: """Override to ensure command field is always RecordID format for database""" @@ -355,7 +369,7 @@ class Source(ObjectModel): return data async def delete(self) -> bool: - """Delete source and clean up associated file if it exists.""" + """Delete source and clean up associated file, embeddings, and insights.""" # Clean up uploaded file if it exists if self.asset and self.asset.file_path: file_path = Path(self.asset.file_path) @@ -373,6 +387,24 @@ class Source(ObjectModel): f"File {file_path} not found for source {self.id}, skipping cleanup" ) + # Delete associated embeddings and insights to prevent orphaned records + try: + source_id = ensure_record_id(self.id) + await repo_query( + "DELETE source_embedding WHERE source = $source_id", + {"source_id": source_id}, + ) + await repo_query( + "DELETE source_insight WHERE source = $source_id", + {"source_id": source_id}, + ) + logger.debug(f"Deleted embeddings and insights for source {self.id}") + except Exception as e: + logger.warning( + f"Failed to delete embeddings/insights for source {self.id}: {e}. " + "Continuing with source deletion." + ) + # Call parent delete to remove database record return await super().delete() @@ -390,6 +422,31 @@ class Note(ObjectModel): raise InvalidInputError("Note content cannot be empty") return v + async def save(self) -> Optional[str]: + """ + Save the note and submit embedding command. + + Overrides ObjectModel.save() to submit an async embed_note command + after saving, instead of inline embedding. + + Returns: + Optional[str]: The command_id if embedding was submitted, None otherwise + """ + # Call parent save (without embedding) + await super().save() + + # Submit embedding command (fire-and-forget) if note has content + if self.id and self.content and self.content.strip(): + command_id = submit_command( + "open_notebook", + "embed_note", + {"note_id": str(self.id)}, + ) + logger.debug(f"Submitted embed_note command {command_id} for {self.id}") + return command_id + + return None + async def add_to_notebook(self, notebook_id: str) -> Any: if not notebook_id: raise InvalidInputError("Notebook ID must be provided") @@ -407,12 +464,6 @@ class Note(ObjectModel): content=self.content[:100] if self.content else None, ) - def needs_embedding(self) -> bool: - return True - - def get_embedding_content(self) -> Optional[str]: - return self.content - class ChatSession(ObjectModel): table_name: ClassVar[str] = "chat_session" @@ -461,10 +512,10 @@ async def vector_search( if not keyword: raise InvalidInputError("Search keyword cannot be empty") try: - EMBEDDING_MODEL = await model_manager.get_embedding_model() - if EMBEDDING_MODEL is None: - raise ValueError("EMBEDDING_MODEL is not configured") - embed = (await EMBEDDING_MODEL.aembed([keyword]))[0] + from open_notebook.utils.embedding import generate_embedding + + # Use unified embedding function (handles chunking if query is very long) + embed = await generate_embedding(keyword) search_results = await repo_query( """ SELECT * FROM fn::vector_search($embed, $results, $source, $note, $minimum_score); diff --git a/open_notebook/graphs/prompt.py b/open_notebook/graphs/prompt.py index b454904..00bc356 100644 --- a/open_notebook/graphs/prompt.py +++ b/open_notebook/graphs/prompt.py @@ -7,6 +7,7 @@ from langgraph.graph import END, START, StateGraph from typing_extensions import TypedDict from open_notebook.ai.provision import provision_langchain_model +from open_notebook.utils.text_utils import clean_thinking_content class PatternChainState(TypedDict): @@ -31,7 +32,9 @@ async def call_model(state: dict, config: RunnableConfig) -> dict: response = await chain.ainvoke(payload) - return {"output": response.content} + # Clean thinking tags from response (handles extended thinking models) + output = clean_thinking_content(str(response.content)) + return {"output": output} agent_state = StateGraph(PatternChainState) diff --git a/open_notebook/utils/CLAUDE.md b/open_notebook/utils/CLAUDE.md index aec3539..8c863f4 100644 --- a/open_notebook/utils/CLAUDE.md +++ b/open_notebook/utils/CLAUDE.md @@ -1,18 +1,20 @@ # Utils Module -Utility functions and helpers for context building, text processing, tokenization, and versioning. +Utility functions and helpers for context building, text processing, chunking, embedding, tokenization, and versioning. ## Purpose -Provides cross-cutting concerns: building LLM context from sources/insights, text utilities (truncation, cleaning), token counting, and version management. +Provides cross-cutting concerns: building LLM context from sources/insights, content-type aware text chunking, unified embedding generation with mean pooling, token counting, and version management. ## Architecture Overview -**Four core utilities**: +**Six core utilities**: 1. **context_builder.py**: Flexible context assembly from sources, notes, insights with token budgeting -2. **text_utils.py**: Text truncation, whitespace cleaning, formatting helpers -3. **token_utils.py**: Token counting for LLM context windows (wrapper around encoding library) -4. **version_utils.py**: Version parsing, comparison, and schema compatibility checks +2. **chunking.py**: Content-type detection and smart text chunking for embedding operations +3. **embedding.py**: Unified embedding generation with mean pooling for large content +4. **text_utils.py**: Text cleaning and thinking content extraction +5. **token_utils.py**: Token counting for LLM context windows (wrapper around encoding library) +6. **version_utils.py**: Version parsing, comparison, and schema compatibility checks Each utility is stateless and can be imported independently. @@ -35,34 +37,63 @@ Each utility is stateless and can be imported independently. - Type-specific fetching: sources → Source.full_text, notes → Note.content, insights → SourceInsight.content - Raises DatabaseOperationError if source/note fetch fails -### text_utils.py -- **truncate_text(text, max_chars, suffix="...")**: Truncates string, adds ellipsis -- **clean_text(text)**: Removes extra whitespace, normalizes newlines -- **extract_sentences(text, max_count)**: Splits text into sentences up to limit -- **normalize_whitespace(text)**: Collapse multiple spaces/newlines into single -- **format_for_llm(text)**: Combines cleaning + normalization for LLM consumption +### chunking.py +- **ContentType**: Enum (HTML, MARKDOWN, PLAIN) +- **CHUNK_SIZE**: 1500 characters (constant) +- **CHUNK_OVERLAP**: 225 characters (15% overlap) +- **detect_content_type_from_extension(file_path)**: Detect type from file extension +- **detect_content_type_from_heuristics(text)**: Detect type from content patterns (returns type + confidence) +- **detect_content_type(text, file_path)**: Combined detection (extension primary, heuristics fallback) +- **chunk_text(text, content_type, file_path)**: Split text using appropriate splitter -**Key behavior**: All functions are pure (no side effects); safe for high-volume processing +**Key behavior**: +- Uses LangChain splitters: HTMLHeaderTextSplitter, MarkdownHeaderTextSplitter, RecursiveCharacterTextSplitter +- Extension-based detection is primary; heuristics can override PLAIN extensions with 0.8+ confidence +- Secondary chunking applied when HTML/Markdown splitters produce oversized chunks +- Returns list of strings, each ≤ CHUNK_SIZE characters + +### embedding.py +- **mean_pool_embeddings(embeddings)**: Combine multiple embeddings via normalized mean pooling +- **generate_embeddings(texts)**: Batch embedding via single Esperanto API call +- **generate_embedding(text, content_type, file_path)**: Unified embedding with automatic chunking + mean pooling + +**Key behavior**: +- Uses model_manager.get_model("embedding") for embedding model +- Short text (≤ CHUNK_SIZE): direct embedding +- Long text: chunk → embed each → mean pool results +- Mean pooling: normalize each → mean → normalize result (using numpy) +- Raises ValueError for empty/whitespace-only text + +### text_utils.py +- **remove_non_ascii(text)**: Remove non-ASCII characters from text +- **remove_non_printable(text)**: Remove non-printable characters, preserving newlines/tabs +- **parse_thinking_content(content)**: Extract `` tags content from AI responses +- **clean_thinking_content(content)**: Remove `` blocks, return cleaned content only + +**Key behavior**: +- parse_thinking_content handles malformed output (missing opening `` tag) +- Large content (>100KB) bypasses thinking extraction for performance +- Non-string input returns empty thinking and stringified content ### token_utils.py -- **token_count(text)**: Returns estimated token count for string (via encoding library) -- **remaining_tokens(max_tokens, used)**: Returns remaining tokens in budget -- **fits_in_context(text, max_tokens)**: Boolean check if text fits token budget +- **token_count(text)**: Returns estimated token count for string (via tiktoken) +- **token_cost(text, model)**: Calculate cost estimate for text with given model -**Key behavior**: Uses fixed encoding (cl100k_base for GPT models); may differ slightly from actual model tokenization +**Key behavior**: Uses cl100k_base encoding; may differ slightly from actual model tokenization ### version_utils.py -- **parse_version(version_string)**: Parses "1.2.3" format; returns Version namedtuple - **compare_versions(v1, v2)**: Returns -1 (v1 < v2), 0 (equal), 1 (v1 > v2) -- **is_compatible(current, required)**: Checks if current version meets requirement (e.g., current >= required) -- **schema_version_check()**: Validates database schema version on startup +- **get_installed_version(package)**: Get version of installed Python package +- **get_version_from_github(url)**: Fetch latest version from GitHub releases -**Key behavior**: Assumes semantic versioning (MAJOR.MINOR.PATCH); non-standard formats raise ValueError +**Key behavior**: Uses packaging library for version parsing; supports pre-release tags ## Common Patterns - **Dataclass-driven config**: ContextConfig used by ContextBuilder (immutable after init) - **Token budgeting**: ContextBuilder respects max_tokens constraint; prioritizes high-priority items +- **Content-type aware processing**: Chunking uses appropriate splitter based on detected content type +- **Mean pooling for large content**: Embedding handles arbitrarily large text via chunking + pooling - **Error handling resilience**: token_count() returns estimate; context_builder catches DB errors gracefully - **Pure text functions**: text_utils functions are stateless utilities (no class needed) - **Lazy evaluation**: ContextBuilder doesn't fetch items until build() called @@ -71,32 +102,59 @@ Each utility is stateless and can be imported independently. ## Key Dependencies - `open_notebook.domain.notebook`: Source, Note, SourceInsight models; vector_search function +- `open_notebook.ai.models`: model_manager for embedding model access - `open_notebook.exceptions`: DatabaseOperationError, NotFoundError -- `tiktoken` (via token_utils.py): Token encoding for GPT models -- `loguru`: Logging in context_builder (debug-level) +- `langchain_text_splitters`: HTMLHeaderTextSplitter, MarkdownHeaderTextSplitter, RecursiveCharacterTextSplitter +- `numpy`: Mean pooling calculations +- `tiktoken`: Token encoding for GPT models +- `loguru`: Logging throughout ## Important Quirks & Gotchas - **Token count estimation**: Uses cl100k_base encoding; may differ 5-10% from actual model tokens +- **Chunk size for Ollama**: 1500 chars chosen to fit within Ollama embedding model context limits +- **Content type detection order**: Extension checked first, then heuristics; high-confidence heuristics (≥0.8) can override PLAIN extensions +- **Mean pooling normalization**: Each embedding normalized before mean, result normalized after - **Priority weights default**: If not specified, ContextConfig uses default weights (source=1, note=0.8, insight=1.2) - **Vector search required**: ContextBuilder assumes vector_search is available on Notebook model; fails if not -- **Source.full_text vs content**: Uses full_text field (may include extracted text + metadata) -- **Type-specific fetch logic**: ContextItem.content stores raw dict; caller must parse (e.g., dict["content"]) - **Circular import risk**: context_builder imports from domain.notebook; avoid domain importing utils - **Max tokens hard limit**: ContextBuilder stops adding items once max_tokens exceeded (not prorated) - **No caching**: Every build() call re-fetches from database (use cache layer if needed) -- **Whitespace normalization lossy**: clean_text() may change intended formatting (code blocks, poetry, etc.) ## How to Extend 1. **Add new context source type**: Create fetch method in ContextBuilder; update ContextConfig.sources dict -2. **Add text preprocessing**: Add new function to text_utils (e.g., remove_urls, extract_keywords) -3. **Change tokenization**: Replace tiktoken with alternative library in token_utils; update all calls -4. **Add context filtering**: Extend ContextConfig with filter_by_date, filter_by_topic fields -5. **Implement caching**: Wrap ContextBuilder.build() with functools.lru_cache (be aware of mutability) +2. **Add content type**: Add to ContentType enum; create splitter getter; update chunk_text() +3. **Change chunk size**: Modify CHUNK_SIZE and CHUNK_OVERLAP constants in chunking.py +4. **Add text preprocessing**: Add new function to text_utils (e.g., remove_urls, extract_keywords) +5. **Change tokenization**: Replace tiktoken with alternative library in token_utils; update all calls +6. **Add context filtering**: Extend ContextConfig with filter_by_date, filter_by_topic fields -## Usage Example +## Usage Examples +### Chunking +```python +from open_notebook.utils.chunking import chunk_text, detect_content_type, ContentType + +# Auto-detect content type and chunk +chunks = chunk_text(long_text, file_path="document.md") + +# Explicit content type +chunks = chunk_text(html_content, content_type=ContentType.HTML) +``` + +### Embedding +```python +from open_notebook.utils.embedding import generate_embedding, generate_embeddings + +# Single text (handles chunking + mean pooling automatically) +embedding = await generate_embedding(long_text) + +# Batch embedding (more efficient for multiple texts) +embeddings = await generate_embeddings(["text1", "text2", "text3"]) +``` + +### Context Building ```python from open_notebook.utils.context_builder import ContextBuilder, ContextConfig @@ -107,7 +165,6 @@ config = ContextConfig( builder = ContextBuilder(notebook, config) context_items = await builder.build() -# context_items is List[ContextItem] sorted by priority for item in context_items: print(f"{item.type}:{item.id} ({item.token_count} tokens)") ``` diff --git a/open_notebook/utils/__init__.py b/open_notebook/utils/__init__.py index dd910ef..622288b 100644 --- a/open_notebook/utils/__init__.py +++ b/open_notebook/utils/__init__.py @@ -3,15 +3,29 @@ Utils package for Open Notebook. To avoid circular imports, import functions directly: - from open_notebook.utils.context_builder import ContextBuilder -- from open_notebook.utils import split_text, token_count, compare_versions +- from open_notebook.utils import token_count, compare_versions +- from open_notebook.utils.chunking import chunk_text, detect_content_type, ContentType +- from open_notebook.utils.embedding import generate_embedding, generate_embeddings """ +from .chunking import ( + CHUNK_SIZE, + ContentType, + chunk_text, + detect_content_type, + detect_content_type_from_extension, + detect_content_type_from_heuristics, +) +from .embedding import ( + generate_embedding, + generate_embeddings, + mean_pool_embeddings, +) from .text_utils import ( clean_thinking_content, parse_thinking_content, remove_non_ascii, remove_non_printable, - split_text, ) from .token_utils import token_cost, token_count from .version_utils import ( @@ -21,13 +35,26 @@ from .version_utils import ( ) __all__ = [ - "split_text", + # Chunking + "CHUNK_SIZE", + "ContentType", + "chunk_text", + "detect_content_type", + "detect_content_type_from_extension", + "detect_content_type_from_heuristics", + # Embedding + "generate_embedding", + "generate_embeddings", + "mean_pool_embeddings", + # Text utils "remove_non_ascii", "remove_non_printable", "parse_thinking_content", "clean_thinking_content", + # Token utils "token_count", "token_cost", + # Version utils "compare_versions", "get_installed_version", "get_version_from_github", diff --git a/open_notebook/utils/chunking.py b/open_notebook/utils/chunking.py new file mode 100644 index 0000000..3b70831 --- /dev/null +++ b/open_notebook/utils/chunking.py @@ -0,0 +1,374 @@ +""" +Chunking utilities for Open Notebook. + +Provides content-type detection and smart text chunking for embedding operations. +Supports HTML, Markdown, and plain text with appropriate splitters for each type. + +Key functions: +- detect_content_type(): Detects content type from file extension or content heuristics +- chunk_text(): Splits text into chunks using appropriate splitter for content type +""" + +import re +from enum import Enum +from pathlib import Path +from typing import List, Optional, Tuple + +from langchain_text_splitters import ( + HTMLHeaderTextSplitter, + MarkdownHeaderTextSplitter, + RecursiveCharacterTextSplitter, +) +from loguru import logger + +# Constants +CHUNK_SIZE = 1500 # characters +CHUNK_OVERLAP = 225 # 15% of chunk size +HIGH_CONFIDENCE_THRESHOLD = 0.8 # Threshold for heuristics to override extension + + +class ContentType(Enum): + """Content type for chunking strategy selection.""" + + HTML = "html" + MARKDOWN = "markdown" + PLAIN = "plain" + + +# File extension mappings +_EXTENSION_TO_CONTENT_TYPE = { + # HTML + ".html": ContentType.HTML, + ".htm": ContentType.HTML, + ".xhtml": ContentType.HTML, + # Markdown + ".md": ContentType.MARKDOWN, + ".markdown": ContentType.MARKDOWN, + ".mdown": ContentType.MARKDOWN, + ".mkd": ContentType.MARKDOWN, + # Plain text (explicit) + ".txt": ContentType.PLAIN, + ".text": ContentType.PLAIN, + # Code files (treat as plain) + ".py": ContentType.PLAIN, + ".js": ContentType.PLAIN, + ".ts": ContentType.PLAIN, + ".java": ContentType.PLAIN, + ".c": ContentType.PLAIN, + ".cpp": ContentType.PLAIN, + ".go": ContentType.PLAIN, + ".rs": ContentType.PLAIN, + ".rb": ContentType.PLAIN, + ".php": ContentType.PLAIN, + ".sh": ContentType.PLAIN, + ".bash": ContentType.PLAIN, + ".zsh": ContentType.PLAIN, + ".sql": ContentType.PLAIN, + ".json": ContentType.PLAIN, + ".yaml": ContentType.PLAIN, + ".yml": ContentType.PLAIN, + ".xml": ContentType.PLAIN, + ".csv": ContentType.PLAIN, + ".tsv": ContentType.PLAIN, +} + + +def detect_content_type_from_extension(file_path: Optional[str]) -> Optional[ContentType]: + """ + Detect content type from file extension. + + Args: + file_path: Path to the file (can be full path or just filename) + + Returns: + ContentType if extension is recognized, None otherwise + """ + if not file_path: + return None + + try: + extension = Path(file_path).suffix.lower() + return _EXTENSION_TO_CONTENT_TYPE.get(extension) + except Exception: + return None + + +def detect_content_type_from_heuristics(text: str) -> Tuple[ContentType, float]: + """ + Detect content type using content heuristics. + + Args: + text: The text content to analyze + + Returns: + Tuple of (ContentType, confidence_score) where confidence is 0.0-1.0 + """ + if not text or len(text) < 10: + return ContentType.PLAIN, 0.5 + + # Sample first 5000 chars for efficiency + sample = text[:5000] + + # Check HTML first (most specific patterns) + html_score = _calculate_html_score(sample) + if html_score >= HIGH_CONFIDENCE_THRESHOLD: + return ContentType.HTML, html_score + + # Check Markdown + markdown_score = _calculate_markdown_score(sample) + if markdown_score >= HIGH_CONFIDENCE_THRESHOLD: + return ContentType.MARKDOWN, markdown_score + + # Return the higher scoring type, or PLAIN if both are low + if html_score > markdown_score and html_score > 0.3: + return ContentType.HTML, html_score + elif markdown_score > 0.3: + return ContentType.MARKDOWN, markdown_score + else: + return ContentType.PLAIN, 0.6 + + +def _calculate_html_score(text: str) -> float: + """Calculate confidence score for HTML content.""" + score = 0.0 + indicators = 0 + + # Strong indicators + if re.search(r"]", text, re.IGNORECASE): + score += 0.3 + indicators += 1 + + # Structural tags + structural_tags = ["", "= 5: + break + + # Header tags + if re.search(r"]", text, re.IGNORECASE): + score += 0.15 + indicators += 1 + + # Closing tags pattern + if re.search(r"", text): + score += 0.1 + indicators += 1 + + return min(score, 1.0) + + +def _calculate_markdown_score(text: str) -> float: + """Calculate confidence score for Markdown content.""" + score = 0.0 + indicators = 0 + + # Headers (# ## ###) - strong indicator + header_matches = len(re.findall(r"^#{1,6}\s+.+", text, re.MULTILINE)) + if header_matches >= 3: + score += 0.35 + indicators += 1 + elif header_matches >= 1: + score += 0.2 + indicators += 1 + + # Links [text](url) - strong indicator + link_matches = len(re.findall(r"\[.+?\]\(.+?\)", text)) + if link_matches >= 2: + score += 0.25 + indicators += 1 + elif link_matches >= 1: + score += 0.15 + indicators += 1 + + # Code blocks ``` - strong indicator + if re.search(r"^```", text, re.MULTILINE): + score += 0.2 + indicators += 1 + + # Inline code `code` + if re.search(r"`[^`]+`", text): + score += 0.1 + indicators += 1 + + # Lists (-, *, +, or numbered) + list_matches = len(re.findall(r"^[\*\-\+]\s+", text, re.MULTILINE)) + list_matches += len(re.findall(r"^\d+\.\s+", text, re.MULTILINE)) + if list_matches >= 3: + score += 0.15 + indicators += 1 + elif list_matches >= 1: + score += 0.08 + indicators += 1 + + # Bold/italic + if re.search(r"\*\*.+?\*\*|__.+?__", text): + score += 0.1 + indicators += 1 + + # Blockquotes + if re.search(r"^>\s+", text, re.MULTILINE): + score += 0.1 + indicators += 1 + + return min(score, 1.0) + + +def detect_content_type( + text: str, file_path: Optional[str] = None +) -> ContentType: + """ + Detect content type using file extension (primary) and heuristics (fallback). + + Strategy: + 1. If file extension is available and recognized, use it as primary + 2. If no extension or generic extension (.txt), use heuristics + 3. Heuristics can override extension only with very high confidence + + Args: + text: The text content + file_path: Optional file path for extension-based detection + + Returns: + Detected ContentType + """ + # Try extension-based detection first + extension_type = detect_content_type_from_extension(file_path) + + # Get heuristic-based detection + heuristic_type, confidence = detect_content_type_from_heuristics(text) + + # If no extension or generic extension, use heuristics + if extension_type is None: + logger.debug( + f"No file extension, using heuristics: {heuristic_type.value} " + f"(confidence: {confidence:.2f})" + ) + return heuristic_type + + # If extension suggests plain text but heuristics are very confident, override + if extension_type == ContentType.PLAIN and confidence >= HIGH_CONFIDENCE_THRESHOLD: + logger.debug( + f"Extension suggests plain, but heuristics override with " + f"{heuristic_type.value} (confidence: {confidence:.2f})" + ) + return heuristic_type + + # Otherwise trust the extension + logger.debug(f"Using extension-based content type: {extension_type.value}") + return extension_type + + +def _get_html_splitter() -> HTMLHeaderTextSplitter: + """Get HTML header splitter configured for h1, h2, h3.""" + headers_to_split_on = [ + ("h1", "Header 1"), + ("h2", "Header 2"), + ("h3", "Header 3"), + ] + return HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on) + + +def _get_markdown_splitter() -> MarkdownHeaderTextSplitter: + """Get Markdown header splitter configured for #, ##, ###.""" + headers_to_split_on = [ + ("#", "Header 1"), + ("##", "Header 2"), + ("###", "Header 3"), + ] + return MarkdownHeaderTextSplitter( + headers_to_split_on=headers_to_split_on, + strip_headers=False, + ) + + +def _get_plain_splitter() -> RecursiveCharacterTextSplitter: + """Get plain text splitter using CHUNK_SIZE and CHUNK_OVERLAP constants.""" + return RecursiveCharacterTextSplitter( + chunk_size=CHUNK_SIZE, + chunk_overlap=CHUNK_OVERLAP, + length_function=len, + separators=["\n\n", "\n", ". ", ", ", " ", ""], + ) + + +def _apply_secondary_chunking(chunks: List[str]) -> List[str]: + """ + Apply secondary chunking to ensure no chunk exceeds CHUNK_SIZE. + + Used when primary splitters (HTML/Markdown) produce oversized chunks. + """ + result = [] + secondary_splitter = _get_plain_splitter() + + for chunk in chunks: + if len(chunk) > CHUNK_SIZE: + # Split oversized chunk + sub_chunks = secondary_splitter.split_text(chunk) + result.extend(sub_chunks) + else: + result.append(chunk) + + return result + + +def chunk_text( + text: str, + content_type: Optional[ContentType] = None, + file_path: Optional[str] = None, +) -> List[str]: + """ + Split text into chunks using appropriate splitter for content type. + + Args: + text: The text to chunk + content_type: Optional explicit content type (auto-detected if not provided) + file_path: Optional file path for content type detection + + Returns: + List of text chunks, each <= CHUNK_SIZE characters + """ + if not text or not text.strip(): + return [] + + # Short text doesn't need chunking + if len(text) <= CHUNK_SIZE: + return [text] + + # Detect content type if not provided + if content_type is None: + content_type = detect_content_type(text, file_path) + + logger.debug(f"Chunking text with content type: {content_type.value}") + + # Select appropriate splitter + if content_type == ContentType.HTML: + splitter = _get_html_splitter() + # HTML splitter returns Document objects + docs = splitter.split_text(text) + chunks = [doc.page_content if hasattr(doc, "page_content") else str(doc) for doc in docs] + elif content_type == ContentType.MARKDOWN: + splitter = _get_markdown_splitter() + # Markdown splitter returns Document objects + docs = splitter.split_text(text) + chunks = [doc.page_content if hasattr(doc, "page_content") else str(doc) for doc in docs] + else: + # Plain text - use recursive splitter directly + splitter = _get_plain_splitter() + chunks = splitter.split_text(text) + + # Apply secondary chunking if needed (for HTML/Markdown that may produce large chunks) + if content_type in (ContentType.HTML, ContentType.MARKDOWN): + chunks = _apply_secondary_chunking(chunks) + + # Filter out empty chunks + chunks = [c.strip() for c in chunks if c and c.strip()] + + logger.debug(f"Created {len(chunks)} chunks from {len(text)} characters") + return chunks diff --git a/open_notebook/utils/context_builder.py b/open_notebook/utils/context_builder.py index d3f0666..ed03712 100644 --- a/open_notebook/utils/context_builder.py +++ b/open_notebook/utils/context_builder.py @@ -15,7 +15,7 @@ from loguru import logger from open_notebook.domain.notebook import Note, Notebook, Source from open_notebook.exceptions import DatabaseOperationError, NotFoundError -from .text_utils import token_count +from .token_utils import token_count @dataclass diff --git a/open_notebook/utils/embedding.py b/open_notebook/utils/embedding.py new file mode 100644 index 0000000..743962e --- /dev/null +++ b/open_notebook/utils/embedding.py @@ -0,0 +1,188 @@ +""" +Unified embedding utilities for Open Notebook. + +Provides centralized embedding generation with support for: +- Single text embedding (with automatic chunking and mean pooling for large texts) +- Batch text embedding (multiple texts in a single API call) +- Mean pooling for combining multiple embeddings into one + +All embedding operations in the application should use these functions +to ensure consistent behavior and proper handling of large content. +""" + +from typing import List, Optional + +import numpy as np +from loguru import logger + +from open_notebook.ai.models import model_manager + +from .chunking import CHUNK_SIZE, ContentType, chunk_text + + +async def mean_pool_embeddings(embeddings: List[List[float]]) -> List[float]: + """ + Combine multiple embeddings into a single embedding using mean pooling. + + Algorithm: + 1. Normalize each embedding to unit length + 2. Compute element-wise mean + 3. Normalize the result to unit length + + This approach ensures the final embedding has the same properties as + individual embeddings (unit length) regardless of input count. + + Args: + embeddings: List of embedding vectors (each is a list of floats) + + Returns: + Single embedding vector (mean pooled and normalized) + + Raises: + ValueError: If embeddings list is empty or embeddings have different dimensions + """ + if not embeddings: + raise ValueError("Cannot mean pool empty list of embeddings") + + if len(embeddings) == 1: + # Single embedding - just normalize and return + arr = np.array(embeddings[0], dtype=np.float64) + norm = np.linalg.norm(arr) + if norm > 0: + arr = arr / norm + return arr.tolist() + + # Convert to numpy array + arr = np.array(embeddings, dtype=np.float64) + + # Verify all embeddings have same dimension + if arr.ndim != 2: + raise ValueError(f"Expected 2D array, got shape {arr.shape}") + + # Normalize each embedding to unit length + norms = np.linalg.norm(arr, axis=1, keepdims=True) + # Avoid division by zero + norms = np.where(norms > 0, norms, 1.0) + normalized = arr / norms + + # Compute mean + mean = np.mean(normalized, axis=0) + + # Normalize the result + mean_norm = np.linalg.norm(mean) + if mean_norm > 0: + mean = mean / mean_norm + + return mean.tolist() + + +async def generate_embeddings(texts: List[str]) -> List[List[float]]: + """ + Generate embeddings for multiple texts in a single API call. + + This is more efficient than calling generate_embedding() multiple times + when you have multiple texts to embed (e.g., source chunks). + + Args: + texts: List of text strings to embed + + Returns: + List of embedding vectors, one per input text + + Raises: + ValueError: If no embedding model is configured + RuntimeError: If embedding generation fails + """ + if not texts: + return [] + + embedding_model = await model_manager.get_embedding_model() + if not embedding_model: + raise ValueError( + "No embedding model configured. Please configure one in the Models section." + ) + + # Log text sizes for debugging + text_sizes = [len(t) for t in texts] + logger.debug( + f"Generating embeddings for {len(texts)} texts " + f"(sizes: min={min(text_sizes)}, max={max(text_sizes)}, " + f"total={sum(text_sizes)} chars)" + ) + + try: + # Single API call for all texts + embeddings = await embedding_model.aembed(texts) + logger.debug(f"Generated {len(embeddings)} embeddings") + return embeddings + except Exception as e: + logger.error( + f"Failed to generate embeddings: {e} " + f"(tried {len(texts)} texts, max size: {max(text_sizes)} chars)" + ) + raise RuntimeError(f"Failed to generate embeddings: {e}") from e + + +async def generate_embedding( + text: str, + content_type: Optional[ContentType] = None, + file_path: Optional[str] = None, +) -> List[float]: + """ + Generate a single embedding for text, handling large content via chunking and mean pooling. + + For short text (<= CHUNK_SIZE): + - Embeds directly and returns the embedding + + For long text (> CHUNK_SIZE): + - Chunks the text using appropriate splitter for content type + - Embeds all chunks in a single API call + - Combines embeddings via mean pooling + + Args: + text: The text to embed + content_type: Optional explicit content type for chunking + file_path: Optional file path for content type detection + + Returns: + Single embedding vector (list of floats) + + Raises: + ValueError: If text is empty or no embedding model configured + RuntimeError: If embedding generation fails + """ + if not text or not text.strip(): + raise ValueError("Cannot generate embedding for empty text") + + text = text.strip() + + # Check if chunking is needed + if len(text) <= CHUNK_SIZE: + # Short text - embed directly + logger.debug(f"Embedding short text ({len(text)} chars) directly") + embeddings = await generate_embeddings([text]) + return embeddings[0] + + # Long text - chunk and mean pool + logger.debug(f"Text exceeds chunk size ({len(text)} chars), chunking...") + + chunks = chunk_text(text, content_type=content_type, file_path=file_path) + + if not chunks: + raise ValueError("Text chunking produced no chunks") + + if len(chunks) == 1: + # Single chunk after splitting + embeddings = await generate_embeddings(chunks) + return embeddings[0] + + logger.debug(f"Embedding {len(chunks)} chunks and mean pooling") + + # Embed all chunks in single API call + embeddings = await generate_embeddings(chunks) + + # Mean pool to get single embedding + pooled = await mean_pool_embeddings(embeddings) + + logger.debug(f"Mean pooled {len(embeddings)} embeddings into single vector") + return pooled diff --git a/open_notebook/utils/text_utils.py b/open_notebook/utils/text_utils.py index 3a98216..3846924 100644 --- a/open_notebook/utils/text_utils.py +++ b/open_notebook/utils/text_utils.py @@ -7,10 +7,6 @@ import re import unicodedata from typing import Tuple -from langchain_text_splitters import RecursiveCharacterTextSplitter - -from .token_utils import token_count - # Patterns for matching thinking content in AI responses # Standard pattern: ... THINK_PATTERN = re.compile(r"(.*?)", re.DOTALL) @@ -18,39 +14,6 @@ THINK_PATTERN = re.compile(r"(.*?)", re.DOTALL) THINK_PATTERN_NO_OPEN = re.compile(r"^(.*?)", re.DOTALL) -def split_text(txt: str, chunk_size=500): - """ - Split the input text into chunks. - - Args: - txt (str): The input text to be split. - chunk_size (int): The size of each chunk. Default is 500. - - Returns: - list: A list of text chunks. - """ - overlap = int(chunk_size * 0.15) - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=chunk_size, - chunk_overlap=overlap, - length_function=token_count, - separators=[ - "\n\n", - "\n", - ".", - ",", - " ", - "\u200b", # Zero-width space - "\uff0c", # Fullwidth comma - "\u3001", # Ideographic comma - "\uff0e", # Fullwidth full stop - "\u3002", # Ideographic full stop - "", - ], - ) - return text_splitter.split_text(txt) - - def remove_non_ascii(text: str) -> str: """Remove non-ASCII characters from text.""" return re.sub(r"[^\x00-\x7F]+", "", text) diff --git a/pyproject.toml b/pyproject.toml index e7d47a4..f54dc96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "open-notebook" -version = "1.5.2" +version = "1.6.0" description = "An open source implementation of a research assistant, inspired by Google Notebook LM" authors = [ {name = "Luis Novo", email = "lfnovo@gmail.com"} @@ -35,10 +35,11 @@ dependencies = [ "httpx[socks]>=0.27.0", "content-core>=1.0.2", "ai-prompter>=0.3", - "esperanto>=2.13", + "esperanto>=2.16", "surrealdb>=1.0.4", "podcast-creator>=0.7.0", "surreal-commands>=1.3.0", + "numpy>=2.4.1", ] [tool.setuptools] diff --git a/tests/test_chunking.py b/tests/test_chunking.py new file mode 100644 index 0000000..5864ded --- /dev/null +++ b/tests/test_chunking.py @@ -0,0 +1,297 @@ +""" +Unit tests for the open_notebook.utils.chunking module. + +Tests content type detection and text chunking functionality. +""" + +import pytest + +from open_notebook.utils.chunking import ( + CHUNK_SIZE, + ContentType, + chunk_text, + detect_content_type, + detect_content_type_from_extension, + detect_content_type_from_heuristics, +) + + +# ============================================================================ +# TEST SUITE 1: Content Type Detection from Extension +# ============================================================================ + + +class TestDetectContentTypeFromExtension: + """Test suite for extension-based content type detection.""" + + def test_html_extensions(self): + """Test HTML file extensions.""" + assert detect_content_type_from_extension("file.html") == ContentType.HTML + assert detect_content_type_from_extension("file.htm") == ContentType.HTML + assert detect_content_type_from_extension("file.xhtml") == ContentType.HTML + assert detect_content_type_from_extension("/path/to/file.HTML") == ContentType.HTML + + def test_markdown_extensions(self): + """Test Markdown file extensions.""" + assert detect_content_type_from_extension("file.md") == ContentType.MARKDOWN + assert detect_content_type_from_extension("file.markdown") == ContentType.MARKDOWN + assert detect_content_type_from_extension("file.mdown") == ContentType.MARKDOWN + assert detect_content_type_from_extension("/path/to/README.MD") == ContentType.MARKDOWN + + def test_plain_text_extensions(self): + """Test plain text file extensions.""" + assert detect_content_type_from_extension("file.txt") == ContentType.PLAIN + assert detect_content_type_from_extension("file.text") == ContentType.PLAIN + + def test_code_extensions_as_plain(self): + """Test code file extensions are treated as plain text.""" + assert detect_content_type_from_extension("file.py") == ContentType.PLAIN + assert detect_content_type_from_extension("file.js") == ContentType.PLAIN + assert detect_content_type_from_extension("file.json") == ContentType.PLAIN + assert detect_content_type_from_extension("file.yaml") == ContentType.PLAIN + + def test_unknown_extensions(self): + """Test unknown extensions return None.""" + assert detect_content_type_from_extension("file.xyz") is None + assert detect_content_type_from_extension("file.docx") is None + assert detect_content_type_from_extension("file.pdf") is None + + def test_no_extension(self): + """Test files without extension.""" + assert detect_content_type_from_extension("Makefile") is None + assert detect_content_type_from_extension("README") is None + + def test_none_input(self): + """Test None input.""" + assert detect_content_type_from_extension(None) is None + + def test_empty_string(self): + """Test empty string input.""" + assert detect_content_type_from_extension("") is None + + +# ============================================================================ +# TEST SUITE 2: Content Type Detection from Heuristics +# ============================================================================ + + +class TestDetectContentTypeFromHeuristics: + """Test suite for heuristics-based content type detection.""" + + def test_html_detection_doctype(self): + """Test HTML detection with DOCTYPE.""" + html_text = "Content" + content_type, confidence = detect_content_type_from_heuristics(html_text) + assert content_type == ContentType.HTML + assert confidence >= 0.8 + + def test_html_detection_tags(self): + """Test HTML detection with structural tags.""" + html_text = "Test

Content

" + content_type, confidence = detect_content_type_from_heuristics(html_text) + assert content_type == ContentType.HTML + assert confidence >= 0.5 + + def test_markdown_detection_headers(self): + """Test Markdown detection with headers.""" + md_text = """# Main Title + +## Section 1 + +Some content here. + +## Section 2 + +More content. + +### Subsection + +Details here. +""" + content_type, confidence = detect_content_type_from_heuristics(md_text) + assert content_type == ContentType.MARKDOWN + assert confidence >= 0.3 # 4 headers give ~0.35 confidence + + def test_markdown_detection_links(self): + """Test Markdown detection with links and headers for stronger signal.""" + md_text = """# Documentation + +Check out [this link](https://example.com) and [another one](https://test.com). + +## References + +Here's some more text with [links](url) and `inline code`.""" + content_type, confidence = detect_content_type_from_heuristics(md_text) + assert content_type == ContentType.MARKDOWN + assert confidence >= 0.4 + + def test_markdown_detection_code_blocks(self): + """Test Markdown detection with code blocks.""" + md_text = """# Code Example + +```python +def hello(): + print("Hello, World!") +``` + +Some explanation text. +""" + content_type, confidence = detect_content_type_from_heuristics(md_text) + assert content_type == ContentType.MARKDOWN + assert confidence >= 0.5 + + def test_plain_text_detection(self): + """Test plain text detection.""" + plain_text = """This is just regular plain text. +It has multiple lines but no special formatting. +No headers, no links, no HTML tags. +Just regular sentences and paragraphs.""" + content_type, confidence = detect_content_type_from_heuristics(plain_text) + assert content_type == ContentType.PLAIN + + def test_short_text(self): + """Test short text defaults to plain.""" + content_type, confidence = detect_content_type_from_heuristics("Hi") + assert content_type == ContentType.PLAIN + + def test_empty_text(self): + """Test empty text defaults to plain.""" + content_type, confidence = detect_content_type_from_heuristics("") + assert content_type == ContentType.PLAIN + + +# ============================================================================ +# TEST SUITE 3: Combined Content Type Detection +# ============================================================================ + + +class TestDetectContentType: + """Test suite for combined content type detection.""" + + def test_extension_takes_priority(self): + """Test that file extension takes priority over heuristics.""" + # Text looks like markdown but file is .txt + md_text = "# Header\n\nSome [link](url) content" + content_type = detect_content_type(md_text, "file.txt") + # Should use extension (plain) unless heuristics are very high confidence + # In this case, markdown confidence might override + assert content_type in (ContentType.PLAIN, ContentType.MARKDOWN) + + def test_no_extension_uses_heuristics(self): + """Test that heuristics are used when no extension is available.""" + html_text = "Test" + content_type = detect_content_type(html_text, None) + assert content_type == ContentType.HTML + + def test_extension_html(self): + """Test HTML extension detection.""" + content_type = detect_content_type("some text", "file.html") + assert content_type == ContentType.HTML + + def test_extension_markdown(self): + """Test Markdown extension detection.""" + content_type = detect_content_type("some text", "file.md") + assert content_type == ContentType.MARKDOWN + + def test_high_confidence_override(self): + """Test that very high confidence heuristics can override plain extension.""" + # Strong HTML indicators in a .txt file + html_text = "Test

Content

" + content_type = detect_content_type(html_text, "file.txt") + # High confidence HTML should override .txt extension + assert content_type == ContentType.HTML + + +# ============================================================================ +# TEST SUITE 4: Text Chunking +# ============================================================================ + + +class TestChunkText: + """Test suite for text chunking functionality.""" + + def test_empty_text(self): + """Test chunking empty text.""" + assert chunk_text("") == [] + assert chunk_text(" ") == [] + + def test_short_text_no_chunking(self): + """Test that short text is not chunked.""" + text = "This is a short text." + chunks = chunk_text(text) + assert len(chunks) == 1 + assert chunks[0] == text + + def test_text_at_chunk_limit(self): + """Test text at exactly chunk size limit.""" + text = "x" * CHUNK_SIZE + chunks = chunk_text(text) + assert len(chunks) == 1 + + def test_long_text_is_chunked(self): + """Test that long text is chunked.""" + # Create text longer than chunk size + text = "This is a sentence. " * 200 # ~4000 chars + chunks = chunk_text(text) + assert len(chunks) > 1 + # Each chunk should be <= CHUNK_SIZE + for chunk in chunks: + assert len(chunk) <= CHUNK_SIZE + 100 # Allow some flexibility for overlap + + def test_explicit_content_type_html(self): + """Test chunking with explicit HTML content type.""" + html_text = """ + +

Main Title

+

First paragraph with lots of content.

+

Section

+

Second paragraph.

+ +""" + chunks = chunk_text(html_text, content_type=ContentType.HTML) + assert len(chunks) >= 1 + + def test_explicit_content_type_markdown(self): + """Test chunking with explicit Markdown content type.""" + md_text = """# Main Title + +Introduction paragraph. + +## Section 1 + +Content for section 1. + +## Section 2 + +Content for section 2. +""" + chunks = chunk_text(md_text, content_type=ContentType.MARKDOWN) + assert len(chunks) >= 1 + + def test_explicit_content_type_plain(self): + """Test chunking with explicit plain content type.""" + plain_text = "Word " * 500 # ~2500 chars + chunks = chunk_text(plain_text, content_type=ContentType.PLAIN) + assert len(chunks) >= 1 + + def test_file_path_detection(self): + """Test chunking with file path for content type detection.""" + text = "Some content here" + chunks = chunk_text(text, file_path="document.md") + assert len(chunks) == 1 + + def test_secondary_chunking_for_large_sections(self): + """Test that large sections from HTML/MD splitters are further chunked.""" + # Create text that would produce a single large section + large_section = "x" * 3000 # Larger than CHUNK_SIZE + md_text = f"# Title\n\n{large_section}" + chunks = chunk_text(md_text, content_type=ContentType.MARKDOWN) + # Should have multiple chunks due to secondary chunking + assert len(chunks) >= 1 + for chunk in chunks: + # Allow some flexibility but chunks should be reasonable size + assert len(chunk) <= CHUNK_SIZE + 300 + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_domain.py b/tests/test_domain.py index ac623e1..7fe5167 100644 --- a/tests/test_domain.py +++ b/tests/test_domain.py @@ -228,16 +228,18 @@ class TestNoteDomain: with pytest.raises(InvalidInputError, match="Note content cannot be empty"): Note(title="Test", content=" ") - def test_note_embedding_enabled(self): - """Test notes have embedding enabled by default.""" + def test_note_content_for_embedding(self): + """Test notes can hold content for embedding. + + Note: Embedding is now handled via command submission in Note.save(), + not via needs_embedding() method. This test verifies basic content handling. + """ note = Note(title="Test", content="Test content") + assert note.content == "Test content" - assert note.needs_embedding() is True - assert note.get_embedding_content() == "Test content" - - # Test with None content + # Test with None content - valid, no embedding will be submitted note2 = Note(title="Test", content=None) - assert note2.get_embedding_content() is None + assert note2.content is None # ============================================================================ diff --git a/tests/test_embedding.py b/tests/test_embedding.py new file mode 100644 index 0000000..30d30db --- /dev/null +++ b/tests/test_embedding.py @@ -0,0 +1,234 @@ +""" +Unit tests for the open_notebook.utils.embedding module. + +Tests embedding generation and mean pooling functionality. +""" + +import pytest + +from open_notebook.utils.embedding import ( + generate_embedding, + generate_embeddings, + mean_pool_embeddings, +) + + +# ============================================================================ +# TEST SUITE 1: Mean Pooling +# ============================================================================ + + +class TestMeanPoolEmbeddings: + """Test suite for mean pooling functionality.""" + + @pytest.mark.asyncio + async def test_single_embedding(self): + """Test mean pooling with single embedding returns normalized version.""" + embedding = [1.0, 0.0, 0.0] + result = await mean_pool_embeddings([embedding]) + assert len(result) == 3 + # Should be normalized (already unit length) + assert abs(result[0] - 1.0) < 0.001 + assert abs(result[1]) < 0.001 + assert abs(result[2]) < 0.001 + + @pytest.mark.asyncio + async def test_two_embeddings(self): + """Test mean pooling with two embeddings.""" + embeddings = [ + [1.0, 0.0, 0.0], + [0.0, 1.0, 0.0], + ] + result = await mean_pool_embeddings(embeddings) + assert len(result) == 3 + # Mean of normalized vectors, then normalized + # Result should be roughly [0.707, 0.707, 0] + assert abs(result[0] - result[1]) < 0.001 # x and y should be equal + assert abs(result[2]) < 0.001 # z should be ~0 + + @pytest.mark.asyncio + async def test_identical_embeddings(self): + """Test mean pooling with identical embeddings.""" + embedding = [0.5, 0.5, 0.5, 0.5] + embeddings = [embedding, embedding, embedding] + result = await mean_pool_embeddings(embeddings) + assert len(result) == 4 + # Result should be same direction, just normalized + # Original is already normalized if we normalize it + import numpy as np + orig_norm = np.linalg.norm(embedding) + expected = [v / orig_norm for v in embedding] + for i in range(4): + assert abs(result[i] - expected[i]) < 0.001 + + @pytest.mark.asyncio + async def test_empty_list_raises(self): + """Test that empty list raises ValueError.""" + with pytest.raises(ValueError, match="empty"): + await mean_pool_embeddings([]) + + @pytest.mark.asyncio + async def test_normalization(self): + """Test that result is normalized to unit length.""" + embeddings = [ + [3.0, 4.0, 0.0], # Not unit length + [0.0, 5.0, 0.0], # Not unit length + ] + result = await mean_pool_embeddings(embeddings) + # Check result is unit length + import numpy as np + norm = np.linalg.norm(result) + assert abs(norm - 1.0) < 0.001 + + @pytest.mark.asyncio + async def test_high_dimensional(self): + """Test mean pooling with high-dimensional embeddings.""" + import numpy as np + # Create random embeddings of dimension 768 (typical embedding size) + np.random.seed(42) + embeddings = [ + np.random.randn(768).tolist(), + np.random.randn(768).tolist(), + np.random.randn(768).tolist(), + ] + result = await mean_pool_embeddings(embeddings) + assert len(result) == 768 + # Check result is normalized + norm = np.linalg.norm(result) + assert abs(norm - 1.0) < 0.001 + + +# ============================================================================ +# TEST SUITE 2: Generate Embeddings (requires mocking) +# ============================================================================ + + +class TestGenerateEmbeddings: + """Test suite for batch embedding generation.""" + + @pytest.mark.asyncio + async def test_empty_list(self): + """Test that empty list returns empty list.""" + result = await generate_embeddings([]) + assert result == [] + + @pytest.mark.asyncio + async def test_no_model_raises(self): + """Test that missing model raises ValueError.""" + from unittest.mock import AsyncMock, patch + + with patch( + "open_notebook.utils.embedding.model_manager.get_embedding_model", + new_callable=AsyncMock, + return_value=None, + ): + with pytest.raises(ValueError, match="No embedding model configured"): + await generate_embeddings(["test text"]) + + @pytest.mark.asyncio + async def test_successful_embedding(self): + """Test successful embedding generation with mocked model.""" + from unittest.mock import AsyncMock, MagicMock, patch + + mock_model = MagicMock() + mock_model.aembed = AsyncMock(return_value=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) + + with patch( + "open_notebook.utils.embedding.model_manager.get_embedding_model", + new_callable=AsyncMock, + return_value=mock_model, + ): + result = await generate_embeddings(["text1", "text2"]) + assert len(result) == 2 + assert result[0] == [0.1, 0.2, 0.3] + assert result[1] == [0.4, 0.5, 0.6] + mock_model.aembed.assert_called_once_with(["text1", "text2"]) + + +# ============================================================================ +# TEST SUITE 3: Generate Single Embedding (requires mocking) +# ============================================================================ + + +class TestGenerateEmbedding: + """Test suite for single embedding generation.""" + + @pytest.mark.asyncio + async def test_empty_text_raises(self): + """Test that empty text raises ValueError.""" + with pytest.raises(ValueError, match="empty"): + await generate_embedding("") + + with pytest.raises(ValueError, match="empty"): + await generate_embedding(" ") + + @pytest.mark.asyncio + async def test_short_text_direct_embedding(self): + """Test that short text is embedded directly without chunking.""" + from unittest.mock import AsyncMock, MagicMock, patch + + mock_model = MagicMock() + mock_model.aembed = AsyncMock(return_value=[[0.1, 0.2, 0.3]]) + + with patch( + "open_notebook.utils.embedding.model_manager.get_embedding_model", + new_callable=AsyncMock, + return_value=mock_model, + ): + result = await generate_embedding("Short text") + assert result == [0.1, 0.2, 0.3] + # Should be called with single text + mock_model.aembed.assert_called_once_with(["Short text"]) + + @pytest.mark.asyncio + async def test_long_text_chunked_and_pooled(self): + """Test that long text is chunked and mean pooled.""" + from unittest.mock import AsyncMock, MagicMock, patch + + # Create text longer than chunk size + long_text = "This is a sentence. " * 200 # ~4000 chars + + mock_model = MagicMock() + # Return multiple embeddings (one per chunk) + mock_model.aembed = AsyncMock( + return_value=[ + [1.0, 0.0, 0.0], + [0.0, 1.0, 0.0], + ] + ) + + with patch( + "open_notebook.utils.embedding.model_manager.get_embedding_model", + new_callable=AsyncMock, + return_value=mock_model, + ): + result = await generate_embedding(long_text) + # Should return mean pooled result + assert len(result) == 3 + # Model should have been called with multiple chunks + assert mock_model.aembed.called + + @pytest.mark.asyncio + async def test_content_type_parameter(self): + """Test that content type parameter is passed through.""" + from unittest.mock import AsyncMock, MagicMock, patch + + from open_notebook.utils.chunking import ContentType + + mock_model = MagicMock() + mock_model.aembed = AsyncMock(return_value=[[0.1, 0.2, 0.3]]) + + with patch( + "open_notebook.utils.embedding.model_manager.get_embedding_model", + new_callable=AsyncMock, + return_value=mock_model, + ): + result = await generate_embedding( + "# Markdown Header\n\nContent", + content_type=ContentType.MARKDOWN, + ) + assert len(result) == 3 + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_utils.py b/tests/test_utils.py index d92040b..e686826 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -14,7 +14,6 @@ from open_notebook.utils import ( parse_thinking_content, remove_non_ascii, remove_non_printable, - split_text, token_count, ) from open_notebook.utils.context_builder import ContextBuilder, ContextConfig @@ -27,11 +26,6 @@ from open_notebook.utils.context_builder import ContextBuilder, ContextConfig class TestTextUtilities: """Test suite for text utility functions.""" - def test_split_text_empty_string(self): - """Test splitting empty or very short strings.""" - assert split_text("") == [] - assert split_text("short") == ["short"] - def test_remove_non_ascii(self): """Test removal of non-ASCII characters.""" # Text with various non-ASCII characters diff --git a/uv.lock b/uv.lock index fedf01e..3a10869 100644 --- a/uv.lock +++ b/uv.lock @@ -2,8 +2,9 @@ version = 1 revision = 3 requires-python = ">=3.11, <3.13" resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version < '3.12'", + "sys_platform == 'win32'", + "sys_platform == 'emscripten'", + "sys_platform != 'emscripten' and sys_platform != 'win32'", ] [[package]] @@ -431,7 +432,7 @@ wheels = [ [[package]] name = "content-core" -version = "1.9.0" +version = "1.10.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "ai-prompter" }, @@ -460,9 +461,9 @@ dependencies = [ { name = "validators" }, { name = "youtube-transcript-api" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fe/26/02db2215432048db1745ccce2039c481450dab8725ebec38917b0319e07d/content_core-1.9.0.tar.gz", hash = "sha256:b293b84801eb357efe054bd4988892e8dac4c6e4a2f7fcada6627664b570c903", size = 20705025, upload-time = "2026-01-15T16:09:50.996Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4a/b5/1322ad0b3d9eb86bbc8efe76e3e569ad32caaf7769973ad166b21468857f/content_core-1.10.0.tar.gz", hash = "sha256:e8f83b5675b24b0b8a38dad0dff5a878b9efe2d5e00b4d71ea3e3073d2eff000", size = 20737121, upload-time = "2026-01-16T20:12:51.308Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/ba/bf35303c656a10b4020233688cc53f97cb49cdd2eabf8af6b2bd34612dbe/content_core-1.9.0-py3-none-any.whl", hash = "sha256:7eec2b633bdc039db074fa0c35d74670cddbec432e071a018d604739c069b59c", size = 180344, upload-time = "2026-01-15T16:09:53.295Z" }, + { url = "https://files.pythonhosted.org/packages/3c/bc/1573354487143af2f3d401454f5801ccbd8a9e426e3d7b5095453ddbb6da/content_core-1.10.0-py3-none-any.whl", hash = "sha256:e8c4ef011224a376b719a73243f9a432e7e4694049e77129eae62370e7e93152", size = 183303, upload-time = "2026-01-16T20:12:49.116Z" }, ] [[package]] @@ -523,7 +524,7 @@ wheels = [ [[package]] name = "cyclopts" -version = "4.4.6" +version = "4.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, @@ -531,9 +532,9 @@ dependencies = [ { name = "rich" }, { name = "rich-rst" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/dd/6d/0e0f03214f6a5fae79d934d1de8a570bda37235d991d1b6dee9cd99c8e44/cyclopts-4.4.6.tar.gz", hash = "sha256:ffeef064d867b8a567b5efdfc09e3295b2774735ea1df33e5230e818d52d9942", size = 160439, upload-time = "2026-01-15T01:37:51.365Z" } +sdist = { url = "https://files.pythonhosted.org/packages/13/7b/663f3285c1ac0e5d0854bd9db2c87caa6fa3d1a063185e3394a6cdca9151/cyclopts-4.5.0.tar.gz", hash = "sha256:717ac4235548b58d500baf7e688aa4d024caf0ee68f61a012ffd5e29db3099f9", size = 161980, upload-time = "2026-01-16T02:07:16.171Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3f/f7/3c861e6aa5ba70f61b70ce61e979bc8d60a324149c8974e639020f374ca8/cyclopts-4.4.6-py3-none-any.whl", hash = "sha256:9d06febb128b765b7fd283b55820db282f209a0cb166125f43345b4809dbee62", size = 197810, upload-time = "2026-01-15T01:37:52.267Z" }, + { url = "https://files.pythonhosted.org/packages/12/a3/2e00fececc34a99ae3a5d5702a5dd29c5371e4ed016647301a2b9bcc1976/cyclopts-4.5.0-py3-none-any.whl", hash = "sha256:305b9aa90a9cd0916f0a450b43e50ad5df9c252680731a0719edfb9b20381bf5", size = 199772, upload-time = "2026-01-16T02:07:14.707Z" }, ] [[package]] @@ -662,15 +663,15 @@ wheels = [ [[package]] name = "esperanto" -version = "2.13.0" +version = "2.16.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b5/8c/9f655703422fc895f4c327316a8b4c824ec334374a8f6e2dea61f2512362/esperanto-2.13.0.tar.gz", hash = "sha256:78df58492700d4cdfe9dd715313c48e5e4b816ecb87dc22a56d03610431c640e", size = 742118, upload-time = "2026-01-04T22:14:24.996Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/77/6d527161eee5a562bc80b72b988be35b409ed8d896e701112d513c8511e9/esperanto-2.16.0.tar.gz", hash = "sha256:963034980f6e27e49ecd81ff6193d6b930f74e9fe5570ac189dc8734dc17fbc2", size = 773360, upload-time = "2026-01-22T02:08:14.853Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8c/44/e1c9aa604f252a351d67398c886bb9b1cd572881289864d3e0d45212e61d/esperanto-2.13.0-py3-none-any.whl", hash = "sha256:5b9d12eb3d03f63acb7a77c0c17ed90b32b761ded1f122bc44bb4e8b6625cec0", size = 152019, upload-time = "2026-01-04T22:14:24.031Z" }, + { url = "https://files.pythonhosted.org/packages/e1/c3/781674895df5355a72aa8259e31624e39ea7cf116320f0db711d65b8d7f1/esperanto-2.16.0-py3-none-any.whl", hash = "sha256:82ca43762673c9738942b7d984f76a754edd085e94ba2ff599fb9f85e5c2c11e", size = 178964, upload-time = "2026-01-22T02:08:16.174Z" }, ] [[package]] @@ -783,7 +784,7 @@ wheels = [ [[package]] name = "firecrawl-py" -version = "4.13.0" +version = "4.13.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -794,9 +795,9 @@ dependencies = [ { name = "requests" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f8/4c/25c1d31cfa80223056af237e031bcfc5c0d7eba0266a96ace267d6880fc5/firecrawl_py-4.13.0.tar.gz", hash = "sha256:4f08088a7dc4abfb6b05dcd2290a9de65ce27ed392c5504e21f0519b783301c7", size = 160762, upload-time = "2026-01-14T15:41:07.531Z" } +sdist = { url = "https://files.pythonhosted.org/packages/22/52/a7aa0b912c1970bc3bf0ec0767c9ea72abc283430451564c9e2772af867a/firecrawl_py-4.13.1.tar.gz", hash = "sha256:ef1d5c6b7c49397a88f33b5c97b4fd605d3f6f47db4f747bf67c97b0becec992", size = 161920, upload-time = "2026-01-20T19:15:16.265Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/a0/e4b4cb71e7e1c3c27e458a40363e28d1912c4d7b92aaddec5495e1f1f3e4/firecrawl_py-4.13.0-py3-none-any.whl", hash = "sha256:abf6e5d9a927824a6a6222e16f95975a57a5d778352241f8a3f3bc5ad81d8bf5", size = 201675, upload-time = "2026-01-14T15:41:06.267Z" }, + { url = "https://files.pythonhosted.org/packages/cc/40/e009871da2ef0e8d3569fd4c30cd4130bfe2f66dc4c1b6566d8d17b71e8e/firecrawl_py-4.13.1-py3-none-any.whl", hash = "sha256:0806e32c82fb8ba8f2d1a27537dec9732cdd2e3c91e79b54acff708229b13e1e", size = 203503, upload-time = "2026-01-20T19:15:14.55Z" }, ] [[package]] @@ -891,7 +892,7 @@ requests = [ [[package]] name = "google-cloud-aiplatform" -version = "1.133.0" +version = "1.134.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "docstring-parser" }, @@ -907,9 +908,9 @@ dependencies = [ { name = "pydantic" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d4/be/31ce7fd658ddebafbe5583977ddee536b2bacc491ad10b5a067388aec66f/google_cloud_aiplatform-1.133.0.tar.gz", hash = "sha256:3a6540711956dd178daaab3c2c05db476e46d94ac25912b8cf4f59b00b058ae0", size = 9921309, upload-time = "2026-01-08T22:11:25.079Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/24/de4f21d0728d640b57bf7bbcd7460827a4daf9eaca61cb5b91be608c40bc/google_cloud_aiplatform-1.134.0.tar.gz", hash = "sha256:964cea117ca1ffc71742970e1091985adac72dfe76e1a1614a02a8cda50d6992", size = 9931075, upload-time = "2026-01-20T19:19:58.867Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/01/5b/ef74ff65aebb74eaba51078e33ddd897247ba0d1197fd5a7953126205519/google_cloud_aiplatform-1.133.0-py2.py3-none-any.whl", hash = "sha256:dfc81228e987ca10d1c32c7204e2131b3c8d6b7c8e0b4e23bf7c56816bc4c566", size = 8184595, upload-time = "2026-01-08T22:11:22.067Z" }, + { url = "https://files.pythonhosted.org/packages/85/f4/6863f3951eb07afd790fe6f8f1a5984224f7df836546a34ed29ab0cfe9af/google_cloud_aiplatform-1.134.0-py2.py3-none-any.whl", hash = "sha256:f249ae67d622deca486310e0021093764892ac357fb744b9e79548f490017ddc", size = 8189190, upload-time = "2026-01-20T19:19:55.997Z" }, ] [[package]] @@ -999,7 +1000,7 @@ wheels = [ [[package]] name = "google-genai" -version = "1.59.0" +version = "1.60.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1013,9 +1014,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/40/34/c03bcbc759d67ac3d96077838cdc1eac85417de6ea3b65b313fe53043eee/google_genai-1.59.0.tar.gz", hash = "sha256:0b7a2dc24582850ae57294209d8dfc2c4f5fcfde0a3f11d81dc5aca75fb619e2", size = 487374, upload-time = "2026-01-15T20:29:46.619Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/3f/a753be0dcee352b7d63bc6d1ba14a72591d63b6391dac0cdff7ac168c530/google_genai-1.60.0.tar.gz", hash = "sha256:9768061775fddfaecfefb0d6d7a6cabefb3952ebd246cd5f65247151c07d33d1", size = 487721, upload-time = "2026-01-21T22:17:30.398Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/aa/53/6d00692fe50d73409b3406ae90c71bc4499c8ae7fac377ba16e283da917c/google_genai-1.59.0-py3-none-any.whl", hash = "sha256:59fc01a225d074fe9d1e626c3433da292f33249dadce4deb34edea698305a6df", size = 719099, upload-time = "2026-01-15T20:29:44.604Z" }, + { url = "https://files.pythonhosted.org/packages/31/e5/384b1f383917b5f0ae92e28f47bc27b16e3d26cd9bacb25e9f8ecab3c8fe/google_genai-1.60.0-py3-none-any.whl", hash = "sha256:967338378ffecebec19a8ed90cf8797b26818bacbefd7846a9280beb1099f7f3", size = 719431, upload-time = "2026-01-21T22:17:28.086Z" }, ] [[package]] @@ -1615,16 +1616,16 @@ wheels = [ [[package]] name = "langchain" -version = "1.2.4" +version = "1.2.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "langchain-core" }, { name = "langgraph" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ed/3f/371267e88c153500a75c0e9daf9645a69955cfe6f85699955241ac0fa6e2/langchain-1.2.4.tar.gz", hash = "sha256:65119ff1c2ac8cc2410739b0fb2773f8fbfbe83357df9bab8a5fceafb9e04aa1", size = 552340, upload-time = "2026-01-14T19:35:26.556Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f5/bc/d8f506a525baadee99a65c6cc28c1c35c9eaf1cb2009f048e9861d81a600/langchain-1.2.6.tar.gz", hash = "sha256:7d46cbf719d860a16f6fc182d5d3de17453dda187f3d43e9c40ac352a5094fdd", size = 553127, upload-time = "2026-01-16T19:21:19.611Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0c/71/667887579bb3cf3c2db88224849f7362a8c3f118666e426a51058ee43d9c/langchain-1.2.4-py3-none-any.whl", hash = "sha256:182ac9f3c4559c5a6477e00d60ff8a56212ec4db6f101a4957492818dc3ce3e9", size = 107949, upload-time = "2026-01-14T19:35:24.7Z" }, + { url = "https://files.pythonhosted.org/packages/3f/28/d5dc4cb06ccb29d62a590d446072964766555e85863f5044c6e644c07d0d/langchain-1.2.6-py3-none-any.whl", hash = "sha256:a9a6c39f03c09b6eb0f1b47e267ad2a2fd04e124dfaa9753bd6c11d2fe7d944e", size = 108458, upload-time = "2026-01-16T19:21:18.085Z" }, ] [[package]] @@ -1859,16 +1860,16 @@ wheels = [ [[package]] name = "langgraph-checkpoint-sqlite" -version = "3.0.2" +version = "3.0.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiosqlite" }, { name = "langgraph-checkpoint" }, { name = "sqlite-vec" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a9/f6/eceda876bbbc15c9d9060c95e392f8a9f9fb9262ace23f30995f865f04c0/langgraph_checkpoint_sqlite-3.0.2.tar.gz", hash = "sha256:a34961c035944af0ee7af416f8f26fec25059b10387a69dcb13fc6cc59c30a25", size = 109772, upload-time = "2026-01-12T20:31:51.951Z" } +sdist = { url = "https://files.pythonhosted.org/packages/04/61/40b7f8f29d6de92406e668c35265f409f57064907e31eae84ab3f2a3e3e1/langgraph_checkpoint_sqlite-3.0.3.tar.gz", hash = "sha256:438c234d37dabda979218954c9c6eb1db73bee6492c2f1d3a00552fe23fa34ed", size = 123876, upload-time = "2026-01-19T00:38:44.473Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/e3/8ed603baf04368618faaedabb198684b5d214697e17ddf8d8aced0f0b58f/langgraph_checkpoint_sqlite-3.0.2-py3-none-any.whl", hash = "sha256:c627e4be7b801f7ac3d1dd3ab407f068bb1150d76b302a458c45904d57feca01", size = 33386, upload-time = "2026-01-12T20:31:51.111Z" }, + { url = "https://files.pythonhosted.org/packages/a3/d8/84ef22ee1cc485c4910df450108fd5e246497379522b3c6cfba896f71bf6/langgraph_checkpoint_sqlite-3.0.3-py3-none-any.whl", hash = "sha256:02eb683a79aa6fcda7cd4de43861062a5d160dbbb990ef8a9fd76c979998a952", size = 33593, upload-time = "2026-01-19T00:38:43.288Z" }, ] [[package]] @@ -2375,7 +2376,7 @@ wheels = [ [[package]] name = "open-notebook" -version = "1.5.2" +version = "1.6.0" source = { editable = "." } dependencies = [ { name = "ai-prompter" }, @@ -2396,6 +2397,7 @@ dependencies = [ { name = "langgraph" }, { name = "langgraph-checkpoint-sqlite" }, { name = "loguru" }, + { name = "numpy" }, { name = "podcast-creator" }, { name = "pydantic" }, { name = "python-dotenv" }, @@ -2428,7 +2430,7 @@ dev = [ requires-dist = [ { name = "ai-prompter", specifier = ">=0.3" }, { name = "content-core", specifier = ">=1.0.2" }, - { name = "esperanto", specifier = ">=2.13" }, + { name = "esperanto", specifier = ">=2.16" }, { name = "fastapi", specifier = ">=0.104.0" }, { name = "httpx", extras = ["socks"], specifier = ">=0.27.0" }, { name = "ipykernel", marker = "extra == 'dev'", specifier = ">=6.29.5" }, @@ -2447,6 +2449,7 @@ requires-dist = [ { name = "langgraph-checkpoint-sqlite", specifier = ">=3.0.1" }, { name = "loguru", specifier = ">=0.7.2" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.11.1" }, + { name = "numpy", specifier = ">=2.4.1" }, { name = "podcast-creator", specifier = ">=0.7.0" }, { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=4.0.1" }, { name = "pydantic", specifier = ">=2.9.2" }, @@ -2621,28 +2624,28 @@ wheels = [ [[package]] name = "ormsgpack" -version = "1.12.1" +version = "1.12.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fe/96/34c40d621996c2f377a18decbd3c59f031dde73c3ba47d1e1e8f29a05aaa/ormsgpack-1.12.1.tar.gz", hash = "sha256:a3877fde1e4f27a39f92681a0aab6385af3a41d0c25375d33590ae20410ea2ac", size = 39476, upload-time = "2025-12-14T07:57:43.248Z" } +sdist = { url = "https://files.pythonhosted.org/packages/12/0c/f1761e21486942ab9bb6feaebc610fa074f7c5e496e6962dea5873348077/ormsgpack-1.12.2.tar.gz", hash = "sha256:944a2233640273bee67521795a73cf1e959538e0dfb7ac635505010455e53b33", size = 39031, upload-time = "2026-01-18T20:55:28.023Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/57/e2/f5b89365c8dc8025c27d31316038f1c103758ddbf87dc0fa8e3f78f66907/ormsgpack-1.12.1-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:4038f59ae0e19dac5e5d9aae4ec17ff84a79e046342ee73ccdecf3547ecf0d34", size = 376180, upload-time = "2025-12-14T07:56:56.521Z" }, - { url = "https://files.pythonhosted.org/packages/ca/87/3f694e06f5e32c6d65066f53b4a025282a5072b6b336c17560b00e04606d/ormsgpack-1.12.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16c63b0c5a3eec467e4bb33a14dabba076b7d934dff62898297b5c0b5f7c3cb3", size = 202338, upload-time = "2025-12-14T07:56:57.585Z" }, - { url = "https://files.pythonhosted.org/packages/e5/f5/6d95d7b7c11f97a92522082fc7e5d1ab34537929f1e13f4c369f392f19d0/ormsgpack-1.12.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:74fd6a8e037eb310dda865298e8d122540af00fe5658ec18b97a1d34f4012e4d", size = 210720, upload-time = "2025-12-14T07:56:58.968Z" }, - { url = "https://files.pythonhosted.org/packages/2b/9d/9a49a2686f8b7165dcb2342b8554951263c30c0f0825f1fcc2d56e736a6b/ormsgpack-1.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58ad60308e233dd824a1859eabb5fe092e123e885eafa4ad5789322329c80fb5", size = 211264, upload-time = "2025-12-14T07:57:00.099Z" }, - { url = "https://files.pythonhosted.org/packages/02/31/2fdc36eaeca2182900b96fc7b19755f293283fe681750e3d295733d62f0e/ormsgpack-1.12.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:35127464c941c1219acbe1a220e48d55e7933373d12257202f4042f7044b4c90", size = 386081, upload-time = "2025-12-14T07:57:01.177Z" }, - { url = "https://files.pythonhosted.org/packages/f0/65/0a765432f08ae26b4013c6a9aed97be17a9ef85f1600948a474b518e27dd/ormsgpack-1.12.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:c48d1c50794692d1e6e3f8c3bb65f5c3acfaae9347e506484a65d60b3d91fb50", size = 479572, upload-time = "2025-12-14T07:57:02.738Z" }, - { url = "https://files.pythonhosted.org/packages/4e/4f/f2f15ebef786ad71cea420bf8692448fbddf04d1bf3feaa68bd5ee3172e6/ormsgpack-1.12.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b512b2ad6feaaefdc26e05431ed2843e42483041e354e167c53401afaa83d919", size = 387862, upload-time = "2025-12-14T07:57:03.842Z" }, - { url = "https://files.pythonhosted.org/packages/15/eb/86fbef1d605fa91ecef077f93f9d0e34fc39b23475dfe3ffb92f6c8db28d/ormsgpack-1.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:93f30db95e101a9616323bfc50807ad00e7f6197cea2216d2d24af42afc77d88", size = 115900, upload-time = "2025-12-14T07:57:05.137Z" }, - { url = "https://files.pythonhosted.org/packages/5b/67/7ba1a46e6a6e263fc42a4fafc24afc1ab21a66116553cad670426f0bd9ef/ormsgpack-1.12.1-cp311-cp311-win_arm64.whl", hash = "sha256:d75b5fa14f6abffce2c392ee03b4731199d8a964c81ee8645c4c79af0e80fd50", size = 109868, upload-time = "2025-12-14T07:57:06.834Z" }, - { url = "https://files.pythonhosted.org/packages/17/fe/ab9167ca037406b5703add24049cf3e18021a3b16133ea20615b1f160ea4/ormsgpack-1.12.1-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:4d7fb0e1b6fbc701d75269f7405a4f79230a6ce0063fb1092e4f6577e312f86d", size = 376725, upload-time = "2025-12-14T07:57:07.894Z" }, - { url = "https://files.pythonhosted.org/packages/c7/ea/2820e65f506894c459b840d1091ae6e327fde3d5a3f3b002a11a1b9bdf7d/ormsgpack-1.12.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:43a9353e2db5b024c91a47d864ef15eaa62d81824cfc7740fed4cef7db738694", size = 202466, upload-time = "2025-12-14T07:57:09.049Z" }, - { url = "https://files.pythonhosted.org/packages/45/8b/def01c13339c5bbec2ee1469ef53e7fadd66c8d775df974ee4def1572515/ormsgpack-1.12.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fc8fe866b7706fc25af0adf1f600bc06ece5b15ca44e34641327198b821e5c3c", size = 210748, upload-time = "2025-12-14T07:57:10.074Z" }, - { url = "https://files.pythonhosted.org/packages/5d/d2/bf350c92f7f067dd9484499705f2d8366d8d9008a670e3d1d0add1908f85/ormsgpack-1.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:813755b5f598a78242042e05dfd1ada4e769e94b98c9ab82554550f97ff4d641", size = 211510, upload-time = "2025-12-14T07:57:11.165Z" }, - { url = "https://files.pythonhosted.org/packages/74/92/9d689bcb95304a6da26c4d59439c350940c25d1b35f146d402ccc6344c51/ormsgpack-1.12.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8eea2a13536fae45d78f93f2cc846c9765c7160c85f19cfefecc20873c137cdd", size = 386237, upload-time = "2025-12-14T07:57:12.306Z" }, - { url = "https://files.pythonhosted.org/packages/17/fe/bd3107547f8b6129265dd957f40b9cd547d2445db2292aacb13335a7ea89/ormsgpack-1.12.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:7a02ebda1a863cbc604740e76faca8eee1add322db2dcbe6cf32669fffdff65c", size = 479589, upload-time = "2025-12-14T07:57:13.475Z" }, - { url = "https://files.pythonhosted.org/packages/c1/7c/e8e5cc9edb967d44f6f85e9ebdad440b59af3fae00b137a4327dc5aed9bb/ormsgpack-1.12.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3c0bd63897c439931cdf29348e5e6e8c330d529830e848d10767615c0f3d1b82", size = 388077, upload-time = "2025-12-14T07:57:14.551Z" }, - { url = "https://files.pythonhosted.org/packages/35/6b/5031797e43b58506f28a8760b26dc23f2620fb4f2200c4c1b3045603e67e/ormsgpack-1.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:362f2e812f8d7035dc25a009171e09d7cc97cb30d3c9e75a16aeae00ca3c1dcf", size = 116190, upload-time = "2025-12-14T07:57:15.575Z" }, - { url = "https://files.pythonhosted.org/packages/1e/fd/9f43ea6425e383a6b2dbfafebb06fd60e8d68c700ef715adfbcdb499f75d/ormsgpack-1.12.1-cp312-cp312-win_arm64.whl", hash = "sha256:6190281e381db2ed0045052208f47a995ccf61eed48f1215ae3cce3fbccd59c5", size = 109990, upload-time = "2025-12-14T07:57:16.419Z" }, + { url = "https://files.pythonhosted.org/packages/4b/08/8b68f24b18e69d92238aa8f258218e6dfeacf4381d9d07ab8df303f524a9/ormsgpack-1.12.2-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:bd5f4bf04c37888e864f08e740c5a573c4017f6fd6e99fa944c5c935fabf2dd9", size = 378266, upload-time = "2026-01-18T20:55:59.876Z" }, + { url = "https://files.pythonhosted.org/packages/0d/24/29fc13044ecb7c153523ae0a1972269fcd613650d1fa1a9cec1044c6b666/ormsgpack-1.12.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34d5b28b3570e9fed9a5a76528fc7230c3c76333bc214798958e58e9b79cc18a", size = 203035, upload-time = "2026-01-18T20:55:30.59Z" }, + { url = "https://files.pythonhosted.org/packages/ad/c2/00169fb25dd8f9213f5e8a549dfb73e4d592009ebc85fbbcd3e1dcac575b/ormsgpack-1.12.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3708693412c28f3538fb5a65da93787b6bbab3484f6bc6e935bfb77a62400ae5", size = 210539, upload-time = "2026-01-18T20:55:48.569Z" }, + { url = "https://files.pythonhosted.org/packages/1b/33/543627f323ff3c73091f51d6a20db28a1a33531af30873ea90c5ac95a9b5/ormsgpack-1.12.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43013a3f3e2e902e1d05e72c0f1aeb5bedbb8e09240b51e26792a3c89267e181", size = 212401, upload-time = "2026-01-18T20:56:10.101Z" }, + { url = "https://files.pythonhosted.org/packages/e8/5d/f70e2c3da414f46186659d24745483757bcc9adccb481a6eb93e2b729301/ormsgpack-1.12.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7c8b1667a72cbba74f0ae7ecf3105a5e01304620ed14528b2cb4320679d2869b", size = 387082, upload-time = "2026-01-18T20:56:12.047Z" }, + { url = "https://files.pythonhosted.org/packages/c0/d6/06e8dc920c7903e051f30934d874d4afccc9bb1c09dcaf0bc03a7de4b343/ormsgpack-1.12.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:df6961442140193e517303d0b5d7bc2e20e69a879c2d774316125350c4a76b92", size = 482346, upload-time = "2026-01-18T20:56:05.152Z" }, + { url = "https://files.pythonhosted.org/packages/66/c4/f337ac0905eed9c393ef990c54565cd33644918e0a8031fe48c098c71dbf/ormsgpack-1.12.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c6a4c34ddef109647c769d69be65fa1de7a6022b02ad45546a69b3216573eb4a", size = 425181, upload-time = "2026-01-18T20:55:37.83Z" }, + { url = "https://files.pythonhosted.org/packages/78/29/6d5758fabef3babdf4bbbc453738cc7de9cd3334e4c38dd5737e27b85653/ormsgpack-1.12.2-cp311-cp311-win_amd64.whl", hash = "sha256:73670ed0375ecc303858e3613f407628dd1fca18fe6ac57b7b7ce66cc7bb006c", size = 117182, upload-time = "2026-01-18T20:55:31.472Z" }, + { url = "https://files.pythonhosted.org/packages/c4/57/17a15549233c37e7fd054c48fe9207492e06b026dbd872b826a0b5f833b6/ormsgpack-1.12.2-cp311-cp311-win_arm64.whl", hash = "sha256:c2be829954434e33601ae5da328cccce3266b098927ca7a30246a0baec2ce7bd", size = 111464, upload-time = "2026-01-18T20:55:38.811Z" }, + { url = "https://files.pythonhosted.org/packages/4c/36/16c4b1921c308a92cef3bf6663226ae283395aa0ff6e154f925c32e91ff5/ormsgpack-1.12.2-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7a29d09b64b9694b588ff2f80e9826bdceb3a2b91523c5beae1fab27d5c940e7", size = 378618, upload-time = "2026-01-18T20:55:50.835Z" }, + { url = "https://files.pythonhosted.org/packages/c0/68/468de634079615abf66ed13bb5c34ff71da237213f29294363beeeca5306/ormsgpack-1.12.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b39e629fd2e1c5b2f46f99778450b59454d1f901bc507963168985e79f09c5d", size = 203186, upload-time = "2026-01-18T20:56:11.163Z" }, + { url = "https://files.pythonhosted.org/packages/73/a9/d756e01961442688b7939bacd87ce13bfad7d26ce24f910f6028178b2cc8/ormsgpack-1.12.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:958dcb270d30a7cb633a45ee62b9444433fa571a752d2ca484efdac07480876e", size = 210738, upload-time = "2026-01-18T20:56:09.181Z" }, + { url = "https://files.pythonhosted.org/packages/7b/ba/795b1036888542c9113269a3f5690ab53dd2258c6fb17676ac4bd44fcf94/ormsgpack-1.12.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58d379d72b6c5e964851c77cfedfb386e474adee4fd39791c2c5d9efb53505cc", size = 212569, upload-time = "2026-01-18T20:56:06.135Z" }, + { url = "https://files.pythonhosted.org/packages/6c/aa/bff73c57497b9e0cba8837c7e4bcab584b1a6dbc91a5dd5526784a5030c8/ormsgpack-1.12.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8463a3fc5f09832e67bdb0e2fda6d518dc4281b133166146a67f54c08496442e", size = 387166, upload-time = "2026-01-18T20:55:36.738Z" }, + { url = "https://files.pythonhosted.org/packages/d3/cf/f8283cba44bcb7b14f97b6274d449db276b3a86589bdb363169b51bc12de/ormsgpack-1.12.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:eddffb77eff0bad4e67547d67a130604e7e2dfbb7b0cde0796045be4090f35c6", size = 482498, upload-time = "2026-01-18T20:55:29.626Z" }, + { url = "https://files.pythonhosted.org/packages/05/be/71e37b852d723dfcbe952ad04178c030df60d6b78eba26bfd14c9a40575e/ormsgpack-1.12.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fcd55e5f6ba0dbce624942adf9f152062135f991a0126064889f68eb850de0dd", size = 425518, upload-time = "2026-01-18T20:55:49.556Z" }, + { url = "https://files.pythonhosted.org/packages/7a/0c/9803aa883d18c7ef197213cd2cbf73ba76472a11fe100fb7dab2884edf48/ormsgpack-1.12.2-cp312-cp312-win_amd64.whl", hash = "sha256:d024b40828f1dde5654faebd0d824f9cc29ad46891f626272dd5bfd7af2333a4", size = 117462, upload-time = "2026-01-18T20:55:47.726Z" }, + { url = "https://files.pythonhosted.org/packages/c8/9e/029e898298b2cc662f10d7a15652a53e3b525b1e7f07e21fef8536a09bb8/ormsgpack-1.12.2-cp312-cp312-win_arm64.whl", hash = "sha256:da538c542bac7d1c8f3f2a937863dba36f013108ce63e55745941dda4b75dbb6", size = 111559, upload-time = "2026-01-18T20:55:54.273Z" }, ] [[package]] @@ -2656,30 +2659,31 @@ wheels = [ [[package]] name = "pandas" -version = "2.3.3" +version = "3.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy" }, { name = "python-dateutil" }, - { name = "pytz" }, - { name = "tzdata" }, + { name = "tzdata", marker = "sys_platform == 'emscripten' or sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223, upload-time = "2025-09-29T23:34:51.853Z" } +sdist = { url = "https://files.pythonhosted.org/packages/de/da/b1dc0481ab8d55d0f46e343cfe67d4551a0e14fcee52bd38ca1bd73258d8/pandas-3.0.0.tar.gz", hash = "sha256:0facf7e87d38f721f0af46fe70d97373a37701b1c09f7ed7aeeb292ade5c050f", size = 4633005, upload-time = "2026-01-21T15:52:04.726Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/fa/7ac648108144a095b4fb6aa3de1954689f7af60a14cf25583f4960ecb878/pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523", size = 11578790, upload-time = "2025-09-29T23:18:30.065Z" }, - { url = "https://files.pythonhosted.org/packages/9b/35/74442388c6cf008882d4d4bdfc4109be87e9b8b7ccd097ad1e7f006e2e95/pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45", size = 10833831, upload-time = "2025-09-29T23:38:56.071Z" }, - { url = "https://files.pythonhosted.org/packages/fe/e4/de154cbfeee13383ad58d23017da99390b91d73f8c11856f2095e813201b/pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66", size = 12199267, upload-time = "2025-09-29T23:18:41.627Z" }, - { url = "https://files.pythonhosted.org/packages/bf/c9/63f8d545568d9ab91476b1818b4741f521646cbdd151c6efebf40d6de6f7/pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b", size = 12789281, upload-time = "2025-09-29T23:18:56.834Z" }, - { url = "https://files.pythonhosted.org/packages/f2/00/a5ac8c7a0e67fd1a6059e40aa08fa1c52cc00709077d2300e210c3ce0322/pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791", size = 13240453, upload-time = "2025-09-29T23:19:09.247Z" }, - { url = "https://files.pythonhosted.org/packages/27/4d/5c23a5bc7bd209231618dd9e606ce076272c9bc4f12023a70e03a86b4067/pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151", size = 13890361, upload-time = "2025-09-29T23:19:25.342Z" }, - { url = "https://files.pythonhosted.org/packages/8e/59/712db1d7040520de7a4965df15b774348980e6df45c129b8c64d0dbe74ef/pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c", size = 11348702, upload-time = "2025-09-29T23:19:38.296Z" }, - { url = "https://files.pythonhosted.org/packages/9c/fb/231d89e8637c808b997d172b18e9d4a4bc7bf31296196c260526055d1ea0/pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53", size = 11597846, upload-time = "2025-09-29T23:19:48.856Z" }, - { url = "https://files.pythonhosted.org/packages/5c/bd/bf8064d9cfa214294356c2d6702b716d3cf3bb24be59287a6a21e24cae6b/pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35", size = 10729618, upload-time = "2025-09-29T23:39:08.659Z" }, - { url = "https://files.pythonhosted.org/packages/57/56/cf2dbe1a3f5271370669475ead12ce77c61726ffd19a35546e31aa8edf4e/pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908", size = 11737212, upload-time = "2025-09-29T23:19:59.765Z" }, - { url = "https://files.pythonhosted.org/packages/e5/63/cd7d615331b328e287d8233ba9fdf191a9c2d11b6af0c7a59cfcec23de68/pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89", size = 12362693, upload-time = "2025-09-29T23:20:14.098Z" }, - { url = "https://files.pythonhosted.org/packages/a6/de/8b1895b107277d52f2b42d3a6806e69cfef0d5cf1d0ba343470b9d8e0a04/pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98", size = 12771002, upload-time = "2025-09-29T23:20:26.76Z" }, - { url = "https://files.pythonhosted.org/packages/87/21/84072af3187a677c5893b170ba2c8fbe450a6ff911234916da889b698220/pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084", size = 13450971, upload-time = "2025-09-29T23:20:41.344Z" }, - { url = "https://files.pythonhosted.org/packages/86/41/585a168330ff063014880a80d744219dbf1dd7a1c706e75ab3425a987384/pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b", size = 10992722, upload-time = "2025-09-29T23:20:54.139Z" }, + { url = "https://files.pythonhosted.org/packages/46/1e/b184654a856e75e975a6ee95d6577b51c271cd92cb2b020c9378f53e0032/pandas-3.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d64ce01eb9cdca96a15266aa679ae50212ec52757c79204dbc7701a222401850", size = 10313247, upload-time = "2026-01-21T15:50:15.775Z" }, + { url = "https://files.pythonhosted.org/packages/dd/5e/e04a547ad0f0183bf151fd7c7a477468e3b85ff2ad231c566389e6cc9587/pandas-3.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:613e13426069793aa1ec53bdcc3b86e8d32071daea138bbcf4fa959c9cdaa2e2", size = 9913131, upload-time = "2026-01-21T15:50:18.611Z" }, + { url = "https://files.pythonhosted.org/packages/a2/93/bb77bfa9fc2aba9f7204db807d5d3fb69832ed2854c60ba91b4c65ba9219/pandas-3.0.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0192fee1f1a8e743b464a6607858ee4b071deb0b118eb143d71c2a1d170996d5", size = 10741925, upload-time = "2026-01-21T15:50:21.058Z" }, + { url = "https://files.pythonhosted.org/packages/62/fb/89319812eb1d714bfc04b7f177895caeba8ab4a37ef6712db75ed786e2e0/pandas-3.0.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f0b853319dec8d5e0c8b875374c078ef17f2269986a78168d9bd57e49bf650ae", size = 11245979, upload-time = "2026-01-21T15:50:23.413Z" }, + { url = "https://files.pythonhosted.org/packages/a9/63/684120486f541fc88da3862ed31165b3b3e12b6a1c7b93be4597bc84e26c/pandas-3.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:707a9a877a876c326ae2cb640fbdc4ef63b0a7b9e2ef55c6df9942dcee8e2af9", size = 11756337, upload-time = "2026-01-21T15:50:25.932Z" }, + { url = "https://files.pythonhosted.org/packages/39/92/7eb0ad232312b59aec61550c3c81ad0743898d10af5df7f80bc5e5065416/pandas-3.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:afd0aa3d0b5cda6e0b8ffc10dbcca3b09ef3cbcd3fe2b27364f85fdc04e1989d", size = 12325517, upload-time = "2026-01-21T15:50:27.952Z" }, + { url = "https://files.pythonhosted.org/packages/51/27/bf9436dd0a4fc3130acec0828951c7ef96a0631969613a9a35744baf27f6/pandas-3.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:113b4cca2614ff7e5b9fee9b6f066618fe73c5a83e99d721ffc41217b2bf57dd", size = 9881576, upload-time = "2026-01-21T15:50:30.149Z" }, + { url = "https://files.pythonhosted.org/packages/e7/2b/c618b871fce0159fd107516336e82891b404e3f340821853c2fc28c7830f/pandas-3.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:c14837eba8e99a8da1527c0280bba29b0eb842f64aa94982c5e21227966e164b", size = 9140807, upload-time = "2026-01-21T15:50:32.308Z" }, + { url = "https://files.pythonhosted.org/packages/0b/38/db33686f4b5fa64d7af40d96361f6a4615b8c6c8f1b3d334eee46ae6160e/pandas-3.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9803b31f5039b3c3b10cc858c5e40054adb4b29b4d81cb2fd789f4121c8efbcd", size = 10334013, upload-time = "2026-01-21T15:50:34.771Z" }, + { url = "https://files.pythonhosted.org/packages/a5/7b/9254310594e9774906bacdd4e732415e1f86ab7dbb4b377ef9ede58cd8ec/pandas-3.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:14c2a4099cd38a1d18ff108168ea417909b2dea3bd1ebff2ccf28ddb6a74d740", size = 9874154, upload-time = "2026-01-21T15:50:36.67Z" }, + { url = "https://files.pythonhosted.org/packages/63/d4/726c5a67a13bc66643e66d2e9ff115cead482a44fc56991d0c4014f15aaf/pandas-3.0.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d257699b9a9960e6125686098d5714ac59d05222bef7a5e6af7a7fd87c650801", size = 10384433, upload-time = "2026-01-21T15:50:39.132Z" }, + { url = "https://files.pythonhosted.org/packages/bf/2e/9211f09bedb04f9832122942de8b051804b31a39cfbad199a819bb88d9f3/pandas-3.0.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:69780c98f286076dcafca38d8b8eee1676adf220199c0a39f0ecbf976b68151a", size = 10864519, upload-time = "2026-01-21T15:50:41.043Z" }, + { url = "https://files.pythonhosted.org/packages/00/8d/50858522cdc46ac88b9afdc3015e298959a70a08cd21e008a44e9520180c/pandas-3.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4a66384f017240f3858a4c8a7cf21b0591c3ac885cddb7758a589f0f71e87ebb", size = 11394124, upload-time = "2026-01-21T15:50:43.377Z" }, + { url = "https://files.pythonhosted.org/packages/86/3f/83b2577db02503cd93d8e95b0f794ad9d4be0ba7cb6c8bcdcac964a34a42/pandas-3.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:be8c515c9bc33989d97b89db66ea0cececb0f6e3c2a87fcc8b69443a6923e95f", size = 11920444, upload-time = "2026-01-21T15:50:45.932Z" }, + { url = "https://files.pythonhosted.org/packages/64/2d/4f8a2f192ed12c90a0aab47f5557ece0e56b0370c49de9454a09de7381b2/pandas-3.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:a453aad8c4f4e9f166436994a33884442ea62aa8b27d007311e87521b97246e1", size = 9730970, upload-time = "2026-01-21T15:50:47.962Z" }, + { url = "https://files.pythonhosted.org/packages/d4/64/ff571be435cf1e643ca98d0945d76732c0b4e9c37191a89c8550b105eed1/pandas-3.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:da768007b5a33057f6d9053563d6b74dd6d029c337d93c6d0d22a763a5c2ecc0", size = 9041950, upload-time = "2026-01-21T15:50:50.422Z" }, ] [[package]] @@ -2723,7 +2727,7 @@ name = "pexpect" version = "4.9.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "ptyprocess" }, + { name = "ptyprocess", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450, upload-time = "2023-11-25T09:07:26.339Z" } wheels = [ @@ -3053,11 +3057,11 @@ wheels = [ [[package]] name = "pycparser" -version = "2.23" +version = "3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fe/cf/d2d3b9f5699fb1e4615c8e32ff220203e43b248e1dfcc6736ad9057731ca/pycparser-2.23.tar.gz", hash = "sha256:78816d4f24add8f10a06d6f05b4d424ad9e96cfebf68a4ddc99c65c0720d00c2", size = 173734, upload-time = "2025-09-09T13:23:47.91Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140, upload-time = "2025-09-09T13:23:46.651Z" }, + { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" }, ] [[package]] @@ -3337,15 +3341,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bd/d1/006341b929ba9a7ec2660fae1b63c8ac7deb257abf412e41fe5f6bddff63/pytubefix-10.3.6-py3-none-any.whl", hash = "sha256:5475577e632d3bc6a70e5fb336f0c629dc6bbdb2a7bf5cecac0bf7a3fd01f6a1", size = 1516541, upload-time = "2025-12-07T10:55:13.735Z" }, ] -[[package]] -name = "pytz" -version = "2025.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" }, -] - [[package]] name = "pywin32" version = "311" @@ -3657,8 +3652,8 @@ name = "secretstorage" version = "3.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cryptography" }, - { name = "jeepney" }, + { name = "cryptography", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, + { name = "jeepney", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/1c/03/e834bcd866f2f8a49a85eaff47340affa3bfa391ee9912a952a1faa68c7b/secretstorage-3.5.0.tar.gz", hash = "sha256:f04b8e4689cbce351744d5537bf6b1329c6fc68f91fa666f60a380edddcd11be", size = 19884, upload-time = "2025-11-23T19:02:53.191Z" } wheels = [ @@ -3712,37 +3707,38 @@ wheels = [ [[package]] name = "soupsieve" -version = "2.8.1" +version = "2.8.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/89/23/adf3796d740536d63a6fbda113d07e60c734b6ed5d3058d1e47fc0495e47/soupsieve-2.8.1.tar.gz", hash = "sha256:4cf733bc50fa805f5df4b8ef4740fc0e0fa6218cf3006269afd3f9d6d80fd350", size = 117856, upload-time = "2025-12-18T13:50:34.655Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/ae/2d9c981590ed9999a0d91755b47fc74f74de286b0f5cee14c9269041e6c4/soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349", size = 118627, upload-time = "2026-01-20T04:27:02.457Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/f3/b67d6ea49ca9154453b6d70b34ea22f3996b9fa55da105a79d8732227adc/soupsieve-2.8.1-py3-none-any.whl", hash = "sha256:a11fe2a6f3d76ab3cf2de04eb339c1be5b506a8a47f2ceb6d139803177f85434", size = 36710, upload-time = "2025-12-18T13:50:33.267Z" }, + { url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" }, ] [[package]] name = "sqlalchemy" -version = "2.0.45" +version = "2.0.46" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "greenlet", marker = "platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/be/f9/5e4491e5ccf42f5d9cfc663741d261b3e6e1683ae7812114e7636409fcc6/sqlalchemy-2.0.45.tar.gz", hash = "sha256:1632a4bda8d2d25703fdad6363058d882541bdaaee0e5e3ddfa0cd3229efce88", size = 9869912, upload-time = "2025-12-09T21:05:16.737Z" } +sdist = { url = "https://files.pythonhosted.org/packages/06/aa/9ce0f3e7a9829ead5c8ce549392f33a12c4555a6c0609bb27d882e9c7ddf/sqlalchemy-2.0.46.tar.gz", hash = "sha256:cf36851ee7219c170bb0793dbc3da3e80c582e04a5437bc601bfe8c85c9216d7", size = 9865393, upload-time = "2026-01-21T18:03:45.119Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a2/1c/769552a9d840065137272ebe86ffbb0bc92b0f1e0a68ee5266a225f8cd7b/sqlalchemy-2.0.45-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e90a344c644a4fa871eb01809c32096487928bd2038bf10f3e4515cb688cc56", size = 2153860, upload-time = "2025-12-10T20:03:23.843Z" }, - { url = "https://files.pythonhosted.org/packages/f3/f8/9be54ff620e5b796ca7b44670ef58bc678095d51b0e89d6e3102ea468216/sqlalchemy-2.0.45-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8c8b41b97fba5f62349aa285654230296829672fc9939cd7f35aab246d1c08b", size = 3309379, upload-time = "2025-12-09T22:06:07.461Z" }, - { url = "https://files.pythonhosted.org/packages/f6/2b/60ce3ee7a5ae172bfcd419ce23259bb874d2cddd44f67c5df3760a1e22f9/sqlalchemy-2.0.45-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:12c694ed6468333a090d2f60950e4250b928f457e4962389553d6ba5fe9951ac", size = 3309948, upload-time = "2025-12-09T22:09:57.643Z" }, - { url = "https://files.pythonhosted.org/packages/a3/42/bac8d393f5db550e4e466d03d16daaafd2bad1f74e48c12673fb499a7fc1/sqlalchemy-2.0.45-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f7d27a1d977a1cfef38a0e2e1ca86f09c4212666ce34e6ae542f3ed0a33bc606", size = 3261239, upload-time = "2025-12-09T22:06:08.879Z" }, - { url = "https://files.pythonhosted.org/packages/6f/12/43dc70a0528c59842b04ea1c1ed176f072a9b383190eb015384dd102fb19/sqlalchemy-2.0.45-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d62e47f5d8a50099b17e2bfc1b0c7d7ecd8ba6b46b1507b58cc4f05eefc3bb1c", size = 3284065, upload-time = "2025-12-09T22:09:59.454Z" }, - { url = "https://files.pythonhosted.org/packages/cf/9c/563049cf761d9a2ec7bc489f7879e9d94e7b590496bea5bbee9ed7b4cc32/sqlalchemy-2.0.45-cp311-cp311-win32.whl", hash = "sha256:3c5f76216e7b85770d5bb5130ddd11ee89f4d52b11783674a662c7dd57018177", size = 2113480, upload-time = "2025-12-09T21:29:57.03Z" }, - { url = "https://files.pythonhosted.org/packages/bc/fa/09d0a11fe9f15c7fa5c7f0dd26be3d235b0c0cbf2f9544f43bc42efc8a24/sqlalchemy-2.0.45-cp311-cp311-win_amd64.whl", hash = "sha256:a15b98adb7f277316f2c276c090259129ee4afca783495e212048daf846654b2", size = 2138407, upload-time = "2025-12-09T21:29:58.556Z" }, - { url = "https://files.pythonhosted.org/packages/2d/c7/1900b56ce19bff1c26f39a4ce427faec7716c81ac792bfac8b6a9f3dca93/sqlalchemy-2.0.45-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b3ee2aac15169fb0d45822983631466d60b762085bc4535cd39e66bea362df5f", size = 3333760, upload-time = "2025-12-09T22:11:02.66Z" }, - { url = "https://files.pythonhosted.org/packages/0a/93/3be94d96bb442d0d9a60e55a6bb6e0958dd3457751c6f8502e56ef95fed0/sqlalchemy-2.0.45-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba547ac0b361ab4f1608afbc8432db669bd0819b3e12e29fb5fa9529a8bba81d", size = 3348268, upload-time = "2025-12-09T22:13:49.054Z" }, - { url = "https://files.pythonhosted.org/packages/48/4b/f88ded696e61513595e4a9778f9d3f2bf7332cce4eb0c7cedaabddd6687b/sqlalchemy-2.0.45-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:215f0528b914e5c75ef2559f69dca86878a3beeb0c1be7279d77f18e8d180ed4", size = 3278144, upload-time = "2025-12-09T22:11:04.14Z" }, - { url = "https://files.pythonhosted.org/packages/ed/6a/310ecb5657221f3e1bd5288ed83aa554923fb5da48d760a9f7622afeb065/sqlalchemy-2.0.45-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:107029bf4f43d076d4011f1afb74f7c3e2ea029ec82eb23d8527d5e909e97aa6", size = 3313907, upload-time = "2025-12-09T22:13:50.598Z" }, - { url = "https://files.pythonhosted.org/packages/5c/39/69c0b4051079addd57c84a5bfb34920d87456dd4c90cf7ee0df6efafc8ff/sqlalchemy-2.0.45-cp312-cp312-win32.whl", hash = "sha256:0c9f6ada57b58420a2c0277ff853abe40b9e9449f8d7d231763c6bc30f5c4953", size = 2112182, upload-time = "2025-12-09T21:39:30.824Z" }, - { url = "https://files.pythonhosted.org/packages/f7/4e/510db49dd89fc3a6e994bee51848c94c48c4a00dc905e8d0133c251f41a7/sqlalchemy-2.0.45-cp312-cp312-win_amd64.whl", hash = "sha256:8defe5737c6d2179c7997242d6473587c3beb52e557f5ef0187277009f73e5e1", size = 2139200, upload-time = "2025-12-09T21:39:32.321Z" }, - { url = "https://files.pythonhosted.org/packages/bf/e1/3ccb13c643399d22289c6a9786c1a91e3dcbb68bce4beb44926ac2c557bf/sqlalchemy-2.0.45-py3-none-any.whl", hash = "sha256:5225a288e4c8cc2308dbdd874edad6e7d0fd38eac1e9e5f23503425c8eee20d0", size = 1936672, upload-time = "2025-12-09T21:54:52.608Z" }, + { url = "https://files.pythonhosted.org/packages/69/ac/b42ad16800d0885105b59380ad69aad0cce5a65276e269ce2729a2343b6a/sqlalchemy-2.0.46-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:261c4b1f101b4a411154f1da2b76497d73abbfc42740029205d4d01fa1052684", size = 2154851, upload-time = "2026-01-21T18:27:30.54Z" }, + { url = "https://files.pythonhosted.org/packages/a0/60/d8710068cb79f64d002ebed62a7263c00c8fd95f4ebd4b5be8f7ca93f2bc/sqlalchemy-2.0.46-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:181903fe8c1b9082995325f1b2e84ac078b1189e2819380c2303a5f90e114a62", size = 3311241, upload-time = "2026-01-21T18:32:33.45Z" }, + { url = "https://files.pythonhosted.org/packages/2b/0f/20c71487c7219ab3aa7421c7c62d93824c97c1460f2e8bb72404b0192d13/sqlalchemy-2.0.46-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:590be24e20e2424a4c3c1b0835e9405fa3d0af5823a1a9fc02e5dff56471515f", size = 3310741, upload-time = "2026-01-21T18:44:57.887Z" }, + { url = "https://files.pythonhosted.org/packages/65/80/d26d00b3b249ae000eee4db206fcfc564bf6ca5030e4747adf451f4b5108/sqlalchemy-2.0.46-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7568fe771f974abadce52669ef3a03150ff03186d8eb82613bc8adc435a03f01", size = 3263116, upload-time = "2026-01-21T18:32:35.044Z" }, + { url = "https://files.pythonhosted.org/packages/da/ee/74dda7506640923821340541e8e45bd3edd8df78664f1f2e0aae8077192b/sqlalchemy-2.0.46-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf7e1e78af38047e08836d33502c7a278915698b7c2145d045f780201679999", size = 3285327, upload-time = "2026-01-21T18:44:59.254Z" }, + { url = "https://files.pythonhosted.org/packages/9f/25/6dcf8abafff1389a21c7185364de145107b7394ecdcb05233815b236330d/sqlalchemy-2.0.46-cp311-cp311-win32.whl", hash = "sha256:9d80ea2ac519c364a7286e8d765d6cd08648f5b21ca855a8017d9871f075542d", size = 2114564, upload-time = "2026-01-21T18:33:15.85Z" }, + { url = "https://files.pythonhosted.org/packages/93/5f/e081490f8523adc0088f777e4ebad3cac21e498ec8a3d4067074e21447a1/sqlalchemy-2.0.46-cp311-cp311-win_amd64.whl", hash = "sha256:585af6afe518732d9ccd3aea33af2edaae4a7aa881af5d8f6f4fe3a368699597", size = 2139233, upload-time = "2026-01-21T18:33:17.528Z" }, + { url = "https://files.pythonhosted.org/packages/b6/35/d16bfa235c8b7caba3730bba43e20b1e376d2224f407c178fbf59559f23e/sqlalchemy-2.0.46-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3a9a72b0da8387f15d5810f1facca8f879de9b85af8c645138cba61ea147968c", size = 2153405, upload-time = "2026-01-21T19:05:54.143Z" }, + { url = "https://files.pythonhosted.org/packages/06/6c/3192e24486749862f495ddc6584ed730c0c994a67550ec395d872a2ad650/sqlalchemy-2.0.46-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2347c3f0efc4de367ba00218e0ae5c4ba2306e47216ef80d6e31761ac97cb0b9", size = 3334702, upload-time = "2026-01-21T18:46:45.384Z" }, + { url = "https://files.pythonhosted.org/packages/ea/a2/b9f33c8d68a3747d972a0bb758c6b63691f8fb8a49014bc3379ba15d4274/sqlalchemy-2.0.46-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9094c8b3197db12aa6f05c51c05daaad0a92b8c9af5388569847b03b1007fb1b", size = 3347664, upload-time = "2026-01-21T18:40:09.979Z" }, + { url = "https://files.pythonhosted.org/packages/aa/d2/3e59e2a91eaec9db7e8dc6b37b91489b5caeb054f670f32c95bcba98940f/sqlalchemy-2.0.46-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37fee2164cf21417478b6a906adc1a91d69ae9aba8f9533e67ce882f4bb1de53", size = 3277372, upload-time = "2026-01-21T18:46:47.168Z" }, + { url = "https://files.pythonhosted.org/packages/dd/dd/67bc2e368b524e2192c3927b423798deda72c003e73a1e94c21e74b20a85/sqlalchemy-2.0.46-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b1e14b2f6965a685c7128bd315e27387205429c2e339eeec55cb75ca4ab0ea2e", size = 3312425, upload-time = "2026-01-21T18:40:11.548Z" }, + { url = "https://files.pythonhosted.org/packages/43/82/0ecd68e172bfe62247e96cb47867c2d68752566811a4e8c9d8f6e7c38a65/sqlalchemy-2.0.46-cp312-cp312-win32.whl", hash = "sha256:412f26bb4ba942d52016edc8d12fb15d91d3cd46b0047ba46e424213ad407bcb", size = 2113155, upload-time = "2026-01-21T18:42:49.748Z" }, + { url = "https://files.pythonhosted.org/packages/bc/2a/2821a45742073fc0331dc132552b30de68ba9563230853437cac54b2b53e/sqlalchemy-2.0.46-cp312-cp312-win_amd64.whl", hash = "sha256:ea3cd46b6713a10216323cda3333514944e510aa691c945334713fca6b5279ff", size = 2140078, upload-time = "2026-01-21T18:42:51.197Z" }, + { url = "https://files.pythonhosted.org/packages/fc/a1/9c4efa03300926601c19c18582531b45aededfb961ab3c3585f1e24f120b/sqlalchemy-2.0.46-py3-none-any.whl", hash = "sha256:f9c11766e7e7c0a2767dda5acb006a118640c9fc0a4104214b96269bfb78399e", size = 1937882, upload-time = "2026-01-21T18:22:10.456Z" }, ] [[package]] @@ -3759,15 +3755,15 @@ wheels = [ [[package]] name = "sse-starlette" -version = "3.1.2" +version = "3.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "starlette" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/da/34/f5df66cb383efdbf4f2db23cabb27f51b1dcb737efaf8a558f6f1d195134/sse_starlette-3.1.2.tar.gz", hash = "sha256:55eff034207a83a0eb86de9a68099bd0157838f0b8b999a1b742005c71e33618", size = 26303, upload-time = "2025-12-31T08:02:20.023Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8b/8d/00d280c03ffd39aaee0e86ec81e2d3b9253036a0f93f51d10503adef0e65/sse_starlette-3.2.0.tar.gz", hash = "sha256:8127594edfb51abe44eac9c49e59b0b01f1039d0c7461c6fd91d4e03b70da422", size = 27253, upload-time = "2026-01-17T13:11:05.62Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/95/8c4b76eec9ae574474e5d2997557cebf764bcd3586458956c30631ae08f4/sse_starlette-3.1.2-py3-none-any.whl", hash = "sha256:cd800dd349f4521b317b9391d3796fa97b71748a4da9b9e00aafab32dda375c8", size = 12484, upload-time = "2025-12-31T08:02:18.894Z" }, + { url = "https://files.pythonhosted.org/packages/96/7f/832f015020844a8b8f7a9cbc103dd76ba8e3875004c41e08440ea3a2b41a/sse_starlette-3.2.0-py3-none-any.whl", hash = "sha256:5876954bd51920fc2cd51baee47a080eb88a37b5b784e615abb0b283f801cdbf", size = 12763, upload-time = "2026-01-17T13:11:03.775Z" }, ] [[package]] @@ -4059,31 +4055,31 @@ wheels = [ [[package]] name = "uuid-utils" -version = "0.13.0" +version = "0.14.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fe/8a/17b11768dcb473d3a255c02ffdd94fbd1b345c906efea0a39124dcbaed52/uuid_utils-0.13.0.tar.gz", hash = "sha256:4c17df6427a9e23a4cd7fb9ee1efb53b8abb078660b9bdb2524ca8595022dfe1", size = 21921, upload-time = "2026-01-08T15:48:10.841Z" } +sdist = { url = "https://files.pythonhosted.org/packages/57/7c/3a926e847516e67bc6838634f2e54e24381105b4e80f9338dc35cca0086b/uuid_utils-0.14.0.tar.gz", hash = "sha256:fc5bac21e9933ea6c590433c11aa54aaca599f690c08069e364eb13a12f670b4", size = 22072, upload-time = "2026-01-20T20:37:15.729Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/85/b8/d40848ca22781f206c60a1885fc737d2640392bd6b5792d455525accd89c/uuid_utils-0.13.0-cp39-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:83628283e977fb212e756bc055df8fdd2f9f589a2e539ba1abe755b8ce8df7a4", size = 602130, upload-time = "2026-01-08T15:47:34.877Z" }, - { url = "https://files.pythonhosted.org/packages/40/b9/00a944b8096632ea12638181f8e294abcde3e3b8b5e29b777f809896f6ae/uuid_utils-0.13.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c47638ed6334ab19d80f73664f153b04bbb04ab8ce4298d10da6a292d4d21c47", size = 304213, upload-time = "2026-01-08T15:47:36.807Z" }, - { url = "https://files.pythonhosted.org/packages/da/d7/07b36c33aef683b81c9afff3ec178d5eb39d325447a68c3c68a62e4abb32/uuid_utils-0.13.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:b276b538c57733ed406948584912da422a604313c71479654848b84b9e19c9b0", size = 340624, upload-time = "2026-01-08T15:47:38.821Z" }, - { url = "https://files.pythonhosted.org/packages/7d/55/fcff2fff02a27866cb1a6614c9df2b3ace721f0a0aab2b7b8f5a7d4e4221/uuid_utils-0.13.0-cp39-abi3-manylinux_2_24_armv7l.whl", hash = "sha256:bdaf2b77e34b199cf04cde28399495fd1ed951de214a4ece1f3919b2f945bb06", size = 346705, upload-time = "2026-01-08T15:47:40.397Z" }, - { url = "https://files.pythonhosted.org/packages/41/48/67438506c2bb8bee1b4b00d7c0b3ff866401b4790849bf591d654d4ea0bc/uuid_utils-0.13.0-cp39-abi3-manylinux_2_24_i686.whl", hash = "sha256:eb2f0baf81e82f9769a7684022dca8f3bf801ca1574a3e94df1876e9d6f9271e", size = 366023, upload-time = "2026-01-08T15:47:42.662Z" }, - { url = "https://files.pythonhosted.org/packages/8b/d7/2d91ce17f62fd764d593430de296b70843cc25229c772453f7261de9e6a8/uuid_utils-0.13.0-cp39-abi3-manylinux_2_24_ppc64le.whl", hash = "sha256:6be6c4d11275f5cc402a4fdba6c2b1ce45fd3d99bb78716cd1cc2cbf6802b2ce", size = 471149, upload-time = "2026-01-08T15:47:44.963Z" }, - { url = "https://files.pythonhosted.org/packages/2e/9a/aa0756186073ba84daf5704c150d41ede10eb3185d510e02532e2071550e/uuid_utils-0.13.0-cp39-abi3-manylinux_2_24_x86_64.whl", hash = "sha256:77621cf6ceca7f42173a642a01c01c216f9eaec3b7b65d093d2d6a433ca0a83d", size = 342130, upload-time = "2026-01-08T15:47:46.331Z" }, - { url = "https://files.pythonhosted.org/packages/74/b4/3191789f4dc3bed59d79cec90559821756297a25d7dc34d1bf7781577a75/uuid_utils-0.13.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9a5a9eb06c2bb86dd876cd7b2fe927fc8543d14c90d971581db6ffda4a02526f", size = 524128, upload-time = "2026-01-08T15:47:47.628Z" }, - { url = "https://files.pythonhosted.org/packages/b2/30/29839210a8fff9fc219bfa7c8d8cd115324e92618cba0cda090d54d3d321/uuid_utils-0.13.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:775347c6110fb71360df17aac74132d8d47c1dbe71233ac98197fc872a791fd2", size = 615872, upload-time = "2026-01-08T15:47:50.61Z" }, - { url = "https://files.pythonhosted.org/packages/99/ed/15000c96a8bd8f5fd8efd622109bf52549ea0b366f8ce71c45580fa55878/uuid_utils-0.13.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:cf95f6370ad1a0910ee7b5ad5228fd19c4ae32fe3627389006adaf519408c41e", size = 581023, upload-time = "2026-01-08T15:47:52.776Z" }, - { url = "https://files.pythonhosted.org/packages/67/c8/3f809fa2dc2ca4bd331c792a3c7d3e45ae2b709d85847a12b8b27d1d5f19/uuid_utils-0.13.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5a88e23e0b2f4203fefe2ccbca5736ee06fcad10e61b5e7e39c8d7904bc13300", size = 546715, upload-time = "2026-01-08T15:47:54.415Z" }, - { url = "https://files.pythonhosted.org/packages/f5/80/4f7c7efd734d1494397c781bd3d421688e9c187ae836e3174625b1ddf8b0/uuid_utils-0.13.0-cp39-abi3-win32.whl", hash = "sha256:3e4f2cc54e6a99c0551158100ead528479ad2596847478cbad624977064ffce3", size = 177650, upload-time = "2026-01-08T15:47:55.679Z" }, - { url = "https://files.pythonhosted.org/packages/6c/94/d05ab68622e66ad787a241dfe5ccc649b3af09f30eae977b9ee8f7046aaa/uuid_utils-0.13.0-cp39-abi3-win_amd64.whl", hash = "sha256:046cb2756e1597b3de22d24851b769913e192135830486a0a70bf41327f0360c", size = 183211, upload-time = "2026-01-08T15:47:57.604Z" }, - { url = "https://files.pythonhosted.org/packages/69/37/674b3ce25cd715b831ea8ebbd828b74c40159f04c95d1bb963b2c876fe79/uuid_utils-0.13.0-cp39-abi3-win_arm64.whl", hash = "sha256:5447a680df6ef8a5a353976aaf4c97cc3a3a22b1ee13671c44227b921e3ae2a9", size = 183518, upload-time = "2026-01-08T15:47:59.148Z" }, - { url = "https://files.pythonhosted.org/packages/99/fa/1d92de9538463859228e68db679b766fd300770c9a2db849dcba0c0c5a57/uuid_utils-0.13.0-pp311-pypy311_pp73-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:e5182e2d95f38e65f2e5bce90648ef56987443da13e145afcd747e584f9bc69c", size = 587641, upload-time = "2026-01-08T15:48:00.433Z" }, - { url = "https://files.pythonhosted.org/packages/ca/07/6bd9e6f5367e38c2ee7178ad882d2bd1b0d17c5393974b09ab027a215eba/uuid_utils-0.13.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e3909a8a1fbd79d7c8bdc874eeb83e23ccb7a7cb0aa821a49596cc96c0cce84b", size = 298273, upload-time = "2026-01-08T15:48:02.063Z" }, - { url = "https://files.pythonhosted.org/packages/dc/14/7061b868a8a6799c8df83768a23f313d4e22075069f01ee3c28fa82aa2c6/uuid_utils-0.13.0-pp311-pypy311_pp73-manylinux_2_24_aarch64.whl", hash = "sha256:5dc4c9f749bd2511b8dcbf0891e658d7d86880022963db050722ad7b502b5e22", size = 333618, upload-time = "2026-01-08T15:48:03.503Z" }, - { url = "https://files.pythonhosted.org/packages/bc/f1/f48c3c9c343c9071ade5f355403e344d817412d9cf379a2d04b181282e74/uuid_utils-0.13.0-pp311-pypy311_pp73-manylinux_2_24_armv7l.whl", hash = "sha256:516adf07f5b2cdb88d50f489c702b5f1a75ae8b2639bfd254f4192d5f7ee261f", size = 339104, upload-time = "2026-01-08T15:48:05.02Z" }, - { url = "https://files.pythonhosted.org/packages/47/22/8e3142b4baffee77ce533fe956446d3699ec42f1d5252911208cbef4501e/uuid_utils-0.13.0-pp311-pypy311_pp73-manylinux_2_24_i686.whl", hash = "sha256:aeee3bd89e8de6184a3ab778ce19f5ce9ad32849d1be549516e0ddb257562d8d", size = 359503, upload-time = "2026-01-08T15:48:06.347Z" }, - { url = "https://files.pythonhosted.org/packages/bd/1a/756f1f9e31b15019c87cd2becb1c596351c50967cd143443da38df8818d1/uuid_utils-0.13.0-pp311-pypy311_pp73-manylinux_2_24_ppc64le.whl", hash = "sha256:97985256c2e59b7caa51f5c8515f64d777328562a9c900ec65e9d627baf72737", size = 467480, upload-time = "2026-01-08T15:48:07.681Z" }, - { url = "https://files.pythonhosted.org/packages/0a/20/a6929e98d9a461ca49e96194a82a1cc3fd5420f3a2f53cbb34fca438549e/uuid_utils-0.13.0-pp311-pypy311_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:b7ccaa20e24c5f60f41a69ef571ed820737f9b0ade4cbeef56aaa8f80f5aa475", size = 333610, upload-time = "2026-01-08T15:48:09.375Z" }, + { url = "https://files.pythonhosted.org/packages/a7/42/42d003f4a99ddc901eef2fd41acb3694163835e037fb6dde79ad68a72342/uuid_utils-0.14.0-cp39-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:f6695c0bed8b18a904321e115afe73b34444bc8451d0ce3244a1ec3b84deb0e5", size = 601786, upload-time = "2026-01-20T20:37:09.843Z" }, + { url = "https://files.pythonhosted.org/packages/96/e6/775dfb91f74b18f7207e3201eb31ee666d286579990dc69dd50db2d92813/uuid_utils-0.14.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:4f0a730bbf2d8bb2c11b93e1005e91769f2f533fa1125ed1f00fd15b6fcc732b", size = 303943, upload-time = "2026-01-20T20:37:18.767Z" }, + { url = "https://files.pythonhosted.org/packages/17/82/ea5f5e85560b08a1f30cdc65f75e76494dc7aba9773f679e7eaa27370229/uuid_utils-0.14.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40ce3fd1a4fdedae618fc3edc8faf91897012469169d600133470f49fd699ed3", size = 340467, upload-time = "2026-01-20T20:37:11.794Z" }, + { url = "https://files.pythonhosted.org/packages/ca/33/54b06415767f4569882e99b6470c6c8eeb97422686a6d432464f9967fd91/uuid_utils-0.14.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:09ae4a98416a440e78f7d9543d11b11cae4bab538b7ed94ec5da5221481748f2", size = 346333, upload-time = "2026-01-20T20:37:12.818Z" }, + { url = "https://files.pythonhosted.org/packages/cb/10/a6bce636b8f95e65dc84bf4a58ce8205b8e0a2a300a38cdbc83a3f763d27/uuid_utils-0.14.0-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:971e8c26b90d8ae727e7f2ac3ee23e265971d448b3672882f2eb44828b2b8c3e", size = 470859, upload-time = "2026-01-20T20:37:01.512Z" }, + { url = "https://files.pythonhosted.org/packages/8a/27/84121c51ea72f013f0e03d0886bcdfa96b31c9b83c98300a7bd5cc4fa191/uuid_utils-0.14.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5cde1fa82804a8f9d2907b7aec2009d440062c63f04abbdb825fce717a5e860", size = 341988, upload-time = "2026-01-20T20:37:22.881Z" }, + { url = "https://files.pythonhosted.org/packages/90/a4/01c1c7af5e6a44f20b40183e8dac37d6ed83e7dc9e8df85370a15959b804/uuid_utils-0.14.0-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c7343862a2359e0bd48a7f3dfb5105877a1728677818bb694d9f40703264a2db", size = 365784, upload-time = "2026-01-20T20:37:10.808Z" }, + { url = "https://files.pythonhosted.org/packages/04/f0/65ee43ec617b8b6b1bf2a5aecd56a069a08cca3d9340c1de86024331bde3/uuid_utils-0.14.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c51e4818fdb08ccec12dc7083a01f49507b4608770a0ab22368001685d59381b", size = 523750, upload-time = "2026-01-20T20:37:06.152Z" }, + { url = "https://files.pythonhosted.org/packages/95/d3/6bf503e3f135a5dfe705a65e6f89f19bccd55ac3fb16cb5d3ec5ba5388b8/uuid_utils-0.14.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:181bbcccb6f93d80a8504b5bd47b311a1c31395139596edbc47b154b0685b533", size = 615818, upload-time = "2026-01-20T20:37:21.816Z" }, + { url = "https://files.pythonhosted.org/packages/df/6c/99937dd78d07f73bba831c8dc9469dfe4696539eba2fc269ae1b92752f9e/uuid_utils-0.14.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:5c8ae96101c3524ba8dbf762b6f05e9e9d896544786c503a727c5bf5cb9af1a7", size = 580831, upload-time = "2026-01-20T20:37:19.691Z" }, + { url = "https://files.pythonhosted.org/packages/44/fa/bbc9e2c25abd09a293b9b097a0d8fc16acd6a92854f0ec080f1ea7ad8bb3/uuid_utils-0.14.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:00ac3c6edfdaff7e1eed041f4800ae09a3361287be780d7610a90fdcde9befdc", size = 546333, upload-time = "2026-01-20T20:37:03.117Z" }, + { url = "https://files.pythonhosted.org/packages/e7/9b/e5e99b324b1b5f0c62882230455786df0bc66f67eff3b452447e703f45d2/uuid_utils-0.14.0-cp39-abi3-win32.whl", hash = "sha256:ec2fd80adf8e0e6589d40699e6f6df94c93edcc16dd999be0438dd007c77b151", size = 177319, upload-time = "2026-01-20T20:37:04.208Z" }, + { url = "https://files.pythonhosted.org/packages/d3/28/2c7d417ea483b6ff7820c948678fdf2ac98899dc7e43bb15852faa95acaf/uuid_utils-0.14.0-cp39-abi3-win_amd64.whl", hash = "sha256:efe881eb43a5504fad922644cb93d725fd8a6a6d949bd5a4b4b7d1a1587c7fd1", size = 182566, upload-time = "2026-01-20T20:37:16.868Z" }, + { url = "https://files.pythonhosted.org/packages/b8/86/49e4bdda28e962fbd7266684171ee29b3d92019116971d58783e51770745/uuid_utils-0.14.0-cp39-abi3-win_arm64.whl", hash = "sha256:32b372b8fd4ebd44d3a219e093fe981af4afdeda2994ee7db208ab065cfcd080", size = 182809, upload-time = "2026-01-20T20:37:05.139Z" }, + { url = "https://files.pythonhosted.org/packages/f1/03/1f1146e32e94d1f260dfabc81e1649102083303fb4ad549775c943425d9a/uuid_utils-0.14.0-pp311-pypy311_pp73-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:762e8d67992ac4d2454e24a141a1c82142b5bde10409818c62adbe9924ebc86d", size = 587430, upload-time = "2026-01-20T20:37:24.998Z" }, + { url = "https://files.pythonhosted.org/packages/87/ba/d5a7469362594d885fd9219fe9e851efbe65101d3ef1ef25ea321d7ce841/uuid_utils-0.14.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:40be5bf0b13aa849d9062abc86c198be6a25ff35316ce0b89fc25f3bac6d525e", size = 298106, upload-time = "2026-01-20T20:37:23.896Z" }, + { url = "https://files.pythonhosted.org/packages/8a/11/3dafb2a5502586f59fd49e93f5802cd5face82921b3a0f3abb5f357cb879/uuid_utils-0.14.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:191a90a6f3940d1b7322b6e6cceff4dd533c943659e0a15f788674407856a515", size = 333423, upload-time = "2026-01-20T20:37:17.828Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f2/c8987663f0cdcf4d717a36d85b5db2a5589df0a4e129aa10f16f4380ef48/uuid_utils-0.14.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4aa4525f4ad82f9d9c842f9a3703f1539c1808affbaec07bb1b842f6b8b96aa5", size = 338659, upload-time = "2026-01-20T20:37:14.286Z" }, + { url = "https://files.pythonhosted.org/packages/d1/c8/929d81665d83f0b2ffaecb8e66c3091a50f62c7cb5b65e678bd75a96684e/uuid_utils-0.14.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cdbd82ff20147461caefc375551595ecf77ebb384e46267f128aca45a0f2cdfc", size = 467029, upload-time = "2026-01-20T20:37:08.277Z" }, + { url = "https://files.pythonhosted.org/packages/8e/a0/27d7daa1bfed7163f4ccaf52d7d2f4ad7bb1002a85b45077938b91ee584f/uuid_utils-0.14.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eff57e8a5d540006ce73cf0841a643d445afe78ba12e75ac53a95ca2924a56be", size = 333298, upload-time = "2026-01-20T20:37:07.271Z" }, + { url = "https://files.pythonhosted.org/packages/63/d4/acad86ce012b42ce18a12f31ee2aa3cbeeb98664f865f05f68c882945913/uuid_utils-0.14.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3fd9112ca96978361201e669729784f26c71fecc9c13a7f8a07162c31bd4d1e2", size = 359217, upload-time = "2026-01-20T20:36:59.687Z" }, ] [[package]] @@ -4124,11 +4120,11 @@ wheels = [ [[package]] name = "wcwidth" -version = "0.2.14" +version = "0.3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/24/30/6b0809f4510673dc723187aeaf24c7f5459922d01e2f794277a3dfb90345/wcwidth-0.2.14.tar.gz", hash = "sha256:4d478375d31bc5395a3c55c40ccdf3354688364cd61c4f6adacaa9215d0b3605", size = 102293, upload-time = "2025-09-22T16:29:53.023Z" } +sdist = { url = "https://files.pythonhosted.org/packages/38/75/2144b65e4fba12a2d9868e9a3f99db7fa0760670d064603634bef9ff1709/wcwidth-0.3.0.tar.gz", hash = "sha256:af1a2fb0b83ef4a7fc0682a4c95ca2576e14d0280bca2a9e67b7dc9f2733e123", size = 172238, upload-time = "2026-01-21T17:44:09.508Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/af/b5/123f13c975e9f27ab9c0770f514345bd406d0e8d3b7a0723af9d43f710af/wcwidth-0.2.14-py2.py3-none-any.whl", hash = "sha256:a7bb560c8aee30f9957e5f9895805edd20602f2d7f720186dfd906e82b4982e1", size = 37286, upload-time = "2025-09-22T16:29:51.641Z" }, + { url = "https://files.pythonhosted.org/packages/18/0e/a5f0257ab47492b7afb5fb60347d14ba19445e2773fc8352d4be6bd2f6f8/wcwidth-0.3.0-py3-none-any.whl", hash = "sha256:073a1acb250e4add96cfd5ef84e0036605cd6e0d0782c8c15c80e42202348458", size = 85520, upload-time = "2026-01-21T17:44:08.002Z" }, ] [[package]]