open-notebook/pyproject.toml
Luis Novo d8006ff5cb
feat: content-type aware chunking and unified embedding (#444)
* feat: content-type aware chunking and unified embedding

- Add chunking.py with HTML, Markdown, and plain text detection
- Add embedding.py with mean pooling for large content
- Create dedicated commands: embed_note, embed_insight, embed_source
- Use fire-and-forget pattern for embedding via submit_command()
- Refactor rebuild_embeddings_command to delegate to individual commands
- Remove legacy commands and needs_embedding() methods
- Reduce chunk size to 1500 chars for Ollama compatibility
- Update CLAUDE.md documentation for new architecture

Fixes #350, #142

* fix: address code review issues

- Note.save() now returns command_id for tracking embedding jobs
- Add length check after generate_embeddings() to fail fast on mismatch
- Add numpy as explicit dependency (was transitive)
- Remove hardcoded chunk sizes from docstrings

* docs: address code review comments

- Rename "SYNC PATH" to "DOMAIN MODEL PATH" in embedding router
- Add test_chunking.py and test_embedding.py to Testing Strategy
- Clarify auto-embedding behavior for each domain model

* fix: clean thinking tags from prompt graph output

Adds clean_thinking_content() to prompt.py to handle extended thinking
models that return <think>...</think> tags. This fixes empty titles
when saving notes from chat.

* chore: remove local docker-compose from git

* fix(frontend): handle null parent_id in search results

Add defensive check for null parent_id in search results to prevent
"Cannot read properties of null (reading 'split')" error. This can
happen with orphaned records in the database.

* fix: cascade delete embeddings and insights when source is deleted

When deleting a Source, now also deletes associated:
- source_embedding records
- source_insight records

This prevents orphaned records that cause null parent_id errors
in vector search results.

* fix: add cleanup for orphan embedding/insight records in migration 10

Deletes source_embedding and source_insight records where the
linked source no longer exists (source.id = NONE).

* chore: bump esperanto to 2.16

Increases ctx_num for Ollama models to accommodate larger notebook
context windows. See: https://github.com/lfnovo/esperanto/pull/69
2026-01-21 23:49:08 -03:00

98 lines
2.4 KiB
TOML

[project]
name = "open-notebook"
version = "1.6.0"
description = "An open source implementation of a research assistant, inspired by Google Notebook LM"
authors = [
{name = "Luis Novo", email = "lfnovo@gmail.com"}
]
readme = "README.md"
classifiers = [
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.11",
]
requires-python = ">=3.11,<3.13"
dependencies = [
"fastapi>=0.104.0",
"uvicorn>=0.24.0",
"pydantic>=2.9.2",
"loguru>=0.7.2",
"langchain>=1.2.0",
"langgraph>=1.0.5",
"tiktoken>=0.12.0",
"langgraph-checkpoint-sqlite>=3.0.1",
"langchain-community>=0.4.1",
"langchain-openai>=1.1.6",
"langchain-anthropic>=1.3.0",
"langchain-ollama>=1.0.1",
"langchain-google-genai>=4.1.2",
"langchain-groq>=1.1.1",
"langchain_mistralai>=1.1.1",
"langchain_deepseek>=1.0.0",
"langchain-google-vertexai>=3.2.0",
"tomli>=2.0.2",
"python-dotenv>=1.0.1",
"httpx[socks]>=0.27.0",
"content-core>=1.0.2",
"ai-prompter>=0.3",
"esperanto>=2.16",
"surrealdb>=1.0.4",
"podcast-creator>=0.7.0",
"surreal-commands>=1.3.0",
"numpy>=2.4.1",
]
[tool.setuptools]
package-dir = {"open_notebook" = "open_notebook"}
[project.optional-dependencies]
dev = [
"ipykernel>=6.29.5",
"ruff>=0.5.5",
"mypy>=1.11.1",
"types-requests>=2.32.0.20241016",
"ipywidgets>=8.1.5",
"pre-commit>=4.0.1",
"pytest>=8.0.0",
]
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[dependency-groups]
dev = [
"pre-commit>=4.1.0",
"pytest-asyncio>=1.2.0",
"types-requests>=2.32.4.20250913",
]
[tool.isort]
profile = "black"
line_length = 88
[tool.ruff]
line-length = 88
[tool.ruff.lint]
select = ["E", "F", "I"]
ignore = [
"E501", # line too long
"E402", # module level import not at top of file (Streamlit requires this pattern)
"E722", # do not use bare except (legacy code pattern)
"F401", # imported but unused (may be used in type hints or re-exports)
"F541", # f-string without placeholders
"F841", # local variable assigned but never used
]
[tool.ruff.lint.per-file-ignores]
# Streamlit files need nest_asyncio.apply() before imports
"app_home.py" = ["E402"]
"pages/**/*.py" = ["E402"]
[tool.mypy]
# Exclude Streamlit UI pages from type checking
[[tool.mypy.overrides]]
module = "pages.*"
ignore_errors = true