refactor database module and migrations

This commit is contained in:
LUIS NOVO 2024-10-30 16:33:07 -03:00
parent 2aa453f73f
commit 2de8520d0c
10 changed files with 146 additions and 223 deletions

View file

@ -1,29 +1,15 @@
import streamlit as st
from open_notebook.exceptions import InvalidDatabaseSchema, NoSchemaFound
from open_notebook.repository import check_database_version, execute_migration
from open_notebook.database.migrate import MigrationManager
from stream_app.utils import version_sidebar
try:
version_sidebar()
check_database_version()
version_sidebar()
mm = MigrationManager()
if mm.needs_migration:
st.warning("The Open Notebook database needs a migration to run properly.")
if st.button("Run Migration"):
mm.run_migration_up()
st.success("Migration successful")
st.rerun()
else:
st.switch_page("pages/2_📒_Notebooks.py")
except NoSchemaFound as e:
st.warning(e)
if st.button("Create Schema.."):
try:
execute_migration("db_setup.surrealql")
st.success("Schema created successfully")
st.rerun()
except Exception as e:
st.error(e)
except InvalidDatabaseSchema as e:
st.warning(e)
if st.button("Execute Migration.."):
try:
execute_migration("0_0_1_to_0_0_2.surrealql")
st.success("Migration executed successfully")
st.rerun()
except Exception as e:
st.error(e)
st.stop()

View file

@ -1,82 +0,0 @@
DEFINE FIELD full_text ON TABLE source TYPE option<string>;
REMOVE TABLE IF EXISTS source_chunk;
REMOVE INDEX IF EXISTS idx_source_full ON TABLE source_chunk;
DEFINE FIELD IF NOT EXISTS archived ON TABLE notebook TYPE option<bool> DEFAULT False;
DEFINE INDEX idx_source_full ON TABLE source_chunk COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
REMOVE FUNCTION IF EXISTS fn::text_search;
DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: int, $sources:bool, $show_notes:bool) {
let $source_title_search =
IF $sources {(
SELECT id as item_id, math::max(search::score(1)) AS relevance
FROM source
WHERE title @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $source_embedding_search =
IF $sources {(
SELECT source as item_id, math::max(search::score(1)) AS relevance
FROM source_embedding
WHERE content @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $source_full_search =
IF $sources {(
SELECT source as item_id, math::max(search::score(1)) AS relevance
FROM source
WHERE full_text @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $source_insight_search =
IF $sources {(
SELECT source as item_id, math::max(search::score(1)) AS relevance
FROM source_insight
WHERE content @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $note_title_search =
IF $show_notes {(
SELECT id as item_id, math::max(search::score(1)) AS relevance
FROM note
WHERE title @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $note_content_search =
IF $show_notes {(
SELECT id as item_id, math::max(search::score(1)) AS relevance
FROM note
WHERE content @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $source_chunk_results = array::union($source_embedding_search, $source_full_search);
let $source_asset_results = array::union($source_title_search, $source_insight_search);
let $source_results = array::union($source_chunk_results, $source_asset_results );
let $note_results = array::union($note_title_search, $note_content_search );
let $final_results = array::union($source_results, $note_results );
RETURN (SELECT item_id, math::max(relevance) as relevance from $final_results
group by item_id ORDER BY relevance DESC LIMIT $match_count);
};
DEFINE EVENT IF NOT EXISTS source_delete ON TABLE source WHEN ($after == NONE) THEN {
delete source_embedding where source == $before.id;
delete source_insight where source == $before.id;
};
DEFINE TABLE IF NOT EXISTS podcast_config SCHEMALESS;
UPDATE open_notebook:database_info SET
version= "0.0.2";

View file

@ -1,92 +1,78 @@
REMOVE table IF EXISTS source;
REMOVE table IF EXISTS reference;
REMOVE table IF EXISTS notebook;
REMOVE table IF EXISTS note;
REMOVE table IF EXISTS artifact;
REMOVE table IF EXISTS source_chunk;
REMOVE table IF EXISTS source_insight;
REMOVE ANALYZER IF EXISTS my_analyzer;
REMOVE FUNCTION IF EXISTS fn::text_search;
REMOVE INDEX IF EXISTS idx_source_full ON TABLE source_chunk;
REMOVE INDEX IF EXISTS idx_source_embed_chunk ON TABLE source_embedding;
REMOVE INDEX IF EXISTS idx_source_insight ON TABLE source_insight;
REMOVE INDEX IF EXISTS idx_note ON TABLE note;
REMOVE INDEX IF EXISTS idx_source_title ON TABLE source;
REMOVE INDEX IF EXISTS idx_note_title ON TABLE note;
DEFINE TABLE IF NOT EXISTS source SCHEMAFULL;
DEFINE FIELD asset
DEFINE FIELD IF NOT EXISTS
asset
ON TABLE source
FLEXIBLE TYPE option<object>;
DEFINE FIELD title ON TABLE source TYPE option<string>;
DEFINE FIELD full_text ON TABLE source TYPE option<string>;
DEFINE FIELD topics ON TABLE source TYPE option<array<string>>;
DEFINE FIELD IF NOT EXISTS title ON TABLE source TYPE option<string>;
DEFINE FIELD IF NOT EXISTS topics ON TABLE source TYPE option<array<string>>;
DEFINE FIELD IF NOT EXISTS full_text ON TABLE source TYPE option<string>;
DEFINE FIELD created ON source DEFAULT time::now() VALUE $before OR time::now();
DEFINE FIELD updated ON source DEFAULT time::now() VALUE time::now();
DEFINE FIELD IF NOT EXISTS created ON source DEFAULT time::now() VALUE $before OR time::now();
DEFINE FIELD IF NOT EXISTS updated ON source DEFAULT time::now() VALUE time::now();
DEFINE TABLE IF NOT EXISTS source_embedding SCHEMAFULL;
DEFINE FIELD source ON TABLE source_embedding TYPE record<source>;
DEFINE FIELD order ON TABLE source_embedding TYPE int;
DEFINE FIELD content ON TABLE source_embedding TYPE string;
DEFINE FIELD embedding ON TABLE source_embedding TYPE array<float>;
DEFINE FIELD IF NOT EXISTS source ON TABLE source_embedding TYPE record<source>;
DEFINE FIELD IF NOT EXISTS order ON TABLE source_embedding TYPE int;
DEFINE FIELD IF NOT EXISTS content ON TABLE source_embedding TYPE string;
DEFINE FIELD IF NOT EXISTS embedding ON TABLE source_embedding TYPE array<float>;
DEFINE TABLE IF NOT EXISTS source_insight SCHEMAFULL;
DEFINE FIELD source ON TABLE source_insight TYPE record<source>;
DEFINE FIELD insight_type ON TABLE source_insight TYPE string;
DEFINE FIELD content ON TABLE source_insight TYPE string;
DEFINE FIELD embedding ON TABLE source_insight TYPE array<float>;
DEFINE FIELD IF NOT EXISTS source ON TABLE source_insight TYPE record<source>;
DEFINE FIELD IF NOT EXISTS insight_type ON TABLE source_insight TYPE string;
DEFINE FIELD IF NOT EXISTS content ON TABLE source_insight TYPE string;
DEFINE FIELD IF NOT EXISTS embedding ON TABLE source_insight TYPE array<float>;
DEFINE EVENT source_delete ON TABLE source WHEN ($after == NONE) THEN {
DEFINE EVENT IF NOT EXISTS source_delete ON TABLE source WHEN ($after == NONE) THEN {
delete source_embedding where source == $before.id;
delete source_insight where source == $before.id;
};
DEFINE TABLE IF NOT EXISTS note SCHEMAFULL;
DEFINE FIELD title ON TABLE note TYPE option<string>;
DEFINE FIELD summary ON TABLE note TYPE option<string>;
DEFINE FIELD content ON TABLE note TYPE option<string>;
DEFINE FIELD embedding ON TABLE note TYPE array<float>;
DEFINE FIELD IF NOT EXISTS title ON TABLE note TYPE option<string>;
DEFINE FIELD IF NOT EXISTS summary ON TABLE note TYPE option<string>;
DEFINE FIELD IF NOT EXISTS content ON TABLE note TYPE option<string>;
DEFINE FIELD IF NOT EXISTS embedding ON TABLE note TYPE array<float>;
DEFINE FIELD created ON note DEFAULT time::now() VALUE $before OR time::now();
DEFINE FIELD updated ON note DEFAULT time::now() VALUE time::now();
DEFINE FIELD IF NOT EXISTS created ON note DEFAULT time::now() VALUE $before OR time::now();
DEFINE FIELD IF NOT EXISTS updated ON note DEFAULT time::now() VALUE time::now();
DEFINE TABLE IF NOT EXISTS notebook SCHEMAFULL;
DEFINE FIELD name ON TABLE notebook TYPE option<string>;
DEFINE FIELD description ON TABLE notebook TYPE option<string>;
DEFINE FIELD archived ON TABLE notebook TYPE option<bool> DEFAULT False;
DEFINE FIELD IF NOT EXISTS name ON TABLE notebook TYPE option<string>;
DEFINE FIELD IF NOT EXISTS description ON TABLE notebook TYPE option<string>;
DEFINE FIELD IF NOT EXISTS archived ON TABLE notebook TYPE option<bool> DEFAULT False;
DEFINE FIELD created ON notebook DEFAULT time::now() VALUE $before OR time::now();
DEFINE FIELD updated ON notebook DEFAULT time::now() VALUE time::now();
DEFINE FIELD IF NOT EXISTS created ON notebook DEFAULT time::now() VALUE $before OR time::now();
DEFINE FIELD IF NOT EXISTS updated ON notebook DEFAULT time::now() VALUE time::now();
DEFINE TABLE reference
DEFINE TABLE IF NOT EXISTS reference
TYPE RELATION
FROM source TO notebook;
DEFINE TABLE artifact
DEFINE TABLE IF NOT EXISTS artifact
TYPE RELATION
FROM note TO notebook;
-- entender o analyzer
DEFINE ANALYZER my_analyzer TOKENIZERS blank,class,camel,punct FILTERS snowball(english), lowercase;
DEFINE TABLE IF NOT EXISTS podcast_config SCHEMALESS;
DEFINE INDEX idx_source_title ON TABLE source COLUMNS title SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
DEFINE INDEX idx_source_full_text ON TABLE source COLUMNS full_text SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
DEFINE INDEX idx_source_embed_chunk ON TABLE source_embedding COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
DEFINE INDEX idx_source_insight ON TABLE source_insight COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
DEFINE INDEX idx_note ON TABLE note COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
DEFINE INDEX idx_note_title ON TABLE note COLUMNS title SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
-- entender o analyzer
DEFINE ANALYZER IF NOT EXISTS my_analyzer TOKENIZERS blank,class,camel,punct FILTERS snowball(english), lowercase;
DEFINE INDEX IF NOT EXISTS idx_source_title ON TABLE source COLUMNS title SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
DEFINE INDEX IF NOT EXISTS idx_source_full_text ON TABLE source COLUMNS full_text SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
DEFINE INDEX IF NOT EXISTS idx_source_embed_chunk ON TABLE source_embedding COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
DEFINE INDEX IF NOT EXISTS idx_source_insight ON TABLE source_insight COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
DEFINE INDEX IF NOT EXISTS idx_note ON TABLE note COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
DEFINE INDEX IF NOT EXISTS idx_note_title ON TABLE note COLUMNS title SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: int, $sources:bool, $show_notes:bool) {
let $source_title_search =
IF $sources {(
SELECT id as item_id, math::max(search::score(1)) AS relevance
@ -150,8 +136,6 @@ DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count:
};
REMOVE FUNCTION fn::vector_search;
DEFINE FUNCTION IF NOT EXISTS fn::vector_search($query: array<float>, $match_count: int, $sources:bool, $show_notes:bool) {
let $source_embedding_search =
@ -188,10 +172,7 @@ DEFINE FUNCTION IF NOT EXISTS fn::vector_search($query: array<float>, $match_cou
};
CREATE open_notebook:database_info SET
version= "0.0.2";
UPDATE open_notebook:database_info SET
version= "0.0.2";
IF array::len(select * from open_notebook:default_models) == 0 THEN
CREATE open_notebook:default_models SET
default_chat_model= ""
END;

View file

@ -0,0 +1,24 @@
REMOVE TABLE IF EXISTS source;
REMOVE TABLE IF EXISTS source_embedding;
REMOVE TABLE IF EXISTS source_insight;
REMOVE TABLE IF EXISTS note;
REMOVE TABLE IF EXISTS notebook;
REMOVE TABLE IF EXISTS reference;
REMOVE TABLE IF EXISTS artifact;
REMOVE TABLE IF EXISTS podcast_config;
REMOVE EVENT IF EXISTS source_delete ON TABLE source;
REMOVE ANALYZER IF EXISTS my_analyzer;
REMOVE INDEX IF EXISTS idx_source_title ON TABLE source;
REMOVE INDEX IF EXISTS idx_source_full_text ON TABLE source;
REMOVE INDEX IF EXISTS idx_source_embed_chunk ON TABLE source_embedding;
REMOVE INDEX IF EXISTS idx_source_insight ON TABLE source_insight;
REMOVE INDEX IF EXISTS idx_note ON TABLE note;
REMOVE INDEX IF EXISTS idx_note_title ON TABLE note;
REMOVE FUNCTION IF EXISTS fn::text_search;
REMOVE FUNCTION IF EXISTS fn::vector_search;
DELETE open_notebook:default_models;

View file

@ -0,0 +1,56 @@
import os
from loguru import logger
from sblpy.connection import SurrealSyncConnection
from sblpy.migrations.db_processes import get_latest_version
from sblpy.migrations.migrations import Migration
from sblpy.migrations.runner import MigrationRunner
class MigrationManager:
def __init__(self):
self.connection = SurrealSyncConnection(
host=os.environ["SURREAL_ADDRESS"],
port=int(os.environ["SURREAL_PORT"]),
user=os.environ["SURREAL_USER"],
password=os.environ["SURREAL_PASS"],
namespace=os.environ["SURREAL_NAMESPACE"],
database=os.environ["SURREAL_DATABASE"],
encrypted=False, # Set to True if using SSL
)
self.up_migrations = [Migration.from_file("migrations/1.surrealql")]
self.down_migrations = [Migration.from_file("migrations/1_down.surrealql")]
self.runner = MigrationRunner(
up_migrations=self.up_migrations,
down_migrations=self.down_migrations,
connection=self.connection,
)
def get_current_version(self) -> int:
return get_latest_version(
self.connection.host,
self.connection.port,
self.connection.user,
self.connection.password,
self.connection.namespace,
self.connection.database,
)
@property
def needs_migration(self) -> bool:
current_version = self.get_current_version()
return current_version < len(self.up_migrations)
def run_migration_up(self):
current_version = self.get_current_version()
logger.debug(f"Current version before migration: {current_version}")
if self.needs_migration:
try:
self.runner.run()
new_version = self.get_current_version()
logger.debug(f"Migration successful. New version: {new_version}")
except Exception as e:
logger.error(f"Migration failed: {str(e)}")
else:
logger.debug("Database is already at the latest version")

View file

@ -5,10 +5,6 @@ from typing import Any, Dict, Optional
from loguru import logger
from sblpy.connection import SurrealSyncConnection
from open_notebook.exceptions import InvalidDatabaseSchema, NoSchemaFound
EXPECTED_VERSION = "0.0.2"
@contextmanager
def db_connection():
@ -39,25 +35,6 @@ def repo_query(query_str: str, vars: Optional[Dict[str, Any]] = None):
raise
def check_database_version():
try:
result = repo_query("SELECT * FROM open_notebook:database_info;")
if not result:
raise NoSchemaFound("Database schema not found")
version = result[0]["version"]
logger.info(f"Connected to SurrealDB, using schema version {version}")
if version != EXPECTED_VERSION:
raise InvalidDatabaseSchema(
f"Version mismatch. Expected {EXPECTED_VERSION}, got {version}"
)
except Exception as e:
logger.error(e)
raise e
def repo_create(table: str, data: Dict[str, Any]):
query = f"CREATE {table} CONTENT {data};"
# vars = {"table": table, "data": data}
@ -89,10 +66,3 @@ def repo_relate(source: str, relationship: str, target: str):
result = repo_query(query)
logger.debug(f"RELATE query result: {result}")
return result
def execute_migration(script: str):
with open(f"database/{script}", "r") as file:
content = file.read()
return repo_query(content)

View file

@ -4,18 +4,18 @@ from typing import Any, ClassVar, Dict, List, Optional, Type, TypeVar
from loguru import logger
from pydantic import BaseModel, ValidationError, field_validator
from open_notebook.exceptions import (
DatabaseOperationError,
InvalidInputError,
NotFoundError,
)
from open_notebook.repository import (
from open_notebook.database.repository import (
repo_create,
repo_delete,
repo_query,
repo_relate,
repo_update,
)
from open_notebook.exceptions import (
DatabaseOperationError,
InvalidInputError,
NotFoundError,
)
T = TypeVar("T", bound="ObjectModel")

View file

@ -3,11 +3,11 @@ from typing import ClassVar, Optional
from loguru import logger
from pydantic import BaseModel
from open_notebook.domain.base import ObjectModel
from open_notebook.repository import (
from open_notebook.database.repository import (
repo_query,
repo_update,
)
from open_notebook.domain.base import ObjectModel
class Model(ObjectModel):

View file

@ -6,6 +6,10 @@ from loguru import logger
from pydantic import BaseModel, Field, field_validator
from open_notebook.config import EMBEDDING_MODEL
from open_notebook.database.repository import (
repo_create,
repo_query,
)
from open_notebook.domain.base import ObjectModel
from open_notebook.exceptions import (
DatabaseOperationError,
@ -13,10 +17,6 @@ from open_notebook.exceptions import (
)
from open_notebook.graphs.multipattern import graph as pattern_graph
from open_notebook.graphs.recursive_toc import graph as toc_graph
from open_notebook.repository import (
repo_create,
repo_query,
)
from open_notebook.utils import split_text, surreal_clean

View file

@ -16,12 +16,6 @@ class UnsupportedTypeException(OpenNotebookError):
pass
class NoSchemaFound(OpenNotebookError):
"""Raised when a database schema is not found."""
pass
class InvalidInputError(OpenNotebookError):
"""Raised when invalid input is provided."""
@ -70,12 +64,6 @@ class NetworkError(OpenNotebookError):
pass
class InvalidDatabaseSchema(OpenNotebookError):
"""Raised when the database is not under the expected schema."""
pass
class NoTranscriptFound(OpenNotebookError):
"""Raised when no transcript is found for a video."""