diff --git a/migrations/3.surrealql b/migrations/3.surrealql new file mode 100644 index 0000000..73b79a7 --- /dev/null +++ b/migrations/3.surrealql @@ -0,0 +1,139 @@ +REMOVE FUNCTION fn::vector_search; + +DEFINE FUNCTION IF NOT EXISTS fn::vector_search($query: array, $match_count: int, $sources: bool, $show_notes: bool, $min_similarity: float) { + let $source_embedding_search = + IF $sources {( + SELECT + id, + source.title as title, + content, + source.id as parent_id, + vector::similarity::cosine(embedding, $query) as similarity + FROM source_embedding + WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity + ORDER BY similarity DESC + LIMIT $match_count + )} + ELSE { [] }; + + -- Busca em source_insight com threshold + let $source_insight_search = + IF $sources {( + SELECT + id, + insight_type + ' - ' + source.title as title, + content, + source.id as parent_id, + vector::similarity::cosine(embedding, $query) as similarity + FROM source_insight + WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity + ORDER BY similarity DESC + LIMIT $match_count + )} + ELSE { [] }; + + + let $note_content_search = + IF $show_notes {( + SELECT + id, + title, + content, + id as parent_id, + vector::similarity::cosine(embedding, $query) as similarity + FROM note + WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity + ORDER BY similarity DESC + LIMIT $match_count + )} + ELSE { [] }; + + + let $all_results = array::union( + array::union($source_embedding_search, $source_insight_search), + $note_content_search + ); + + + RETURN ( + SELECT + id, title, content, parent_id, + math::max(similarity) as similarity + FROM $all_results + GROUP BY id + ORDER BY similarity DESC + LIMIT $match_count + ); +}; + + +REMOVE FUNCTION fn::text_search; + + + DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: int, $sources:bool, $show_notes:bool) { + + let $source_title_search = + IF $sources {( + SELECT id, title, + search::highlight('`', '`', 1) as content, + id as parent_id, + math::max(search::score(1)) AS relevance + FROM source + WHERE title @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $source_embedding_search = + IF $sources {( + SELECT id as id, source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance + FROM source_embedding + WHERE content @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $source_full_search = + IF $sources {( + SELECT source.id as id, source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance + FROM source + WHERE full_text @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $source_insight_search = + IF $sources {( + SELECT id, insight_type + " - " + source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance + FROM source_insight + WHERE content @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $note_title_search = + IF $show_notes {( + SELECT id, title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance + FROM note + WHERE title @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $note_content_search = + IF $show_notes {( + SELECT id, title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance + FROM note + WHERE content @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $source_chunk_results = array::union($source_embedding_search, $source_full_search); + + let $source_asset_results = array::union($source_title_search, $source_insight_search); + + let $source_results = array::union($source_chunk_results, $source_asset_results ); + let $note_results = array::union($note_title_search, $note_content_search ); + let $final_results = array::union($source_results, $note_results ); + + RETURN (SELECT id, title, content, parent_id, math::max(relevance) as relevance from $final_results + where id is not None +group by id, title, content, parent_id ORDER BY relevance DESC LIMIT $match_count); + + +}; diff --git a/migrations/3_down.surrealql b/migrations/3_down.surrealql new file mode 100644 index 0000000..aaab4d9 --- /dev/null +++ b/migrations/3_down.surrealql @@ -0,0 +1,105 @@ +REMOVE FUNCTION fn::vector_search; + + +DEFINE FUNCTION IF NOT EXISTS fn::vector_search($query: array, $match_count: int, $sources:bool, $show_notes:bool) { + + let $source_embedding_search = + IF $sources {( + SELECT source as item_id, content, vector::similarity::cosine(embedding, $query) as similarity + FROM source_embedding LIMIT $match_count)} + ELSE { [] }; + + + let $source_insight_search = + IF $sources {( + SELECT source as item_id, content, vector::similarity::cosine(embedding, $query) as similarity + FROM source_insight LIMIT $match_count)} + ELSE { [] }; + + + let $note_content_search = + IF $show_notes {( + SELECT id as item_id, content, vector::similarity::cosine(embedding, $query) as similarity + FROM note LIMIT $match_count)} + + ELSE { [] }; + + let $source_chunk_results = array::union($source_embedding_search, $source_insight_search); + + let $source_results = array::union($source_chunk_results, $source_insight_search); + + let $note_results = $note_content_search; + let $final_results = array::union($source_results, $note_results ); + + RETURN (SELECT item_id, math::max(similarity) as similarity from $final_results + group by item_id ORDER BY similarity DESC LIMIT $match_count); + + +}; + +REMOVE FUNCTION fn::text_search; + + +DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: int, $sources:bool, $show_notes:bool) { + + let $source_title_search = + IF $sources {( + SELECT id as item_id, math::max(search::score(1)) AS relevance + FROM source + WHERE title @1@ $query_text + GROUP BY item_id)} + ELSE { [] }; + + let $source_embedding_search = + IF $sources {( + SELECT source as item_id, math::max(search::score(1)) AS relevance + FROM source_embedding + WHERE content @1@ $query_text + GROUP BY item_id)} + ELSE { [] }; + + let $source_full_search = + IF $sources {( + SELECT source as item_id, math::max(search::score(1)) AS relevance + FROM source + WHERE full_text @1@ $query_text + GROUP BY item_id)} + ELSE { [] }; + + let $source_insight_search = + IF $sources {( + SELECT source as item_id, math::max(search::score(1)) AS relevance + FROM source_insight + WHERE content @1@ $query_text + GROUP BY item_id)} + ELSE { [] }; + + let $note_title_search = + IF $show_notes {( + SELECT id as item_id, math::max(search::score(1)) AS relevance + FROM note + WHERE title @1@ $query_text + GROUP BY item_id)} + ELSE { [] }; + + let $note_content_search = + IF $show_notes {( + SELECT id as item_id, math::max(search::score(1)) AS relevance + FROM note + WHERE content @1@ $query_text + GROUP BY item_id)} + ELSE { [] }; + + let $source_chunk_results = array::union($source_embedding_search, $source_full_search); + + let $source_asset_results = array::union($source_title_search, $source_insight_search); + + let $source_results = array::union($source_chunk_results, $source_asset_results ); + let $note_results = array::union($note_title_search, $note_content_search ); + let $final_results = array::union($source_results, $note_results ); + + RETURN (SELECT item_id, math::max(relevance) as relevance from $final_results + group by item_id ORDER BY relevance DESC LIMIT $match_count); + + +}; diff --git a/open_notebook/database/migrate.py b/open_notebook/database/migrate.py index f890091..085caf4 100644 --- a/open_notebook/database/migrate.py +++ b/open_notebook/database/migrate.py @@ -21,12 +21,14 @@ class MigrationManager: self.up_migrations = [ Migration.from_file("migrations/1.surrealql"), Migration.from_file("migrations/2.surrealql"), + Migration.from_file("migrations/3.surrealql"), ] self.down_migrations = [ Migration.from_file( "migrations/1_down.surrealql", ), Migration.from_file("migrations/2_down.surrealql"), + Migration.from_file("migrations/3_down.surrealql"), ] self.runner = MigrationRunner( up_migrations=self.up_migrations,