improve search functions

This commit is contained in:
LUIS NOVO 2024-11-04 09:53:09 -03:00
parent 8398539df8
commit 56e745d668
3 changed files with 246 additions and 0 deletions

139
migrations/3.surrealql Normal file
View file

@ -0,0 +1,139 @@
REMOVE FUNCTION fn::vector_search;
DEFINE FUNCTION IF NOT EXISTS fn::vector_search($query: array<float>, $match_count: int, $sources: bool, $show_notes: bool, $min_similarity: float) {
let $source_embedding_search =
IF $sources {(
SELECT
id,
source.title as title,
content,
source.id as parent_id,
vector::similarity::cosine(embedding, $query) as similarity
FROM source_embedding
WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity
ORDER BY similarity DESC
LIMIT $match_count
)}
ELSE { [] };
-- Busca em source_insight com threshold
let $source_insight_search =
IF $sources {(
SELECT
id,
insight_type + ' - ' + source.title as title,
content,
source.id as parent_id,
vector::similarity::cosine(embedding, $query) as similarity
FROM source_insight
WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity
ORDER BY similarity DESC
LIMIT $match_count
)}
ELSE { [] };
let $note_content_search =
IF $show_notes {(
SELECT
id,
title,
content,
id as parent_id,
vector::similarity::cosine(embedding, $query) as similarity
FROM note
WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity
ORDER BY similarity DESC
LIMIT $match_count
)}
ELSE { [] };
let $all_results = array::union(
array::union($source_embedding_search, $source_insight_search),
$note_content_search
);
RETURN (
SELECT
id, title, content, parent_id,
math::max(similarity) as similarity
FROM $all_results
GROUP BY id
ORDER BY similarity DESC
LIMIT $match_count
);
};
REMOVE FUNCTION fn::text_search;
DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: int, $sources:bool, $show_notes:bool) {
let $source_title_search =
IF $sources {(
SELECT id, title,
search::highlight('`', '`', 1) as content,
id as parent_id,
math::max(search::score(1)) AS relevance
FROM source
WHERE title @1@ $query_text
GROUP BY id)}
ELSE { [] };
let $source_embedding_search =
IF $sources {(
SELECT id as id, source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance
FROM source_embedding
WHERE content @1@ $query_text
GROUP BY id)}
ELSE { [] };
let $source_full_search =
IF $sources {(
SELECT source.id as id, source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance
FROM source
WHERE full_text @1@ $query_text
GROUP BY id)}
ELSE { [] };
let $source_insight_search =
IF $sources {(
SELECT id, insight_type + " - " + source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance
FROM source_insight
WHERE content @1@ $query_text
GROUP BY id)}
ELSE { [] };
let $note_title_search =
IF $show_notes {(
SELECT id, title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance
FROM note
WHERE title @1@ $query_text
GROUP BY id)}
ELSE { [] };
let $note_content_search =
IF $show_notes {(
SELECT id, title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance
FROM note
WHERE content @1@ $query_text
GROUP BY id)}
ELSE { [] };
let $source_chunk_results = array::union($source_embedding_search, $source_full_search);
let $source_asset_results = array::union($source_title_search, $source_insight_search);
let $source_results = array::union($source_chunk_results, $source_asset_results );
let $note_results = array::union($note_title_search, $note_content_search );
let $final_results = array::union($source_results, $note_results );
RETURN (SELECT id, title, content, parent_id, math::max(relevance) as relevance from $final_results
where id is not None
group by id, title, content, parent_id ORDER BY relevance DESC LIMIT $match_count);
};

105
migrations/3_down.surrealql Normal file
View file

@ -0,0 +1,105 @@
REMOVE FUNCTION fn::vector_search;
DEFINE FUNCTION IF NOT EXISTS fn::vector_search($query: array<float>, $match_count: int, $sources:bool, $show_notes:bool) {
let $source_embedding_search =
IF $sources {(
SELECT source as item_id, content, vector::similarity::cosine(embedding, $query) as similarity
FROM source_embedding LIMIT $match_count)}
ELSE { [] };
let $source_insight_search =
IF $sources {(
SELECT source as item_id, content, vector::similarity::cosine(embedding, $query) as similarity
FROM source_insight LIMIT $match_count)}
ELSE { [] };
let $note_content_search =
IF $show_notes {(
SELECT id as item_id, content, vector::similarity::cosine(embedding, $query) as similarity
FROM note LIMIT $match_count)}
ELSE { [] };
let $source_chunk_results = array::union($source_embedding_search, $source_insight_search);
let $source_results = array::union($source_chunk_results, $source_insight_search);
let $note_results = $note_content_search;
let $final_results = array::union($source_results, $note_results );
RETURN (SELECT item_id, math::max(similarity) as similarity from $final_results
group by item_id ORDER BY similarity DESC LIMIT $match_count);
};
REMOVE FUNCTION fn::text_search;
DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: int, $sources:bool, $show_notes:bool) {
let $source_title_search =
IF $sources {(
SELECT id as item_id, math::max(search::score(1)) AS relevance
FROM source
WHERE title @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $source_embedding_search =
IF $sources {(
SELECT source as item_id, math::max(search::score(1)) AS relevance
FROM source_embedding
WHERE content @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $source_full_search =
IF $sources {(
SELECT source as item_id, math::max(search::score(1)) AS relevance
FROM source
WHERE full_text @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $source_insight_search =
IF $sources {(
SELECT source as item_id, math::max(search::score(1)) AS relevance
FROM source_insight
WHERE content @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $note_title_search =
IF $show_notes {(
SELECT id as item_id, math::max(search::score(1)) AS relevance
FROM note
WHERE title @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $note_content_search =
IF $show_notes {(
SELECT id as item_id, math::max(search::score(1)) AS relevance
FROM note
WHERE content @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $source_chunk_results = array::union($source_embedding_search, $source_full_search);
let $source_asset_results = array::union($source_title_search, $source_insight_search);
let $source_results = array::union($source_chunk_results, $source_asset_results );
let $note_results = array::union($note_title_search, $note_content_search );
let $final_results = array::union($source_results, $note_results );
RETURN (SELECT item_id, math::max(relevance) as relevance from $final_results
group by item_id ORDER BY relevance DESC LIMIT $match_count);
};

View file

@ -21,12 +21,14 @@ class MigrationManager:
self.up_migrations = [
Migration.from_file("migrations/1.surrealql"),
Migration.from_file("migrations/2.surrealql"),
Migration.from_file("migrations/3.surrealql"),
]
self.down_migrations = [
Migration.from_file(
"migrations/1_down.surrealql",
),
Migration.from_file("migrations/2_down.surrealql"),
Migration.from_file("migrations/3_down.surrealql"),
]
self.runner = MigrationRunner(
up_migrations=self.up_migrations,