improve search functions
This commit is contained in:
parent
8398539df8
commit
56e745d668
3 changed files with 246 additions and 0 deletions
139
migrations/3.surrealql
Normal file
139
migrations/3.surrealql
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
REMOVE FUNCTION fn::vector_search;
|
||||
|
||||
DEFINE FUNCTION IF NOT EXISTS fn::vector_search($query: array<float>, $match_count: int, $sources: bool, $show_notes: bool, $min_similarity: float) {
|
||||
let $source_embedding_search =
|
||||
IF $sources {(
|
||||
SELECT
|
||||
id,
|
||||
source.title as title,
|
||||
content,
|
||||
source.id as parent_id,
|
||||
vector::similarity::cosine(embedding, $query) as similarity
|
||||
FROM source_embedding
|
||||
WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity
|
||||
ORDER BY similarity DESC
|
||||
LIMIT $match_count
|
||||
)}
|
||||
ELSE { [] };
|
||||
|
||||
-- Busca em source_insight com threshold
|
||||
let $source_insight_search =
|
||||
IF $sources {(
|
||||
SELECT
|
||||
id,
|
||||
insight_type + ' - ' + source.title as title,
|
||||
content,
|
||||
source.id as parent_id,
|
||||
vector::similarity::cosine(embedding, $query) as similarity
|
||||
FROM source_insight
|
||||
WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity
|
||||
ORDER BY similarity DESC
|
||||
LIMIT $match_count
|
||||
)}
|
||||
ELSE { [] };
|
||||
|
||||
|
||||
let $note_content_search =
|
||||
IF $show_notes {(
|
||||
SELECT
|
||||
id,
|
||||
title,
|
||||
content,
|
||||
id as parent_id,
|
||||
vector::similarity::cosine(embedding, $query) as similarity
|
||||
FROM note
|
||||
WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity
|
||||
ORDER BY similarity DESC
|
||||
LIMIT $match_count
|
||||
)}
|
||||
ELSE { [] };
|
||||
|
||||
|
||||
let $all_results = array::union(
|
||||
array::union($source_embedding_search, $source_insight_search),
|
||||
$note_content_search
|
||||
);
|
||||
|
||||
|
||||
RETURN (
|
||||
SELECT
|
||||
id, title, content, parent_id,
|
||||
math::max(similarity) as similarity
|
||||
FROM $all_results
|
||||
GROUP BY id
|
||||
ORDER BY similarity DESC
|
||||
LIMIT $match_count
|
||||
);
|
||||
};
|
||||
|
||||
|
||||
REMOVE FUNCTION fn::text_search;
|
||||
|
||||
|
||||
DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: int, $sources:bool, $show_notes:bool) {
|
||||
|
||||
let $source_title_search =
|
||||
IF $sources {(
|
||||
SELECT id, title,
|
||||
search::highlight('`', '`', 1) as content,
|
||||
id as parent_id,
|
||||
math::max(search::score(1)) AS relevance
|
||||
FROM source
|
||||
WHERE title @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_embedding_search =
|
||||
IF $sources {(
|
||||
SELECT id as id, source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance
|
||||
FROM source_embedding
|
||||
WHERE content @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_full_search =
|
||||
IF $sources {(
|
||||
SELECT source.id as id, source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance
|
||||
FROM source
|
||||
WHERE full_text @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_insight_search =
|
||||
IF $sources {(
|
||||
SELECT id, insight_type + " - " + source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance
|
||||
FROM source_insight
|
||||
WHERE content @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $note_title_search =
|
||||
IF $show_notes {(
|
||||
SELECT id, title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance
|
||||
FROM note
|
||||
WHERE title @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $note_content_search =
|
||||
IF $show_notes {(
|
||||
SELECT id, title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance
|
||||
FROM note
|
||||
WHERE content @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_chunk_results = array::union($source_embedding_search, $source_full_search);
|
||||
|
||||
let $source_asset_results = array::union($source_title_search, $source_insight_search);
|
||||
|
||||
let $source_results = array::union($source_chunk_results, $source_asset_results );
|
||||
let $note_results = array::union($note_title_search, $note_content_search );
|
||||
let $final_results = array::union($source_results, $note_results );
|
||||
|
||||
RETURN (SELECT id, title, content, parent_id, math::max(relevance) as relevance from $final_results
|
||||
where id is not None
|
||||
group by id, title, content, parent_id ORDER BY relevance DESC LIMIT $match_count);
|
||||
|
||||
|
||||
};
|
||||
105
migrations/3_down.surrealql
Normal file
105
migrations/3_down.surrealql
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
REMOVE FUNCTION fn::vector_search;
|
||||
|
||||
|
||||
DEFINE FUNCTION IF NOT EXISTS fn::vector_search($query: array<float>, $match_count: int, $sources:bool, $show_notes:bool) {
|
||||
|
||||
let $source_embedding_search =
|
||||
IF $sources {(
|
||||
SELECT source as item_id, content, vector::similarity::cosine(embedding, $query) as similarity
|
||||
FROM source_embedding LIMIT $match_count)}
|
||||
ELSE { [] };
|
||||
|
||||
|
||||
let $source_insight_search =
|
||||
IF $sources {(
|
||||
SELECT source as item_id, content, vector::similarity::cosine(embedding, $query) as similarity
|
||||
FROM source_insight LIMIT $match_count)}
|
||||
ELSE { [] };
|
||||
|
||||
|
||||
let $note_content_search =
|
||||
IF $show_notes {(
|
||||
SELECT id as item_id, content, vector::similarity::cosine(embedding, $query) as similarity
|
||||
FROM note LIMIT $match_count)}
|
||||
|
||||
ELSE { [] };
|
||||
|
||||
let $source_chunk_results = array::union($source_embedding_search, $source_insight_search);
|
||||
|
||||
let $source_results = array::union($source_chunk_results, $source_insight_search);
|
||||
|
||||
let $note_results = $note_content_search;
|
||||
let $final_results = array::union($source_results, $note_results );
|
||||
|
||||
RETURN (SELECT item_id, math::max(similarity) as similarity from $final_results
|
||||
group by item_id ORDER BY similarity DESC LIMIT $match_count);
|
||||
|
||||
|
||||
};
|
||||
|
||||
REMOVE FUNCTION fn::text_search;
|
||||
|
||||
|
||||
DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: int, $sources:bool, $show_notes:bool) {
|
||||
|
||||
let $source_title_search =
|
||||
IF $sources {(
|
||||
SELECT id as item_id, math::max(search::score(1)) AS relevance
|
||||
FROM source
|
||||
WHERE title @1@ $query_text
|
||||
GROUP BY item_id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_embedding_search =
|
||||
IF $sources {(
|
||||
SELECT source as item_id, math::max(search::score(1)) AS relevance
|
||||
FROM source_embedding
|
||||
WHERE content @1@ $query_text
|
||||
GROUP BY item_id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_full_search =
|
||||
IF $sources {(
|
||||
SELECT source as item_id, math::max(search::score(1)) AS relevance
|
||||
FROM source
|
||||
WHERE full_text @1@ $query_text
|
||||
GROUP BY item_id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_insight_search =
|
||||
IF $sources {(
|
||||
SELECT source as item_id, math::max(search::score(1)) AS relevance
|
||||
FROM source_insight
|
||||
WHERE content @1@ $query_text
|
||||
GROUP BY item_id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $note_title_search =
|
||||
IF $show_notes {(
|
||||
SELECT id as item_id, math::max(search::score(1)) AS relevance
|
||||
FROM note
|
||||
WHERE title @1@ $query_text
|
||||
GROUP BY item_id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $note_content_search =
|
||||
IF $show_notes {(
|
||||
SELECT id as item_id, math::max(search::score(1)) AS relevance
|
||||
FROM note
|
||||
WHERE content @1@ $query_text
|
||||
GROUP BY item_id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_chunk_results = array::union($source_embedding_search, $source_full_search);
|
||||
|
||||
let $source_asset_results = array::union($source_title_search, $source_insight_search);
|
||||
|
||||
let $source_results = array::union($source_chunk_results, $source_asset_results );
|
||||
let $note_results = array::union($note_title_search, $note_content_search );
|
||||
let $final_results = array::union($source_results, $note_results );
|
||||
|
||||
RETURN (SELECT item_id, math::max(relevance) as relevance from $final_results
|
||||
group by item_id ORDER BY relevance DESC LIMIT $match_count);
|
||||
|
||||
|
||||
};
|
||||
|
|
@ -21,12 +21,14 @@ class MigrationManager:
|
|||
self.up_migrations = [
|
||||
Migration.from_file("migrations/1.surrealql"),
|
||||
Migration.from_file("migrations/2.surrealql"),
|
||||
Migration.from_file("migrations/3.surrealql"),
|
||||
]
|
||||
self.down_migrations = [
|
||||
Migration.from_file(
|
||||
"migrations/1_down.surrealql",
|
||||
),
|
||||
Migration.from_file("migrations/2_down.surrealql"),
|
||||
Migration.from_file("migrations/3_down.surrealql"),
|
||||
]
|
||||
self.runner = MigrationRunner(
|
||||
up_migrations=self.up_migrations,
|
||||
|
|
|
|||
Loading…
Reference in a new issue