From 994dbe09e79813aba04206aadd4bce7974faa62f Mon Sep 17 00:00:00 2001 From: Diwank Singh Tomer Date: Tue, 15 Oct 2024 14:15:34 -0400 Subject: [PATCH] fix(agents-api): Fix search embeddings query to avoid snippets with embedding == null (#662) Signed-off-by: Diwank Singh Tomer ---- > [!IMPORTANT] > Adds conditions in `search_docs_by_embedding.py` to exclude null embeddings, ensuring only valid embeddings are processed. > > - **Behavior**: > - In `search_docs_by_embedding` in `search_docs_by_embedding.py`, added conditions to exclude snippets with `embedding == null`. > - Ensures only valid embeddings are processed in the search query. > - **Misc**: > - Added `candidate` clause in `search_docs_by_text.py` for consistency. > > This description was created by [Ellipsis](https://www.ellipsis.dev?ref=julep-ai%2Fjulep&utm_source=github&utm_medium=referral) for 7decd8ab31be319e71747546dd85572ec8b3772f. It will automatically update as commits are pushed. --------- Signed-off-by: Diwank Singh Tomer --- .../agents_api/models/docs/search_docs_by_embedding.py | 8 ++++++++ agents-api/agents_api/models/docs/search_docs_by_text.py | 1 + 2 files changed, 9 insertions(+) diff --git a/agents-api/agents_api/models/docs/search_docs_by_embedding.py b/agents-api/agents_api/models/docs/search_docs_by_embedding.py index 83418aa21..90db8ac26 100644 --- a/agents-api/agents_api/models/docs/search_docs_by_embedding.py +++ b/agents-api/agents_api/models/docs/search_docs_by_embedding.py @@ -99,6 +99,7 @@ def search_docs_by_embedding( index1, min(dist) ] := + candidate[doc_id], *snippets {{ doc_id, index: index1, @@ -109,19 +110,24 @@ def search_docs_by_embedding( index: index2, embedding: embedding2 }}, + is_null(embedding1) == false, + is_null(embedding2) == false, index1 < index2, dist = cos_dist(embedding1, embedding2) doclength[doc_id, max(index)] := + candidate[doc_id], *snippets {{ doc_id, index, }} get_intersnippet[doc_id, index, distance] := + candidate[doc_id], intersnippet_distance[doc_id, _, distance] get_intersnippet[doc_id, index, distance] := + candidate[doc_id], not intersnippet_distance[doc_id, _, distance], distance = 0.0 @@ -151,6 +157,7 @@ def search_docs_by_embedding( distance, mmr_score, ] := + candidate[doc_id], search_result[doc_id, content, index, distance], get_intersnippet[doc_id, index, intersnippet_distance], mmr_score = {mmr_lambda} * (distance - (1.0 - {mmr_lambda}) * intersnippet_distance), @@ -165,6 +172,7 @@ def search_docs_by_embedding( mmr_score, title, ] := + candidate[doc_id], *docs {{ owner_type, owner_id, diff --git a/agents-api/agents_api/models/docs/search_docs_by_text.py b/agents-api/agents_api/models/docs/search_docs_by_text.py index bb700a494..bc7063e87 100644 --- a/agents-api/agents_api/models/docs/search_docs_by_text.py +++ b/agents-api/agents_api/models/docs/search_docs_by_text.py @@ -154,6 +154,7 @@ def search_docs_by_text( distance, title, ] := + candidate[id], input[owner_type, owner_id], m[ id,