From 4edae23b8e12b8ceb19c75652b215621128fb4fa Mon Sep 17 00:00:00 2001 From: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> Date: Wed, 30 Nov 2022 09:42:09 +1100 Subject: [PATCH] Make quoted queries behave as described in the API documentation (return exact matches only) (#1012) * Fix quoted audio search example escaping * Make quoted queries behave as described in API documentation * Undo change breaking title match boosting * Fix and future proof tests against additional test data --- .../api/controllers/search_controller.py | 17 ++++++++++++----- api/catalog/api/examples/audio_requests.py | 2 +- api/catalog/api/examples/image_requests.py | 2 +- api/test/audio_integration_test.py | 6 ++++++ api/test/image_integration_test.py | 6 ++++++ api/test/media_integration.py | 19 +++++++++++++++++++ 6 files changed, 45 insertions(+), 7 deletions(-) diff --git a/api/catalog/api/controllers/search_controller.py b/api/catalog/api/controllers/search_controller.py index 99fb71aaf..e02170ffa 100644 --- a/api/catalog/api/controllers/search_controller.py +++ b/api/catalog/api/controllers/search_controller.py @@ -340,18 +340,25 @@ def search( search_fields = ["tags.name", "title", "description"] if "q" in search_params.data: query = _quote_escape(search_params.data["q"]) + base_query_kwargs = { + "query": query, + "fields": search_fields, + "default_operator": "AND", + } + + if '"' in query: + base_query_kwargs["quote_field_suffix"] = ".exact" + s = s.query( "simple_query_string", - query=query, - fields=search_fields, - default_operator="AND", + **base_query_kwargs, ) - # Boost exact matches + # Boost exact matches on the title quotes_stripped = query.replace('"', "") exact_match_boost = Q( "simple_query_string", fields=["title"], - query=f'"{quotes_stripped}"', + query=f"{quotes_stripped}", boost=10000, ) s = search_client.query(Q("bool", must=s.query, should=exact_match_boost)) diff --git a/api/catalog/api/examples/audio_requests.py b/api/catalog/api/examples/audio_requests.py index 5fab43653..b817ede4f 100644 --- a/api/catalog/api/examples/audio_requests.py +++ b/api/catalog/api/examples/audio_requests.py @@ -10,7 +10,7 @@ syntax_examples = { "using single query parameter": "test", "using multiple query parameters": "test&license=pdm,by&categories=illustration&page_size=1&page=1", # noqa: E501 - "that is an exact match of Giacomo Puccini": '"Giacomo Puccini"', + "that is an exact match of Giacomo Puccini": r"%22Giacomo%20Puccini%22", "related to both dog and cat": "dog+cat", "related to dog or cat, but not necessarily both": "dog|cat", "related to dog but won't include results related to 'pug'": "dog -pug", diff --git a/api/catalog/api/examples/image_requests.py b/api/catalog/api/examples/image_requests.py index 4f846a036..3524e4745 100644 --- a/api/catalog/api/examples/image_requests.py +++ b/api/catalog/api/examples/image_requests.py @@ -10,7 +10,7 @@ syntax_examples = { "using single query parameter": "test", "using multiple query parameters": "test&license=pdm,by&categories=illustration&page_size=1&page=1", # noqa: E501 - "that are an exact match of Claude Monet": '"Claude Monet"', + "that are an exact match of Claude Monet": "%22Claude%20Monet%22", "related to both dog and cat": "dog+cat", "related to dog or cat, but not necessarily both": "dog|cat", "related to dog but won't include results related to 'pug'": "dog -pug", diff --git a/api/test/audio_integration_test.py b/api/test/audio_integration_test.py index 4e0bf6e28..69ad8b8e6 100644 --- a/api/test/audio_integration_test.py +++ b/api/test/audio_integration_test.py @@ -15,6 +15,7 @@ search_by_category, search_consistency, search_quotes, + search_quotes_exact, search_source_and_excluded, search_special_chars, stats, @@ -101,6 +102,11 @@ def test_search_quotes(): search_quotes("audio", "love") +def test_search_quotes_exact(): + # ``water running`` returns different results when quoted vs unquoted + search_quotes_exact("audio", "water running") + + def test_search_with_special_characters(): search_special_chars("audio", "love") diff --git a/api/test/image_integration_test.py b/api/test/image_integration_test.py index 8c8dc63e5..acb5d0d44 100644 --- a/api/test/image_integration_test.py +++ b/api/test/image_integration_test.py @@ -15,6 +15,7 @@ search_all_excluded, search_consistency, search_quotes, + search_quotes_exact, search_source_and_excluded, search_special_chars, stats, @@ -53,6 +54,11 @@ def test_search_quotes(): search_quotes("images", "dog") +def test_search_quotes_exact(): + # ``bird perched`` returns different results when quoted vs unquoted + search_quotes_exact("images", "bird perched") + + def test_search_with_special_characters(): search_special_chars("images", "dog") diff --git a/api/test/media_integration.py b/api/test/media_integration.py index 99d30e474..f90acb059 100644 --- a/api/test/media_integration.py +++ b/api/test/media_integration.py @@ -45,6 +45,25 @@ def search_quotes(media_path, q="test"): assert response.status_code == 200 +def search_quotes_exact(media_path, q): + """Only returns exact matches for the given query""" + unquoted_response = requests.get(f"{API_URL}/v1/{media_path}?q={q}", verify=False) + assert unquoted_response.status_code == 200 + unquoted_result_count = unquoted_response.json()["result_count"] + assert unquoted_result_count > 0 + + quoted_response = requests.get(f'{API_URL}/v1/{media_path}?q="{q}"', verify=False) + assert quoted_response.status_code == 200 + quoted_result_count = quoted_response.json()["result_count"] + assert quoted_result_count > 0 + + # The rationale here is that the unquoted results will match more records due + # to the query being overall less strict. Quoting the query will make it more + # strict causing it to return fewer results. + # Above we check that the results are not 0 to confirm that we do still get results back. + assert quoted_result_count < unquoted_result_count + + def search_special_chars(media_path, q="test"): """Returns a response when query includes special characters.""" response = requests.get(f"{API_URL}/v1/{media_path}?q={q}!", verify=False)