From 2c6eca9756a43b8420f37d718b3d613b56f4d304 Mon Sep 17 00:00:00 2001 From: Gohar Anwar Date: Wed, 29 Apr 2026 18:47:28 +0500 Subject: [PATCH] fix: poll for search-index visibility after indexing in three flaky tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The post-index queries in three tests asserted len(search_results) > 0 right after wait_for(get_document().success), but document storage and search index visibility are eventually consistent on staging — get_document returning 200 only proves the document is stored, not that it is searchable. When the index lagged, the first /v2/query returned 0 results and the test failed. Replace the immediate query + assertion with a wait_for(...) poll that retries until the query returns results (timeout 30s, interval 2s), mirroring the existing _krakatoa_gone pattern already used on the delete side of the lifecycle test. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../corpus/test_filter_attributes_types.py | 62 +++++++++++-------- .../indexing/test_document_lifecycle.py | 16 ++++- tests/services/query/test_query_filters.py | 29 ++++++--- 3 files changed, 71 insertions(+), 36 deletions(-) diff --git a/tests/services/corpus/test_filter_attributes_types.py b/tests/services/corpus/test_filter_attributes_types.py index 8f7974c..e64b71f 100644 --- a/tests/services/corpus/test_filter_attributes_types.py +++ b/tests/services/corpus/test_filter_attributes_types.py @@ -60,15 +60,31 @@ def test_text_integer_boolean_filters(self, client, unique_id): description="both documents indexed", ) - text_query = client.post( - "/v2/query", - data={ - "query": "research and data", - "search": { - "corpora": [{"corpus_key": corpus_key, "metadata_filter": "part.category = 'tech'"}], - "limit": 10, - }, - }, + def _filter_query_returns_results(metadata_filter): + def _check(): + resp = client.post( + "/v2/query", + data={ + "query": "research and data", + "search": { + "corpora": [{"corpus_key": corpus_key, "metadata_filter": metadata_filter}], + "limit": 10, + }, + }, + ) + if not resp.success: + return None + if not resp.data.get("search_results"): + return None + return resp + + return _check + + text_query = wait_for( + _filter_query_returns_results("part.category = 'tech'"), + timeout=30, + interval=2, + description="text filter query to return results", ) assert text_query.success, f"Text filter query failed: {text_query.status_code}" text_results = text_query.data.get("search_results", []) @@ -77,15 +93,11 @@ def test_text_integer_boolean_filters(self, client, unique_id): "quantum" in r.get("text", "").lower() for r in text_results ), f"Text filter for 'tech' should only return tech doc: {[r.get('text', '')[:50] for r in text_results]}" - int_query = client.post( - "/v2/query", - data={ - "query": "research and data", - "search": { - "corpora": [{"corpus_key": corpus_key, "metadata_filter": "part.priority >= 3"}], - "limit": 10, - }, - }, + int_query = wait_for( + _filter_query_returns_results("part.priority >= 3"), + timeout=30, + interval=2, + description="integer filter query to return results", ) assert int_query.success, f"Integer filter query failed: {int_query.status_code}" int_results = int_query.data.get("search_results", []) @@ -94,15 +106,11 @@ def test_text_integer_boolean_filters(self, client, unique_id): "climate" in r.get("text", "").lower() for r in int_results ), f"Integer filter >= 3 should only return science doc: {[r.get('text', '')[:50] for r in int_results]}" - bool_query = client.post( - "/v2/query", - data={ - "query": "research and data", - "search": { - "corpora": [{"corpus_key": corpus_key, "metadata_filter": "part.is_public = true"}], - "limit": 10, - }, - }, + bool_query = wait_for( + _filter_query_returns_results("part.is_public = true"), + timeout=30, + interval=2, + description="boolean filter query to return results", ) assert bool_query.success, f"Boolean filter query failed: {bool_query.status_code}" bool_results = bool_query.data.get("search_results", []) diff --git a/tests/services/indexing/test_document_lifecycle.py b/tests/services/indexing/test_document_lifecycle.py index d248079..f9c6a89 100644 --- a/tests/services/indexing/test_document_lifecycle.py +++ b/tests/services/indexing/test_document_lifecycle.py @@ -28,7 +28,21 @@ def test_index_query_delete_query_cycle(self, client, test_corpus, unique_id): description="document to be indexed", ) - query_resp = client.query(test_corpus, "Krakatoa volcano eruption", limit=10) + def _krakatoa_in_results(): + qr = client.query(test_corpus, "Krakatoa volcano eruption", limit=10) + if not qr.success: + return None + hits = qr.data.get("search_results", []) + if any("krakatoa" in r.get("text", "").lower() for r in hits): + return qr + return None + + query_resp = wait_for( + _krakatoa_in_results, + timeout=30, + interval=2, + description="Krakatoa to appear in search", + ) assert query_resp.success, f"Query failed: {query_resp.status_code}" results = query_resp.data.get("search_results", []) found = any("krakatoa" in r.get("text", "").lower() for r in results) diff --git a/tests/services/query/test_query_filters.py b/tests/services/query/test_query_filters.py index 5afbb77..91e2e04 100644 --- a/tests/services/query/test_query_filters.py +++ b/tests/services/query/test_query_filters.py @@ -53,15 +53,28 @@ def test_query_with_valid_metadata_filter(self, client, unique_id): description="document to be indexed", ) - query_resp = client.post( - "/v2/query", - data={ - "query": "artificial intelligence", - "search": { - "corpora": [{"corpus_key": corpus_key, "metadata_filter": "part.topic = 'ai'"}], - "limit": 10, + def _query_returns_results(): + resp = client.post( + "/v2/query", + data={ + "query": "artificial intelligence", + "search": { + "corpora": [{"corpus_key": corpus_key, "metadata_filter": "part.topic = 'ai'"}], + "limit": 10, + }, }, - }, + ) + if not resp.success: + return None + if not resp.data.get("search_results"): + return None + return resp + + query_resp = wait_for( + _query_returns_results, + timeout=30, + interval=2, + description="filter query to return results", ) assert query_resp.success, f"Query failed: {query_resp.status_code} - {query_resp.data}" results = query_resp.data.get("search_results", [])