From 4229b9473629b0c14d796dc573078c8f238c548f Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 4 Apr 2024 21:51:06 -0400 Subject: [PATCH] Track failed QA runs and include in list endpoint (#1650) Fixes #1648 - Tracks failed QA runs in database, not only successful ones - Includes failed QA runs in list endpoint by default - Adds `skipFailed` param to list endpoint to return only successful runs --------- Co-authored-by: Ilya Kreymer --- backend/btrixcloud/crawls.py | 41 ++++++++++++--- backend/test/test_qa.py | 96 +++++++++++++++++++++++++++++++++++- 2 files changed, 127 insertions(+), 10 deletions(-) diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py index 661eb5608b..92426d84f8 100644 --- a/backend/btrixcloud/crawls.py +++ b/backend/btrixcloud/crawls.py @@ -40,6 +40,7 @@ PaginatedResponse, RUNNING_AND_STARTING_STATES, SUCCESSFUL_STATES, + NON_RUNNING_STATES, ALL_CRAWL_STATES, TYPE_ALL_CRAWL_STATES, ) @@ -726,7 +727,9 @@ async def start_crawl_qa_run(self, crawl_id: str, org: Organization, user: User) # pylint: disable=raise-missing-from raise HTTPException(status_code=500, detail=f"Error starting crawl: {exc}") - async def stop_crawl_qa_run(self, crawl_id: str, org: Organization): + async def stop_crawl_qa_run( + self, crawl_id: str, org: Organization, graceful: bool = True + ): """Stop crawl QA run, QA run removed when actually finished""" crawl = await self.get_crawl(crawl_id, org) @@ -734,7 +737,9 @@ async def stop_crawl_qa_run(self, crawl_id: str, org: Organization): raise HTTPException(status_code=400, detail="qa_not_running") try: - result = await self.crawl_manager.shutdown_crawl(crawl.qa.id, graceful=True) + result = await self.crawl_manager.shutdown_crawl( + crawl.qa.id, graceful=graceful + ) if result.get("error") == "Not Found": # treat as success, qa crawl no longer exists, so mark as no qa @@ -775,7 +780,7 @@ async def qa_run_finished(self, crawl_id: str): query: Dict[str, Any] = {"qa": None} - if crawl.qa.finished and crawl.qa.state in SUCCESSFUL_STATES: + if crawl.qa.finished and crawl.qa.state in NON_RUNNING_STATES: query[f"qaFinished.{crawl.qa.id}"] = crawl.qa.dict() if await self.crawls.find_one_and_update( @@ -786,17 +791,28 @@ async def qa_run_finished(self, crawl_id: str): return False async def get_qa_runs( - self, crawl_id: str, org: Optional[Organization] = None + self, + crawl_id: str, + skip_failed: bool = False, + org: Optional[Organization] = None, ) -> List[QARunOut]: """Return list of QA runs""" crawl_data = await self.get_crawl_raw( crawl_id, org, "crawl", project={"qaFinished": True, "qa": True} ) qa_finished = crawl_data.get("qaFinished") or {} - all_qa = [QARunOut(**qa_run_data) for qa_run_data in qa_finished.values()] + if skip_failed: + all_qa = [ + QARunOut(**qa_run_data) + for qa_run_data in qa_finished.values() + if qa_run_data.get("state") in SUCCESSFUL_STATES + ] + else: + all_qa = [QARunOut(**qa_run_data) for qa_run_data in qa_finished.values()] all_qa.sort(key=lambda x: x.finished or dt_now(), reverse=True) qa = crawl_data.get("qa") - if qa: + # ensure current QA run didn't just fail, just in case + if qa and (not skip_failed or qa.get("state") in SUCCESSFUL_STATES): all_qa.insert(0, QARunOut(**qa)) return all_qa @@ -1061,6 +1077,13 @@ async def stop_crawl_qa_run( # pylint: disable=unused-argument return await ops.stop_crawl_qa_run(crawl_id, org) + @app.post("/orgs/{oid}/crawls/{crawl_id}/qa/cancel", tags=["qa"]) + async def cancel_crawl_qa_run( + crawl_id: str, org: Organization = Depends(org_crawl_dep) + ): + # pylint: disable=unused-argument + return await ops.stop_crawl_qa_run(crawl_id, org, graceful=False) + @app.post("/orgs/{oid}/crawls/{crawl_id}/qa/delete", tags=["qa"]) async def delete_crawl_qa_runs( crawl_id: str, @@ -1075,8 +1098,10 @@ async def delete_crawl_qa_runs( tags=["qa"], response_model=List[QARunOut], ) - async def get_qa_runs(crawl_id, org: Organization = Depends(org_viewer_dep)): - return await ops.get_qa_runs(crawl_id, org) + async def get_qa_runs( + crawl_id, org: Organization = Depends(org_viewer_dep), skipFailed: bool = False + ): + return await ops.get_qa_runs(crawl_id, skip_failed=skipFailed, org=org) @app.get( "/orgs/{oid}/crawls/{crawl_id}/qa/activeQA", diff --git a/backend/test/test_qa.py b/backend/test/test_qa.py index b8f15bb99c..f02109f00b 100644 --- a/backend/test/test_qa.py +++ b/backend/test/test_qa.py @@ -3,7 +3,9 @@ import time from datetime import datetime + qa_run_id = None +failed_qa_run_id = None def test_run_qa(crawler_crawl_id, crawler_auth_headers, default_org_id): @@ -182,15 +184,105 @@ def test_run_qa_not_running(crawler_crawl_id, crawler_auth_headers, default_org_ assert r.json()["detail"] == "qa_not_running" +def test_fail_running_qa(crawler_crawl_id, crawler_auth_headers, default_org_id): + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/start", + headers=crawler_auth_headers, + ) + + assert r.status_code == 200 + + data = r.json() + assert data["started"] + global failed_qa_run_id + failed_qa_run_id = data["started"] + + # Wait until it's properly running + count = 0 + while count < 24: + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/activeQA", + headers=crawler_auth_headers, + ) + + data = r.json() + if data.get("qa") and data["qa"].get("state") == "running": + break + + time.sleep(5) + count += 1 + + # Cancel crawl + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/cancel", + headers=crawler_auth_headers, + ) + assert r.status_code == 200 + + # Wait until it stops with canceled state + count = 0 + while count < 24: + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/activeQA", + headers=crawler_auth_headers, + ) + + data = r.json() + if data.get("qa") and data["qa"].get("state") == "canceled": + break + + time.sleep(5) + count += 1 + + # Ensure failed QA run is included in list endpoint + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa", + headers=crawler_auth_headers, + ) + + data = r.json() + + assert len(data) == 2 + + failed_run = [qa_run for qa_run in data if qa_run.get("id") == failed_qa_run_id][0] + assert failed_run + assert failed_run["state"] == "canceled" + assert failed_run["started"] + assert failed_run["finished"] + assert failed_run["stats"] + assert failed_run["crawlExecSeconds"] >= 0 + + # Ensure failed QA run not included in list endpoint with skipFailed param + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa?skipFailed=true", + headers=crawler_auth_headers, + ) + + data = r.json() + + assert len(data) == 1 + + qa = data[0] + assert qa + assert qa["state"] == "complete" + assert qa["started"] + assert qa["finished"] + assert qa["stats"]["found"] == 1 + assert qa["stats"]["done"] == 1 + assert qa["crawlExecSeconds"] > 0 + + def test_delete_qa_run(crawler_crawl_id, crawler_auth_headers, default_org_id): r = requests.post( f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/delete", - json={"qa_run_ids": [qa_run_id]}, + json={"qa_run_ids": [qa_run_id, failed_qa_run_id]}, headers=crawler_auth_headers, ) assert r.status_code == 200 - assert r.json()["deleted"] == True + assert r.json()["deleted"] == 2 + + time.sleep(5) # deleted from finished qa list r = requests.get(