Skip to content

Commit

Permalink
Track failed QA runs and include in list endpoint (#1650)
Browse files Browse the repository at this point in the history
Fixes #1648 

- Tracks failed QA runs in database, not only successful ones
- Includes failed QA runs in list endpoint by default
- Adds `skipFailed` param to list endpoint to return only successful
runs

---------
Co-authored-by: Ilya Kreymer <[email protected]>
  • Loading branch information
tw4l authored Apr 5, 2024
1 parent 5c08c96 commit 4229b94
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 10 deletions.
41 changes: 33 additions & 8 deletions backend/btrixcloud/crawls.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
PaginatedResponse,
RUNNING_AND_STARTING_STATES,
SUCCESSFUL_STATES,
NON_RUNNING_STATES,
ALL_CRAWL_STATES,
TYPE_ALL_CRAWL_STATES,
)
Expand Down Expand Up @@ -726,15 +727,19 @@ async def start_crawl_qa_run(self, crawl_id: str, org: Organization, user: User)
# pylint: disable=raise-missing-from
raise HTTPException(status_code=500, detail=f"Error starting crawl: {exc}")

async def stop_crawl_qa_run(self, crawl_id: str, org: Organization):
async def stop_crawl_qa_run(
self, crawl_id: str, org: Organization, graceful: bool = True
):
"""Stop crawl QA run, QA run removed when actually finished"""
crawl = await self.get_crawl(crawl_id, org)

if not crawl.qa:
raise HTTPException(status_code=400, detail="qa_not_running")

try:
result = await self.crawl_manager.shutdown_crawl(crawl.qa.id, graceful=True)
result = await self.crawl_manager.shutdown_crawl(
crawl.qa.id, graceful=graceful
)

if result.get("error") == "Not Found":
# treat as success, qa crawl no longer exists, so mark as no qa
Expand Down Expand Up @@ -775,7 +780,7 @@ async def qa_run_finished(self, crawl_id: str):

query: Dict[str, Any] = {"qa": None}

if crawl.qa.finished and crawl.qa.state in SUCCESSFUL_STATES:
if crawl.qa.finished and crawl.qa.state in NON_RUNNING_STATES:
query[f"qaFinished.{crawl.qa.id}"] = crawl.qa.dict()

if await self.crawls.find_one_and_update(
Expand All @@ -786,17 +791,28 @@ async def qa_run_finished(self, crawl_id: str):
return False

async def get_qa_runs(
self, crawl_id: str, org: Optional[Organization] = None
self,
crawl_id: str,
skip_failed: bool = False,
org: Optional[Organization] = None,
) -> List[QARunOut]:
"""Return list of QA runs"""
crawl_data = await self.get_crawl_raw(
crawl_id, org, "crawl", project={"qaFinished": True, "qa": True}
)
qa_finished = crawl_data.get("qaFinished") or {}
all_qa = [QARunOut(**qa_run_data) for qa_run_data in qa_finished.values()]
if skip_failed:
all_qa = [
QARunOut(**qa_run_data)
for qa_run_data in qa_finished.values()
if qa_run_data.get("state") in SUCCESSFUL_STATES
]
else:
all_qa = [QARunOut(**qa_run_data) for qa_run_data in qa_finished.values()]
all_qa.sort(key=lambda x: x.finished or dt_now(), reverse=True)
qa = crawl_data.get("qa")
if qa:
# ensure current QA run didn't just fail, just in case
if qa and (not skip_failed or qa.get("state") in SUCCESSFUL_STATES):
all_qa.insert(0, QARunOut(**qa))
return all_qa

Expand Down Expand Up @@ -1061,6 +1077,13 @@ async def stop_crawl_qa_run(
# pylint: disable=unused-argument
return await ops.stop_crawl_qa_run(crawl_id, org)

@app.post("/orgs/{oid}/crawls/{crawl_id}/qa/cancel", tags=["qa"])
async def cancel_crawl_qa_run(
crawl_id: str, org: Organization = Depends(org_crawl_dep)
):
# pylint: disable=unused-argument
return await ops.stop_crawl_qa_run(crawl_id, org, graceful=False)

@app.post("/orgs/{oid}/crawls/{crawl_id}/qa/delete", tags=["qa"])
async def delete_crawl_qa_runs(
crawl_id: str,
Expand All @@ -1075,8 +1098,10 @@ async def delete_crawl_qa_runs(
tags=["qa"],
response_model=List[QARunOut],
)
async def get_qa_runs(crawl_id, org: Organization = Depends(org_viewer_dep)):
return await ops.get_qa_runs(crawl_id, org)
async def get_qa_runs(
crawl_id, org: Organization = Depends(org_viewer_dep), skipFailed: bool = False
):
return await ops.get_qa_runs(crawl_id, skip_failed=skipFailed, org=org)

@app.get(
"/orgs/{oid}/crawls/{crawl_id}/qa/activeQA",
Expand Down
96 changes: 94 additions & 2 deletions backend/test/test_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import time
from datetime import datetime


qa_run_id = None
failed_qa_run_id = None


def test_run_qa(crawler_crawl_id, crawler_auth_headers, default_org_id):
Expand Down Expand Up @@ -182,15 +184,105 @@ def test_run_qa_not_running(crawler_crawl_id, crawler_auth_headers, default_org_
assert r.json()["detail"] == "qa_not_running"


def test_fail_running_qa(crawler_crawl_id, crawler_auth_headers, default_org_id):
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/start",
headers=crawler_auth_headers,
)

assert r.status_code == 200

data = r.json()
assert data["started"]
global failed_qa_run_id
failed_qa_run_id = data["started"]

# Wait until it's properly running
count = 0
while count < 24:
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/activeQA",
headers=crawler_auth_headers,
)

data = r.json()
if data.get("qa") and data["qa"].get("state") == "running":
break

time.sleep(5)
count += 1

# Cancel crawl
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/cancel",
headers=crawler_auth_headers,
)
assert r.status_code == 200

# Wait until it stops with canceled state
count = 0
while count < 24:
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/activeQA",
headers=crawler_auth_headers,
)

data = r.json()
if data.get("qa") and data["qa"].get("state") == "canceled":
break

time.sleep(5)
count += 1

# Ensure failed QA run is included in list endpoint
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa",
headers=crawler_auth_headers,
)

data = r.json()

assert len(data) == 2

failed_run = [qa_run for qa_run in data if qa_run.get("id") == failed_qa_run_id][0]
assert failed_run
assert failed_run["state"] == "canceled"
assert failed_run["started"]
assert failed_run["finished"]
assert failed_run["stats"]
assert failed_run["crawlExecSeconds"] >= 0

# Ensure failed QA run not included in list endpoint with skipFailed param
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa?skipFailed=true",
headers=crawler_auth_headers,
)

data = r.json()

assert len(data) == 1

qa = data[0]
assert qa
assert qa["state"] == "complete"
assert qa["started"]
assert qa["finished"]
assert qa["stats"]["found"] == 1
assert qa["stats"]["done"] == 1
assert qa["crawlExecSeconds"] > 0


def test_delete_qa_run(crawler_crawl_id, crawler_auth_headers, default_org_id):
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/delete",
json={"qa_run_ids": [qa_run_id]},
json={"qa_run_ids": [qa_run_id, failed_qa_run_id]},
headers=crawler_auth_headers,
)

assert r.status_code == 200
assert r.json()["deleted"] == True
assert r.json()["deleted"] == 2

time.sleep(5)

# deleted from finished qa list
r = requests.get(
Expand Down

0 comments on commit 4229b94

Please sign in to comment.