From 598d831cfd23ac59425a6b24e23b364ad4c232b2 Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Mon, 26 Feb 2024 23:28:29 +0000 Subject: [PATCH] refactor: update logic for odk form creation & media upload --- src/backend/app/central/central_crud.py | 117 +++++++++++------------ src/backend/app/projects/project_crud.py | 82 ++++++++-------- 2 files changed, 100 insertions(+), 99 deletions(-) diff --git a/src/backend/app/central/central_crud.py b/src/backend/app/central/central_crud.py index 4c462d51c0..77586f0088 100644 --- a/src/backend/app/central/central_crud.py +++ b/src/backend/app/central/central_crud.py @@ -212,25 +212,18 @@ def upload_xform_media( def create_odk_xform( - project_id: int, - xform_name: str, - filespec: str, + odk_id: int, + xform_path: Path, + xform_category: str, feature_geojson: BytesIO, - odk_credentials: Optional[project_schemas.ODKCentralDecrypted] = None, + odk_credentials: project_schemas.ODKCentralDecrypted, create_draft: bool = False, convert_to_draft_when_publishing=True, ): """Create an XForm on a remote ODK Central server.""" - title = os.path.basename(os.path.splitext(filespec)[0]) # result = xform.createForm(project_id, title, filespec, True) # Pass odk credentials of project in xform - if not odk_credentials: - odk_credentials = project_schemas.ODKCentralDecrypted( - odk_central_url=settings.ODK_CENTRAL_URL, - odk_central_user=settings.ODK_CENTRAL_USER, - odk_central_password=settings.ODK_CENTRAL_PASSWD, - ) try: xform = get_odk_form(odk_credentials) except Exception as e: @@ -239,28 +232,29 @@ def create_odk_xform( status_code=500, detail={"message": "Connection failed to odk central"} ) from e - result = xform.createForm(project_id, xform_name, filespec, create_draft) + result = xform.createForm(odk_id, xform_path.stem, str(xform_path), create_draft) if result != 200 and result != 409: return result # TODO refactor osm_fieldwork.OdkCentral.OdkForm.uploadMedia # to accept passing a bytesio object and update - geojson_file = Path(f"/tmp/{title}.geojson") - with open(geojson_file, "w") as f: - f.write(feature_geojson.getvalue().decode("utf-8")) + geojson_path = Path(f"/tmp/fmtm/odk/{odk_id}/{xform_category}.geojson") + geojson_path.parents[0].mkdir(parents=True, exist_ok=True) + with open(geojson_path, "w") as geojson_file: + geojson_file.write(feature_geojson.getvalue().decode("utf-8")) # This modifies an existing published XForm to be in draft mode. # An XForm must be in draft mode to upload an attachment. # Upload the geojson of features to be modified xform.uploadMedia( - project_id, title, str(geojson_file), convert_to_draft_when_publishing + odk_id, xform_path.stem, str(geojson_path), convert_to_draft_when_publishing ) # Delete temp geojson file - geojson_file.unlink(missing_ok=True) + geojson_path.unlink(missing_ok=True) - result = xform.publishForm(project_id, title) + result = xform.publishForm(odk_id, xform_path.stem) return result @@ -272,7 +266,7 @@ def delete_odk_xform( ): """Delete an XForm from a remote ODK Central server.""" xform = get_odk_form(odk_central) - result = xform.deleteForm(project_id, xform_id, filespec, True) + result = xform.deleteForm(project_id, xform_id) # FIXME: make sure it's a valid project id return result @@ -410,39 +404,36 @@ async def test_form_validity(xform_content: str, form_type: str): def generate_updated_xform( - xlsform: str, - xform: str, - form_type: str, -): + input_path: str, + xform_path: Path, + form_file_extension: str, + form_category: str, +) -> str: """Update the version in an XForm so it's unique.""" - name = os.path.basename(xform).replace(".xml", "") - outfile = xform - - log.debug(f"Reading xlsform: {xlsform}") - if form_type != "xml": + if form_file_extension != "xml": try: - xls2xform_convert(xlsform_path=xlsform, xform_path=outfile, validate=False) + log.debug(f"Reading & converting xlsform -> xform: {input_path}") + xls2xform_convert( + xlsform_path=input_path, xform_path=str(xform_path), validate=False + ) except Exception as e: - log.error(f"Couldn't convert {xlsform} to an XForm!", str(e)) - raise HTTPException(status_code=400, detail=str(e)) from e - - if os.path.getsize(outfile) <= 0: - log.warning(f"{outfile} is empty!") - raise HTTPException(status=400, detail=f"{outfile} is empty!") from None - - xls = open(outfile, "r") - data = xls.read() - xls.close() + log.error(e) + msg = f"Couldn't convert {input_path} to an XForm!" + log.error(msg) + raise HTTPException(status_code=400, detail=msg) from e + + if xform_path.stat().st_size <= 0: + log.warning(f"{str(xform_path)} is empty!") + raise HTTPException( + status_code=400, detail=f"{str(xform_path)} is empty!" + ) from None + + with open(xform_path, "r") as xform: + data = xform.read() else: - xls = open(xlsform, "r") - data = xls.read() - xls.close() - - tmp = name.split("_") - tmp[0] - tmp[1] - id = tmp[2].split(".")[0] - extract = f"jr://file/{name}.geojson" + with open(input_path, "r") as xlsform: + log.debug(f"Reading XForm directly: {str(input_path)}") + data = xlsform.read() # # Parse the XML to geojson # xml = xmltodict.parse(str(data)) @@ -479,41 +470,45 @@ def generate_updated_xform( # index += 1 # xml["h:html"]["h:head"]["h:title"] = name + log.debug("Updating XML keys in XForm with data extract file & form id") namespaces = { "h": "http://www.w3.org/1999/xhtml", "odk": "http://www.opendatakit.org/xforms", "xforms": "http://www.w3.org/2002/xforms", } + instances = [] root = ElementTree.fromstring(data) head = root.find("h:head", namespaces) - model = head.find("xforms:model", namespaces) - instances = model.findall("xforms:instance", namespaces) + if head: + model = head.find("xforms:model", namespaces) + if model: + instances = model.findall("xforms:instance", namespaces) - index = 0 for inst in instances: try: if "src" in inst.attrib: - if (inst.attrib["src"].split("."))[1] == "geojson": - (inst.attrib)["src"] = extract + src_value = inst.attrib.get("src", "") + if src_value.endswith(".geojson"): + inst.attrib["src"] = f"jr://file/{form_category}.geojson" # Looking for data tags data_tags = inst.findall("xforms:data", namespaces) if data_tags: for dt in data_tags: - dt.attrib["id"] = id - except Exception: + if "id" in dt.attrib: + dt.attrib["id"] = str(xform_path.stem) + except Exception as e: + log.debug(e) + log.warning(f"Exception parsing XForm XML: {str(xform_path)}") continue - index += 1 # Save the modified XML newxml = ElementTree.tostring(root) # write the updated XML file - outxml = open(outfile, "w") - # newxml = xmltodict.unparse(xml) - outxml.write(newxml.decode()) - outxml.close() + with open(xform_path, "wb") as outxml: + outxml.write(newxml) # insert the new version # forms = table( @@ -526,7 +521,7 @@ def generate_updated_xform( # db.execute(sql) # db.commit() - return outfile + return str(xform_path) def upload_media( diff --git a/src/backend/app/projects/project_crud.py b/src/backend/app/projects/project_crud.py index 11927f7983..2ab7e5ad31 100644 --- a/src/backend/app/projects/project_crud.py +++ b/src/backend/app/projects/project_crud.py @@ -24,6 +24,7 @@ from concurrent.futures import ThreadPoolExecutor, wait from importlib.resources import files as pkg_files from io import BytesIO +from pathlib import Path from typing import List, Optional, Union import geoalchemy2 @@ -93,7 +94,7 @@ async def get_projects( if search: filters.append( - db_models.DbProject.project_name_prefix.ilike( # type: ignore + db_models.DbProject.project_info.name.ilike( # type: ignore f"%{search}%" ) ) @@ -1112,8 +1113,8 @@ def generate_task_files( project: db_models.DbProject, task_id: int, data_extract: FeatureCollection, - xlsform: str, - form_type: str, + xlsform_path: str, + form_file_extension: str, odk_credentials: project_schemas.ODKCentralDecrypted, ): """Generate all files for a task.""" @@ -1154,11 +1155,10 @@ def generate_task_files( f"{odk_url}/v1/key/{appuser_token}/projects/{odk_id}" ) - # This file will store xml contents of an xls form. - xform = f"/tmp/{appuser_name}.xml" - - # xform_id_format - xform_id = f"{appuser_name}".split("_")[2] + # This file will store xml contents of an xls form + # NOTE there must be two underscores, for [2] index in OdkCentral.py + xform_path = Path(f"/tmp/fmtm/{project_name}_{task_id}_{category}.xml") + xform_path.parents[0].mkdir(parents=True, exist_ok=True) # Create memory object from split data extract geojson_string = geojson.dumps(data_extract) @@ -1167,31 +1167,40 @@ def generate_task_files( project_log.info( f"Generating xform for task: {task_id} | " - f"using xform: {xform} | form_type: {form_type}" + f"xform: {xform_path} | form_type: {form_file_extension}" + ) + central_crud.generate_updated_xform( + xlsform_path, xform_path, form_file_extension, category ) - xform_path = central_crud.generate_updated_xform(xlsform, xform, form_type) # Create an odk xform project_log.info(f"Uploading data extract media to task ({task_id})") central_crud.create_odk_xform( odk_id, - str(task_id), xform_path, + category, geojson_bytesio, odk_credentials, - False, + create_draft=False, ) + + # Update db with feature count after form uploaded with media task.feature_count = len(data_extract.get("features", [])) + log.debug(f"({task.feature_count}) features added for task ({task_id})") project_log.info(f"Updating role for app user in task {task_id}") # Update the user role for the created xform. try: + log.debug("Updating appuser role to access XForm") appuser.updateRole( - projectId=odk_id, xform=xform_id, actorId=appuser_json.get("id") + projectId=odk_id, xform=xform_path.stem, actorId=appuser_json.get("id") ) except Exception as e: log.exception(e) + # Remove xform temp files + xform_path.unlink(missing_ok=True) + project.extract_completed_count += 1 # Commit db transaction @@ -1243,18 +1252,18 @@ def generate_project_files( # TODO uncomment after refactor to use BytesIO # xlsform = custom_form - xlsform = f"/tmp/{form_category}.{form_format}" - with open(xlsform, "wb") as f: + xlsform_path = f"/tmp/{form_category}.{form_format}" + with open(xlsform_path, "wb") as f: f.write(custom_form.getvalue()) else: - log.debug(f"Using default XLSForm for category {form_category}") + log.debug(f"Using default XLSForm for category: '{form_category}'") # TODO uncomment after refactor to use BytesIO # xlsform_path = f"{xlsforms_path}/{form_category}.xls" # with open(xlsform_path, "rb") as f: # xlsform = BytesIO(f.read()) - xlsform = f"{xlsforms_path}/{form_category}.xls" + xlsform_path = f"{xlsforms_path}/{form_category}.xls" # filter = FilterData(xlsform) # updated_data_extract = {"type": "FeatureCollection", "features": []} @@ -1273,9 +1282,9 @@ def generate_project_files( # Split extract by task area split_geojson_sync = async_to_sync(split_geojson_by_task_areas) - split_extract_dict = split_geojson_sync(db, feature_collection, project_id) + task_extract_dict = split_geojson_sync(db, feature_collection, project_id) - if not split_extract_dict: + if not task_extract_dict: log.warning("Project ({project_id}) failed splitting tasks") raise HTTPException( status_code=HTTPStatus.UNPROCESSABLE_ENTITY, @@ -1295,8 +1304,8 @@ def wrap_generate_task_files(task_id): next(get_db()), project, task_id, - split_extract_dict[task_id], - xlsform, + task_extract_dict[task_id], + xlsform_path, form_format, odk_credentials, ) @@ -1308,7 +1317,7 @@ def wrap_generate_task_files(task_id): # Submit tasks to the thread pool futures = [ executor.submit(wrap_generate_task_files, task_id) - for task_id in split_extract_dict.keys() + for task_id in task_extract_dict.keys() ] # Wait for all tasks to complete wait(futures) @@ -1669,7 +1678,6 @@ async def update_project_form( """Upload a new custom XLSForm for a project.""" project = await get_project(db, project_id) category = project.xform_title - project_title = project.project_name_prefix odk_id = project.odkid # ODK Credentials @@ -1685,9 +1693,7 @@ async def update_project_form( # TODO fix this to use correct data extract generation pg = PostgresClient("underpass") - outfile = ( - f"/tmp/{project_title}_{category}.geojson" # This file will store osm extracts - ) + outfile = f"/tmp/{category}.geojson" # This file will store osm extracts # FIXME test this works # FIXME PostgresClient.getFeatures does not exist... @@ -1716,25 +1722,25 @@ async def update_project_form( tasks_list = await tasks_crud.get_task_id_list(db, project_id) - for task in tasks_list: + for task_id in tasks_list: # This file will store xml contents of an xls form. - xform = f"/tmp/{project_title}_{category}_{task}.xml" - extracts = f"/tmp/{project_title}_{category}_{task}.geojson" - - # Update outfile containing osm extracts with the new geojson contents - # containing title in the properties. - with open(extracts, "w") as jsonfile: - jsonfile.truncate(0) # clear the contents of the file - geojson.dump(feature_geojson, jsonfile) + xform_path = Path(f"/tmp/{category}_{task_id}.xml") - outfile = central_crud.generate_updated_xform(xlsform, xform, form_type) + outfile = central_crud.generate_updated_xform( + xlsform, xform_path, form_type, category + ) # Create an odk xform # TODO include data extract geojson correctly result = central_crud.create_odk_xform( - odk_id, str(task), xform, feature_geojson, odk_credentials, True, False + odk_id, + xform_path, + category, + feature_geojson, + odk_credentials, + create_draft=True, + convert_to_draft_when_publishing=False, ) - return True