Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Endpoint to conflate the submission with osm data #1594

Merged
merged 6 commits into from
Jul 11, 2024
24 changes: 18 additions & 6 deletions src/backend/app/auth/roles.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ async def wrap_check_access(
if not db_user:
raise HTTPException(
status_code=HTTPStatus.FORBIDDEN,
detail="User is not a project manager",
detail="User do not have permission to access the project.",
)

return {
Expand Down Expand Up @@ -321,17 +321,29 @@ async def mapper(
project: DbProject = Depends(get_project_by_id),
db: Session = Depends(get_db),
user_data: AuthUser = Depends(login_required),
) -> AuthUser:
) -> ProjectUserDict:
"""A mapper for a specific project."""
# If project is public, skip permission check
if project.visibility == ProjectVisibility.PUBLIC:
return user_data
user_id = user_data.id
sql = text("SELECT * FROM users WHERE id = :user_id;")
result = db.execute(sql, {"user_id": user_id})
db_user = result.first()

if not db_user:
raise HTTPException(
status_code=HTTPStatus.NOT_FOUND,
detail=f"User ({user_id}) does not exist in database",
)

return {
"user": DbUser(**db_user._asdict()),
"project": project,
}

await wrap_check_access(
return await wrap_check_access(
project,
db,
user_data,
ProjectRole.MAPPER,
)

return user_data
15 changes: 8 additions & 7 deletions src/backend/app/central/central_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import os
import uuid
from io import BytesIO, StringIO
from typing import Optional
from typing import Optional, Union
from xml.etree.ElementTree import Element, SubElement

import geojson
Expand Down Expand Up @@ -624,8 +624,8 @@ def flatten_json(data: dict, target: dict):


async def convert_odk_submission_json_to_geojson(
input_json: BytesIO,
) -> BytesIO:
input_json: Union[BytesIO, list],
) -> geojson.FeatureCollection:
"""Convert ODK submission JSON file to GeoJSON.

Used for loading into QGIS.
Expand All @@ -636,7 +636,10 @@ async def convert_odk_submission_json_to_geojson(
Returns:
geojson (BytesIO): GeoJSON format ODK submission.
"""
submission_json = json.loads(input_json.getvalue())
if isinstance(input_json, list):
submission_json = input_json
else:
submission_json = json.loads(input_json.getvalue())

if not submission_json:
raise HTTPException(
Expand All @@ -660,9 +663,7 @@ async def convert_odk_submission_json_to_geojson(
feature = geojson.Feature(geometry=geojson_geom, properties=data)
all_features.append(feature)

featcol = geojson.FeatureCollection(features=all_features)

return BytesIO(json.dumps(featcol).encode("utf-8"))
return geojson.FeatureCollection(features=all_features)


async def get_entities_geojson(
Expand Down
122 changes: 122 additions & 0 deletions src/backend/app/db/postgis_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import logging
from asyncio import gather
from datetime import datetime, timezone
from io import BytesIO
from random import getrandbits
from typing import Optional, Union

Expand All @@ -32,15 +33,20 @@
from geoalchemy2.shape import from_shape, to_shape
from geojson_pydantic import Feature, MultiPolygon, Polygon
from geojson_pydantic import FeatureCollection as FeatCol
from osm_fieldwork.data_models import data_models_path
from osm_rawdata.postgres import PostgresClient
from shapely.geometry import mapping, shape
from shapely.geometry.base import BaseGeometry
from shapely.ops import unary_union
from sqlalchemy import text
from sqlalchemy.exc import ProgrammingError
from sqlalchemy.orm import Session

from app.config import settings
from app.models.enums import XLSFormType

log = logging.getLogger(__name__)
API_URL = settings.RAW_DATA_API_URL


def timestamp():
Expand Down Expand Up @@ -785,3 +791,119 @@ def parse_featcol(features: Union[Feature, FeatCol, MultiPolygon, Polygon]):
elif isinstance(features, Feature):
feat_col = geojson.FeatureCollection([feat_col])
return feat_col


def get_osm_geometries(form_category, geometry):
"""Request a snapshot based on the provided geometry.

Args:
form_category(str): feature category type (eg: buildings).
geometry (str): The geometry data in JSON format.

Returns:
dict: The JSON response containing the snapshot data.
"""
config_filename = XLSFormType(form_category).name
data_model = f"{data_models_path}/{config_filename}.yaml"

with open(data_model, "rb") as data_model_yaml:
extract_config = BytesIO(data_model_yaml.read())

pg = PostgresClient(
"underpass",
extract_config,
auth_token=settings.RAW_DATA_API_AUTH_TOKEN
if settings.RAW_DATA_API_AUTH_TOKEN
else None,
)
return pg.execQuery(
geometry,
extra_params={
"outputType": "geojson",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe bind zip true and geojson are defaults on raw data api, but no harm being explicit πŸ‘

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think its better to mention, as we won't know what are the default values.

"bind_zip": True,
"useStWithin": False,
},
)


def geometries_almost_equal(
geom1: BaseGeometry, geom2: BaseGeometry, tolerance: float = 1e-6
) -> bool:
"""Determine if two geometries are almost equal within a tolerance.

Args:
geom1 (BaseGeometry): First geometry.
geom2 (BaseGeometry): Second geometry.
tolerance (float): Tolerance level for almost equality.

Returns:
bool: True if geometries are almost equal else False.
"""
return geom1.equals_exact(geom2, tolerance)


def check_partial_overlap(geom1: BaseGeometry, geom2: BaseGeometry) -> bool:
"""Determine if two geometries have a partial overlap.

Args:
geom1 (BaseGeometry): First geometry.
geom2 (BaseGeometry): Second geometry.

Returns:
bool: True if geometries have a partial overlap, else False.
"""
intersection = geom1.intersection(geom2)
return not intersection.is_empty and (
0 < intersection.area < geom1.area and 0 < intersection.area < geom2.area
)


def conflate_features(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

input_features: list, osm_features: list, remove_conflated=False, tolerance=1e-6
):
"""Conflate input features with OSM features to identify overlaps.

Args:
input_features (list): A list of input features with geometries.
osm_features (list): A list of OSM features with geometries.
remove_conflated (bool): Flag to remove conflated features.
tolerance (float): Tolerance level for almost equality.

Returns:
list: A list of features after conflation with OSM features.
"""
osm_geometries = [shape(feature["geometry"]) for feature in osm_features]
return_features = []

for input_feature in input_features:
input_geometry = shape(input_feature["geometry"])
is_duplicate = False
is_partial_overlap = False

for osm_feature, osm_geometry in zip(
osm_features, osm_geometries, strict=False
):
if geometries_almost_equal(input_geometry, osm_geometry, tolerance):
is_duplicate = True
input_feature["properties"].update(osm_feature["properties"])
break

if check_partial_overlap(input_geometry, osm_geometry):
is_partial_overlap = True
new_feature = {
"type": "Feature",
"geometry": mapping(osm_feature["geometry"]),
"properties": osm_feature["properties"],
}
return_features.append(new_feature)
break

input_feature["properties"]["is_duplicate"] = is_duplicate
input_feature["properties"]["is_partial_overlap"] = is_partial_overlap

if (is_duplicate or is_partial_overlap) and remove_conflated is True:
continue

return_features.append(input_feature)

return return_features
3 changes: 2 additions & 1 deletion src/backend/app/helpers/helper_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,10 @@ async def convert_odk_submission_json_to_geojson_wrapper(

contents = await json_file.read()
submission_geojson = await convert_odk_submission_json_to_geojson(BytesIO(contents))
submission_data = BytesIO(json.dumps(submission_geojson).encode("utf-8"))

headers = {"Content-Disposition": f"attachment; filename={filename.stem}.geojson"}
return Response(submission_geojson.getvalue(), headers=headers)
return Response(submission_data.getvalue(), headers=headers)


@router.get("/view-raw-data-api-token")
Expand Down
48 changes: 47 additions & 1 deletion src/backend/app/submissions/submission_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@
# from osm_fieldwork.json2osm import json2osm
from sqlalchemy.orm import Session

from app.central.central_crud import get_odk_form, get_odk_project, list_odk_xforms
from app.central.central_crud import (
get_odk_form,
get_odk_project,
list_odk_xforms,
)
from app.config import settings
from app.db import db_models
from app.models.enums import HTTPStatus
Expand Down Expand Up @@ -524,3 +528,45 @@ async def get_submission_detail(
odk_form.getSubmissions(project.odkid, db_xform.odk_form_id, submission_id)
)
return submission.get("value", [])[0]


# FIXME might not needed
# async def get_submission_geojson(
# project_id: int,
# db: Session,
# ):
# """Retrieve GeoJSON data for a submission associated with a project.

# Args:
# project_id (int): The ID of the project.
# db (Session): The database session.

# Returns:
# FeatCol: A GeoJSON FeatCol containing the submission features.
# """
# data = await get_submission_by_project(project_id, {}, db)
# submission_json = data.get("value", [])

# if not submission_json:
# raise HTTPException(
# status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
# detail="Loading JSON submission failed",
# )

# all_features = []
# for submission in submission_json:
# keys_to_remove = ["meta", "__id", "__system"]
# for key in keys_to_remove:
# submission.pop(key)

# data = {}
# flatten_json(submission, data)

# geojson_geom = await postgis_utils.javarosa_to_geojson_geom(
# data.pop("xlocation", {}), geom_type="Polygon"
# )

# feature = geojson.Feature(geometry=geojson_geom, properties=data)
# all_features.append(feature)

# return geojson.FeatureCollection(features=all_features)
71 changes: 48 additions & 23 deletions src/backend/app/submissions/submission_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@
from app.auth.roles import mapper, project_manager
from app.central import central_crud
from app.db import database, db_models, postgis_utils
from app.models.enums import HTTPStatus, ReviewStateEnum
from app.models.enums import ReviewStateEnum
from app.projects import project_crud, project_deps
from app.submissions import submission_crud, submission_schemas
from app.tasks.task_deps import get_task_by_id

router = APIRouter(
prefix="/submission",
Expand Down Expand Up @@ -558,32 +559,56 @@ async def download_submission_geojson(
data = await submission_crud.get_submission_by_project(project_id, {}, db)
submission_json = data.get("value", [])

if not submission_json:
raise HTTPException(
status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
detail="Loading JSON submission failed",
)
submission_geojson = await central_crud.convert_odk_submission_json_to_geojson(
submission_json
)
submission_data = BytesIO(json.dumps(submission_geojson).encode("utf-8"))
filename = project.project_name_prefix

all_features = []
for submission in submission_json:
keys_to_remove = ["meta", "__id", "__system"]
for key in keys_to_remove:
submission.pop(key)
headers = {"Content-Disposition": f"attachment; filename={filename}.geojson"}

data = {}
central_crud.flatten_json(submission, data)
return Response(submission_data.getvalue(), headers=headers)

geojson_geom = await postgis_utils.javarosa_to_geojson_geom(
data.pop("xlocation", {}), geom_type="Polygon"
)

feature = geojson.Feature(geometry=geojson_geom, properties=data)
all_features.append(feature)
@router.get("/conflate_submission_geojson/")
async def conflate_geojson(
task_id: int,
current_user: dict = Depends(mapper), # FIXME change this validator
remove_conflated=False,
db: Session = Depends(database.get_db),
):
"""Conflates the input GeoJSON with OpenStreetMap data.

featcol = geojson.FeatureCollection(features=all_features)
submission_geojson = BytesIO(json.dumps(featcol).encode("utf-8"))
filename = project.project_name_prefix
Args:
task_id(int): task index of project.
current_user(dict): Check if user is mapper.
remove_conflated(bool): returns geojson which are not overlapped with osm data.
db (Session): The database session.

headers = {"Content-Disposition": f"attachment; filename={filename}.geojson"}
Returns:
str: Updated GeoJSON string with conflated features.
"""
try:
project = current_user["project"]
db_task = await get_task_by_id(project.id, task_id, db)
task_aoi = postgis_utils.geometry_to_geojson(db_task.outline)
task_geojson = geojson.dumps(task_aoi, indent=2)

data = await submission_crud.get_submission_by_project(project.id, {}, db)
submission_json = data.get("value", [])

return Response(submission_geojson.getvalue(), headers=headers)
submission_geojson = await central_crud.convert_odk_submission_json_to_geojson(
submission_json
)
form_category = project.xform_category
input_features = submission_geojson["features"]
osm_features = postgis_utils.get_osm_geometries(form_category, task_geojson)
submission_geojson["features"] = postgis_utils.conflate_features(
input_features, osm_features.get("features", []), remove_conflated
)

return submission_geojson
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to process conflation: {str(e)}"
) from e
Loading
Loading