Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Endpoint to conflate the submission with osm data #1594

Merged
merged 6 commits into from
Jul 11, 2024
165 changes: 165 additions & 0 deletions src/backend/app/db/postgis_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@

import json
import logging
import time
import zipfile
from asyncio import gather
from datetime import datetime, timezone
from io import BytesIO
from random import getrandbits
from typing import Optional, Union

Expand All @@ -33,12 +36,17 @@
from geojson_pydantic import Feature, MultiPolygon, Polygon
from geojson_pydantic import FeatureCollection as FeatCol
from shapely.geometry import mapping, shape
from shapely.geometry.base import BaseGeometry
from shapely.ops import unary_union
from shapely.validation import make_valid
from sqlalchemy import text
from sqlalchemy.exc import ProgrammingError
from sqlalchemy.orm import Session

from app.config import settings

log = logging.getLogger(__name__)
API_URL = settings.RAW_DATA_API_URL


def timestamp():
Expand Down Expand Up @@ -777,3 +785,160 @@ def parse_featcol(features: Union[Feature, FeatCol, MultiPolygon, Polygon]):
elif isinstance(features, Feature):
feat_col = geojson.FeatureCollection([feat_col])
return feat_col


def request_snapshot(geometry):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function and the poll_task_status function can be replaced with osm_fieldwork.PostgresClient.
There is already functionality in place to get a data extract using provided config πŸ‘
Example:

pg = PostgresClient(

"""Request a snapshot based on the provided geometry.

Args:
geometry (str): The geometry data in JSON format.

Returns:
dict: The JSON response containing the snapshot data.
"""
headers = {"accept": "application/json", "Content-Type": "application/json"}

payload = {
"geometry": json.loads(geometry),
"filters": {"tags": {"all_geometry": {"join_or": {"building": []}}}},
"geometryType": ["polygon"],
}
response = requests.post(
f"{API_URL}/snapshot/", data=json.dumps(payload), headers=headers
)
response.raise_for_status()
return response.json()


def poll_task_status(task_link):
"""Poll the status of a task until it reaches state (SUCCESS or FAILED).

Args:
task_link (str): The link to the task status endpoint.

Returns:
dict: The final status of the task as a JSON response.
"""
stop_loop = False
while not stop_loop:
check_result = requests.get(url=f"{API_URL}{task_link}")
check_result.raise_for_status()
res = check_result.json()
if res["status"] in ["SUCCESS", "FAILED"]:
stop_loop = True
time.sleep(1)
return res


def download_snapshot(download_url):
"""Download a snapshot from the provided URL and extract the GeoJSON.

Args:
download_url (str): The URL to download the snapshot from.

Returns:
dict: The extracted GeoJSON data from the downloaded snapshot.
"""
response = requests.get(download_url)
response.raise_for_status()
with zipfile.ZipFile(BytesIO(response.content), "r") as zip_ref:
with zip_ref.open("Export.geojson") as file:
return json.load(file)


def calculate_bbox(features):
"""Calculate the bounding box of a collection of features(geojson).

Args:
features (list): A list of features with geometries.

Returns:
tuple: the bounding box coordinates (minx, miny, maxx, maxy).
"""
geometries = [make_valid(shape(feature["geometry"])) for feature in features]
union = unary_union(geometries)
bbox = union.bounds
return bbox


def geometries_almost_equal(
geom1: BaseGeometry, geom2: BaseGeometry, tolerance: float = 1e-6
) -> bool:
"""Determine if two geometries are almost equal within a tolerance.

Args:
geom1 (BaseGeometry): First geometry.
geom2 (BaseGeometry): Second geometry.
tolerance (float): Tolerance level for almost equality.

Returns:
bool: True if geometries are almost equal else False.
"""
return geom1.equals_exact(geom2, tolerance)


def check_partial_overlap(geom1: BaseGeometry, geom2: BaseGeometry) -> bool:
"""Determine if two geometries have a partial overlap.

Args:
geom1 (BaseGeometry): First geometry.
geom2 (BaseGeometry): Second geometry.

Returns:
bool: True if geometries have a partial overlap, else False.
"""
intersection = geom1.intersection(geom2)
return not intersection.is_empty and (
0 < intersection.area < geom1.area and 0 < intersection.area < geom2.area
)


def conflate_features(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

input_features: list, osm_features: list, remove_conflated=False, tolerance=1e-6
):
"""Conflate input features with OSM features to identify overlaps.

Args:
input_features (list): A list of input features with geometries.
osm_features (list): A list of OSM features with geometries.
remove_conflated (bool): Flag to remove conflated features.
tolerance (float): Tolerance level for almost equality.

Returns:
list: A list of features after conflation with OSM features.
"""
osm_geometries = [shape(feature["geometry"]) for feature in osm_features]
return_features = []

for input_feature in input_features:
input_geometry = shape(input_feature["geometry"])
is_duplicate = False
is_partial_overlap = False

for osm_feature, osm_geometry in zip(
osm_features, osm_geometries, strict=False
):
if geometries_almost_equal(input_geometry, osm_geometry, tolerance):
is_duplicate = True
input_feature["properties"].update(osm_feature["properties"])
break

if check_partial_overlap(input_geometry, osm_geometry):
is_partial_overlap = True
new_feature = {
"type": "Feature",
"geometry": mapping(osm_feature["geometry"]),
"properties": osm_feature["properties"],
}
return_features.append(new_feature)
break

input_feature["properties"]["is_duplicate"] = is_duplicate
input_feature["properties"]["is_partial_overlap"] = is_partial_overlap

if (is_duplicate or is_partial_overlap) and remove_conflated is True:
continue

return_features.append(input_feature)

return return_features
52 changes: 50 additions & 2 deletions src/backend/app/submissions/submission_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from io import BytesIO
from typing import Optional

import geojson
import sozipfile.sozipfile as zipfile
from asgiref.sync import async_to_sync
from fastapi import HTTPException, Response
Expand All @@ -35,9 +36,14 @@
# from osm_fieldwork.json2osm import json2osm
from sqlalchemy.orm import Session

from app.central.central_crud import get_odk_form, get_odk_project, list_odk_xforms
from app.central.central_crud import (
flatten_json,
get_odk_form,
get_odk_project,
list_odk_xforms,
)
from app.config import settings
from app.db import db_models
from app.db import db_models, postgis_utils
from app.models.enums import HTTPStatus
from app.projects import project_crud, project_deps
from app.s3 import add_obj_to_bucket, get_obj_from_bucket
Expand Down Expand Up @@ -524,3 +530,45 @@ async def get_submission_detail(
odk_form.getSubmissions(project.odkid, db_xform.odk_form_id, submission_id)
)
return submission.get("value", [])[0]


async def get_submission_geojson(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed we already have central_crud.convert_odk_submission_json_to_geojson and submission_routes.download_submission_geojson that all basically do the same thing.

Could we refactor to only have the logic once, then reuse the code?

Copy link
Collaborator Author

@Sujanadh Sujanadh Jul 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if i want geojson only for the conflation, then this function won't work since it returns submission in bytes. like you said, we can refactor the download_submission endpoint to use convert_odk_submission_json_to_geojson. Or we can update this function to return geojson and convert it into bytes wherever necessary instead of returning bytes from this function.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The second option sounds best in the spirit of DRY code.

A single function to return a geojson, then encode as bytes where specifically needed.

If absolutely necessary it's also possible to decode bytes back to geojson using .decode("utf--8") πŸ‘

json.load would probably also do that

project_id: int,
db: Session,
):
"""Retrieve GeoJSON data for a submission associated with a project.

Args:
project_id (int): The ID of the project.
db (Session): The database session.

Returns:
FeatCol: A GeoJSON FeatCol containing the submission features.
"""
data = await get_submission_by_project(project_id, {}, db)
submission_json = data.get("value", [])

if not submission_json:
raise HTTPException(
status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
detail="Loading JSON submission failed",
)

all_features = []
for submission in submission_json:
keys_to_remove = ["meta", "__id", "__system"]
for key in keys_to_remove:
submission.pop(key)

data = {}
flatten_json(submission, data)

geojson_geom = await postgis_utils.javarosa_to_geojson_geom(
data.pop("xlocation", {}), geom_type="Polygon"
)

feature = geojson.Feature(geometry=geojson_geom, properties=data)
all_features.append(feature)

featcol = geojson.FeatureCollection(features=all_features)
return featcol
66 changes: 40 additions & 26 deletions src/backend/app/submissions/submission_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@
from io import BytesIO
from typing import Optional

import geojson
from fastapi import APIRouter, Depends, HTTPException, Query, Response
from fastapi.concurrency import run_in_threadpool
from fastapi.responses import FileResponse
from shapely.geometry import box
from sqlalchemy.orm import Session

from app.auth.osm import AuthUser, login_required
from app.auth.roles import mapper, project_admin
from app.central import central_crud
from app.db import database, db_models, postgis_utils
from app.models.enums import HTTPStatus, ReviewStateEnum
from app.models.enums import ReviewStateEnum
from app.projects import project_crud, project_deps
from app.submissions import submission_crud, submission_schemas

Expand Down Expand Up @@ -554,35 +554,49 @@ async def download_submission_geojson(
HTTPException: If loading JSON submission fails.
"""
project = await project_deps.get_project_by_id(db, project_id)
data = await submission_crud.get_submission_by_project(project_id, {}, db)
submission_json = data.get("value", [])
submissions = await submission_crud.get_submission_geojson(project_id, db)
submission_geojson = BytesIO(json.dumps(submissions).encode("utf-8"))
filename = project.project_name_prefix

if not submission_json:
raise HTTPException(
status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
detail="Loading JSON submission failed",
)
headers = {"Content-Disposition": f"attachment; filename={filename}.geojson"}

all_features = []
for submission in submission_json:
keys_to_remove = ["meta", "__id", "__system"]
for key in keys_to_remove:
submission.pop(key)
return Response(submission_geojson.getvalue(), headers=headers)

data = {}
central_crud.flatten_json(submission, data)

geojson_geom = await postgis_utils.javarosa_to_geojson_geom(
data.pop("xlocation", {}), geom_type="Polygon"
)
@router.get("/conflate_submission_geojson/{project_id}")
async def conflate_geojson(
project_id: int, remove_conflated=False, db: Session = Depends(database.get_db)
):
"""Conflates the input GeoJSON with OpenStreetMap data.

feature = geojson.Feature(geometry=geojson_geom, properties=data)
all_features.append(feature)
Args:
project_id(int): id of project in FMTM.
remove_conflated(bool): returns geojson which are not overlapped with osm data.
db (Session): The database session.

featcol = geojson.FeatureCollection(features=all_features)
submission_geojson = BytesIO(json.dumps(featcol).encode("utf-8"))
filename = project.project_name_prefix
Returns:
str: Updated GeoJSON string with conflated features.
"""
geojsonn = await submission_crud.get_submission_geojson(project_id, db)

headers = {"Content-Disposition": f"attachment; filename={filename}.geojson"}
input_features = geojsonn["features"]
input_bbox = postgis_utils.calculate_bbox(input_features)

return Response(submission_geojson.getvalue(), headers=headers)
bbox_geometry = box(*input_bbox)
bbox_geojson_str = json.dumps(bbox_geometry.__geo_interface__)

snapshot_data = postgis_utils.request_snapshot(bbox_geojson_str)
task_link = snapshot_data["track_link"]
task_result = postgis_utils.poll_task_status(task_link)
if task_result["status"] != "SUCCESS":
raise RuntimeError(
"Raw Data API did not respond correctly. Please try again later."
)

snapshot_url = task_result["result"]["download_url"]
osm_features = postgis_utils.download_snapshot(snapshot_url)
geojsonn["features"] = postgis_utils.conflate_features(
input_features, osm_features["features"], remove_conflated
)

return geojsonn
Loading