-
-
Notifications
You must be signed in to change notification settings - Fork 57
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Endpoint to conflate the submission with osm data #1594
Changes from 1 commit
479c4b6
29a342c
ffc9a32
4c7c55e
c4b73f5
1aceae4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,8 +19,11 @@ | |
|
||
import json | ||
import logging | ||
import time | ||
import zipfile | ||
from asyncio import gather | ||
from datetime import datetime, timezone | ||
from io import BytesIO | ||
from random import getrandbits | ||
from typing import Optional, Union | ||
|
||
|
@@ -33,12 +36,17 @@ | |
from geojson_pydantic import Feature, MultiPolygon, Polygon | ||
from geojson_pydantic import FeatureCollection as FeatCol | ||
from shapely.geometry import mapping, shape | ||
from shapely.geometry.base import BaseGeometry | ||
from shapely.ops import unary_union | ||
from shapely.validation import make_valid | ||
from sqlalchemy import text | ||
from sqlalchemy.exc import ProgrammingError | ||
from sqlalchemy.orm import Session | ||
|
||
from app.config import settings | ||
|
||
log = logging.getLogger(__name__) | ||
API_URL = settings.RAW_DATA_API_URL | ||
|
||
|
||
def timestamp(): | ||
|
@@ -777,3 +785,160 @@ def parse_featcol(features: Union[Feature, FeatCol, MultiPolygon, Polygon]): | |
elif isinstance(features, Feature): | ||
feat_col = geojson.FeatureCollection([feat_col]) | ||
return feat_col | ||
|
||
|
||
def request_snapshot(geometry): | ||
"""Request a snapshot based on the provided geometry. | ||
|
||
Args: | ||
geometry (str): The geometry data in JSON format. | ||
|
||
Returns: | ||
dict: The JSON response containing the snapshot data. | ||
""" | ||
headers = {"accept": "application/json", "Content-Type": "application/json"} | ||
|
||
payload = { | ||
"geometry": json.loads(geometry), | ||
"filters": {"tags": {"all_geometry": {"join_or": {"building": []}}}}, | ||
"geometryType": ["polygon"], | ||
} | ||
response = requests.post( | ||
f"{API_URL}/snapshot/", data=json.dumps(payload), headers=headers | ||
) | ||
response.raise_for_status() | ||
return response.json() | ||
|
||
|
||
def poll_task_status(task_link): | ||
"""Poll the status of a task until it reaches state (SUCCESS or FAILED). | ||
|
||
Args: | ||
task_link (str): The link to the task status endpoint. | ||
|
||
Returns: | ||
dict: The final status of the task as a JSON response. | ||
""" | ||
stop_loop = False | ||
while not stop_loop: | ||
check_result = requests.get(url=f"{API_URL}{task_link}") | ||
check_result.raise_for_status() | ||
res = check_result.json() | ||
if res["status"] in ["SUCCESS", "FAILED"]: | ||
stop_loop = True | ||
time.sleep(1) | ||
return res | ||
|
||
|
||
def download_snapshot(download_url): | ||
"""Download a snapshot from the provided URL and extract the GeoJSON. | ||
|
||
Args: | ||
download_url (str): The URL to download the snapshot from. | ||
|
||
Returns: | ||
dict: The extracted GeoJSON data from the downloaded snapshot. | ||
""" | ||
response = requests.get(download_url) | ||
response.raise_for_status() | ||
with zipfile.ZipFile(BytesIO(response.content), "r") as zip_ref: | ||
with zip_ref.open("Export.geojson") as file: | ||
return json.load(file) | ||
|
||
|
||
def calculate_bbox(features): | ||
"""Calculate the bounding box of a collection of features(geojson). | ||
|
||
Args: | ||
features (list): A list of features with geometries. | ||
|
||
Returns: | ||
tuple: the bounding box coordinates (minx, miny, maxx, maxy). | ||
""" | ||
geometries = [make_valid(shape(feature["geometry"])) for feature in features] | ||
union = unary_union(geometries) | ||
bbox = union.bounds | ||
return bbox | ||
|
||
|
||
def geometries_almost_equal( | ||
geom1: BaseGeometry, geom2: BaseGeometry, tolerance: float = 1e-6 | ||
) -> bool: | ||
"""Determine if two geometries are almost equal within a tolerance. | ||
|
||
Args: | ||
geom1 (BaseGeometry): First geometry. | ||
geom2 (BaseGeometry): Second geometry. | ||
tolerance (float): Tolerance level for almost equality. | ||
|
||
Returns: | ||
bool: True if geometries are almost equal else False. | ||
""" | ||
return geom1.equals_exact(geom2, tolerance) | ||
|
||
|
||
def check_partial_overlap(geom1: BaseGeometry, geom2: BaseGeometry) -> bool: | ||
"""Determine if two geometries have a partial overlap. | ||
|
||
Args: | ||
geom1 (BaseGeometry): First geometry. | ||
geom2 (BaseGeometry): Second geometry. | ||
|
||
Returns: | ||
bool: True if geometries have a partial overlap, else False. | ||
""" | ||
intersection = geom1.intersection(geom2) | ||
return not intersection.is_empty and ( | ||
0 < intersection.area < geom1.area and 0 < intersection.area < geom2.area | ||
) | ||
|
||
|
||
def conflate_features( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice! |
||
input_features: list, osm_features: list, remove_conflated=False, tolerance=1e-6 | ||
): | ||
"""Conflate input features with OSM features to identify overlaps. | ||
|
||
Args: | ||
input_features (list): A list of input features with geometries. | ||
osm_features (list): A list of OSM features with geometries. | ||
remove_conflated (bool): Flag to remove conflated features. | ||
tolerance (float): Tolerance level for almost equality. | ||
|
||
Returns: | ||
list: A list of features after conflation with OSM features. | ||
""" | ||
osm_geometries = [shape(feature["geometry"]) for feature in osm_features] | ||
return_features = [] | ||
|
||
for input_feature in input_features: | ||
input_geometry = shape(input_feature["geometry"]) | ||
is_duplicate = False | ||
is_partial_overlap = False | ||
|
||
for osm_feature, osm_geometry in zip( | ||
osm_features, osm_geometries, strict=False | ||
): | ||
if geometries_almost_equal(input_geometry, osm_geometry, tolerance): | ||
is_duplicate = True | ||
input_feature["properties"].update(osm_feature["properties"]) | ||
break | ||
|
||
if check_partial_overlap(input_geometry, osm_geometry): | ||
is_partial_overlap = True | ||
new_feature = { | ||
"type": "Feature", | ||
"geometry": mapping(osm_feature["geometry"]), | ||
"properties": osm_feature["properties"], | ||
} | ||
return_features.append(new_feature) | ||
break | ||
|
||
input_feature["properties"]["is_duplicate"] = is_duplicate | ||
input_feature["properties"]["is_partial_overlap"] = is_partial_overlap | ||
|
||
if (is_duplicate or is_partial_overlap) and remove_conflated is True: | ||
continue | ||
|
||
return_features.append(input_feature) | ||
|
||
return return_features |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ | |
from io import BytesIO | ||
from typing import Optional | ||
|
||
import geojson | ||
import sozipfile.sozipfile as zipfile | ||
from asgiref.sync import async_to_sync | ||
from fastapi import HTTPException, Response | ||
|
@@ -35,9 +36,14 @@ | |
# from osm_fieldwork.json2osm import json2osm | ||
from sqlalchemy.orm import Session | ||
|
||
from app.central.central_crud import get_odk_form, get_odk_project, list_odk_xforms | ||
from app.central.central_crud import ( | ||
flatten_json, | ||
get_odk_form, | ||
get_odk_project, | ||
list_odk_xforms, | ||
) | ||
from app.config import settings | ||
from app.db import db_models | ||
from app.db import db_models, postgis_utils | ||
from app.models.enums import HTTPStatus | ||
from app.projects import project_crud, project_deps | ||
from app.s3 import add_obj_to_bucket, get_obj_from_bucket | ||
|
@@ -524,3 +530,45 @@ async def get_submission_detail( | |
odk_form.getSubmissions(project.odkid, db_xform.odk_form_id, submission_id) | ||
) | ||
return submission.get("value", [])[0] | ||
|
||
|
||
async def get_submission_geojson( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I noticed we already have central_crud.convert_odk_submission_json_to_geojson and submission_routes.download_submission_geojson that all basically do the same thing. Could we refactor to only have the logic once, then reuse the code? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if i want geojson only for the conflation, then this function won't work since it returns submission in bytes. like you said, we can refactor the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The second option sounds best in the spirit of DRY code. A single function to return a geojson, then encode as bytes where specifically needed. If absolutely necessary it's also possible to decode bytes back to geojson using json.load would probably also do that |
||
project_id: int, | ||
db: Session, | ||
): | ||
"""Retrieve GeoJSON data for a submission associated with a project. | ||
|
||
Args: | ||
project_id (int): The ID of the project. | ||
db (Session): The database session. | ||
|
||
Returns: | ||
FeatCol: A GeoJSON FeatCol containing the submission features. | ||
""" | ||
data = await get_submission_by_project(project_id, {}, db) | ||
submission_json = data.get("value", []) | ||
|
||
if not submission_json: | ||
raise HTTPException( | ||
status_code=HTTPStatus.UNPROCESSABLE_ENTITY, | ||
detail="Loading JSON submission failed", | ||
) | ||
|
||
all_features = [] | ||
for submission in submission_json: | ||
keys_to_remove = ["meta", "__id", "__system"] | ||
for key in keys_to_remove: | ||
submission.pop(key) | ||
|
||
data = {} | ||
flatten_json(submission, data) | ||
|
||
geojson_geom = await postgis_utils.javarosa_to_geojson_geom( | ||
data.pop("xlocation", {}), geom_type="Polygon" | ||
) | ||
|
||
feature = geojson.Feature(geometry=geojson_geom, properties=data) | ||
all_features.append(feature) | ||
|
||
featcol = geojson.FeatureCollection(features=all_features) | ||
return featcol |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This function and the
poll_task_status
function can be replaced with osm_fieldwork.PostgresClient.There is already functionality in place to get a data extract using provided config π
Example:
fmtm/src/backend/app/projects/project_crud.py
Line 449 in 319f65e