diff --git a/README.md b/README.md index 4006591..d111626 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Geopackages are a data format that have a deliberately broad application, so many of the requirements are dependend on your use. -The PDOK geopackage validator is used by [PDOK](https://www.pdok.nl/). PDOK is part of the Dutch government. This geopackage validator is used to validate a [set of requirements](#what-does-it-do) to make sure geopackages adhere to our standardized ETL pipeline. It is possible to use this for your own purposes as described [here](https://github.com/PDOK/geopackage-validator/issues/115#issuecomment-1529488733). The validations will not change (except for bugfixes); **new validations are always added to the list**. In case you are looking for a more generic validator. These do exist and can be found: +The PDOK geopackage validator is used by [PDOK](https://www.pdok.nl/). PDOK is part of the Dutch government. This geopackage validator is used to validate a [set of requirements](#what-does-it-do) to make sure geopackages adhere to our standardized ETL pipeline. It is possible to use this for your own purposes as described [here](https://github.com/PDOK/geopackage-validator/issues/115#issuecomment-1529488733). The validations will not change (except for bugfixes); **new validations are always added to the list**. In case you are looking for a more generic validator. These do exist and can be found: - [teamengine](https://cite.opengeospatial.org/teamengine) (official OGC, Java) - [teamengine Github](https://github.com/opengeospatial/teamengine) @@ -13,18 +13,18 @@ The PDOK geopackage validator is used by [PDOK](https://www.pdok.nl/). PDOK is p ## Table of Contents -- [geopackage-validator](#geopackage-validator) +- [geopackage-validator](#pdok-geopackage-validator) - [Table of Contents](#table-of-contents) - [What does it do](#what-does-it-do) - [Geopackage versions](#geopackage-versions) - [Installation](#installation) - [Docker](#docker-installation) - [Usage](#usage) - - [RQ8 Validation](#local-rq8-validation) - - [Show validations](#local-show-validations) - - [Generate table definitions](#local-generate-table-definitions) + - [RQ8 Validation](#rq8-validation) + - [Show validations](#show-validations) + - [Generate table definitions](#generate-table-definitions) - [Local development](#local-development) - - [Usage](#usage-1) + - [Docker run](#docker-run) - [Python console](#python-console) - [Code style](#code-style) - [Tests](#tests) @@ -32,14 +32,15 @@ The PDOK geopackage validator is used by [PDOK](https://www.pdok.nl/). PDOK is p ## TL;DR Commands -Either run through [docker](#docker) or [locally](#local). +Either run through [docker](#docker) or [locally](#local). ### Docker + Validate a GeoPackage with the default set of validation rules: ```sh gpkg_path=relative/path/to/the.gpkg -docker run -v "$(pwd)":/gpkg --rm pdok/geopackage-validator validate --gpkg-path "/gpkg/${gpkg_path}" +docker run -v "$(pwd)":/gpkg --rm pdok/geopackage-validator validate --gpkg-path "/gpkg/${gpkg_path}" ``` Validate a GeoPackage with the default set of validation rules including a schema: @@ -47,15 +48,15 @@ Validate a GeoPackage with the default set of validation rules including a schem ```sh schema_path=relative/path/to/the/schema.json gpkg_path=relative/path/to/the.gpkg -docker run -v "$(pwd)":/gpkg --rm pdok/geopackage-validator validate --gpkg-path "/gpkg/${gpkg_path}" --table-definitions-path "/gpkg/${schema_path}" +docker run -v "$(pwd)":/gpkg --rm pdok/geopackage-validator validate --gpkg-path "/gpkg/${gpkg_path}" --table-definitions-path "/gpkg/${schema_path}" ``` -Generate a schema: +Generate a schema: ```sh schema_path=relative/path/to/the/schema.json gpkg_path=relative/path/to/the.gpkg -docker run -v "$(pwd)":/gpkg --rm pdok/geopackage-validator generate-definitions --gpkg-path "/gpkg/${gpkg_path}" > "$schema_path" +docker run -v "$(pwd)":/gpkg --rm pdok/geopackage-validator generate-definitions --gpkg-path "/gpkg/${gpkg_path}" > "$schema_path" ``` ### Local @@ -64,7 +65,7 @@ For a local setup we require/tested against python > 3.6 and gdal = 3.4. ```sh gpkg_path=relative/path/to/the.gpkg -geopackage-validator validate --gpkg-path "/gpkg/${gpkg_path}" +geopackage-validator validate --gpkg-path "/gpkg/${gpkg_path}" ``` Validate a GeoPackage with the default set of validation rules including a schema: @@ -72,15 +73,15 @@ Validate a GeoPackage with the default set of validation rules including a schem ```sh schema_path=relative/path/to/the/schema.json gpkg_path=relative/path/to/the.gpkg -geopackage-validator validate --gpkg-path "/gpkg/${gpkg_path}" --table-definitions-path "/gpkg/${schema_path}" +geopackage-validator validate --gpkg-path "/gpkg/${gpkg_path}" --table-definitions-path "/gpkg/${schema_path}" ``` -Generate a schema: +Generate a schema: ```sh schema_path=relative/path/to/the/schema.json gpkg_path=relative/path/to/the.gpkg -geopackage-validator generate-definitions --gpkg-path "/gpkg/${gpkg_path}" > "$schema_path" +geopackage-validator generate-definitions --gpkg-path "/gpkg/${gpkg_path}" > "$schema_path" ``` ## What does it do @@ -111,20 +112,22 @@ The current checks are (see also the 'show-validations' command): | RQ21 | All layer and column names shall not be longer than 57 characters. | | RQ22 | Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3857, 4258, 4326, 4936, 4937, 5730, 7409. | | RQ23 | Geometry should be valid and simple. | +| RQ24 | Geometry should not be empty (e.g. 'POINT EMPTY', represented as 'POINT(NaN NaN)'). | | RC17 | It is recommended to name all GEOMETRY type columns 'geom'. | | RC18 | It is recommended to give all GEOMETRY type columns the same name. | | RC19 | It is recommended to only use multidimensional geometry coordinates (elevation and measurement) when necessary. | -| RC20 | It is recommended that all (MULTI)POLYGON geometries have a counter-clockwise orientation for their exterior ring, and a clockwise direction for all interior rings. | +| RC20 | It is recommended that all (MULTI)POLYGON geometries have a counter-clockwise orientation for their exterior ring, and a clockwise direction for all interior rings. | | UNKNOWN_WARNINGS | It is recommended that the unexpected (GDAL) warnings are looked into. | \* Legacy requirements are only executed with the validate command when explicitly requested in the validation set. -\** Since version 0.8.0 the recommendations are part of the same sequence as the requirements. From now on a check will always maintain the integer part of the code. Even if at a later time the validation type can shift between requirement and recommendation. +\** Since version 0.8.0 the recommendations are part of the same sequence as the requirements. From now on a check will always maintain the integer part of the code. Even if at a later time the validation type can shift between requirement and recommendation. An explanation in Dutch with a reason for each rule can be found [here](https://www.pdok.nl/voor-data-aanbieders#:~:text=Regels%20in%20detail). ## Geopackage versions The Geopackage validator support the following Geopackage versions: + - 1.4 - 1.3.1 - 1.3 @@ -133,11 +136,12 @@ The Geopackage validator support the following Geopackage versions: ## Installation This package requires: + - [GDAL](https://gdal.org/) version >= 3.2.1. - [Spatialite](https://www.gaia-gis.it/fossil/libspatialite/index) version >= 5.0.0 - And python >= 3.8 to run. -We recommend using the docker image. When above requirements are met the package can be installed using pip (`pip install pdok-geopackage-validator`). +We recommend using the docker image. When above requirements are met the package can be installed using pip (`pip install pdok-geopackage-validator`). ### Docker Installation @@ -167,7 +171,7 @@ To validate RQ8 you have to generate definitions first. ```bash docker run -v ${PWD}:/gpkg --rm pdok/geopackage-validator geopackage-validator generate-definitions --gpkg-path /path/to/file.gpkg -```` +``` ### Validate @@ -402,14 +406,14 @@ Options: ## Local development -We advise using docker-compose for local development. This allows live editing and testing code with the correct gdal/ogr version with spatialite 5.0.0. -First build the local image with your machines user id and group id: +We advise using docker-compose for local development. This allows live editing and testing code with the correct gdal/ogr version with spatialite 5.0.0. +First build the local image with your machines user id and group id: ```bash docker-compose build --build-arg USER_ID=`id -u` --build-arg GROUP_ID=`id -g` ``` -### Usage +### Docker run There will be a script you can run like this: @@ -422,7 +426,7 @@ to point the docker-compose to other files, you can add or edit the volumes in t ### Python console -Ipython is available in the docker: +Ipython is available in the docker: ```bash docker-compose run --rm validator ipython @@ -435,7 +439,7 @@ work on it, run the following command periodically: ```bash docker-compose run --rm validator black . -``` +``` ### Tests diff --git a/geopackage_validator/validations/__init__.py b/geopackage_validator/validations/__init__.py index d910f1d..83e530a 100644 --- a/geopackage_validator/validations/__init__.py +++ b/geopackage_validator/validations/__init__.py @@ -10,6 +10,9 @@ ValidGeometryValidator, ValidGeometryValidatorV0, ) +from geopackage_validator.validations.geometry_empty_check import ( + EmptyGeometryValidator, +) from geopackage_validator.validations.layerfeature_check import ( OGRIndexValidator, NonEmptyLayerValidator, @@ -62,6 +65,7 @@ "GpkgGeometryTypeNameValidator", "GeometryTypeEqualsGpkgDefinitionValidator", "PolygonWindingOrderValidator", + "EmptyGeometryValidator", # Recommendations "GeomColumnNameValidator", "GeomColumnNameEqualValidator", diff --git a/geopackage_validator/validations/geometry_empty_check.py b/geopackage_validator/validations/geometry_empty_check.py new file mode 100644 index 0000000..8457730 --- /dev/null +++ b/geopackage_validator/validations/geometry_empty_check.py @@ -0,0 +1,45 @@ +from typing import Iterable, Tuple +from geopackage_validator.validations import validator +from geopackage_validator import utils + +SQL_EMPTY_TEMPLATE = """SELECT count(row_id) AS count, row_id +FROM( + SELECT + cast(rowid AS INTEGER) AS row_id + FROM "{table_name}" WHERE ST_IsEmpty("{column_name}") = 1 +);""" + + +def query_geometry_empty(dataset, sql_template) -> Iterable[Tuple[str, str, int, int]]: + columns = utils.dataset_geometry_tables(dataset) + + for table_name, column_name, _ in columns: + validations = dataset.ExecuteSQL( + sql_template.format(table_name=table_name, column_name=column_name) + ) + for count, row_id in validations: + yield table_name, column_name, count, row_id + dataset.ReleaseResultSet(validations) + + +class EmptyGeometryValidator(validator.Validator): + """Geometries should not be empty.""" + + code = 24 + level = validator.ValidationLevel.ERROR + message = "Found empty geometry in table: {table_name}, column {column_name}, {count} {count_label}, example id {row_id}" + + def check(self) -> Iterable[str]: + result = query_geometry_empty(self.dataset, SQL_EMPTY_TEMPLATE) + + return [ + self.message.format( + table_name=table_name, + column_name=column_name, + count=count, + count_label=("time" if count == 1 else "times"), + row_id=row_id, + ) + for table_name, column_name, count, row_id in result + if count > 0 + ] diff --git a/tests/data/test_geometry_empty.gpkg b/tests/data/test_geometry_empty.gpkg new file mode 100755 index 0000000..46a35b2 Binary files /dev/null and b/tests/data/test_geometry_empty.gpkg differ diff --git a/tests/test_validate.py b/tests/test_validate.py index 8d37b8a..df11237 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -35,6 +35,7 @@ def test_determine_validations_to_use_none(): "RQ21", "RQ22", "RQ23", + "RQ24", "RC17", "RC18", "RC19", diff --git a/tests/validations/test_geometry_empty_check.py b/tests/validations/test_geometry_empty_check.py new file mode 100644 index 0000000..8dae640 --- /dev/null +++ b/tests/validations/test_geometry_empty_check.py @@ -0,0 +1,20 @@ +from geopackage_validator.utils import open_dataset +from geopackage_validator.validations.geometry_empty_check import ( + EmptyGeometryValidator, +) + + +def test_with_gpkg_empty(): + dataset = open_dataset("tests/data/test_geometry_empty.gpkg") + result = list(EmptyGeometryValidator(dataset).check()) + assert len(result) == 1 + assert ( + result[0] + == "Found empty geometry in table: stations, column geom, 45 times, example id 129" + ) + + +def test_with_gpkg_allcorrect(): + dataset = open_dataset("tests/data/test_allcorrect.gpkg") + result = list(EmptyGeometryValidator(dataset).check()) + assert len(result) == 0 diff --git a/tests/validations/test_geometry_valid_check.py b/tests/validations/test_geometry_valid_check.py index 8aba940..30299f1 100644 --- a/tests/validations/test_geometry_valid_check.py +++ b/tests/validations/test_geometry_valid_check.py @@ -39,6 +39,13 @@ def test_with_gpkg_valid_simple(): assert checks[0][4] == 1 +def test_with_gpkg_empty(): + # geometries that are empty are still considered valid + dataset = open_dataset("tests/data/test_geometry_empty.gpkg") + checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE)) + assert len(checks) == 0 + + def test_with_gpkg_allcorrect(): dataset = open_dataset("tests/data/test_allcorrect.gpkg") checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE))