diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..aec749e --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,17 @@ +## Description + +A short description of the changes in this PR. + +## Jira Issue ID + +[DAS-XXXX](https://bugs.earthdata.nasa.gov/browse/DAS-XXXX) + +## Local Test Steps + + +## PR Acceptance Checklist +* [ ] Jira ticket acceptance criteria met. +* [ ] `CHANGELOG.md` updated to include high level summary of PR changes. +* [ ] `docker/service_version.txt` updated if publishing a release. +* [ ] Tests added/updated and passing. +* [ ] Documentation updated (if needed). diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml new file mode 100644 index 0000000..9e973f7 --- /dev/null +++ b/.github/workflows/mypy.yml @@ -0,0 +1,31 @@ +# This is a GitHub action to run mypy on the repository +name: Run mypy + +on: + workflow_call + +jobs: + mypy: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ['3.12'] + + steps: + - name: Check out smap-l2-gridder code + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install mypy + pip install -r pip_requirements.txt -r pip_dev_requirements.txt + + - name: Run mypy + run: mypy . diff --git a/.github/workflows/run_tests_on_pull_requests.yml b/.github/workflows/run_tests_on_pull_requests.yml new file mode 100644 index 0000000..e57c98a --- /dev/null +++ b/.github/workflows/run_tests_on_pull_requests.yml @@ -0,0 +1,15 @@ +# This workflow will run when a PR is opened against the `main` branch. +# +# It will trigger the reusable workflows: +# `.github/workflows/mypy.yml` - which runs mypy on the repository. +# +name: Tests for PRs to main + +on: + pull_request: + branches: [ main ] + workflow_dispatch: + +jobs: + mypy: + uses: ./.github/workflows/mypy.yml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eeb8a6e --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +**/__pycache__ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..0d5379f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,22 @@ +ci: + autofix_prs: false +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-json + - id: check-yaml + - id: check-added-large-files + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.7.1 + hooks: + - id: ruff + args: ["--fix", "--show-fixes", "--extend-select", "I"] + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 24.10.0 + hooks: + - id: black-jupyter + args: ["--skip-string-normalization"] + language_version: python3.12 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..472976c --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,12 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added + +- Initial codebase that transforms SPL2SMP_E granules into NetCDF4-CF grids. [#1](https://github.com/nasa/harmony-SMAP-L2-gridding-service/pull/1) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..50186e0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,60 @@ +Copyright © 2022 United States Government as represented by the Administrator of the National Aeronautics and Space Administration. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + +--- + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + + You must give any other recipients of the Work or Derivative Works a copy of this License; and + You must cause any modified files to carry prominent notices stating that You changed the files; and + You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. + + You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/README.md b/README.md index c37efe9..4fb3d73 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,37 @@ # smap-l2-gridder This is a python service to transform NASA level 2 Grid trajectory data into gridded NetCDF4-CF output files. + + +## Transform Data + +To run the regridder you can create an isolated python 3.12 environment installing packages from the `pip_requirements.txt` file. + +From the commandline run: + +```python +python -m smap_l2_gridder --input path/to/granule.h5 --output path/to/output_granule.nc +``` + + +## pre-commit hooks: + +This repository uses [pre-commit](https://pre-commit.com/) to enable pre-commit +checks that enforce coding standard best practices. These include: + +* Removing trailing whitespaces. +* Removing blank lines at the end of a file. +* Ensure JSON files have valid formats. +* [ruff](https://github.com/astral-sh/ruff) Python linting checks. +* [black](https://black.readthedocs.io/en/stable/index.html) Python code + formatting checks. + +To enable these checks: + +```bash +# Install pre-commit Python package: +pip install pre-commit + +# Install the git hook scripts: +pre-commit install +``` diff --git a/pip_dev_requirements.txt b/pip_dev_requirements.txt new file mode 100644 index 0000000..8b1a6ab --- /dev/null +++ b/pip_dev_requirements.txt @@ -0,0 +1,3 @@ +pytest==8.3.3 +pytest-mock==3.14.0 +mypy==1.13.0 diff --git a/pip_requirements.txt b/pip_requirements.txt new file mode 100644 index 0000000..41f3ccc --- /dev/null +++ b/pip_requirements.txt @@ -0,0 +1,3 @@ +netcdf4==1.7.2 +xarray==2024.10.0 +pyproj==3.7.0 diff --git a/smap_l2_gridder/__init__.py b/smap_l2_gridder/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/smap_l2_gridder/__main__.py b/smap_l2_gridder/__main__.py new file mode 100644 index 0000000..d838a58 --- /dev/null +++ b/smap_l2_gridder/__main__.py @@ -0,0 +1,32 @@ +"""Module to allow running from a commandline.""" + +import argparse + +from xarray import open_datatree + +from smap_l2_gridder.grid import process_input + + +def parse_args(): + """Get input and output filenames from args.""" + parser = argparse.ArgumentParser(description='PoC script to process SPL2SMP_E data') + parser.add_argument('--input', help='Input h5 file path') + parser.add_argument('--output', help='Output NetCDF file path') + return parser.parse_args() + + +def main(): + """Entrypoint for local running and testing.""" + try: + args = parse_args() + with open_datatree(args.input, decode_times=False) as in_data: + process_input(in_data, args.output) + except Exception as e: + print(f"Error occurred: {e}") + raise e + print(f'successfully processed {args.input} into {args.output}') + return 0 + + +if __name__ == '__main__': + exit(main()) diff --git a/smap_l2_gridder/crs.py b/smap_l2_gridder/crs.py new file mode 100644 index 0000000..6b7ff76 --- /dev/null +++ b/smap_l2_gridder/crs.py @@ -0,0 +1,259 @@ +"""Info particular to geolocating the input data to a target grid.""" + +from dataclasses import dataclass +from pathlib import Path + +import numpy as np +import xarray as xr +from pyproj.crs import CRS +from xarray import DataArray + +from smap_l2_gridder.exceptions import InvalidGPDError + + +@dataclass +class Geotransform: + """Class for holding a GDAL-style 6-element geotransform.""" + + top_left_x: np.float64 + pixel_width: np.float64 + row_rotation: np.float64 + top_left_y: np.float64 + column_rotation: np.float64 + pixel_height: np.float64 + + def col_row_to_xy(self, col: int, row: int) -> tuple[np.float64, np.float64]: + """Convert grid cell location to x,y coordinate.""" + # Geotransform is defined from upper left corner as (0,0), so adjust + # input value to the center of grid at (.5, .5) + adj_col = col + 0.5 + adj_row = row + 0.5 + + x = self.top_left_x + adj_col * self.pixel_width + adj_row * self.row_rotation + y = ( + self.top_left_y + + adj_col * self.column_rotation + + adj_row * self.pixel_height + ) + return x, y + + +# The authoritative value of well known text strings is from epsg.org + +# The pyproj CRS created from this WKT string is the same as a CRS that has been +# round tripped through the CRS creation process. But the output value on the +# files CRS metadata may not match the authoritative value because of the +# different varieties of WKT. That said, the CRS created by pyproj is the same. +# i.e. +# pyproj.crs.CRS.from_wkt(epsg_6933_wkt).to_wkt() != epsg_6933_wkt +# but +# pyproj.crs.CRS.from_wkt(pyproj.crs.CRS.from_wkt(epsg_6933_wkt).to_wkt()) +# == pyproj.crs.CRS.from_wkt(epsg_6933_wkt) + +# NSIDC EASE-Grid 2.0 Global CRS definition +# from: https://epsg.org/crs/wkt/id/6933 +epsg_6933_wkt = ( + 'PROJCRS["WGS 84 / NSIDC EASE-Grid 2.0 Global",' + 'BASEGEOGCRS["WGS 84",ENSEMBLE["World Geodetic System 1984 ensemble", ' + 'MEMBER["World Geodetic System 1984 (Transit)", ID["EPSG",1166]], ' + 'MEMBER["World Geodetic System 1984 (G730)", ID["EPSG",1152]], ' + 'MEMBER["World Geodetic System 1984 (G873)", ID["EPSG",1153]], ' + 'MEMBER["World Geodetic System 1984 (G1150)", ID["EPSG",1154]], ' + 'MEMBER["World Geodetic System 1984 (G1674)", ID["EPSG",1155]], ' + 'MEMBER["World Geodetic System 1984 (G1762)", ID["EPSG",1156]], ' + 'MEMBER["World Geodetic System 1984 (G2139)", ID["EPSG",1309]], ' + 'MEMBER["World Geodetic System 1984 (G2296)", ID["EPSG",1383]], ' + 'ELLIPSOID["WGS 84",6378137,298.257223563,' + 'LENGTHUNIT["metre",1,ID["EPSG",9001]],ID["EPSG",7030]], ' + 'ENSEMBLEACCURACY[2],ID["EPSG",6326]],ID["EPSG",4326]],' + 'CONVERSION["US NSIDC EASE-Grid 2.0 Global",' + 'METHOD["Lambert Cylindrical Equal Area",ID["EPSG",9835]],' + 'PARAMETER["Latitude of 1st standard parallel",30,' + 'ANGLEUNIT["degree",0.0174532925199433,ID["EPSG",9102]],' + 'ID["EPSG",8823]],PARAMETER["Longitude of natural origin",0,' + 'ANGLEUNIT["degree",0.0174532925199433,ID["EPSG",9102]],' + 'ID["EPSG",8802]],PARAMETER["False easting",0,' + 'LENGTHUNIT["metre",1,ID["EPSG",9001]],ID["EPSG",8806]],' + 'PARAMETER["False northing",0,LENGTHUNIT["metre",1,ID["EPSG",9001]],' + 'ID["EPSG",8807]],ID["EPSG",6928]],CS[Cartesian,2,ID["EPSG",4499]],' + 'AXIS["Easting (X)",east],AXIS["Northing (Y)",north],' + 'LENGTHUNIT["metre",1,ID["EPSG",9001]],ID["EPSG",6933]]' +) + +# NSIDC EASE-Grid 2.0 North CRS definition +# from: https://epsg.org/crs/wkt/id/6931 +epsg_6931_wkt = ( + 'PROJCRS["WGS 84 / NSIDC EASE-Grid 2.0 North",' + 'BASEGEOGCRS["WGS 84",ENSEMBLE["World Geodetic System 1984 ensemble", ' + 'MEMBER["World Geodetic System 1984 (Transit)", ID["EPSG",1166]], ' + 'MEMBER["World Geodetic System 1984 (G730)", ID["EPSG",1152]], ' + 'MEMBER["World Geodetic System 1984 (G873)", ID["EPSG",1153]], ' + 'MEMBER["World Geodetic System 1984 (G1150)", ID["EPSG",1154]], ' + 'MEMBER["World Geodetic System 1984 (G1674)", ID["EPSG",1155]], ' + 'MEMBER["World Geodetic System 1984 (G1762)", ID["EPSG",1156]], ' + 'MEMBER["World Geodetic System 1984 (G2139)", ID["EPSG",1309]], ' + 'MEMBER["World Geodetic System 1984 (G2296)", ID["EPSG",1383]], ' + 'ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1,' + 'ID["EPSG",9001]],ID["EPSG",7030]], ENSEMBLEACCURACY[2],' + 'ID["EPSG",6326]],ID["EPSG",4326]],' + 'CONVERSION["US NSIDC EASE-Grid 2.0 North",' + 'METHOD["Lambert Azimuthal Equal Area",' + 'ID["EPSG",9820]],PARAMETER["Latitude of natural origin",90,' + 'ANGLEUNIT["degree",0.0174532925199433,ID["EPSG",9102]],ID["EPSG",8801]],' + 'PARAMETER["Longitude of natural origin",0,' + 'ANGLEUNIT["degree",0.0174532925199433,ID["EPSG",9102]],ID["EPSG",8802]],' + 'PARAMETER["False easting",0,LENGTHUNIT["metre",1,ID["EPSG",9001]],' + 'ID["EPSG",8806]],PARAMETER["False northing",0,LENGTHUNIT["metre",1,' + 'ID["EPSG",9001]],ID["EPSG",8807]],ID["EPSG",6929]],CS[Cartesian,2,' + 'ID["EPSG",4469]],AXIS["Easting (X)",South,MERIDIAN[90.0,' + 'ANGLEUNIT["degree",0.0174532925199433,ID["EPSG",9102]]]],' + 'AXIS["Northing (Y)",South,MERIDIAN[180.0,' + 'ANGLEUNIT["degree",0.0174532925199433,ID["EPSG",9102]]]],' + 'LENGTHUNIT["metre",1,ID["EPSG",9001]],ID["EPSG",6931]]' +) + +GPD_TO_WKT = { + "EASE2_N09km.gpd": epsg_6931_wkt, + "EASE2_M09km.gpd": epsg_6933_wkt, +} + + +def geotransform_from_target_info(target_info: dict) -> Geotransform: + """Return a geotransform from the grid_info dict. + + grid_info contains a parsed NSIDC gpd file. + + GT(0) x-coordinate of the upper-left corner of the upper-left pixel. + GT(1) w-e pixel resolution / pixel width. + GT(2) row rotation (typically zero). + GT(3) y-coordinate of the upper-left corner of the upper-left pixel. + GT(4) column rotation (typically zero). + GT(5) n-s pixel resolution / pixel height (negative value for a north-up image). + + """ + validate_gpd_style(target_info) + return Geotransform( + target_info['Map Origin X'], + target_info['Grid Map Units per Cell'], + np.float64(0.0), + target_info['Map Origin Y'], + np.float64(0.0), + -1.0 * target_info['Grid Map Units per Cell'], + ) + + +def validate_gpd_style(target_info: dict) -> None: + """Raise error if the gpd is non-standard.""" + if (target_info['Grid Map Origin Column'] != -0.5) or ( + target_info['Grid Map Origin Row'] != -0.5 + ): + raise InvalidGPDError('Can not use non standard gpd.') + + +def convert_value(value: str) -> str | np.float64 | np.long: + """Convert gpd value to appropriate type.""" + value = value.strip() + try: + if '.' in value: + return np.float64(value) + else: + return np.long(value) + except ValueError: + return value + + +def parse_gpd_file(gpd_name: str) -> dict: + """Parse a grid parameter definition file and return a dictionary of parameters. + + Parameters: + ----------- + filename : str + Full path to a grid parameter definition file + or + Name of a gpd file in the references directory. + + Returns: + -------- + dict + Dictionary containing parameter names as keys and their values as values. + Decimal numbers are converted to np.float64 + Integer-like numbers are converted to np.long + Non-numeric values remain as strings + + """ + gpd_info = {} + + if Path(gpd_name).exists(): + filename = gpd_name + else: + filename = f'{Path(__file__).parent}/reference/{gpd_name}' + + with open(filename, encoding='utf-8') as f: + for line in f: + + line = line.strip() + if not line or line.startswith(';'): + continue + + if ':' in line: + key, value = line.split(':', 1) + key = key.strip() + # Remove comments and strip whitespace + value = value.split(';')[0].strip() + + if key and value: + gpd_info[key] = convert_value(value) + + return gpd_info + + +def create_crs(target_info: dict) -> DataArray: + """Return the correct CRS for the target grid.""" + crs = CRS(target_info['wkt']) + the_crs = xr.DataArray(data=b'', attrs={**crs.to_cf(), 'proj': crs.to_proj4()}) + return the_crs + + +def compute_dims(target_info: dict) -> tuple[DataArray, DataArray]: + """Compute the coordinate dimension. + + Parameters: + ---------- + target_info : dict + target grid information dictionary. + + """ + n_cols = target_info["Grid Width"] + n_rows = target_info["Grid Height"] + geotransform = geotransform_from_target_info(target_info) + + # compute the x,y locations along a column and row + column_dimensions = [geotransform.col_row_to_xy(i, 0) for i in range(n_cols)] + row_dimensions = [geotransform.col_row_to_xy(0, i) for i in range(n_rows)] + # pull out dimension values + x_values = np.array([x for x, y in column_dimensions], dtype=np.float64) + y_values = np.array([y for x, y in row_dimensions], dtype=np.float64) + + x_dim = xr.DataArray( + data=x_values, + dims=['x-dim'], + coords={'x-dim': x_values}, + attrs={ + 'standard_name': 'projection_x_coordinate', + 'long_name': 'x coordinate of projection', + 'units': 'm', + }, + ) + y_dim = xr.DataArray( + data=y_values, + dims=['y-dim'], + coords=({'y-dim': y_values}), + attrs={ + 'standard_name': 'projection_y_coordinate', + 'long_name': 'y coordinate of projection', + 'units': 'm', + }, + ) + x_dim.encoding = {'_FillValue': None} + y_dim.encoding = {'_FillValue': None} + return (x_dim, y_dim) diff --git a/smap_l2_gridder/exceptions.py b/smap_l2_gridder/exceptions.py new file mode 100644 index 0000000..dd5b7db --- /dev/null +++ b/smap_l2_gridder/exceptions.py @@ -0,0 +1,13 @@ +"""Module defining custom exceptions.""" + + +class SMAPL2GridderError(Exception): + """Base error class for exceptions raised by smap_l2_gridder library.""" + + def __init__(self, message=None): + """All smap-l2-gridder errors have a message field.""" + self.message = message + + +class InvalidGPDError(SMAPL2GridderError): + """Raised if an invalid GPD is used.""" diff --git a/smap_l2_gridder/grid.py b/smap_l2_gridder/grid.py new file mode 100644 index 0000000..4540a3d --- /dev/null +++ b/smap_l2_gridder/grid.py @@ -0,0 +1,192 @@ +"""Grid L2G data into encoded EASE Grid output. + +L2G data represents a gridded swath in an EASE projected grid. These are the +routines to translate the 1D intput arrays into the EASE grid output format +""" + +from typing import Iterable + +import numpy as np +from xarray import DataArray, DataTree + +from .crs import compute_dims, create_crs, epsg_6931_wkt, epsg_6933_wkt, parse_gpd_file + + +def process_input(in_data: DataTree, output_file: str): + """Process input file to generate gridded output file.""" + out_data = DataTree() + + out_data = transfer_metadata(in_data, out_data) + + # Process grids from all top level groups that are not only Metadata + data_node_names = set(in_data['/'].children) - set(get_metadata_children(in_data)) + + for node_name in data_node_names: + + grid_info = get_grid_information(in_data, node_name) + vars_to_grid = get_target_variables(in_data, node_name) + + # Add coordinates and CRS metadata for this node_name + x_dim, y_dim = compute_dims(grid_info['target']) + out_data[f'{node_name}/crs'] = create_crs(grid_info['target']) + out_data[f'{node_name}/x-dim'] = x_dim + out_data[f'{node_name}/y-dim'] = y_dim + + for var_name in vars_to_grid: + full_var_name = f'{node_name}/{var_name}' + out_data[full_var_name] = prepare_variable( + in_data[full_var_name], grid_info + ) + + # write the output data file. + out_data.to_netcdf(output_file) + + +def prepare_variable(var: DataTree | DataArray, grid_info: dict) -> DataArray: + """Grid and annotate intput variable.""" + grid_data = grid_variable(var, grid_info) + grid_data.attrs = {**var.attrs, 'grid_mapping': "crs"} + encoding = { + '_FillValue': variable_fill_value(var), + 'coordinates': var.encoding.get('coordinates', None), + # can't zip strings + **({'zlib': True, 'complevel': 6} if var.name != 'tb_time_utc' else {}), + } + grid_data.encoding.update(encoding) + return grid_data + + +def grid_variable(var: DataTree | DataArray, grid_info: dict) -> DataArray: + """Regrid the input 1D variable into a 2D grid using the grid_info.""" + fill_val = variable_fill_value(var) + grid = np.full( + (grid_info['target']['Grid Height'], grid_info['target']['Grid Width']), + fill_val, + dtype=var.encoding['dtype'], + ) + try: + valid_mask = ~np.isnan(var.data) + except TypeError: + # tb_time_utc is type string + valid_mask = var.data != "" + valid_rows = grid_info['src']['rows'].data[valid_mask] + valid_cols = grid_info['src']['cols'].data[valid_mask] + valid_values = var.data[valid_mask] + grid[valid_rows, valid_cols] = valid_values + return DataArray(grid, dims=['y-dim', 'x-dim']) + + +def variable_fill_value(var: DataTree | DataArray) -> np.integer | np.floating | None: + """Determine the correct fill value for the input variable. + + a fill value is found by searching in order: + - encoding._FillValue + - attrs._FillValue + - missing_value + + If not found, a default_fill_value is determined base on the input + varaiable datatype. + """ + fill_value = var.encoding.get('_FillValue') + if fill_value is None: + fill_value = var.attrs.get('_FillValue') + if fill_value is None: + fill_value = var.attrs.get('missing_value') + if fill_value is None: + fill_value = default_fill_value(var.encoding.get('dtype')) + return fill_value + + +def default_fill_value(data_type: np.dtype | None) -> np.integer | np.floating | None: + """Return an appropriate fill value for the input data type. + + These values were pulled from the on-prem system's + get_special_fill_value routine. + + it returns: + - None if type isn't numeric + - -9999.0 if type is floating + - Max value if type is an integer. + """ + if not np.issubdtype(data_type, np.number): + return None + elif np.issubdtype(data_type, np.floating): + return np.dtype(data_type).type(-9999.0) + else: + # np.issubdtype(data_type, np.integer): + return np.dtype(data_type).type(np.iinfo(data_type).max) + + +def get_target_variables(in_data: DataTree, node: str) -> Iterable[str]: + """Get variables to be regridded in the output file. + + TODO [MHS, 11/07/2024]: This is all variables now, but also might have + some special handling cases in the future. + """ + return in_data[node].data_vars + + +def get_grid_information(in_data: DataTree, node: str) -> dict: + """Get the column and row index locations and grid information for this node. + + For the PoC this will always be "node/EASE_column_index", + "node/EASE_row_index", and EASE Grid 9km data either global or north grid. + + TODO [MHS, 11/06/2024] This might go in a different file later with logic to + suss which grid each node is representing. + + """ + src_grid_info = {} + row = in_data[f'/{node}/EASE_row_index'] + column = in_data[f'/{node}/EASE_column_index'] + src_grid_info['rows'] = row.astype(row.encoding.get('dtype', 'uint16')) + src_grid_info['cols'] = column.astype(column.encoding.get('dtype', 'uint16')) + + grid_info = {} + grid_info['src'] = src_grid_info + grid_info['target'] = get_target_grid_information(node) + + return grid_info + + +def get_target_grid_information(node: str) -> dict: + """Return the target grid informaton. + + TODO [MHS, 11/13/2024] This might be in the wrong file. + """ + if is_polar_node(node): + gpd_name = 'EASE2_N09km.gpd' + wkt = epsg_6931_wkt + else: + gpd_name = 'EASE2_M09km.gpd' + wkt = epsg_6933_wkt + + target_grid_info = parse_gpd_file(gpd_name) + target_grid_info['wkt'] = wkt + return target_grid_info + + +def is_polar_node(node: str) -> bool: + """If the node name ends with "_Polar" it's the Northern Hemisphere data.""" + return node.endswith('_Polar') + + +def get_metadata_children(in_data: DataTree) -> list[str]: + """Fetch nodes with metadata. + + List of top level datatree children containing metadata to transfer + directly to output file. + + Note: This returns a constant for now, but may require reading the in_data + in the future. + """ + return ['Metadata'] + + +def transfer_metadata(in_data: DataTree, out_data: DataTree) -> DataTree: + """Transfer all metadata groups from input to the output datatree.""" + updated_data = out_data.copy() + for group_name in get_metadata_children(in_data): + updated_data[group_name] = in_data[group_name] + + return updated_data diff --git a/smap_l2_gridder/reference/EASE2_M09km.gpd b/smap_l2_gridder/reference/EASE2_M09km.gpd new file mode 100644 index 0000000..a307963 --- /dev/null +++ b/smap_l2_gridder/reference/EASE2_M09km.gpd @@ -0,0 +1,34 @@ +; 9 km Global EASE-Grid-2.0 grid parameter definition (nested in 36 km gpd). +; This projection uses the WGS84 ellipsoid. +; The left and right edges of the grid are at -+180.0000000 deg longitude. +; The top and bottom edges of the grid are at +-85.0445664 deg latitude. +Map Projection: Cylindrical Equal-Area (ellipsoid) +Map Reference Latitude: 0.0 +Map Reference Longitude: 0.0 +Map Second Reference Latitude: 30.0 +Map Rotation: 0.0 +Map Equatorial Radius: 6378137.0 ; wgs84 +Map Eccentricity: 0.081819190843 ; wgs84 +Map Origin X: -17367530.4451615 ; meters, -180.0000 deg lon + ; mapped to X +Map Origin Y: 7314540.8306386 ; meters, Grid Map Units per Cell * + ; Grid Height / 2 +Grid Map Origin Column: -0.5 +Grid Map Origin Row: -0.5 +Grid Map Units per Cell: 9008.055210146 ; meters, -2 * Map Origin X / + ; Grid Width +Grid Width: 3856 +Grid Height: 1624 + +; The following parameters are used only by NSIDC programs that actually draw maps +Map Southern Bound: -90.0 +Map Northern Bound: 90.0 +Map Western Bound: -180.0 +Map Eastern Bound: 180.0 +Map Graticule Latitude Interval: 15.00 +Map Graticule Longitude Interval: 30.00 +Map Graticule Label Latitude: 0.0 +Map Graticule Label Longitude: 0.0 +Map CIL Detail Level: 1 +Map BDY Detail Level: 0 +Map RIV Detail Level: 0 diff --git a/smap_l2_gridder/reference/EASE2_N09km.gpd b/smap_l2_gridder/reference/EASE2_N09km.gpd new file mode 100644 index 0000000..0bd1fc4 --- /dev/null +++ b/smap_l2_gridder/reference/EASE2_N09km.gpd @@ -0,0 +1,31 @@ +; 9 km Northern Hemisphere EASE-Grid-2.0 grid parameter definition (nested in 36 km gpd). +; This projection uses the WGS84 ellipsoid. +; The pole is exactly at the intersection of the middle 4 cells. +; The left/right/top/bottom latitude is 0.127234 degrees North +; (the equator goes just outside the edges of the grid coverage along the sides) +Map Projection: Azimuthal Equal-Area (ellipsoid) +Map Reference Latitude: 90.0 +Map Reference Longitude: 0.0 +Map Rotation: 0.0 +Map Equatorial Radius: 6378137.0 ; wgs84 +Map Eccentricity: 0.081819190843 ; wgs84 +Map Origin X: -9000000. ; meters +Map Origin Y: 9000000. ; meters +Grid Map Origin Column: -0.5 +Grid Map Origin Row: -0.5 +Grid Map Units per Cell: 9000. ; meters +Grid Width: 2000 +Grid Height: 2000 + +; The following parameters are used only by NSIDC programs that actually draw maps +Map Southern Bound: 0.0 +Map Northern Bound: 90.0 +Map Western Bound: -180.0 +Map Eastern Bound: 180.0 +Map Graticule Latitude Interval: 15.00 +Map Graticule Longitude Interval: 30.00 +Map Graticule Label Latitude: 0.0 +Map Graticule Label Longitude: 0.0 +Map CIL Detail Level: 1 +Map BDY Detail Level: 0 +Map RIV Detail Level: 0 diff --git a/smap_l2_gridder/reference/README.md b/smap_l2_gridder/reference/README.md new file mode 100644 index 0000000..0b1baee --- /dev/null +++ b/smap_l2_gridder/reference/README.md @@ -0,0 +1,10 @@ +# Reference files for smap l2 gridder + +These are copies of the gpd files describing the EASE2 grids copied directly from the [maxmaps NSIDC repository](https://github.com/nsidc/mapxmaps). + +These files are stable and do not change regularly (over 13 years for M09 and +N09). Rather than use a gitsubmodule, I have chosen to copy the files to +prevent adding unnecessary complexity. I could have copied the information +into python dictionaries, but parsing these files and working with them +directly will ease the burden of verifying that we haven't missed anything. +Hopefully someday we well be able to work with the files directly online. diff --git a/tests/unit/test_crs.py b/tests/unit/test_crs.py new file mode 100644 index 0000000..f3b1a73 --- /dev/null +++ b/tests/unit/test_crs.py @@ -0,0 +1,163 @@ +"""Unit test module for crs functionality.""" + +from pathlib import Path +from textwrap import dedent + +import numpy as np +import pytest +from xarray import DataArray + +from smap_l2_gridder.crs import ( + Geotransform, + compute_dims, + convert_value, + create_crs, + epsg_6933_wkt, + geotransform_from_target_info, + parse_gpd_file, + validate_gpd_style, +) +from smap_l2_gridder.exceptions import InvalidGPDError + +EASE2_M09km_gpd_filename = ( + Path(__file__).parent.parent.parent / 'smap_l2_gridder/reference/EASE2_M09km.gpd' +) + +# Sample data for testing +sample_target_info = { + 'Map Origin X': -5000.0, + 'Map Origin Y': -5000.0, + 'Grid Map Units per Cell': 1000.0, + 'Grid Width': 10, + 'Grid Height': 10, + 'Grid Map Origin Column': -0.5, + 'Grid Map Origin Row': -0.5, + 'wkt': epsg_6933_wkt, +} + + +def test_geotransform(): + """Test basic geotransformation computations.""" + gt = Geotransform(-2000.0, 1000.0, 0.0, 0.0, 0.0, -1000.0) + x, y = gt.col_row_to_xy(0, 0) + assert np.isclose(x, -1500.0) + assert np.isclose(y, -500.0) + x2, y2 = gt.col_row_to_xy(1, 1) + assert np.isclose(x2, -500.0) + assert np.isclose(y2, -1500.0) + + +def test_geotransform_from_target_info(): + """Tests target info will be parsed for a geotransform.""" + gt = geotransform_from_target_info(sample_target_info) + assert isinstance(gt, Geotransform) + assert np.isclose(gt.top_left_x, -5000.0) + assert np.isclose(gt.top_left_y, -5000.0) + assert np.isclose(gt.row_rotation, 0.0) + assert np.isclose(gt.column_rotation, 0.0) + assert np.isclose(gt.pixel_width, 1000.0) + assert np.isclose(gt.pixel_height, -1000.0) + + +def test_validate_gpd_style_valid(): + """Valid target information will validate.""" + validate_gpd_style(sample_target_info) + + +def test_validate_gpd_style_invalid(): + """Tests that only the expected gpd format will convert to geotransform.""" + invalid_target_info = sample_target_info.copy() + invalid_target_info['Grid Map Origin Column'] = 0.0 + with pytest.raises(InvalidGPDError): + validate_gpd_style(invalid_target_info) + + +def test_convert_value(): + """Ensure gpd parsed values are converted to correct data types.""" + assert convert_value(' 123 ') == 123 + assert convert_value(' 123.45 ') == np.float64(123.45) + assert convert_value('abc ') == 'abc' + + +def test_parse_gpd_file(tmp_path): + """Test GPD files are correctly parsed.""" + gpd_content = dedent( + """ + ; Example gpd for parsing + Map Origin X: -7000.0 ; pinned location for Map Origin X + Map Origin Y: 0.0 + Grid Map Units per Cell: 1000.0 + Grid Map Origin Column: -0.5 + Grid Map Origin Row: -0.5 + Something That Exists But Is Not Used: Has A Value + """ + ).strip() + + gpd_file = tmp_path / "test.gpd" + gpd_file.write_text(gpd_content) + + result = parse_gpd_file(str(gpd_file)) + assert result['Map Origin X'] == -7000.0 + assert result['Map Origin Y'] == 0.0 + assert result['Grid Map Units per Cell'] == 1000.0 + assert result['Something That Exists But Is Not Used'] == 'Has A Value' + + +def test_create_crs(): + """Test create crs transforms input info into array with metadata.""" + crs_data_array = create_crs(sample_target_info) + assert 'crs_wkt' in crs_data_array.attrs + assert ( + 'NSIDC EASE-Grid 2.0 Global' in crs_data_array.crs_wkt + ), f'expected "NSIDC EASE-Grid 2.0 Global" in {crs_data_array.crs_wkt}' + assert crs_data_array.attrs['proj'] == ( + '+proj=cea +lat_ts=30 +lon_0=0 +x_0=0 ' + '+y_0=0 +datum=WGS84 +units=m +no_defs +type=crs' + ) + assert isinstance(crs_data_array, DataArray) + + +def test_compute_dims(): + """Test compute dims against a known entity. + + Use the EASE2 Global 9km grid. and verify a few values against known values + pulled form NSIDC-0772 dataset + + """ + EASE2_M09km_grid_info = parse_gpd_file(EASE2_M09km_gpd_filename) + x_dim, y_dim = compute_dims(EASE2_M09km_grid_info) + assert len(x_dim) == EASE2_M09km_grid_info['Grid Width'] + assert len(y_dim) == EASE2_M09km_grid_info['Grid Height'] + selected_y_index = np.array([0, 203, 406, 609, 812, 1014, 1217, 1420, 1623]) + selected_x_index = np.array([0, 482, 964, 1446, 1928, 2409, 2891, 3373, 3855]) + expected_x_values = np.array( + [ + -1.7363026417556427e07, + -1.3021143806266051e07, + -8.6792611949756760e06, + -4.3373785836853012e06, + 4.5040276050753891e03, + 4.3373785836853050e06, + 8.6792611949756779e06, + 1.3021143806266055e07, + 1.7363026417556427e07, + ], + dtype=np.float64, + ) + expected_y_values = np.array( + [ + 7.3100368030335270e06, + 5.4814015953738764e06, + 3.6527663877142267e06, + 1.8241311800545761e06, + -4.5040276050735265e03, + -1.8241311800545771e06, + -3.6527663877142277e06, + -5.4814015953738783e06, + -7.3100368030335270e06, + ], + dtype=np.float64, + ) + + np.testing.assert_array_almost_equal(x_dim[selected_x_index], expected_x_values) + np.testing.assert_array_almost_equal(y_dim[selected_y_index], expected_y_values) diff --git a/tests/unit/test_grid.py b/tests/unit/test_grid.py new file mode 100644 index 0000000..98c30bb --- /dev/null +++ b/tests/unit/test_grid.py @@ -0,0 +1,287 @@ +"""Tests for grid module.""" + +from pathlib import Path + +import numpy as np +import pytest +import xarray as xr +from xarray import DataArray, DataTree + +from smap_l2_gridder.grid import ( + default_fill_value, + get_grid_information, + get_target_grid_information, + grid_variable, + prepare_variable, + process_input, + transfer_metadata, + variable_fill_value, +) + + +# Fixtures +@pytest.fixture +def sample_datatree(tmp_path): + """Create a sample DataTree for testing. + + It is round tripped to a temporary disk location for easy of setting the + correct NetCDF attributes. + + This represents the expected shape of an SPL2SMP_E granule. + The data is repeated for both global and polar nodes. + + """ + dt = DataTree() + dt["Metadata/Lineage/DEMSLP"] = DataTree() + dt["Metadata/Lineage/DEMSLP"].attrs[ + "Description" + ] = "Representative surface slope data for each of the 9 km cells" + + nodes = ["Soil_Moisture_Retrieval_Data", "Soil_Moisture_Retrieval_Data_Polar"] + for node in nodes: + dt[f"{node}"] = DataTree() + dt[f"{node}/EASE_column_index"] = DataArray( + data=np.array([1175, 1175, 1175, 1175, 1175], dtype=np.uint16), + dims=["phony_dim_0"], + name="EASE_column_index", + attrs={ + "long_name": "The column index of the 9 km EASE grid cell...", + "valid_min": 0, + "valid_max": 3855, + "_FillValue": np.uint16(65534), + }, + ) + + dt[f"{node}/EASE_row_index"] = DataArray( + data=np.array([1603, 1604, 1605, 1606, 1607], dtype=np.uint16), + dims=["phony_dim_0"], + attrs={ + "long_name": "The row index of the 9 km EASE grid cell...", + "valid_min": 0, + "valid_max": 1623, + "_FillValue": np.uint16(65534), + }, + ) + + dt[f'{node}/albedo'] = DataArray( + data=np.array( + [0.0009434, 0.00136986, 0.0025, 0.0, -9999.0], dtype=np.float32 + ), + dims=['phony_dim_0'], + attrs={ + 'long_name': 'Diffuse reflecting power of the Earth's...', + 'valid_min': 0.0, + 'valid_max': 1.0, + '_FillValue': np.float32(-9999.0), + }, + ) + + dt[f'{node}/tb_time_utc'] = DataArray( + data=np.array( + [ + '2024-11-06T03:59:27.313Z', + '2024-11-06T03:59:25.754Z', + '2024-11-06T03:59:24.374Z', + '2024-11-06T03:59:22.735Z', + '2024-11-06T03:59:21.191Z', + ], + dtype='