From 0851075c5c1e7e22b5a5eb64531b582b677540ad Mon Sep 17 00:00:00 2001 From: Samuel Scherrer Date: Wed, 19 May 2021 17:22:54 +0200 Subject: [PATCH] fix for temporal matcher; publishing in separate CI workflow --- .github/workflows/ci.yml | 26 ++++--- .../validation_framework/temporal_matchers.py | 8 ++- .../validation_framework/validation.py | 11 +-- .../test_metric_calculators.py | 72 ++++++++++++++++++- .../test_temporal_matchers.py | 14 +++- 5 files changed, 108 insertions(+), 23 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a2852a6e..08abe884 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -85,17 +85,6 @@ jobs: with: name: Artifacts path: .artifacts/* - - name: Publish to PyPI Test - if: ${{ github.event_name == 'release' && (matrix.os == 'windows-latest' || matrix.python-version == '3.8') }} - uses: pypa/gh-action-pypi-publish@v1.4.1 - with: - skip_existing: true - verbose: true - verify_metadata: true - packages_dir: .artifacts/dist/ - user: __token__ - password: ${{ secrets.TEST_PYPI_API_TOKEN }} # this needs to be uploaded to github actions secrets - repository_url: https://test.pypi.org/legacy/ # todo: delete for real pypi coveralls: name: Submit Coveralls 👚 needs: build @@ -107,3 +96,18 @@ jobs: pip3 install --upgrade coveralls && coveralls --service=github --finish env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + publish: + name: Upload to PyPI + if: ${{ github.event_name == 'release' }} + runs-on: ubuntu-latest + steps: + - name: Publish to PyPI Test + uses: pypa/gh-action-pypi-publish@v1.4.1 + with: + skip_existing: true + verbose: true + verify_metadata: true + packages_dir: .artifacts/dist/ + user: __token__ + password: ${{ secrets.TEST_PYPI_API_TOKEN }} # this needs to be uploaded to github actions secrets + repository_url: https://test.pypi.org/legacy/ # todo: delete for real pypi diff --git a/src/pytesmo/validation_framework/temporal_matchers.py b/src/pytesmo/validation_framework/temporal_matchers.py index 066b3682..47e261d0 100644 --- a/src/pytesmo/validation_framework/temporal_matchers.py +++ b/src/pytesmo/validation_framework/temporal_matchers.py @@ -120,7 +120,7 @@ def combinatory_matcher(self, df_dict, refkey, n=2, **kwargs): def dfdict_combined_temporal_collocation( - dfs, refname, k, window=None, n=None, combined_dropna=False, **kwargs + dfs, refname, k, window=None, n=None, **kwargs ): """ Applies :py:func:`combined_temporal_collocation` on a dictionary of @@ -161,7 +161,7 @@ def dfdict_combined_temporal_collocation( others.append(df_name_multiindex(dfs[name], name)) ref = df_name_multiindex(dfs[refname], refname) matched_df = temporal_matching.combined_temporal_collocation( - ref, others, window, add_ref_data=True, combined_dropna=combined_dropna, **kwargs + ref, others, window, add_ref_data=True, **kwargs ) # unpack again to dictionary @@ -194,7 +194,9 @@ def matcher(dfs, refname, k=None, **kwargs): # this comes from Validation.temporal_match_datasets but is not # required return dfdict_combined_temporal_collocation( - dfs, refname, k, window=window, **kwargs + dfs, refname, k, window=window, dropna=True, + combined_dropna="any", dropduplicates=True, + **kwargs ) return matcher diff --git a/src/pytesmo/validation_framework/validation.py b/src/pytesmo/validation_framework/validation.py index d91726ef..05415bb6 100644 --- a/src/pytesmo/validation_framework/validation.py +++ b/src/pytesmo/validation_framework/validation.py @@ -285,10 +285,13 @@ def perform_validation( df_dict[self.temporal_ref] = masked_ref_df - # matched_n is a dictionary with the same keys as self.metrics_c, i.e. - # (n1, k1), (n2, k2), ... - # each entry is a dictionary whose - matched_n = self.temporal_match_datasets(df_dict) + # we only need the data columns + data_df_dict = {} + for ds in df_dict: + columns = self.data_manager.datasets[ds]["columns"] + data_df_dict[ds] = df_dict[ds][columns] + + matched_n = self.temporal_match_datasets(data_df_dict) for n, k in self.metrics_c: n_matched_data = matched_n[(n, k)] diff --git a/tests/test_validation_framework/test_metric_calculators.py b/tests/test_validation_framework/test_metric_calculators.py index 4e7142ac..506e63e4 100644 --- a/tests/test_validation_framework/test_metric_calculators.py +++ b/tests/test_validation_framework/test_metric_calculators.py @@ -56,7 +56,7 @@ TripleCollocationMetrics, ) from pytesmo.validation_framework.temporal_matchers import ( - make_combined_temporal_matcher + make_combined_temporal_matcher, BasicTemporalMatching ) from pytesmo.validation_framework.results_manager import netcdf_results_manager import pytesmo.metrics as metrics @@ -753,7 +753,8 @@ def test_PairwiseIntercomparisonMetrics(testdata_generator): metrics_calculators={(4, 4): metrics.calc_metrics} ) - results = val.calc(1, 1, 1, rename_cols=False, only_with_temporal_ref=True) + print("running old setup") + results = val.calc(1, 1, 1, rename_cols=False) # results is a dictionary with one entry and key # (('c1name', 'c1'), ('c2name', 'c2'), ('c3name', 'c3'), ('refname', @@ -972,6 +973,73 @@ def test_TripleCollocationMetrics(testdata_generator): ) +def test_temporal_matching_ascat_ismn(): + """ + This test uses a CSV file of ASCAT and ISMN data to test if the temporal + matching within the validation works as epxected in a "real" setup. + This only tests whether the number of observations matches, because this is + the main thing the temporal matching influences. + """ + + # test with ASCAT and ISMN data + here = Path(__file__).resolve().parent + ascat = pd.read_csv(here / "ASCAT.csv", index_col=0, parse_dates=True) + ismn = pd.read_csv(here / "ISMN.csv", index_col=0, parse_dates=True) + dfs = {"ASCAT": ascat, "ISMN": ismn} + columns = {"ASCAT": "sm", "ISMN": "soil_moisture"} + refname = "ISMN" + window = pd.Timedelta(12, "H") + + old_matcher = BasicTemporalMatching().combinatory_matcher + new_matcher = make_combined_temporal_matcher(window) + + datasets = {} + for key in ["ISMN", "ASCAT"]: + all_columns = list(dfs[key].columns) + ds = {"columns": [columns[key]], "class": DummyReader(dfs[key], all_columns)} + datasets[key] = ds + + new_val = Validation( + datasets, + refname, + scaling=None, # doesn't work with the constant test data + temporal_matcher=new_matcher, + metrics_calculators={ + (2, 2): PairwiseIntercomparisonMetrics().calc_metrics + } + ) + new_results = new_val.calc( + 1, 1, 1, rename_cols=False, only_with_temporal_ref=True + ) + + # old setup + ds_names = list(datasets.keys()) + metrics = IntercomparisonMetrics( + dataset_names=ds_names, + # passing the names here explicitly, see GH issue #220 + refname=refname, + other_names=ds_names[1:], + calc_tau=True, + ) + old_val = Validation( + datasets, + refname, + scaling=None, # doesn't work with the constant test data + temporal_matcher=old_matcher, + metrics_calculators={ + (2, 2): metrics.calc_metrics + } + ) + old_results = old_val.calc( + 1, 1, 1, rename_cols=False + ) + + old_key = (('ASCAT', 'sm'), ('ISMN', 'soil_moisture')) + new_key = (('ASCAT', 'sm'), ('ISMN', 'soil_moisture')) + + assert old_results[old_key]["n_obs"] == new_results[new_key]["n_obs"] + + # def test_sorting_issue(): # GH #220 # might be a good start for fixing the issue diff --git a/tests/test_validation_framework/test_temporal_matchers.py b/tests/test_validation_framework/test_temporal_matchers.py index 82481b90..dc85956c 100644 --- a/tests/test_validation_framework/test_temporal_matchers.py +++ b/tests/test_validation_framework/test_temporal_matchers.py @@ -165,7 +165,7 @@ def test_dfdict_combined_temporal_collocation(): ascat = pd.read_csv(here / "ASCAT.csv", index_col=0, parse_dates=True) ismn = pd.read_csv(here / "ISMN.csv", index_col=0, parse_dates=True) - dfs = {"ASCAT": ascat, "ISMN": ismn} + dfs = {"ASCAT": ascat[["sm"]], "ISMN": ismn[["soil_moisture"]]} refname = "ISMN" window = pd.Timedelta(12, "H") @@ -178,6 +178,14 @@ def test_dfdict_combined_temporal_collocation(): key = ("ISMN", "ASCAT") assert list(expected.keys()) == [key] assert list(new.keys()) == [key] - assert expected[key].shape == new[key].shape + # We have to do an extra dropna for the old matcher, because the old + # matcher doesn't do this by itself. + # This is normally done within validation.py, `get_data_for_result_tuple`, + # but since the combined matcher should exclude all data where even a + # single entry misses (so that all only have common data) this is done + # before in the new matcher (the combined matcher, whereas the old one is + # the combinatory matcher) + exp = expected[key].dropna() + assert exp.shape == new[key].shape for col in new[key]: - np.testing.assert_equal(expected[key][col].values, new[key][col].values) + np.testing.assert_equal(exp[col].values, new[key][col].values)