Skip to content

Commit

Permalink
fix for temporal matcher; publishing in separate CI workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
s-scherrer committed May 19, 2021
1 parent 13950bb commit 0851075
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 23 deletions.
26 changes: 15 additions & 11 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,17 +85,6 @@ jobs:
with:
name: Artifacts
path: .artifacts/*
- name: Publish to PyPI Test
if: ${{ github.event_name == 'release' && (matrix.os == 'windows-latest' || matrix.python-version == '3.8') }}
uses: pypa/[email protected]
with:
skip_existing: true
verbose: true
verify_metadata: true
packages_dir: .artifacts/dist/
user: __token__
password: ${{ secrets.TEST_PYPI_API_TOKEN }} # this needs to be uploaded to github actions secrets
repository_url: https://test.pypi.org/legacy/ # todo: delete for real pypi
coveralls:
name: Submit Coveralls 👚
needs: build
Expand All @@ -107,3 +96,18 @@ jobs:
pip3 install --upgrade coveralls && coveralls --service=github --finish
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
publish:
name: Upload to PyPI
if: ${{ github.event_name == 'release' }}
runs-on: ubuntu-latest
steps:
- name: Publish to PyPI Test
uses: pypa/[email protected]
with:
skip_existing: true
verbose: true
verify_metadata: true
packages_dir: .artifacts/dist/
user: __token__
password: ${{ secrets.TEST_PYPI_API_TOKEN }} # this needs to be uploaded to github actions secrets
repository_url: https://test.pypi.org/legacy/ # todo: delete for real pypi
8 changes: 5 additions & 3 deletions src/pytesmo/validation_framework/temporal_matchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def combinatory_matcher(self, df_dict, refkey, n=2, **kwargs):


def dfdict_combined_temporal_collocation(
dfs, refname, k, window=None, n=None, combined_dropna=False, **kwargs
dfs, refname, k, window=None, n=None, **kwargs
):
"""
Applies :py:func:`combined_temporal_collocation` on a dictionary of
Expand Down Expand Up @@ -161,7 +161,7 @@ def dfdict_combined_temporal_collocation(
others.append(df_name_multiindex(dfs[name], name))
ref = df_name_multiindex(dfs[refname], refname)
matched_df = temporal_matching.combined_temporal_collocation(
ref, others, window, add_ref_data=True, combined_dropna=combined_dropna, **kwargs
ref, others, window, add_ref_data=True, **kwargs
)

# unpack again to dictionary
Expand Down Expand Up @@ -194,7 +194,9 @@ def matcher(dfs, refname, k=None, **kwargs):
# this comes from Validation.temporal_match_datasets but is not
# required
return dfdict_combined_temporal_collocation(
dfs, refname, k, window=window, **kwargs
dfs, refname, k, window=window, dropna=True,
combined_dropna="any", dropduplicates=True,
**kwargs
)
return matcher

Expand Down
11 changes: 7 additions & 4 deletions src/pytesmo/validation_framework/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,10 +285,13 @@ def perform_validation(

df_dict[self.temporal_ref] = masked_ref_df

# matched_n is a dictionary with the same keys as self.metrics_c, i.e.
# (n1, k1), (n2, k2), ...
# each entry is a dictionary whose
matched_n = self.temporal_match_datasets(df_dict)
# we only need the data columns
data_df_dict = {}
for ds in df_dict:
columns = self.data_manager.datasets[ds]["columns"]
data_df_dict[ds] = df_dict[ds][columns]

matched_n = self.temporal_match_datasets(data_df_dict)

for n, k in self.metrics_c:
n_matched_data = matched_n[(n, k)]
Expand Down
72 changes: 70 additions & 2 deletions tests/test_validation_framework/test_metric_calculators.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
TripleCollocationMetrics,
)
from pytesmo.validation_framework.temporal_matchers import (
make_combined_temporal_matcher
make_combined_temporal_matcher, BasicTemporalMatching
)
from pytesmo.validation_framework.results_manager import netcdf_results_manager
import pytesmo.metrics as metrics
Expand Down Expand Up @@ -753,7 +753,8 @@ def test_PairwiseIntercomparisonMetrics(testdata_generator):
metrics_calculators={(4, 4): metrics.calc_metrics}
)

results = val.calc(1, 1, 1, rename_cols=False, only_with_temporal_ref=True)
print("running old setup")
results = val.calc(1, 1, 1, rename_cols=False)

# results is a dictionary with one entry and key
# (('c1name', 'c1'), ('c2name', 'c2'), ('c3name', 'c3'), ('refname',
Expand Down Expand Up @@ -972,6 +973,73 @@ def test_TripleCollocationMetrics(testdata_generator):
)


def test_temporal_matching_ascat_ismn():
"""
This test uses a CSV file of ASCAT and ISMN data to test if the temporal
matching within the validation works as epxected in a "real" setup.
This only tests whether the number of observations matches, because this is
the main thing the temporal matching influences.
"""

# test with ASCAT and ISMN data
here = Path(__file__).resolve().parent
ascat = pd.read_csv(here / "ASCAT.csv", index_col=0, parse_dates=True)
ismn = pd.read_csv(here / "ISMN.csv", index_col=0, parse_dates=True)
dfs = {"ASCAT": ascat, "ISMN": ismn}
columns = {"ASCAT": "sm", "ISMN": "soil_moisture"}
refname = "ISMN"
window = pd.Timedelta(12, "H")

old_matcher = BasicTemporalMatching().combinatory_matcher
new_matcher = make_combined_temporal_matcher(window)

datasets = {}
for key in ["ISMN", "ASCAT"]:
all_columns = list(dfs[key].columns)
ds = {"columns": [columns[key]], "class": DummyReader(dfs[key], all_columns)}
datasets[key] = ds

new_val = Validation(
datasets,
refname,
scaling=None, # doesn't work with the constant test data
temporal_matcher=new_matcher,
metrics_calculators={
(2, 2): PairwiseIntercomparisonMetrics().calc_metrics
}
)
new_results = new_val.calc(
1, 1, 1, rename_cols=False, only_with_temporal_ref=True
)

# old setup
ds_names = list(datasets.keys())
metrics = IntercomparisonMetrics(
dataset_names=ds_names,
# passing the names here explicitly, see GH issue #220
refname=refname,
other_names=ds_names[1:],
calc_tau=True,
)
old_val = Validation(
datasets,
refname,
scaling=None, # doesn't work with the constant test data
temporal_matcher=old_matcher,
metrics_calculators={
(2, 2): metrics.calc_metrics
}
)
old_results = old_val.calc(
1, 1, 1, rename_cols=False
)

old_key = (('ASCAT', 'sm'), ('ISMN', 'soil_moisture'))
new_key = (('ASCAT', 'sm'), ('ISMN', 'soil_moisture'))

assert old_results[old_key]["n_obs"] == new_results[new_key]["n_obs"]


# def test_sorting_issue():
# GH #220
# might be a good start for fixing the issue
Expand Down
14 changes: 11 additions & 3 deletions tests/test_validation_framework/test_temporal_matchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def test_dfdict_combined_temporal_collocation():
ascat = pd.read_csv(here / "ASCAT.csv", index_col=0, parse_dates=True)
ismn = pd.read_csv(here / "ISMN.csv", index_col=0, parse_dates=True)

dfs = {"ASCAT": ascat, "ISMN": ismn}
dfs = {"ASCAT": ascat[["sm"]], "ISMN": ismn[["soil_moisture"]]}
refname = "ISMN"
window = pd.Timedelta(12, "H")

Expand All @@ -178,6 +178,14 @@ def test_dfdict_combined_temporal_collocation():
key = ("ISMN", "ASCAT")
assert list(expected.keys()) == [key]
assert list(new.keys()) == [key]
assert expected[key].shape == new[key].shape
# We have to do an extra dropna for the old matcher, because the old
# matcher doesn't do this by itself.
# This is normally done within validation.py, `get_data_for_result_tuple`,
# but since the combined matcher should exclude all data where even a
# single entry misses (so that all only have common data) this is done
# before in the new matcher (the combined matcher, whereas the old one is
# the combinatory matcher)
exp = expected[key].dropna()
assert exp.shape == new[key].shape
for col in new[key]:
np.testing.assert_equal(expected[key][col].values, new[key][col].values)
np.testing.assert_equal(exp[col].values, new[key][col].values)

0 comments on commit 0851075

Please sign in to comment.