Skip to content

Commit

Permalink
Merge pull request #73 from TUW-GEO/improve-climatology
Browse files Browse the repository at this point in the history
Improve climatology
  • Loading branch information
cpaulik committed Dec 10, 2015
2 parents 875f295 + 2312136 commit e6cd0a2
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 6 deletions.
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# v0.3.6, 2015-12-10

* make sure that climatologies are always 366 elements
* add new options to climatology calculation for filling NaN values
* add option to climatology calculation for wraparound before the smoothing

# v0.3.5, 2015-11-04

* fix bug in anomaly calculation that occurred when the climatology series had
Expand Down
39 changes: 35 additions & 4 deletions pytesmo/time_series/anomaly.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
'''

import pandas as pd
import numpy as np
from pytesmo.timedate.julian import doy, julian2date
from pytesmo.time_series.filtering import moving_average

Expand Down Expand Up @@ -76,7 +77,9 @@ def calc_climatology(Ser,
moving_avg_orig=5,
moving_avg_clim=30,
median=False,
timespan=None):
timespan=None,
fill=np.nan,
wraparound=False):
'''
Calculates the climatology of a data set.
Expand All @@ -101,6 +104,13 @@ def calc_climatology(Ser,
Set this to calculate the climatology based on a subset of the input
Series
fill : float or int, optional
Fill value to use for days on which no climatology exists
wraparound : boolean, optional
If set then the climatology is wrapped around at the edges before
doing the second running average (long-term event correction)
Returns
-------
climatology : pandas.Series
Expand Down Expand Up @@ -130,6 +140,27 @@ def calc_climatology(Ser,
else:
clim = Ser.groupby('doy').mean()

return moving_average(pd.Series(clim.values.flatten(),
index=clim.index.values),
window_size=moving_avg_clim)
clim_ser = pd.Series(clim.values.flatten(),
index=clim.index.values)

if wraparound:
index_old = clim_ser.index.copy()
left_mirror = clim_ser.iloc[-moving_avg_clim:]
right_mirror = clim_ser.iloc[:moving_avg_clim]
# Shift index to start at 366 - index at -moving_avg_clim
# to run over a whole year while keeping gaps the same size
right_mirror.index = right_mirror.index + 366 * 2
clim_ser.index = clim_ser.index + 366
clim_ser = pd.concat([left_mirror,
clim_ser,
right_mirror])

clim_ser = moving_average(clim_ser, window_size=moving_avg_clim)
clim_ser = clim_ser.iloc[moving_avg_clim:-moving_avg_clim]
clim_ser.index = index_old
else:
clim_ser = moving_average(clim_ser, window_size=moving_avg_clim)

clim_ser = clim_ser.reindex(np.arange(366) + 1)
clim_ser = clim_ser.fillna(fill)
return clim_ser
39 changes: 37 additions & 2 deletions tests/test_time_series/test_anomaly.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def test_anomaly_calc_given_climatology():
np.arange(366), index=pd.date_range('2000-01-01', periods=366))
anom_should = pd.Series(
np.zeros(366), index=pd.date_range('2000-01-01', periods=366))
anom = anomaly.calc_anomaly(data, climatology=clim, respect_leap_years=False)
anom = anomaly.calc_anomaly(
data, climatology=clim, respect_leap_years=False)

pdt.assert_series_equal(anom_should, anom, check_dtype=False)

Expand All @@ -56,6 +57,40 @@ def test_anomaly_calc_given_climatology_no_leap_year():
np.arange(365), index=pd.date_range('2007-01-01', periods=365))
anom_should = pd.Series(
np.zeros(365), index=pd.date_range('2007-01-01', periods=365))
anom = anomaly.calc_anomaly(data, climatology=clim, respect_leap_years=True)
anom = anomaly.calc_anomaly(
data, climatology=clim, respect_leap_years=True)

pdt.assert_series_equal(anom_should, anom, check_dtype=False)


def test_climatology_always_366():
ts = pd.Series(np.sin(np.arange(366) / 366. * 2 * np.pi), index=pd.date_range(
'2000-01-01', freq='D', periods=366))
# remove a part of the time series
ts['2000-02-01': '2000-02-28'] = np.nan
ts = ts.dropna()
clim = anomaly.calc_climatology(ts)
assert clim.size == 366


def test_climatology_always_366_fill():
ts = pd.Series(np.sin(np.arange(366) / 366. * 2 * np.pi), index=pd.date_range(
'2000-01-01', freq='D', periods=366))
# remove a part of the time series
ts['2000-02-01': '2000-02-28'] = np.nan
ts = ts.dropna()
clim = anomaly.calc_climatology(ts, fill=-1)
assert clim.size == 366
assert clim.iloc[31] == -1


def test_climatology_closed():
ts = pd.Series(np.arange(366), index=pd.date_range(
'2000-01-01', freq='D', periods=366))
# remove a part of the time series
ts['2000-02-01': '2000-02-28'] = np.nan
ts = ts.dropna()
clim = anomaly.calc_climatology(ts, wraparound=True)
assert clim.size == 366
# test that the arange was closed during the second moving average
assert clim.iloc[365] - 187.90 < 0.01

0 comments on commit e6cd0a2

Please sign in to comment.