Nixtla · marcopeix · Jan 31, 2024 · Feb 14, 2024 · Feb 15, 2024 · Feb 15, 2024
diff --git a/experiments/nf_evaluation/README.md b/experiments/nf_evaluation/README.md
@@ -0,0 +1,50 @@
+# Comprehensive Evaluation of Neuralforecast models
+
+In this experiment, we tested all available models in Neuralforecast on benchmark datasets to evaluate their speed and forecasting performance.
+
+The datasets used for this benchmark are:
+- M4 (yearly)
+- M4 (quarterly)
+- M4 (monthly)
+- M4 (daily)
+- Ettm2 (15 min)
+- Electricity (hourly)
+- Weather (10 min)
+- Traffic (hourly)
+- ILI (weekly)
+
+Each model went through hyperparameter optimization. The test was completed locally on CPU.
+
+The table below summarizes the results
+*Table will be updated as results are obtained*
+
+<br>
+
+## Reproducibility
+
+1. Create a conda environment `nf_evaluation` using the `environment.yml` file.
+  ```shell
+  conda env create -f environment.yml
+  ```
+
+3. Activate the conda environment using 
+  ```shell
+  conda activate nf_evaluation
+  ```
+
+Alternatively simply installing neuralforecast and datasetsforecast with pip may suffice:
+```
+pip install git+https://github.com/Nixtla/datasetsforecast.git
+pip install git+https://github.com/Nixtla/neuralforecast.git
+```
+
+4. Run the experiments for each dataset and each model using with 
+- `--dataset` parameter in `[M4-yearly, M4-quarterly, M4-monthly, M4-daily, Ettm2, Electricity, Weather, Traffic, ILI]`
+- `--model` parameter in `['AutoLSTM', 'AutoRNN', 'AutoGRU', 'AutoDilatedRNN', 'AutoDeepAR', 'AutoTCN', 'AutoMLP', 'AutoNBEATS', 'AutoNHITS', 'AutoDLinear', 'AutoTFT', 'AutoVanillaTransformer', 'AutoInformer', 'AutoAutoformer', 'AutoFEDformer', 'AutoTimesNet', 'AutoPatchTST']`
+<br>
+
+```shell
+python run_experiments.py --dataset M4-yearly --model AutoMLP
+```
+
+5. The script creates a folder `results/<dataset>` which contains a CSV file with the metrics for the specified model
diff --git a/experiments/nf_evaluation/datasets.py b/experiments/nf_evaluation/datasets.py
@@ -0,0 +1,77 @@
+import pandas as pd
+
+from datasetsforecast.m4 import M4
+from datasetsforecast.m3 import M3
+from datasetsforecast.long_horizon import LongHorizon
+
+
+def get_dataset(name):
+
+    # Read data and parameters
+    if name == 'M4-yearly':
+        Y_df, *_ = M4.load(directory='./', group='Yearly')
+        Y_df['ds'] = Y_df['ds'].astype(int)
+        freq = 1
+        h = 6
+        val_size = 6
+        test_size = 6
+    elif name == 'M4-quarterly':
+        Y_df, *_ = M4.load(directory='./', group='Quarterly')
+        Y_df['ds'] = Y_df['ds'].astype(int)
+        freq = 4
+        h = 8
+        val_size = 8
+        test_size = 8
+    elif name == 'M4-monthly':
+        Y_df, *_ = M4.load(directory='./', group='Monthly')
+        Y_df['ds'] = Y_df['ds'].astype(int)
+        freq = 12
+        h = 18
+        val_size = 18
+        test_size = 18
+    elif name == 'M4-daily':
+        Y_df, *_ = M4.load(directory='./', group='Daily')
+        Y_df['ds'] = Y_df['ds'].astype(int)
+        freq = 365
+        h = 14
+        val_size = 14
+        test_size = 14
+    elif name == 'ETTm2':
+        Y_df, *_ = LongHorizon.load(directory='./', group='ETTm2')
+        Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+        freq = '15T'
+        h = 720
+        val_size = 11520
+        test_size = 11520
+    elif name == 'Electricity':
+        Y_df, *_ = LongHorizon.load(directory='./', group='ECL')
+        Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+        freq = 'H'
+        h = 720
+        val_size = 2632
+        test_size = 5260   
+    elif name == 'Weather':
+        Y_df, *_ = LongHorizon.load(directory='./', group='Weather')
+        Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+        freq = '10T'
+        h = 720
+        val_size = 5270
+        test_size = 10539
+    elif name == 'Traffic':
+        Y_df, *_ = LongHorizon.load(directory='./', group='Traffic')
+        Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+        freq = 'H'
+        h = 720
+        val_size = 1756
+        test_size = 3508
+    elif name == 'ILI':
+        Y_df, *_ = LongHorizon.load(directory='./', group='ILI')
+        Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+        freq = 'W'
+        h = 60
+        val_size = 97
+        test_size = 193
+    else:
+        raise Exception("Frequency not defined")
+
+    return Y_df, h, freq, val_size, test_size
diff --git a/experiments/nf_evaluation/environment.yml b/experiments/nf_evaluation/environment.yml
@@ -0,0 +1,9 @@
+name: nf_evaluation
+channels:
+  - conda-forge
+dependencies:
+  - numpy<1.24
+  - pip
+  - pip:
+    - "git+https://github.com/Nixtla/datasetsforecast.git"
+    - "git+https://github.com/Nixtla/neuralforecast.git"
diff --git a/experiments/nf_evaluation/models.py b/experiments/nf_evaluation/models.py
@@ -0,0 +1,30 @@
+from neuralforecast.auto import *
+from neuralforecast.losses.pytorch import HuberLoss, DistributionLoss
+
+
+def get_model(model_name, horizon, num_samples):
+    """Returns the model class given the model name.
+    """
+    model_dict = {
+        'AutoLSTM': AutoLSTM(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+        'AutoRNN': AutoRNN(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+        'AutoGRU': AutoGRU(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+        'AutoDilatedRNN': AutoDilatedRNN(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+        'AutoDeepAR': AutoDeepAR(config=None, h=horizon,
+                             loss=DistributionLoss(distribution='StudentT', level=[80, 90], return_params=False),
+                             num_samples=num_samples),
+        'AutoTCN': AutoTCN(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+        'AutoMLP': AutoMLP(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+        'AutoNBEATS': AutoNBEATS(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+        'AutoNHITS': AutoNHITS(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+        'AutoDLinear': AutoDLinear(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+        'AutoTFT': AutoTFT(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+        'AutoVanillaTransformer': AutoVanillaTransformer(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+        'AutoInformer': AutoInformer(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+        'AutoAutoformer': AutoAutoformer(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+        'AutoFEDformer': AutoFEDformer(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+        'AutoTimesNet': AutoTimesNet(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+        'AutoPatchTST': AutoPatchTST(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples)
+    }
+
+    return model_dict[model_name]
diff --git a/experiments/nf_evaluation/run_experiments.py b/experiments/nf_evaluation/run_experiments.py
@@ -0,0 +1,67 @@
+import os
+import argparse
+import time
+
+import pandas as pd
+from neuralforecast.core import NeuralForecast
+from neuralforecast.losses.numpy import mae, mse, smape, mape
+
+from models import get_model
+from datasets import get_dataset
+
+# For compatibitlity with Mac with M chip
+os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
+
+
+def main(args):
+
+    # Load dataset and model
+    Y_df, h, freq, val_size, test_size = get_dataset(args.dataset)
+    model = get_model(model_name=args.model, horizon=h, num_samples=20)
+
+    # Start time
+    start_time = time.time()
+
+    # Train model
+    nf = NeuralForecast(models=[model], freq=freq)
+    forecasts_df = nf.cross_validation(df=Y_df, val_size=val_size, test_size=test_size, n_windows=None, verbose=True)
+
+    # Calculate elapsed time
+    elapsed_time = time.time() - start_time
+
+    # Evaluation
+    model_mae = mae(y=forecasts_df['y'], y_hat=forecasts_df[f'{args.model}'])
+    model_mse = mse(y=forecasts_df['y'], y_hat=forecasts_df[f'{args.model}'])
+    model_smape = smape(y=forecasts_df['y'], y_hat=forecasts_df[f'{args.model}'])
+    model_mape = mape(y=forecasts_df['y'], y_hat=forecasts_df[f'{args.model}'])
+
+    metrics = {
+            'Model': [args.model],
+            'MAE': [model_mae],
+            'MSE': [model_mse],
+            'sMAPE': [model_smape],
+            'MAPE': [model_mape],
+            'time': [elapsed_time]
+    }
+
+
+    # Save results
+    results_path = f'./results/{args.dataset}'
+    os.makedirs(results_path, exist_ok=True)
+
+    metrics_df = pd.DataFrame(metrics)
+    metrics_df.to_csv(f'{results_path}/{args.model}_metrics.csv', header=True, index=False)
+
+def parse_args():
+	parser = argparse.ArgumentParser(description="script arguments")
+	parser.add_argument('--dataset', type=str, help='dataset to train models on')
+	parser.add_argument('--model', type=str, help='name of the model')
+	# parser.add_argument('--experiment_id', type=str, help='identify experiment')
+	return parser.parse_args()
+
+if __name__ == '__main__':
+	# parse arguments
+	args = parse_args()
+
+    # Run experiment
+	main(args)