diff --git a/experiments/nf_evaluation/README.md b/experiments/nf_evaluation/README.md
new file mode 100644
index 000000000..52273a52a
--- /dev/null
+++ b/experiments/nf_evaluation/README.md
@@ -0,0 +1,56 @@
+# Comprehensive Evaluation of Neuralforecast models
+
+In this experiment, we tested all available models in Neuralforecast on benchmark datasets to evaluate their speed and forecasting performance.
+
+The datasets used for this benchmark are:
+- M4 (yearly)
+- M4 (quarterly)
+- M4 (monthly)
+- M4 (daily)
+- Ettm2 (15 min)
+- Electricity (hourly)
+- Weather (10 min)
+- Traffic (hourly)
+- ILI (weekly)
+
+Each model went through hyperparameter optimization. The test was completed locally on CPU.
+
+The table below summarizes the results
+*Table will be updated as results are obtained*
+
+
+
+## Reproducibility
+
+1. Create a conda environment `nf_evaluation` using the `environment.yml` file.
+ ```shell
+ conda env create -f environment.yml
+ ```
+
+3. Activate the conda environment using
+ ```shell
+ conda activate nf_evaluation
+ ```
+
+Alternatively simply installing neuralforecast and datasetsforecast with pip may suffice:
+```
+pip install git+https://github.com/Nixtla/datasetsforecast.git
+pip install git+https://github.com/Nixtla/neuralforecast.git
+```
+
+4. Run the experiments for each dataset and each model using the
+- `--dataset` parameter in `[M4-yearly, M4-quarterly, M4-monthly, M4-daily, Ettm2, Electricity, Weather, Traffic, ILI]`
+- `--model` parameter in `['AutoLSTM', 'AutoRNN', 'AutoGRU', 'AutoDilatedRNN', 'AutoDeepAR', 'AutoTCN', 'AutoMLP', 'AutoNBEATS', 'AutoNHITS', 'AutoDLinear', 'AutoTFT', 'AutoVanillaTransformer', 'AutoInformer', 'AutoAutoformer', 'AutoFEDformer', 'AutoTimesNet', 'AutoPatchTST']`
+
+
+```shell
+python run_experiments.py --dataset M4-yearly --model AutoMLP
+```
+
+You can also run all experiments in a single command using
+
+```shell
+run.sh
+```
+
+5. The script creates a folder `results/` which contains a CSV file with the metrics for the specified model
\ No newline at end of file
diff --git a/experiments/nf_evaluation/datasets.py b/experiments/nf_evaluation/datasets.py
new file mode 100644
index 000000000..d95b09e41
--- /dev/null
+++ b/experiments/nf_evaluation/datasets.py
@@ -0,0 +1,133 @@
+import pandas as pd
+
+from datasetsforecast.m4 import M4
+from datasetsforecast.m3 import M3
+from datasetsforecast.long_horizon import LongHorizon
+
+
+def get_dataset(name):
+
+ # Read data and parameters
+ if name == 'M3-yearly':
+ Y_df, *_ = M3.load(directory='./', group='Yearly')
+ Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+ freq = 'Y'
+ h = 6
+ val_size = 6
+ test_size = 6
+ elif name == 'M3-quarterly':
+ Y_df, *_ = M3.load(directory='./', group='Quarterly')
+ Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+ freq = 'Q'
+ h = 8
+ val_size = 8
+ test_size = 8
+ elif name == 'M3-monthly':
+ Y_df, *_ = M3.load(directory='./', group='Monthly')
+ Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+ freq = 'M'
+ h = 12
+ val_size = 12
+ test_size = 12
+ elif name == 'M4-yearly':
+ Y_df, *_ = M4.load(directory='./', group='Yearly')
+ Y_df['ds'] = Y_df['ds'].astype(int)
+ freq = 1
+ h = 6
+ val_size = 6
+ test_size = 6
+ elif name == 'M4-quarterly':
+ Y_df, *_ = M4.load(directory='./', group='Quarterly')
+ Y_df['ds'] = Y_df['ds'].astype(int)
+ freq = 4
+ h = 8
+ val_size = 8
+ test_size = 8
+ elif name == 'M4-monthly':
+ Y_df, *_ = M4.load(directory='./', group='Monthly')
+ Y_df['ds'] = Y_df['ds'].astype(int)
+ freq = 12
+ h = 18
+ val_size = 18
+ test_size = 18
+ elif name == 'M4-daily':
+ Y_df, *_ = M4.load(directory='./', group='Daily')
+ Y_df['ds'] = Y_df['ds'].astype(int)
+ freq = 365
+ h = 14
+ val_size = 14
+ test_size = 14
+ elif name == 'M4-hourly':
+ Y_df, *_ = M4.load(directory='./', group='Hourly')
+ Y_df['ds'] = Y_df['ds'].astype(int)
+ freq = 24
+ h = 48
+ val_size = 48
+ test_size = 48
+ elif name == 'Ettm2':
+ Y_df, *_ = LongHorizon.load(directory='./', group='ETTm2')
+ Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+ freq = '15T'
+ h = 720
+ val_size = 11520
+ test_size = 11520
+ elif name == 'Ettm1':
+ Y_df, *_ = LongHorizon.load(directory='./', group='ETTm1')
+ Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+ freq = '15T'
+ h = 720
+ val_size = 11520
+ test_size = 11520
+ elif name == 'Etth1':
+ Y_df, *_ = LongHorizon.load(directory='./', group='ETTh1')
+ Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+ freq = 'H'
+ h = 720
+ val_size = 2880
+ test_size = 2880
+ elif name == 'Etth2':
+ Y_df, *_ = LongHorizon.load(directory='./', group='ETTh2')
+ Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+ freq = 'H'
+ h = 720
+ val_size = 2880
+ test_size = 2880
+ elif name == 'Electricity':
+ Y_df, *_ = LongHorizon.load(directory='./', group='ECL')
+ Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+ freq = 'H'
+ h = 720
+ val_size = 2632
+ test_size = 5260
+ elif name == 'Exchange':
+ Y_df, *_ = LongHorizon.load(directory='./', group='Exchange')
+ Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+ freq = 'D'
+ h = 720
+ val_size = 760
+ test_size = 1517
+ elif name == 'Weather':
+ Y_df, *_ = LongHorizon.load(directory='./', group='Weather')
+ Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+ freq = '10T'
+ h = 720
+ val_size = 5270
+ test_size = 10539
+ elif name == 'Traffic':
+ Y_df, *_ = LongHorizon.load(directory='./', group='TrafficL')
+ Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+ freq = 'H'
+ h = 720
+ val_size = 1756
+ test_size = 3508
+ elif name == 'ILI':
+ Y_df, *_ = LongHorizon.load(directory='./', group='ILI')
+ Y_df['ds'] = pd.to_datetime(Y_df['ds'])
+ freq = 'W'
+ h = 60
+ val_size = 97
+ test_size = 193
+ else:
+ raise Exception("Frequency not defined")
+
+ return Y_df, h, freq, val_size, test_size
\ No newline at end of file
diff --git a/experiments/nf_evaluation/environment.yml b/experiments/nf_evaluation/environment.yml
new file mode 100644
index 000000000..744060d0f
--- /dev/null
+++ b/experiments/nf_evaluation/environment.yml
@@ -0,0 +1,9 @@
+name: nf_evaluation
+channels:
+ - conda-forge
+dependencies:
+ - numpy<1.24
+ - pip
+ - pip:
+ - "git+https://github.com/Nixtla/datasetsforecast.git"
+ - "git+https://github.com/Nixtla/neuralforecast.git"
\ No newline at end of file
diff --git a/experiments/nf_evaluation/gpu_1.sh b/experiments/nf_evaluation/gpu_1.sh
new file mode 100755
index 000000000..59cbf1ecf
--- /dev/null
+++ b/experiments/nf_evaluation/gpu_1.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+datasets=("M3-yearly" "M3-quarterly" "M3-monthly" "M4-yearly")
+
+models=("AutoLSTM" "AutoRNN" "AutoGRU" "AutoDilatedRNN" "AutoDeepAR" "AutoTCN" "AutoMLP" "AutoNBEATS" "AutoNHITS" "AutoDLinear" "AutoTFT" "AutoVanillaTransformer" "AutoInformer" "AutoAutoformer" "AutoFEDformer" "AutoTimesNet" "AutoPatchTST", "AutoTSMixer", "AutoiTransformer")
+
+for dataset in "${datasets[@]}"; do
+ for model in "${models[@]}"; do
+ CUDA_VISIBLE_DEVICES=0 python run_experiments.py --dataset "$dataset" --model "$model"
+ done
+done
\ No newline at end of file
diff --git a/experiments/nf_evaluation/gpu_2.sh b/experiments/nf_evaluation/gpu_2.sh
new file mode 100755
index 000000000..301914c31
--- /dev/null
+++ b/experiments/nf_evaluation/gpu_2.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+datasets=("M4-quarterly" "M4-monthly" "M4-daily" "M4-hourly")
+
+models=("AutoLSTM" "AutoRNN" "AutoGRU" "AutoDilatedRNN" "AutoDeepAR" "AutoTCN" "AutoMLP" "AutoNBEATS" "AutoNHITS" "AutoDLinear" "AutoTFT" "AutoVanillaTransformer" "AutoInformer" "AutoAutoformer" "AutoFEDformer" "AutoTimesNet" "AutoPatchTST" "AutoTSMixer", "AutoiTransformer")
+
+for dataset in "${datasets[@]}"; do
+ for model in "${models[@]}"; do
+ CUDA_VISIBLE_DEVICES=1 python run_experiments.py --dataset "$dataset" --model "$model"
+ done
+done
\ No newline at end of file
diff --git a/experiments/nf_evaluation/gpu_3.sh b/experiments/nf_evaluation/gpu_3.sh
new file mode 100755
index 000000000..358dc38f1
--- /dev/null
+++ b/experiments/nf_evaluation/gpu_3.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+datasets=("Ettm2" "Ettm1" "Etth1" "Etth2")
+
+models=("AutoLSTM" "AutoRNN" "AutoGRU" "AutoDilatedRNN" "AutoDeepAR" "AutoTCN" "AutoMLP" "AutoNBEATS" "AutoNHITS" "AutoDLinear" "AutoTFT" "AutoVanillaTransformer" "AutoInformer" "AutoAutoformer" "AutoFEDformer" "AutoTimesNet" "AutoPatchTST" "AutoTSMixer", "AutoiTransformer")
+
+for dataset in "${datasets[@]}"; do
+ for model in "${models[@]}"; do
+ CUDA_VISIBLE_DEVICES=2 python run_experiments.py --dataset "$dataset" --model "$model"
+ done
+done
\ No newline at end of file
diff --git a/experiments/nf_evaluation/gpu_4.sh b/experiments/nf_evaluation/gpu_4.sh
new file mode 100755
index 000000000..562cc8a3c
--- /dev/null
+++ b/experiments/nf_evaluation/gpu_4.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+datasets=("Electricity" "Exchange" "Weather" "Traffic" "ILI")
+
+models=("AutoLSTM" "AutoRNN" "AutoGRU" "AutoDilatedRNN" "AutoDeepAR" "AutoTCN" "AutoMLP" "AutoNBEATS" "AutoNHITS" "AutoDLinear" "AutoTFT" "AutoVanillaTransformer" "AutoInformer" "AutoAutoformer" "AutoFEDformer" "AutoTimesNet" "AutoPatchTST" "AutoTSMixer", "AutoiTransformer")
+
+for dataset in "${datasets[@]}"; do
+ for model in "${models[@]}"; do
+ CUDA_VISIBLE_DEVICES=3 python run_experiments.py --dataset "$dataset" --model "$model"
+ done
+done
\ No newline at end of file
diff --git a/experiments/nf_evaluation/models.py b/experiments/nf_evaluation/models.py
new file mode 100644
index 000000000..bd6e228c7
--- /dev/null
+++ b/experiments/nf_evaluation/models.py
@@ -0,0 +1,32 @@
+from neuralforecast.auto import *
+from neuralforecast.losses.pytorch import HuberLoss, DistributionLoss
+
+
+def get_model(model_name, horizon, num_samples):
+ """Returns the model class given the model name.
+ """
+ model_dict = {
+ 'AutoLSTM': AutoLSTM(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+ 'AutoRNN': AutoRNN(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+ 'AutoGRU': AutoGRU(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+ 'AutoDilatedRNN': AutoDilatedRNN(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+ 'AutoDeepAR': AutoDeepAR(config=None, h=horizon,
+ loss=DistributionLoss(distribution='StudentT', level=[80, 90], return_params=False),
+ num_samples=num_samples),
+ 'AutoTCN': AutoTCN(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+ 'AutoMLP': AutoMLP(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+ 'AutoNBEATS': AutoNBEATS(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+ 'AutoNHITS': AutoNHITS(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+ 'AutoDLinear': AutoDLinear(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+ 'AutoTFT': AutoTFT(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+ 'AutoVanillaTransformer': AutoVanillaTransformer(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+ 'AutoInformer': AutoInformer(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+ 'AutoAutoformer': AutoAutoformer(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+ 'AutoFEDformer': AutoFEDformer(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+ 'AutoTimesNet': AutoTimesNet(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+ 'AutoPatchTST': AutoPatchTST(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples),
+ 'AutoTSMixer': AutoTSMixer(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples, n_series=1),
+ 'AutoiTransformer': AutoiTransformer(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples, n_series=1)
+ }
+
+ return model_dict[model_name]
\ No newline at end of file
diff --git a/experiments/nf_evaluation/run.sh b/experiments/nf_evaluation/run.sh
new file mode 100644
index 000000000..1e75bd3ae
--- /dev/null
+++ b/experiments/nf_evaluation/run.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+datasets=("M3-yearly" "M3-quarterly" "M3-monthly" "M4-yearly" "M4-quarterly" "M4-monthly" "M4-daily" "M4-hourly" "Ettm2" "Ettm1" "Etth1" "Etth2" "Electricity" "Exchange" "Weather" "Traffic" "ILI")
+
+models=("AutoLSTM" "AutoRNN" "AutoGRU" "AutoDilatedRNN" "AutoDeepAR" "AutoTCN" "AutoMLP" "AutoNBEATS" "AutoNHITS" "AutoDLinear" "AutoTFT" "AutoVanillaTransformer" "AutoInformer" "AutoAutoformer" "AutoFEDformer" "AutoTimesNet" "AutoPatchTST")
+
+for dataset in "${datasets[@]}"; do
+ for model in "${models[@]}"; do
+ python run_experiments.py --dataset "$dataset" --model "$model"
+ done
+done
\ No newline at end of file
diff --git a/experiments/nf_evaluation/run_experiments.py b/experiments/nf_evaluation/run_experiments.py
new file mode 100644
index 000000000..9bdfc7af9
--- /dev/null
+++ b/experiments/nf_evaluation/run_experiments.py
@@ -0,0 +1,68 @@
+import os
+import argparse
+import time
+
+import pandas as pd
+from neuralforecast.core import NeuralForecast
+from neuralforecast.losses.numpy import mae, mse, smape, mape
+
+from models import get_model
+from datasets import get_dataset
+
+# For compatibitlity with Mac with M chip
+os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
+
+# For memory allocation
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+
+def main(args):
+
+ # Load dataset and model
+ Y_df, h, freq, val_size, test_size = get_dataset(args.dataset)
+ model = get_model(model_name=args.model, horizon=h, num_samples=20)
+
+ # Start time
+ start_time = time.time()
+
+ # Train model
+ nf = NeuralForecast(models=[model], freq=freq)
+ forecasts_df = nf.cross_validation(df=Y_df, val_size=val_size, test_size=test_size, n_windows=None, verbose=True)
+
+ # Calculate elapsed time
+ elapsed_time = time.time() - start_time
+
+ # Evaluation
+ model_mae = mae(y=forecasts_df['y'], y_hat=forecasts_df[f'{args.model}'])
+ model_mse = mse(y=forecasts_df['y'], y_hat=forecasts_df[f'{args.model}'])
+ model_smape = smape(y=forecasts_df['y'], y_hat=forecasts_df[f'{args.model}'])
+ model_mape = mape(y=forecasts_df['y'], y_hat=forecasts_df[f'{args.model}'])
+
+ metrics = {
+ 'Model': [args.model],
+ 'MAE': [model_mae],
+ 'MSE': [model_mse],
+ 'sMAPE': [model_smape],
+ 'MAPE': [model_mape],
+ 'time': [elapsed_time]
+ }
+
+
+ # Save results
+ results_path = f'./results/{args.dataset}'
+ os.makedirs(results_path, exist_ok=True)
+
+ metrics_df = pd.DataFrame(metrics)
+ metrics_df.to_csv(f'{results_path}/{args.model}_metrics.csv', header=True, index=False)
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="script arguments")
+ parser.add_argument('--dataset', type=str, help='dataset to train models on')
+ parser.add_argument('--model', type=str, help='name of the model')
+ return parser.parse_args()
+
+if __name__ == '__main__':
+ # parse arguments
+ args = parse_args()
+
+ # Run experiment
+ main(args)
\ No newline at end of file
diff --git a/nbs/models.ipynb b/nbs/models.ipynb
index 82331c3b2..6895a8434 100644
--- a/nbs/models.ipynb
+++ b/nbs/models.ipynb
@@ -226,7 +226,7 @@
" \"context_size\": tune.choice([5, 10, 50]),\n",
" \"decoder_hidden_size\": tune.choice([64, 128, 256, 512]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
- " \"max_steps\": tune.choice([500, 1000]),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([16, 32]),\n",
" \"loss\": None,\n",
" \"random_seed\": tune.randint(1, 20)\n",
@@ -369,7 +369,7 @@
" \"context_size\": tune.choice([5, 10, 50]),\n",
" \"decoder_hidden_size\": tune.choice([64, 128, 256, 512]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
- " \"max_steps\": tune.choice([500, 1000]),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([16, 32]),\n",
" \"loss\": None,\n",
" \"random_seed\": tune.randint(1, 20)\n",
@@ -508,7 +508,7 @@
" \"context_size\": tune.choice([5, 10, 50]),\n",
" \"decoder_hidden_size\": tune.choice([64, 128, 256, 512]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
- " \"max_steps\": tune.choice([500, 1000]),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([16, 32]),\n",
" \"loss\": None,\n",
" \"random_seed\": tune.randint(1, 20)\n",
@@ -646,7 +646,7 @@
" \"context_size\": tune.choice([5, 10, 50]),\n",
" \"decoder_hidden_size\": tune.choice([64, 128]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
- " \"max_steps\": tune.choice([500, 1000]),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([16, 32]),\n",
" \"loss\": None,\n",
" \"random_seed\": tune.randint(1, 20)\n",
@@ -784,7 +784,7 @@
" \"lstm_dropout\": tune.uniform(0.0, 0.5),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"scaler_type\": tune.choice(['robust', 'minmax1']),\n",
- " \"max_steps\": tune.choice([500, 1000, 2000]),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([32, 64, 128, 256]),\n",
" \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n",
" \"loss\": None,\n",
@@ -924,7 +924,7 @@
" \"context_size\": tune.choice([5, 10, 50]),\n",
" \"decoder_hidden_size\": tune.choice([64, 128, 256, 512]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
- " \"max_steps\": tune.choice([500, 1000]),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([16, 32]),\n",
" \"loss\": None,\n",
" \"random_seed\": tune.randint(1, 20)\n",
@@ -1070,7 +1070,7 @@
" \"num_layers\": tune.randint(2, 6),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n",
- " \"max_steps\": tune.choice([500, 1000]),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([32, 64, 128, 256]),\n",
" \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n",
" \"loss\": None,\n",
@@ -1205,7 +1205,7 @@
" \"h\": None,\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n",
- " \"max_steps\": tune.choice([500, 1000]),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([32, 64, 128, 256]),\n",
" \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n",
" \"loss\": None,\n",
@@ -1341,7 +1341,7 @@
" \"h\": None,\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n",
- " \"max_steps\": tune.choice([500, 1000]),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([32, 64, 128, 256]),\n",
" \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n",
" \"loss\": None,\n",
@@ -1482,7 +1482,7 @@
" [40, 20, 1], [1, 1, 1]]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n",
- " \"max_steps\": tune.quniform(lower=500, upper=1500, q=100),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([32, 64, 128, 256]),\n",
" \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n",
" \"loss\": None,\n",
@@ -1620,7 +1620,7 @@
" \"moving_avg_window\": tune.choice([11, 25, 51]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n",
- " \"max_steps\": tune.quniform(lower=500, upper=1500, q=100),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([32, 64, 128, 256]),\n",
" \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n",
" \"loss\": None,\n",
@@ -1901,7 +1901,7 @@
" \"n_head\": tune.choice([4, 8]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n",
- " \"max_steps\": tune.choice([500, 1000, 2000]),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([32, 64, 128, 256]),\n",
" \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n",
" \"loss\": None,\n",
@@ -2038,7 +2038,7 @@
" \"n_head\": tune.choice([4, 8]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n",
- " \"max_steps\": tune.choice([500, 1000, 2000]),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([32, 64, 128, 256]),\n",
" \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n",
" \"loss\": None,\n",
@@ -2175,7 +2175,7 @@
" \"n_head\": tune.choice([4, 8]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n",
- " \"max_steps\": tune.choice([500, 1000, 2000]),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([32, 64, 128, 256]),\n",
" \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n",
" \"loss\": None,\n",
@@ -2312,7 +2312,7 @@
" \"n_head\": tune.choice([4, 8]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n",
- " \"max_steps\": tune.choice([500, 1000, 2000]),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([32, 64, 128, 256]),\n",
" \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n",
" \"loss\": None,\n",
@@ -2448,7 +2448,7 @@
" \"hidden_size\": tune.choice([64, 128, 256]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n",
- " \"max_steps\": tune.choice([500, 1000, 2000]),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([32, 64, 128, 256]),\n",
" \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n",
" \"loss\": None,\n",
@@ -2587,7 +2587,7 @@
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n",
" \"revin\": tune.choice([False, True]),\n",
- " \"max_steps\": tune.choice([500, 1000, 5000]),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([32, 64, 128, 256]),\n",
" \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n",
" \"loss\": None,\n",
@@ -3031,7 +3031,7 @@
" \"multi_layer\": tune.choice([3, 5, 7]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n",
- " \"max_steps\": tune.choice([500, 1000, 2000]),\n",
+ " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n",
" \"batch_size\": tune.choice([32, 64, 128, 256]),\n",
" \"loss\": None,\n",
" \"random_seed\": tune.randint(1, 20),\n",
diff --git a/neuralforecast/auto.py b/neuralforecast/auto.py
index 20932619a..a64676dac 100644
--- a/neuralforecast/auto.py
+++ b/neuralforecast/auto.py
@@ -59,7 +59,7 @@ class AutoRNN(BaseAuto):
"context_size": tune.choice([5, 10, 50]),
"decoder_hidden_size": tune.choice([64, 128, 256, 512]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
- "max_steps": tune.choice([500, 1000]),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([16, 32]),
"loss": None,
"random_seed": tune.randint(1, 20),
@@ -134,7 +134,7 @@ class AutoLSTM(BaseAuto):
"context_size": tune.choice([5, 10, 50]),
"decoder_hidden_size": tune.choice([64, 128, 256, 512]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
- "max_steps": tune.choice([500, 1000]),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([16, 32]),
"loss": None,
"random_seed": tune.randint(1, 20),
@@ -205,7 +205,7 @@ class AutoGRU(BaseAuto):
"context_size": tune.choice([5, 10, 50]),
"decoder_hidden_size": tune.choice([64, 128, 256, 512]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
- "max_steps": tune.choice([500, 1000]),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([16, 32]),
"loss": None,
"random_seed": tune.randint(1, 20),
@@ -275,7 +275,7 @@ class AutoTCN(BaseAuto):
"context_size": tune.choice([5, 10, 50]),
"decoder_hidden_size": tune.choice([64, 128]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
- "max_steps": tune.choice([500, 1000]),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([16, 32]),
"loss": None,
"random_seed": tune.randint(1, 20),
@@ -345,7 +345,7 @@ class AutoDeepAR(BaseAuto):
"lstm_dropout": tune.uniform(0.0, 0.5),
"learning_rate": tune.loguniform(1e-4, 1e-1),
"scaler_type": tune.choice(["robust", "minmax1"]),
- "max_steps": tune.choice([500, 1000, 2000]),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([32, 64, 128, 256]),
"windows_batch_size": tune.choice([128, 256, 512, 1024]),
"loss": None,
@@ -418,7 +418,7 @@ class AutoDilatedRNN(BaseAuto):
"context_size": tune.choice([5, 10, 50]),
"decoder_hidden_size": tune.choice([64, 128, 256, 512]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
- "max_steps": tune.choice([500, 1000]),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([16, 32]),
"loss": None,
"random_seed": tune.randint(1, 20),
@@ -487,7 +487,7 @@ class AutoMLP(BaseAuto):
"num_layers": tune.randint(2, 6),
"learning_rate": tune.loguniform(1e-4, 1e-1),
"scaler_type": tune.choice([None, "robust", "standard"]),
- "max_steps": tune.choice([500, 1000]),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([32, 64, 128, 256]),
"windows_batch_size": tune.choice([128, 256, 512, 1024]),
"loss": None,
@@ -553,7 +553,7 @@ class AutoNBEATS(BaseAuto):
"h": None,
"learning_rate": tune.loguniform(1e-4, 1e-1),
"scaler_type": tune.choice([None, "robust", "standard"]),
- "max_steps": tune.choice([500, 1000]),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([32, 64, 128, 256]),
"windows_batch_size": tune.choice([128, 256, 512, 1024]),
"loss": None,
@@ -619,7 +619,7 @@ class AutoNBEATSx(BaseAuto):
"h": None,
"learning_rate": tune.loguniform(1e-4, 1e-1),
"scaler_type": tune.choice([None, "robust", "standard"]),
- "max_steps": tune.choice([500, 1000]),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([32, 64, 128, 256]),
"windows_batch_size": tune.choice([128, 256, 512, 1024]),
"loss": None,
@@ -698,7 +698,7 @@ class AutoNHITS(BaseAuto):
),
"learning_rate": tune.loguniform(1e-4, 1e-1),
"scaler_type": tune.choice([None, "robust", "standard"]),
- "max_steps": tune.quniform(lower=500, upper=1500, q=100),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([32, 64, 128, 256]),
"windows_batch_size": tune.choice([128, 256, 512, 1024]),
"loss": None,
@@ -765,7 +765,7 @@ class AutoDLinear(BaseAuto):
"moving_avg_window": tune.choice([11, 25, 51]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
"scaler_type": tune.choice([None, "robust", "standard"]),
- "max_steps": tune.quniform(lower=500, upper=1500, q=100),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([32, 64, 128, 256]),
"windows_batch_size": tune.choice([128, 256, 512, 1024]),
"loss": None,
@@ -899,7 +899,7 @@ class AutoTFT(BaseAuto):
"n_head": tune.choice([4, 8]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
"scaler_type": tune.choice([None, "robust", "standard"]),
- "max_steps": tune.choice([500, 1000, 2000]),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([32, 64, 128, 256]),
"windows_batch_size": tune.choice([128, 256, 512, 1024]),
"loss": None,
@@ -967,7 +967,7 @@ class AutoVanillaTransformer(BaseAuto):
"n_head": tune.choice([4, 8]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
"scaler_type": tune.choice([None, "robust", "standard"]),
- "max_steps": tune.choice([500, 1000, 2000]),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([32, 64, 128, 256]),
"windows_batch_size": tune.choice([128, 256, 512, 1024]),
"loss": None,
@@ -1035,7 +1035,7 @@ class AutoInformer(BaseAuto):
"n_head": tune.choice([4, 8]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
"scaler_type": tune.choice([None, "robust", "standard"]),
- "max_steps": tune.choice([500, 1000, 2000]),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([32, 64, 128, 256]),
"windows_batch_size": tune.choice([128, 256, 512, 1024]),
"loss": None,
@@ -1103,7 +1103,7 @@ class AutoAutoformer(BaseAuto):
"n_head": tune.choice([4, 8]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
"scaler_type": tune.choice([None, "robust", "standard"]),
- "max_steps": tune.choice([500, 1000, 2000]),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([32, 64, 128, 256]),
"windows_batch_size": tune.choice([128, 256, 512, 1024]),
"loss": None,
@@ -1170,7 +1170,7 @@ class AutoFEDformer(BaseAuto):
"hidden_size": tune.choice([64, 128, 256]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
"scaler_type": tune.choice([None, "robust", "standard"]),
- "max_steps": tune.choice([500, 1000, 2000]),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([32, 64, 128, 256]),
"windows_batch_size": tune.choice([128, 256, 512, 1024]),
"loss": None,
@@ -1240,7 +1240,7 @@ class AutoPatchTST(BaseAuto):
"learning_rate": tune.loguniform(1e-4, 1e-1),
"scaler_type": tune.choice([None, "robust", "standard"]),
"revin": tune.choice([False, True]),
- "max_steps": tune.choice([500, 1000, 5000]),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([32, 64, 128, 256]),
"windows_batch_size": tune.choice([128, 256, 512, 1024]),
"loss": None,
@@ -1462,7 +1462,7 @@ class AutoStemGNN(BaseAuto):
"multi_layer": tune.choice([3, 5, 7]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
"scaler_type": tune.choice([None, "robust", "standard"]),
- "max_steps": tune.choice([500, 1000, 2000]),
+ "max_steps": tune.quniform(lower=500, upper=5000, q=500),
"batch_size": tune.choice([32, 64, 128, 256]),
"loss": None,
"random_seed": tune.randint(1, 20),