diff --git a/experiments/nf_evaluation/README.md b/experiments/nf_evaluation/README.md new file mode 100644 index 000000000..52273a52a --- /dev/null +++ b/experiments/nf_evaluation/README.md @@ -0,0 +1,56 @@ +# Comprehensive Evaluation of Neuralforecast models + +In this experiment, we tested all available models in Neuralforecast on benchmark datasets to evaluate their speed and forecasting performance. + +The datasets used for this benchmark are: +- M4 (yearly) +- M4 (quarterly) +- M4 (monthly) +- M4 (daily) +- Ettm2 (15 min) +- Electricity (hourly) +- Weather (10 min) +- Traffic (hourly) +- ILI (weekly) + +Each model went through hyperparameter optimization. The test was completed locally on CPU. + +The table below summarizes the results +*Table will be updated as results are obtained* + +
+ +## Reproducibility + +1. Create a conda environment `nf_evaluation` using the `environment.yml` file. + ```shell + conda env create -f environment.yml + ``` + +3. Activate the conda environment using + ```shell + conda activate nf_evaluation + ``` + +Alternatively simply installing neuralforecast and datasetsforecast with pip may suffice: +``` +pip install git+https://github.com/Nixtla/datasetsforecast.git +pip install git+https://github.com/Nixtla/neuralforecast.git +``` + +4. Run the experiments for each dataset and each model using the +- `--dataset` parameter in `[M4-yearly, M4-quarterly, M4-monthly, M4-daily, Ettm2, Electricity, Weather, Traffic, ILI]` +- `--model` parameter in `['AutoLSTM', 'AutoRNN', 'AutoGRU', 'AutoDilatedRNN', 'AutoDeepAR', 'AutoTCN', 'AutoMLP', 'AutoNBEATS', 'AutoNHITS', 'AutoDLinear', 'AutoTFT', 'AutoVanillaTransformer', 'AutoInformer', 'AutoAutoformer', 'AutoFEDformer', 'AutoTimesNet', 'AutoPatchTST']` +
+ +```shell +python run_experiments.py --dataset M4-yearly --model AutoMLP +``` + +You can also run all experiments in a single command using +
+```shell +run.sh +``` + +5. The script creates a folder `results/` which contains a CSV file with the metrics for the specified model \ No newline at end of file diff --git a/experiments/nf_evaluation/datasets.py b/experiments/nf_evaluation/datasets.py new file mode 100644 index 000000000..d95b09e41 --- /dev/null +++ b/experiments/nf_evaluation/datasets.py @@ -0,0 +1,133 @@ +import pandas as pd + +from datasetsforecast.m4 import M4 +from datasetsforecast.m3 import M3 +from datasetsforecast.long_horizon import LongHorizon + + +def get_dataset(name): + + # Read data and parameters + if name == 'M3-yearly': + Y_df, *_ = M3.load(directory='./', group='Yearly') + Y_df['ds'] = pd.to_datetime(Y_df['ds']) + freq = 'Y' + h = 6 + val_size = 6 + test_size = 6 + elif name == 'M3-quarterly': + Y_df, *_ = M3.load(directory='./', group='Quarterly') + Y_df['ds'] = pd.to_datetime(Y_df['ds']) + freq = 'Q' + h = 8 + val_size = 8 + test_size = 8 + elif name == 'M3-monthly': + Y_df, *_ = M3.load(directory='./', group='Monthly') + Y_df['ds'] = pd.to_datetime(Y_df['ds']) + freq = 'M' + h = 12 + val_size = 12 + test_size = 12 + elif name == 'M4-yearly': + Y_df, *_ = M4.load(directory='./', group='Yearly') + Y_df['ds'] = Y_df['ds'].astype(int) + freq = 1 + h = 6 + val_size = 6 + test_size = 6 + elif name == 'M4-quarterly': + Y_df, *_ = M4.load(directory='./', group='Quarterly') + Y_df['ds'] = Y_df['ds'].astype(int) + freq = 4 + h = 8 + val_size = 8 + test_size = 8 + elif name == 'M4-monthly': + Y_df, *_ = M4.load(directory='./', group='Monthly') + Y_df['ds'] = Y_df['ds'].astype(int) + freq = 12 + h = 18 + val_size = 18 + test_size = 18 + elif name == 'M4-daily': + Y_df, *_ = M4.load(directory='./', group='Daily') + Y_df['ds'] = Y_df['ds'].astype(int) + freq = 365 + h = 14 + val_size = 14 + test_size = 14 + elif name == 'M4-hourly': + Y_df, *_ = M4.load(directory='./', group='Hourly') + Y_df['ds'] = Y_df['ds'].astype(int) + freq = 24 + h = 48 + val_size = 48 + test_size = 48 + elif name == 'Ettm2': + Y_df, *_ = LongHorizon.load(directory='./', group='ETTm2') + Y_df['ds'] = pd.to_datetime(Y_df['ds']) + freq = '15T' + h = 720 + val_size = 11520 + test_size = 11520 + elif name == 'Ettm1': + Y_df, *_ = LongHorizon.load(directory='./', group='ETTm1') + Y_df['ds'] = pd.to_datetime(Y_df['ds']) + freq = '15T' + h = 720 + val_size = 11520 + test_size = 11520 + elif name == 'Etth1': + Y_df, *_ = LongHorizon.load(directory='./', group='ETTh1') + Y_df['ds'] = pd.to_datetime(Y_df['ds']) + freq = 'H' + h = 720 + val_size = 2880 + test_size = 2880 + elif name == 'Etth2': + Y_df, *_ = LongHorizon.load(directory='./', group='ETTh2') + Y_df['ds'] = pd.to_datetime(Y_df['ds']) + freq = 'H' + h = 720 + val_size = 2880 + test_size = 2880 + elif name == 'Electricity': + Y_df, *_ = LongHorizon.load(directory='./', group='ECL') + Y_df['ds'] = pd.to_datetime(Y_df['ds']) + freq = 'H' + h = 720 + val_size = 2632 + test_size = 5260 + elif name == 'Exchange': + Y_df, *_ = LongHorizon.load(directory='./', group='Exchange') + Y_df['ds'] = pd.to_datetime(Y_df['ds']) + freq = 'D' + h = 720 + val_size = 760 + test_size = 1517 + elif name == 'Weather': + Y_df, *_ = LongHorizon.load(directory='./', group='Weather') + Y_df['ds'] = pd.to_datetime(Y_df['ds']) + freq = '10T' + h = 720 + val_size = 5270 + test_size = 10539 + elif name == 'Traffic': + Y_df, *_ = LongHorizon.load(directory='./', group='TrafficL') + Y_df['ds'] = pd.to_datetime(Y_df['ds']) + freq = 'H' + h = 720 + val_size = 1756 + test_size = 3508 + elif name == 'ILI': + Y_df, *_ = LongHorizon.load(directory='./', group='ILI') + Y_df['ds'] = pd.to_datetime(Y_df['ds']) + freq = 'W' + h = 60 + val_size = 97 + test_size = 193 + else: + raise Exception("Frequency not defined") + + return Y_df, h, freq, val_size, test_size \ No newline at end of file diff --git a/experiments/nf_evaluation/environment.yml b/experiments/nf_evaluation/environment.yml new file mode 100644 index 000000000..744060d0f --- /dev/null +++ b/experiments/nf_evaluation/environment.yml @@ -0,0 +1,9 @@ +name: nf_evaluation +channels: + - conda-forge +dependencies: + - numpy<1.24 + - pip + - pip: + - "git+https://github.com/Nixtla/datasetsforecast.git" + - "git+https://github.com/Nixtla/neuralforecast.git" \ No newline at end of file diff --git a/experiments/nf_evaluation/gpu_1.sh b/experiments/nf_evaluation/gpu_1.sh new file mode 100755 index 000000000..59cbf1ecf --- /dev/null +++ b/experiments/nf_evaluation/gpu_1.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +datasets=("M3-yearly" "M3-quarterly" "M3-monthly" "M4-yearly") + +models=("AutoLSTM" "AutoRNN" "AutoGRU" "AutoDilatedRNN" "AutoDeepAR" "AutoTCN" "AutoMLP" "AutoNBEATS" "AutoNHITS" "AutoDLinear" "AutoTFT" "AutoVanillaTransformer" "AutoInformer" "AutoAutoformer" "AutoFEDformer" "AutoTimesNet" "AutoPatchTST", "AutoTSMixer", "AutoiTransformer") + +for dataset in "${datasets[@]}"; do + for model in "${models[@]}"; do + CUDA_VISIBLE_DEVICES=0 python run_experiments.py --dataset "$dataset" --model "$model" + done +done \ No newline at end of file diff --git a/experiments/nf_evaluation/gpu_2.sh b/experiments/nf_evaluation/gpu_2.sh new file mode 100755 index 000000000..301914c31 --- /dev/null +++ b/experiments/nf_evaluation/gpu_2.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +datasets=("M4-quarterly" "M4-monthly" "M4-daily" "M4-hourly") + +models=("AutoLSTM" "AutoRNN" "AutoGRU" "AutoDilatedRNN" "AutoDeepAR" "AutoTCN" "AutoMLP" "AutoNBEATS" "AutoNHITS" "AutoDLinear" "AutoTFT" "AutoVanillaTransformer" "AutoInformer" "AutoAutoformer" "AutoFEDformer" "AutoTimesNet" "AutoPatchTST" "AutoTSMixer", "AutoiTransformer") + +for dataset in "${datasets[@]}"; do + for model in "${models[@]}"; do + CUDA_VISIBLE_DEVICES=1 python run_experiments.py --dataset "$dataset" --model "$model" + done +done \ No newline at end of file diff --git a/experiments/nf_evaluation/gpu_3.sh b/experiments/nf_evaluation/gpu_3.sh new file mode 100755 index 000000000..358dc38f1 --- /dev/null +++ b/experiments/nf_evaluation/gpu_3.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +datasets=("Ettm2" "Ettm1" "Etth1" "Etth2") + +models=("AutoLSTM" "AutoRNN" "AutoGRU" "AutoDilatedRNN" "AutoDeepAR" "AutoTCN" "AutoMLP" "AutoNBEATS" "AutoNHITS" "AutoDLinear" "AutoTFT" "AutoVanillaTransformer" "AutoInformer" "AutoAutoformer" "AutoFEDformer" "AutoTimesNet" "AutoPatchTST" "AutoTSMixer", "AutoiTransformer") + +for dataset in "${datasets[@]}"; do + for model in "${models[@]}"; do + CUDA_VISIBLE_DEVICES=2 python run_experiments.py --dataset "$dataset" --model "$model" + done +done \ No newline at end of file diff --git a/experiments/nf_evaluation/gpu_4.sh b/experiments/nf_evaluation/gpu_4.sh new file mode 100755 index 000000000..562cc8a3c --- /dev/null +++ b/experiments/nf_evaluation/gpu_4.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +datasets=("Electricity" "Exchange" "Weather" "Traffic" "ILI") + +models=("AutoLSTM" "AutoRNN" "AutoGRU" "AutoDilatedRNN" "AutoDeepAR" "AutoTCN" "AutoMLP" "AutoNBEATS" "AutoNHITS" "AutoDLinear" "AutoTFT" "AutoVanillaTransformer" "AutoInformer" "AutoAutoformer" "AutoFEDformer" "AutoTimesNet" "AutoPatchTST" "AutoTSMixer", "AutoiTransformer") + +for dataset in "${datasets[@]}"; do + for model in "${models[@]}"; do + CUDA_VISIBLE_DEVICES=3 python run_experiments.py --dataset "$dataset" --model "$model" + done +done \ No newline at end of file diff --git a/experiments/nf_evaluation/models.py b/experiments/nf_evaluation/models.py new file mode 100644 index 000000000..bd6e228c7 --- /dev/null +++ b/experiments/nf_evaluation/models.py @@ -0,0 +1,32 @@ +from neuralforecast.auto import * +from neuralforecast.losses.pytorch import HuberLoss, DistributionLoss + + +def get_model(model_name, horizon, num_samples): + """Returns the model class given the model name. + """ + model_dict = { + 'AutoLSTM': AutoLSTM(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples), + 'AutoRNN': AutoRNN(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples), + 'AutoGRU': AutoGRU(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples), + 'AutoDilatedRNN': AutoDilatedRNN(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples), + 'AutoDeepAR': AutoDeepAR(config=None, h=horizon, + loss=DistributionLoss(distribution='StudentT', level=[80, 90], return_params=False), + num_samples=num_samples), + 'AutoTCN': AutoTCN(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples), + 'AutoMLP': AutoMLP(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples), + 'AutoNBEATS': AutoNBEATS(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples), + 'AutoNHITS': AutoNHITS(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples), + 'AutoDLinear': AutoDLinear(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples), + 'AutoTFT': AutoTFT(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples), + 'AutoVanillaTransformer': AutoVanillaTransformer(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples), + 'AutoInformer': AutoInformer(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples), + 'AutoAutoformer': AutoAutoformer(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples), + 'AutoFEDformer': AutoFEDformer(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples), + 'AutoTimesNet': AutoTimesNet(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples), + 'AutoPatchTST': AutoPatchTST(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples), + 'AutoTSMixer': AutoTSMixer(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples, n_series=1), + 'AutoiTransformer': AutoiTransformer(config=None, h=horizon, loss=HuberLoss(), num_samples=num_samples, n_series=1) + } + + return model_dict[model_name] \ No newline at end of file diff --git a/experiments/nf_evaluation/run.sh b/experiments/nf_evaluation/run.sh new file mode 100644 index 000000000..1e75bd3ae --- /dev/null +++ b/experiments/nf_evaluation/run.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +datasets=("M3-yearly" "M3-quarterly" "M3-monthly" "M4-yearly" "M4-quarterly" "M4-monthly" "M4-daily" "M4-hourly" "Ettm2" "Ettm1" "Etth1" "Etth2" "Electricity" "Exchange" "Weather" "Traffic" "ILI") + +models=("AutoLSTM" "AutoRNN" "AutoGRU" "AutoDilatedRNN" "AutoDeepAR" "AutoTCN" "AutoMLP" "AutoNBEATS" "AutoNHITS" "AutoDLinear" "AutoTFT" "AutoVanillaTransformer" "AutoInformer" "AutoAutoformer" "AutoFEDformer" "AutoTimesNet" "AutoPatchTST") + +for dataset in "${datasets[@]}"; do + for model in "${models[@]}"; do + python run_experiments.py --dataset "$dataset" --model "$model" + done +done \ No newline at end of file diff --git a/experiments/nf_evaluation/run_experiments.py b/experiments/nf_evaluation/run_experiments.py new file mode 100644 index 000000000..9bdfc7af9 --- /dev/null +++ b/experiments/nf_evaluation/run_experiments.py @@ -0,0 +1,68 @@ +import os +import argparse +import time + +import pandas as pd +from neuralforecast.core import NeuralForecast +from neuralforecast.losses.numpy import mae, mse, smape, mape + +from models import get_model +from datasets import get_dataset + +# For compatibitlity with Mac with M chip +os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" + +# For memory allocation +os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" + +def main(args): + + # Load dataset and model + Y_df, h, freq, val_size, test_size = get_dataset(args.dataset) + model = get_model(model_name=args.model, horizon=h, num_samples=20) + + # Start time + start_time = time.time() + + # Train model + nf = NeuralForecast(models=[model], freq=freq) + forecasts_df = nf.cross_validation(df=Y_df, val_size=val_size, test_size=test_size, n_windows=None, verbose=True) + + # Calculate elapsed time + elapsed_time = time.time() - start_time + + # Evaluation + model_mae = mae(y=forecasts_df['y'], y_hat=forecasts_df[f'{args.model}']) + model_mse = mse(y=forecasts_df['y'], y_hat=forecasts_df[f'{args.model}']) + model_smape = smape(y=forecasts_df['y'], y_hat=forecasts_df[f'{args.model}']) + model_mape = mape(y=forecasts_df['y'], y_hat=forecasts_df[f'{args.model}']) + + metrics = { + 'Model': [args.model], + 'MAE': [model_mae], + 'MSE': [model_mse], + 'sMAPE': [model_smape], + 'MAPE': [model_mape], + 'time': [elapsed_time] + } + + + # Save results + results_path = f'./results/{args.dataset}' + os.makedirs(results_path, exist_ok=True) + + metrics_df = pd.DataFrame(metrics) + metrics_df.to_csv(f'{results_path}/{args.model}_metrics.csv', header=True, index=False) + +def parse_args(): + parser = argparse.ArgumentParser(description="script arguments") + parser.add_argument('--dataset', type=str, help='dataset to train models on') + parser.add_argument('--model', type=str, help='name of the model') + return parser.parse_args() + +if __name__ == '__main__': + # parse arguments + args = parse_args() + + # Run experiment + main(args) \ No newline at end of file diff --git a/nbs/models.ipynb b/nbs/models.ipynb index 82331c3b2..6895a8434 100644 --- a/nbs/models.ipynb +++ b/nbs/models.ipynb @@ -226,7 +226,7 @@ " \"context_size\": tune.choice([5, 10, 50]),\n", " \"decoder_hidden_size\": tune.choice([64, 128, 256, 512]),\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", - " \"max_steps\": tune.choice([500, 1000]),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([16, 32]),\n", " \"loss\": None,\n", " \"random_seed\": tune.randint(1, 20)\n", @@ -369,7 +369,7 @@ " \"context_size\": tune.choice([5, 10, 50]),\n", " \"decoder_hidden_size\": tune.choice([64, 128, 256, 512]),\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", - " \"max_steps\": tune.choice([500, 1000]),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([16, 32]),\n", " \"loss\": None,\n", " \"random_seed\": tune.randint(1, 20)\n", @@ -508,7 +508,7 @@ " \"context_size\": tune.choice([5, 10, 50]),\n", " \"decoder_hidden_size\": tune.choice([64, 128, 256, 512]),\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", - " \"max_steps\": tune.choice([500, 1000]),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([16, 32]),\n", " \"loss\": None,\n", " \"random_seed\": tune.randint(1, 20)\n", @@ -646,7 +646,7 @@ " \"context_size\": tune.choice([5, 10, 50]),\n", " \"decoder_hidden_size\": tune.choice([64, 128]),\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", - " \"max_steps\": tune.choice([500, 1000]),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([16, 32]),\n", " \"loss\": None,\n", " \"random_seed\": tune.randint(1, 20)\n", @@ -784,7 +784,7 @@ " \"lstm_dropout\": tune.uniform(0.0, 0.5),\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", " \"scaler_type\": tune.choice(['robust', 'minmax1']),\n", - " \"max_steps\": tune.choice([500, 1000, 2000]),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([32, 64, 128, 256]),\n", " \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n", " \"loss\": None,\n", @@ -924,7 +924,7 @@ " \"context_size\": tune.choice([5, 10, 50]),\n", " \"decoder_hidden_size\": tune.choice([64, 128, 256, 512]),\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", - " \"max_steps\": tune.choice([500, 1000]),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([16, 32]),\n", " \"loss\": None,\n", " \"random_seed\": tune.randint(1, 20)\n", @@ -1070,7 +1070,7 @@ " \"num_layers\": tune.randint(2, 6),\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", " \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n", - " \"max_steps\": tune.choice([500, 1000]),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([32, 64, 128, 256]),\n", " \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n", " \"loss\": None,\n", @@ -1205,7 +1205,7 @@ " \"h\": None,\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", " \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n", - " \"max_steps\": tune.choice([500, 1000]),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([32, 64, 128, 256]),\n", " \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n", " \"loss\": None,\n", @@ -1341,7 +1341,7 @@ " \"h\": None,\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", " \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n", - " \"max_steps\": tune.choice([500, 1000]),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([32, 64, 128, 256]),\n", " \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n", " \"loss\": None,\n", @@ -1482,7 +1482,7 @@ " [40, 20, 1], [1, 1, 1]]),\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", " \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n", - " \"max_steps\": tune.quniform(lower=500, upper=1500, q=100),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([32, 64, 128, 256]),\n", " \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n", " \"loss\": None,\n", @@ -1620,7 +1620,7 @@ " \"moving_avg_window\": tune.choice([11, 25, 51]),\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", " \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n", - " \"max_steps\": tune.quniform(lower=500, upper=1500, q=100),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([32, 64, 128, 256]),\n", " \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n", " \"loss\": None,\n", @@ -1901,7 +1901,7 @@ " \"n_head\": tune.choice([4, 8]),\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", " \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n", - " \"max_steps\": tune.choice([500, 1000, 2000]),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([32, 64, 128, 256]),\n", " \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n", " \"loss\": None,\n", @@ -2038,7 +2038,7 @@ " \"n_head\": tune.choice([4, 8]),\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", " \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n", - " \"max_steps\": tune.choice([500, 1000, 2000]),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([32, 64, 128, 256]),\n", " \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n", " \"loss\": None,\n", @@ -2175,7 +2175,7 @@ " \"n_head\": tune.choice([4, 8]),\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", " \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n", - " \"max_steps\": tune.choice([500, 1000, 2000]),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([32, 64, 128, 256]),\n", " \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n", " \"loss\": None,\n", @@ -2312,7 +2312,7 @@ " \"n_head\": tune.choice([4, 8]),\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", " \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n", - " \"max_steps\": tune.choice([500, 1000, 2000]),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([32, 64, 128, 256]),\n", " \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n", " \"loss\": None,\n", @@ -2448,7 +2448,7 @@ " \"hidden_size\": tune.choice([64, 128, 256]),\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", " \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n", - " \"max_steps\": tune.choice([500, 1000, 2000]),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([32, 64, 128, 256]),\n", " \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n", " \"loss\": None,\n", @@ -2587,7 +2587,7 @@ " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", " \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n", " \"revin\": tune.choice([False, True]),\n", - " \"max_steps\": tune.choice([500, 1000, 5000]),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([32, 64, 128, 256]),\n", " \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n", " \"loss\": None,\n", @@ -3031,7 +3031,7 @@ " \"multi_layer\": tune.choice([3, 5, 7]),\n", " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n", " \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n", - " \"max_steps\": tune.choice([500, 1000, 2000]),\n", + " \"max_steps\": tune.quniform(lower=500, upper=5000, q=500),\n", " \"batch_size\": tune.choice([32, 64, 128, 256]),\n", " \"loss\": None,\n", " \"random_seed\": tune.randint(1, 20),\n", diff --git a/neuralforecast/auto.py b/neuralforecast/auto.py index 20932619a..a64676dac 100644 --- a/neuralforecast/auto.py +++ b/neuralforecast/auto.py @@ -59,7 +59,7 @@ class AutoRNN(BaseAuto): "context_size": tune.choice([5, 10, 50]), "decoder_hidden_size": tune.choice([64, 128, 256, 512]), "learning_rate": tune.loguniform(1e-4, 1e-1), - "max_steps": tune.choice([500, 1000]), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([16, 32]), "loss": None, "random_seed": tune.randint(1, 20), @@ -134,7 +134,7 @@ class AutoLSTM(BaseAuto): "context_size": tune.choice([5, 10, 50]), "decoder_hidden_size": tune.choice([64, 128, 256, 512]), "learning_rate": tune.loguniform(1e-4, 1e-1), - "max_steps": tune.choice([500, 1000]), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([16, 32]), "loss": None, "random_seed": tune.randint(1, 20), @@ -205,7 +205,7 @@ class AutoGRU(BaseAuto): "context_size": tune.choice([5, 10, 50]), "decoder_hidden_size": tune.choice([64, 128, 256, 512]), "learning_rate": tune.loguniform(1e-4, 1e-1), - "max_steps": tune.choice([500, 1000]), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([16, 32]), "loss": None, "random_seed": tune.randint(1, 20), @@ -275,7 +275,7 @@ class AutoTCN(BaseAuto): "context_size": tune.choice([5, 10, 50]), "decoder_hidden_size": tune.choice([64, 128]), "learning_rate": tune.loguniform(1e-4, 1e-1), - "max_steps": tune.choice([500, 1000]), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([16, 32]), "loss": None, "random_seed": tune.randint(1, 20), @@ -345,7 +345,7 @@ class AutoDeepAR(BaseAuto): "lstm_dropout": tune.uniform(0.0, 0.5), "learning_rate": tune.loguniform(1e-4, 1e-1), "scaler_type": tune.choice(["robust", "minmax1"]), - "max_steps": tune.choice([500, 1000, 2000]), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([32, 64, 128, 256]), "windows_batch_size": tune.choice([128, 256, 512, 1024]), "loss": None, @@ -418,7 +418,7 @@ class AutoDilatedRNN(BaseAuto): "context_size": tune.choice([5, 10, 50]), "decoder_hidden_size": tune.choice([64, 128, 256, 512]), "learning_rate": tune.loguniform(1e-4, 1e-1), - "max_steps": tune.choice([500, 1000]), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([16, 32]), "loss": None, "random_seed": tune.randint(1, 20), @@ -487,7 +487,7 @@ class AutoMLP(BaseAuto): "num_layers": tune.randint(2, 6), "learning_rate": tune.loguniform(1e-4, 1e-1), "scaler_type": tune.choice([None, "robust", "standard"]), - "max_steps": tune.choice([500, 1000]), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([32, 64, 128, 256]), "windows_batch_size": tune.choice([128, 256, 512, 1024]), "loss": None, @@ -553,7 +553,7 @@ class AutoNBEATS(BaseAuto): "h": None, "learning_rate": tune.loguniform(1e-4, 1e-1), "scaler_type": tune.choice([None, "robust", "standard"]), - "max_steps": tune.choice([500, 1000]), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([32, 64, 128, 256]), "windows_batch_size": tune.choice([128, 256, 512, 1024]), "loss": None, @@ -619,7 +619,7 @@ class AutoNBEATSx(BaseAuto): "h": None, "learning_rate": tune.loguniform(1e-4, 1e-1), "scaler_type": tune.choice([None, "robust", "standard"]), - "max_steps": tune.choice([500, 1000]), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([32, 64, 128, 256]), "windows_batch_size": tune.choice([128, 256, 512, 1024]), "loss": None, @@ -698,7 +698,7 @@ class AutoNHITS(BaseAuto): ), "learning_rate": tune.loguniform(1e-4, 1e-1), "scaler_type": tune.choice([None, "robust", "standard"]), - "max_steps": tune.quniform(lower=500, upper=1500, q=100), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([32, 64, 128, 256]), "windows_batch_size": tune.choice([128, 256, 512, 1024]), "loss": None, @@ -765,7 +765,7 @@ class AutoDLinear(BaseAuto): "moving_avg_window": tune.choice([11, 25, 51]), "learning_rate": tune.loguniform(1e-4, 1e-1), "scaler_type": tune.choice([None, "robust", "standard"]), - "max_steps": tune.quniform(lower=500, upper=1500, q=100), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([32, 64, 128, 256]), "windows_batch_size": tune.choice([128, 256, 512, 1024]), "loss": None, @@ -899,7 +899,7 @@ class AutoTFT(BaseAuto): "n_head": tune.choice([4, 8]), "learning_rate": tune.loguniform(1e-4, 1e-1), "scaler_type": tune.choice([None, "robust", "standard"]), - "max_steps": tune.choice([500, 1000, 2000]), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([32, 64, 128, 256]), "windows_batch_size": tune.choice([128, 256, 512, 1024]), "loss": None, @@ -967,7 +967,7 @@ class AutoVanillaTransformer(BaseAuto): "n_head": tune.choice([4, 8]), "learning_rate": tune.loguniform(1e-4, 1e-1), "scaler_type": tune.choice([None, "robust", "standard"]), - "max_steps": tune.choice([500, 1000, 2000]), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([32, 64, 128, 256]), "windows_batch_size": tune.choice([128, 256, 512, 1024]), "loss": None, @@ -1035,7 +1035,7 @@ class AutoInformer(BaseAuto): "n_head": tune.choice([4, 8]), "learning_rate": tune.loguniform(1e-4, 1e-1), "scaler_type": tune.choice([None, "robust", "standard"]), - "max_steps": tune.choice([500, 1000, 2000]), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([32, 64, 128, 256]), "windows_batch_size": tune.choice([128, 256, 512, 1024]), "loss": None, @@ -1103,7 +1103,7 @@ class AutoAutoformer(BaseAuto): "n_head": tune.choice([4, 8]), "learning_rate": tune.loguniform(1e-4, 1e-1), "scaler_type": tune.choice([None, "robust", "standard"]), - "max_steps": tune.choice([500, 1000, 2000]), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([32, 64, 128, 256]), "windows_batch_size": tune.choice([128, 256, 512, 1024]), "loss": None, @@ -1170,7 +1170,7 @@ class AutoFEDformer(BaseAuto): "hidden_size": tune.choice([64, 128, 256]), "learning_rate": tune.loguniform(1e-4, 1e-1), "scaler_type": tune.choice([None, "robust", "standard"]), - "max_steps": tune.choice([500, 1000, 2000]), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([32, 64, 128, 256]), "windows_batch_size": tune.choice([128, 256, 512, 1024]), "loss": None, @@ -1240,7 +1240,7 @@ class AutoPatchTST(BaseAuto): "learning_rate": tune.loguniform(1e-4, 1e-1), "scaler_type": tune.choice([None, "robust", "standard"]), "revin": tune.choice([False, True]), - "max_steps": tune.choice([500, 1000, 5000]), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([32, 64, 128, 256]), "windows_batch_size": tune.choice([128, 256, 512, 1024]), "loss": None, @@ -1462,7 +1462,7 @@ class AutoStemGNN(BaseAuto): "multi_layer": tune.choice([3, 5, 7]), "learning_rate": tune.loguniform(1e-4, 1e-1), "scaler_type": tune.choice([None, "robust", "standard"]), - "max_steps": tune.choice([500, 1000, 2000]), + "max_steps": tune.quniform(lower=500, upper=5000, q=500), "batch_size": tune.choice([32, 64, 128, 256]), "loss": None, "random_seed": tune.randint(1, 20),