From 48c64e83a966ca4d7bf5c5bc72a5c7a22efbca06 Mon Sep 17 00:00:00 2001 From: kierandidi Date: Sun, 24 Mar 2024 16:52:52 +0000 Subject: [PATCH] Changed yaml file arg order --- proteinworkshop/config/dataset/pdb.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/proteinworkshop/config/dataset/pdb.yaml b/proteinworkshop/config/dataset/pdb.yaml index 9f2b8a4e..3954e199 100644 --- a/proteinworkshop/config/dataset/pdb.yaml +++ b/proteinworkshop/config/dataset/pdb.yaml @@ -10,6 +10,11 @@ datamodule: pdb_dataset: _target_: "proteinworkshop.datasets.pdb_dataset.PDBData" + split_type: "sequence_similarity" # Split sequences by sequence similarity clustering, other options are "random" and "time_cutoff" + split_sequence_similiarity: 0.3 # Clustering at 30% sequence similarity (argument is ignored if split_type!="sequence_similarity") + overwrite_sequence_clusters: False # Previous clusterings at same sequence similarity are reused and not overwritten + split_time_frames: null # Time-cutoffs for train, val and test set (argument is ignored if split_type!="time_cutoff") - e.g., ["2020-01-01", "2021-01-01", "2023-03-01"] + train_val_test: [0.8, 0.1, 0.1] # Cross-validation ratios to use for train, val, and test splits fraction: 1.0 # Fraction of dataset to use molecule_type: "protein" # Type of molecule for which to select experiment_types: ["diffraction", "NMR", "EM", "other"] # All experiment types @@ -23,8 +28,4 @@ datamodule: remove_ligands: [] # Exclude specific ligands from any available protein-ligand complexes remove_non_standard_residues: True # Include only proteins containing standard amino acid residues remove_pdb_unavailable: True # Include only proteins that are available to download - train_val_test: [0.8, 0.1, 0.1] # Cross-validation ratios to use for train, val, and test splits - split_type: "sequence_similarity" # Split sequences by sequence similarity clustering, other options are "random" and "time_cutoff" - split_sequence_similiarity: 0.3 # Clustering at 30% sequence similarity (argument is ignored if split_type!="sequence_similarity") - overwrite_sequence_clusters: False # Previous clusterings at same sequence similarity are reused and not overwritten - split_time_frames: null # Time-cutoffs for train, val and test set (argument is ignored if split_type!="time_cutoff") - e.g., ["2020-01-01", "2021-01-01", "2023-03-01"] +