Skip to content

Commit

Permalink
Changed yaml file arg order
Browse files Browse the repository at this point in the history
  • Loading branch information
kierandidi committed Mar 24, 2024
1 parent cb32b81 commit 48c64e8
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions proteinworkshop/config/dataset/pdb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ datamodule:

pdb_dataset:
_target_: "proteinworkshop.datasets.pdb_dataset.PDBData"
split_type: "sequence_similarity" # Split sequences by sequence similarity clustering, other options are "random" and "time_cutoff"
split_sequence_similiarity: 0.3 # Clustering at 30% sequence similarity (argument is ignored if split_type!="sequence_similarity")
overwrite_sequence_clusters: False # Previous clusterings at same sequence similarity are reused and not overwritten
split_time_frames: null # Time-cutoffs for train, val and test set (argument is ignored if split_type!="time_cutoff") - e.g., ["2020-01-01", "2021-01-01", "2023-03-01"]
train_val_test: [0.8, 0.1, 0.1] # Cross-validation ratios to use for train, val, and test splits
fraction: 1.0 # Fraction of dataset to use
molecule_type: "protein" # Type of molecule for which to select
experiment_types: ["diffraction", "NMR", "EM", "other"] # All experiment types
Expand All @@ -23,8 +28,4 @@ datamodule:
remove_ligands: [] # Exclude specific ligands from any available protein-ligand complexes
remove_non_standard_residues: True # Include only proteins containing standard amino acid residues
remove_pdb_unavailable: True # Include only proteins that are available to download
train_val_test: [0.8, 0.1, 0.1] # Cross-validation ratios to use for train, val, and test splits
split_type: "sequence_similarity" # Split sequences by sequence similarity clustering, other options are "random" and "time_cutoff"
split_sequence_similiarity: 0.3 # Clustering at 30% sequence similarity (argument is ignored if split_type!="sequence_similarity")
overwrite_sequence_clusters: False # Previous clusterings at same sequence similarity are reused and not overwritten
split_time_frames: null # Time-cutoffs for train, val and test set (argument is ignored if split_type!="time_cutoff") - e.g., ["2020-01-01", "2021-01-01", "2023-03-01"]

0 comments on commit 48c64e8

Please sign in to comment.