Cb release (#413)

* poetry update & changelog update for release
aramis-lab · Apr 13, 2023 · 22b49c6 · 22b49c6
1 parent a0c73d4
commit 22b49c6
Show file tree

Hide file tree

Showing 30 changed files with 1,313 additions and 1,070 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,30 @@ Main changes to this code/ project are documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+
+## ClinicaDL 1.3.0
+
+### New 
+
+* Add new command `quality-check pet-linear`.
+* Add new command `generate hypometabolic`.
+* Add new network architecture: `Resnet3D` and `SqueezeExcitationCNN`.
+* Add `flair-linear` modality for `prepare-data` command.
+* Add pytorch profiler.
+* Add `--save_nifti` option for `interpret`command.
+* Add `--output_dir` argument for `tsvtools get-labels` command
+
+### Changed
+
+** Core: **
+
+* Transition from os to pathlib.
+* Update data CI.
+* Improve maps_manager.
+* Change `--acq_label` option for `--tracer`.
+* Update tutorial.
+
+
 ## ClinicaDL 1.2.0
 
 ### Changed

diff --git a/clinicadl/generate/generate_utils.py b/clinicadl/generate/generate_utils.py
@@ -28,7 +28,7 @@ def find_file_type(
     elif preprocessing == "pet-linear":
         if tracer is None or suvr_reference_region is None:
             raise ClinicaDLArgumentError(
-                "tracer and suvr_reference_region must be defined "
+                "`tracer` and `suvr_reference_region` must be defined "
                 "when using `pet-linear` preprocessing."
             )
         file_type = pet_linear_nii(tracer, suvr_reference_region, uncropped_image)
@@ -41,7 +41,6 @@ def find_file_type(
 
 
 def write_missing_mods(output_dir: Path, output_df: pd.DataFrame):
-
     missing_path = output_dir / "missing_mods"
     missing_path.mkdir(parents=True, exist_ok=True)
 
@@ -58,7 +57,6 @@ def write_missing_mods(output_dir: Path, output_df: pd.DataFrame):
 def load_and_check_tsv(
     tsv_path: Path, caps_dict: Dict[str, Path], output_path: Path
 ) -> pd.DataFrame:
-
     from clinica.iotools.utils.data_handling import create_subs_sess_list
 
     from clinicadl.utils.caps_dataset.data import check_multi_cohort_tsv
@@ -320,7 +318,6 @@ def generate_shepplogan_phantom(
 
 
 def mask_processing(mask, percentage, sigma):
-
     inverse_mask = 1 - mask
     inverse_mask[inverse_mask == 0] = 1 - percentage / 100
     gaussian_mask = gaussian_filter(inverse_mask, sigma=sigma)

diff --git a/clinicadl/prepare_data/prepare_data.py b/clinicadl/prepare_data/prepare_data.py
@@ -3,7 +3,6 @@
 
 
 def DeepLearningPrepareData(caps_directory: Path, tsv_file: Path, n_proc, parameters):
-
     from clinica.utils.inputs import check_caps_folder, clinica_file_reader
     from clinica.utils.nipype import container_from_filename
     from clinica.utils.participant import get_subject_session_list
@@ -19,7 +18,7 @@ def DeepLearningPrepareData(caps_directory: Path, tsv_file: Path, n_proc, parame
 
     # Get subject and session list
     check_caps_folder(caps_directory)
-    logger.debug(f"CAPS directory : {caps_directory}.")
+    logger.debug(f"CAPS directory: {caps_directory}.")
     is_bids_dir = False
     sessions, subjects = get_subject_session_list(
         caps_directory, tsv_file, is_bids_dir, False, None

diff --git a/clinicadl/prepare_data/prepare_data_utils.py b/clinicadl/prepare_data/prepare_data_utils.py
@@ -18,18 +18,26 @@ def get_parameters_dict(
     suvr_reference_region: str,
 ) -> Dict[str, Any]:
     """
-    Args:
-        modality: preprocessing procedure performed with Clinica.
-        extract_method: mode of extraction (image, slice, patch, roi).
-        save_features: If True modes are extracted, else images are extracted
-            and the extraction of modes is done on-the-fly during training.
-        extract_json: Name of the JSON file created to sum up the arguments of tensor extraction.
-        use_uncropped_image: If True the cropped version of the image is used
-            (specific to t1-linear and pet-linear).
-        custom_suffix: string used to identify images when modality is custom.
-        tracer: name of the tracer (specific to PET pipelines).
-        suvr_reference_region: name of the reference region for normalization
-            specific to PET pipelines)
+    Parameters
+    ----------
+    modality: str
+        Preprocessing procedure performed with Clinica.
+    extract_method: str
+        Mode of extraction (image, slice, patch, roi).
+    save_features: bool
+        If True modes are extracted, else images are extracted
+        and the extraction of modes is done on-the-fly during training.
+    extract_json: str
+        Name of the JSON file created to sum up the arguments of tensor extraction.
+    use_uncropped_image: bool
+        If True the cropped version of the image is used
+        (specific to t1-linear and pet-linear).
+    custom_suffix: str
+        String used to identify images when modality is custom.
+    tracer: str
+        Name of the tracer (specific to PET pipelines).
+    suvr_reference_region: str
+        Name of the reference region for normalization specific to PET pipelines)
     Returns:
         The dictionary of parameters specific to the preprocessing
     """
@@ -199,7 +207,6 @@ def extract_slice_tensor(
 def extract_slice_path(
     img_path: Path, slice_direction: int, slice_mode: str, slice_index: int
 ) -> str:
-
     direction_dict = {0: "sag", 1: "cor", 2: "axi"}
     if slice_direction not in direction_dict:
         raise KeyError(
@@ -471,7 +478,6 @@ def extract_roi_tensor(
     mask_np,
     uncrop_output: bool,
 ) -> torch.Tensor:
-
     if len(mask_np.shape) == 3:
         mask_np = np.expand_dims(mask_np, axis=0)
     elif len(mask_np.shape) == 4:

diff --git a/clinicadl/quality_check/t1_linear/utils.py b/clinicadl/quality_check/t1_linear/utils.py
@@ -40,7 +40,7 @@ def __init__(
             "participant_id" not in list(self.df.columns.values)
         ):
             raise Exception(
-                "the data file is not in the correct format."
+                "The data file is not in the correct format."
                 "Columns should include ['participant_id', 'session_id']"
             )
 
@@ -84,7 +84,6 @@ def __getitem__(self, idx):
             image = torch.load(image_path)
             image = self.pt_transform(image)
         else:
-
             image_path = clinica_file_reader(
                 [subject],
                 [session],
@@ -196,7 +195,6 @@ def pt_transform(self, image):
             # direction with the pretrained model
 
             if len(input_images[i].shape) == 3:
-
                 slice = np.reshape(
                     input_images[i],
                     (input_images[i].shape[0], input_images[i].shape[1]),

diff --git a/clinicadl/quality_check/t1_volume/quality_check.py b/clinicadl/quality_check/t1_volume/quality_check.py
@@ -13,7 +13,6 @@
 
 
 def quality_check(caps_dir: Path, output_directory: Path, group_label):
-
     logger = getLogger("clinicadl.quality_check")
 
     extract_metrics(
@@ -27,7 +26,7 @@ def quality_check(caps_dir: Path, output_directory: Path, group_label):
     rejection1_df = qc_df[qc_df.max_intensity > 0.95]
     rejection1_df.to_csv(output_directory / "pass_step-1.tsv", sep="\t", index=False)
     logger.info(
-        f"Number of sessions removed based on max intensity: {len(qc_df) - len(rejection1_df)}."
+        f"Number of sessions removed, based on max intensity: {len(qc_df) - len(rejection1_df)}."
     )
     logger.debug(f"{rejection1_df}")
 

diff --git a/clinicadl/random_search/random_search.py b/clinicadl/random_search/random_search.py
@@ -8,10 +8,9 @@
 
 
 def launch_search(launch_directory: Path, job_name):
-
     if not (launch_directory / "random_search.toml").is_file():
         raise FileNotFoundError(
-            f"TOML file 'random_search.toml' must be written in directory {launch_directory}."
+            f"TOML file 'random_search.toml' must be written in directory: {launch_directory}."
         )
     space_options = get_space_dict(launch_directory)
     options = random_sampling(space_options)

diff --git a/clinicadl/train/train.py b/clinicadl/train/train.py
@@ -11,6 +11,5 @@ def train(
     split_list: List[int],
     erase_existing: bool = True,
 ):
-
     maps_manager = MapsManager(maps_dir, train_dict, verbose=None)
     maps_manager.train(split_list=split_list, overwrite=erase_existing)
diff --git a/clinicadl/tsvtools/analysis/analysis.py b/clinicadl/tsvtools/analysis/analysis.py
@@ -27,7 +27,7 @@ def demographics_analysis(
     Produces a tsv file with rows corresponding to the labels defined by the diagnoses list,
     and the columns being demographic statistics.
 
-    Writes one tsv file at results_tsv containing the demographic analysis of the tsv files in data_tsv.
+    Writes one tsv file at results_tsv, containing the demographic analysis of the tsv files in data_tsv.
 
     Parameters
     ----------
@@ -173,7 +173,6 @@ def demographics_analysis(
                 f"There is no subject with diagnosis {diagnosis}"
             )
     for diagnosis in diagnoses:
-
         logger.debug(f"compute stats for diagnosis {diagnosis}")
 
         results_df.loc[diagnosis, "mean_age"] = np.nanmean(

diff --git a/clinicadl/tsvtools/get_labels/get_labels.py b/clinicadl/tsvtools/get_labels/get_labels.py
@@ -39,12 +39,12 @@ def infer_or_drop_diagnosis(bids_df: pd.DataFrame) -> pd.DataFrame:
     Parameters
     ----------
     bids_df: DataFrame
-        Columns including ['participant_id', 'session_id', 'diagnosis']
+        Columns including ['participant_id', 'session_id', 'diagnosis'].
 
     Returns
     -------
     bids_copy_df: DataFrame
-        Cleaned copy of the input bids_df
+        Cleaned copy of the input bids_df.
     """
     bids_copy_df = copy(bids_df)
     found_diag_interpol = 0
@@ -161,7 +161,6 @@ def remove_unique_session(bids_df: pd.DataFrame) -> pd.DataFrame:
     nb_unique = 0
 
     for subject, subject_df in bids_df.groupby(level=0):
-
         session_list = [session for _, session in subject_df.index.values]
         session_list.sort()
         nb_session = len(session_list)
@@ -195,7 +194,7 @@ def diagnosis_removal(bids_df: pd.DataFrame, diagnosis_list: List[str]) -> pd.Da
     output_df = copy(bids_df)
     nb_subjects = 0
     for subject, subject_df in bids_df.groupby(level=0):
-        for (_, session) in subject_df.index.values:
+        for _, session in subject_df.index.values:
             group = subject_df.loc[(subject, session), "diagnosis"]
             if group not in diagnosis_list:
                 output_df.drop((subject, session), inplace=True)

diff --git a/clinicadl/tsvtools/get_metadata/get_metadata.py b/clinicadl/tsvtools/get_metadata/get_metadata.py
@@ -22,7 +22,7 @@ def get_metadata(
     data_tsv: str (Path)
         Columns must include ['participant_id', 'session_id']
     merged_tsv: str (Path)
-        output of `clinica merge-tsv`
+        Output of `clinica merge-tsv`
     variables_of_interest: list of str
         List of columns that will be added in the output DataFrame.
 
@@ -41,7 +41,6 @@ def get_metadata(
     )
 
     if variables_of_interest is None:
-
         variables_list = np.unique(variables_metadata)
         logger.debug(
             f"Adding the following columns to the input tsv file: {variables_list}"
@@ -50,7 +49,6 @@ def get_metadata(
         result_df.set_index(["participant_id", "session_id"], inplace=True)
 
     else:
-
         if not set(variables_of_interest).issubset(set(metadata_df.columns.values)):
             raise ClinicaDLArgumentError(
                 f"The variables asked by the user {variables_of_interest} do not "

diff --git a/clinicadl/tsvtools/get_progression/get_progression.py b/clinicadl/tsvtools/get_progression/get_progression.py
@@ -20,9 +20,8 @@ def get_progression(
     horizon_time: int = 36,
     stability_dict: dict = None,
 ):
-
     """
-    A method to get the progression for each sessions depending on their stability on the time horizon
+    A method to get the progression for each sessions, depending on their stability on the time horizon
     Outputs are written in data_tsv
 
     Parameters

diff --git a/clinicadl/tsvtools/split/split.py b/clinicadl/tsvtools/split/split.py
@@ -63,9 +63,8 @@ def create_split(
     supplementary_train_df=None,
     ignore_demographics=False,
 ):
-
     """
-    Split data at the subject-level in training and test set with equivalent age, sex and split_label distributions
+    Split data at the subject-level in training and test set with equivalent age, sex and split_label distributions.
 
     Parameters
     ----------
@@ -133,10 +132,8 @@ def create_split(
         n_try = 0
 
         while flag_selection:
-
             splits = StratifiedShuffleSplit(n_splits=1, test_size=n_test)
             for train_index, test_index in splits.split(category, category):
-
                 # Find the value for different demographics (age & sex)
                 if len(set(age)) != 1:
                     age_test = [float(age[idx]) for idx in test_index]

diff --git a/clinicadl/utils/caps_dataset/data.py b/clinicadl/utils/caps_dataset/data.py
@@ -67,7 +67,7 @@ def __init__(
                 "Child class of CapsDataset must set elem_index attribute."
             )
         if not hasattr(self, "mode"):
-            raise AttributeError("Child class of CapsDataset must set mode attribute.")
+            raise AttributeError("Child class of CapsDataset, must set mode attribute.")
 
         self.df = data_df
 
@@ -76,7 +76,6 @@ def __init__(
             mandatory_col.add(self.label)
 
         if not mandatory_col.issubset(set(self.df.columns.values)):
-
             raise Exception(
                 f"the data file is not in the correct format."
                 f"Columns should include {mandatory_col}"
@@ -113,7 +112,6 @@ def __len__(self) -> int:
 
     @staticmethod
     def create_caps_dict(caps_directory: Path, multi_cohort: bool) -> Dict[str, Path]:
-
         from clinica.utils.inputs import check_caps_folder
 
         if multi_cohort:
@@ -1020,7 +1018,6 @@ def load_data_test(test_path: Path, diagnoses_list, baseline=True, multi_cohort=
 
 
 def load_data_test_single(test_path: Path, diagnoses_list, baseline=True):
-
     if test_path.suffix == ".tsv":
         test_df = pd.read_csv(test_path, sep="\t")
         if "diagnosis" not in test_df.columns.values:

diff --git a/clinicadl/utils/logger.py b/clinicadl/utils/logger.py
@@ -16,7 +16,6 @@ def filter(self, record):
 
 # Create formatter for console
 class ConsoleFormatter(logging.Formatter):
-
     FORMATS = {
         logging.INFO: "%(asctime)s - %(message)s",
         logging.WARNING: "%(asctime)s - %(levelname)s: %(message)s",
@@ -31,9 +30,11 @@ def format(self, record):
 def setup_logging(verbose: bool = False) -> None:
     """
     Setup ClinicaDL's logging facilities.
-    Args:
-        verbose: The desired level of verbosity for logging.
-            (False (default): INFO, True: DEBUG)
+    Parameters
+    ----------
+    verbose: bool
+        The desired level of verbosity for logging.
+        (False (default): INFO, True: DEBUG)
     """
     logging_level = "DEBUG" if verbose else "INFO"
 

diff --git a/clinicadl/utils/maps_manager/maps_manager.py b/clinicadl/utils/maps_manager/maps_manager.py
@@ -55,9 +55,9 @@ def __init__(
         Parameters
         ----------
         maps_path: str (path)
-            path of the MAPS
+            Path of the MAPS
         parameters: Dict[str, Any]
-            parameters of the training step. If given a new MAPS is created.
+            Parameters of the training step. If given a new MAPS is created.
         verbose: str
             Logging level ("debug", "info", "warning")
         """
@@ -826,7 +826,6 @@ def _train(
 
             with profiler:
                 for i, data in enumerate(train_loader):
-
                     _, loss_dict = model.compute_outputs_and_loss(data, criterion)
                     logger.debug(f"Train loss dictionnary {loss_dict}")
                     loss = loss_dict["loss"]
@@ -998,7 +997,6 @@ def _test_loader(
             network (int): Index of the network tested (only used in multi-network setting).
         """
         for selection_metric in selection_metrics:
-
             log_dir = (
                 self.maps_path
                 / f"{self.split_name}-{split}"