From 52f64a838300f4d862c6ea329b356c13093a4172 Mon Sep 17 00:00:00 2001 From: Beat Buesser Date: Wed, 30 Aug 2023 23:39:20 +0200 Subject: [PATCH 1/3] Remove in-place operations Signed-off-by: Beat Buesser --- .../object_detection/pytorch_object_detector.py | 4 ++-- art/estimators/object_detection/pytorch_yolo.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/art/estimators/object_detection/pytorch_object_detector.py b/art/estimators/object_detection/pytorch_object_detector.py index d6317c10a5..3e7fd445ac 100644 --- a/art/estimators/object_detection/pytorch_object_detector.py +++ b/art/estimators/object_detection/pytorch_object_detector.py @@ -215,7 +215,7 @@ def _preprocess_and_convert_inputs( if not self.channels_first: x_tensor = torch.permute(x_tensor, (0, 3, 1, 2)) - x_tensor /= norm_factor + x_tensor = x_tensor / norm_factor # Set gradients if not no_grad: @@ -236,7 +236,7 @@ def _preprocess_and_convert_inputs( if not self.channels_first: x_preprocessed = torch.permute(x_preprocessed, (0, 3, 1, 2)) - x_preprocessed /= norm_factor + x_preprocessed = x_preprocessed / norm_factor # Set gradients if not no_grad: diff --git a/art/estimators/object_detection/pytorch_yolo.py b/art/estimators/object_detection/pytorch_yolo.py index cdc4c72446..f680795443 100644 --- a/art/estimators/object_detection/pytorch_yolo.py +++ b/art/estimators/object_detection/pytorch_yolo.py @@ -103,8 +103,8 @@ def translate_labels_x1y1x2y2_to_xcycwh( labels[:, 2:6] = label_dict["boxes"] # normalize bounding boxes to [0, 1] - labels[:, 2:6:2] /= width - labels[:, 3:6:2] /= height + labels[:, 2:6:2] = labels[:, 2:6:2] / width + labels[:, 3:6:2] = labels[:, 3:6:2] / height # convert from x1y1x2y2 to xcycwh labels[:, 4] -= labels[:, 2] @@ -290,7 +290,7 @@ def _preprocess_and_convert_inputs( if not self.channels_first: x_tensor = torch.permute(x_tensor, (0, 3, 1, 2)) - x_tensor /= norm_factor + x_tensor = x_tensor / norm_factor # Set gradients if not no_grad: @@ -311,7 +311,7 @@ def _preprocess_and_convert_inputs( if not self.channels_first: x_preprocessed = torch.permute(x_preprocessed, (0, 3, 1, 2)) - x_preprocessed /= norm_factor + x_preprocessed = x_preprocessed / norm_factor # Set gradients if not no_grad: From 0b2ee2a3be8c703432dbc3c53ddbc406afdb53a4 Mon Sep 17 00:00:00 2001 From: Farhan Ahmed Date: Wed, 30 Aug 2023 14:42:04 -0700 Subject: [PATCH 2/3] update docstrings and default parameters Signed-off-by: Farhan Ahmed --- .../object_detection/pytorch_faster_rcnn.py | 20 +++++++++---------- .../pytorch_object_detector.py | 16 +++++++-------- .../object_detection/pytorch_yolo.py | 2 +- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/art/estimators/object_detection/pytorch_faster_rcnn.py b/art/estimators/object_detection/pytorch_faster_rcnn.py index ce18fea694..cc606ad098 100644 --- a/art/estimators/object_detection/pytorch_faster_rcnn.py +++ b/art/estimators/object_detection/pytorch_faster_rcnn.py @@ -38,17 +38,17 @@ class PyTorchFasterRCNN(PyTorchObjectDetector): """ - This class implements a model-specific object detector using Faster-RCNN and PyTorch following the input and output + This class implements a model-specific object detector using Faster R-CNN and PyTorch following the input and output formats of torchvision. """ def __init__( self, - model: Optional["torchvision.models.detection.fasterrcnn_resnet50_fpn"] = None, - input_shape: Tuple[int, ...] = (-1, -1, -1), + model: Optional["torchvision.models.detection.FasterRCNN"] = None, + input_shape: Tuple[int, ...] = (3, 416, 416), optimizer: Optional["torch.optim.Optimizer"] = None, clip_values: Optional["CLIP_VALUES_TYPE"] = None, - channels_first: Optional[bool] = False, + channels_first: Optional[bool] = True, preprocessing_defences: Union["Preprocessor", List["Preprocessor"], None] = None, postprocessing_defences: Union["Postprocessor", List["Postprocessor"], None] = None, preprocessing: "PREPROCESSING_TYPE" = None, @@ -63,13 +63,13 @@ def __init__( """ Initialization. - :param model: Faster-RCNN model. The output of the model is `List[Dict[Tensor]]`, one for each input image. The - fields of the Dict are as follows: + :param model: Faster R-CNN model. The output of the model is `List[Dict[str, torch.Tensor]]`, one for + each input image. The fields of the Dict are as follows: - - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values \ - between 0 and H and 0 and W - - labels (Int64Tensor[N]): the predicted labels for each image - - scores (Tensor[N]): the scores or each prediction + - boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and + 0 <= y1 < y2 <= H. + - labels [N]: the labels for each image. + - scores [N]: the scores of each prediction. :param input_shape: The shape of one input sample. :param optimizer: The optimizer for training the classifier. :param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and diff --git a/art/estimators/object_detection/pytorch_object_detector.py b/art/estimators/object_detection/pytorch_object_detector.py index d6317c10a5..45afb7e6d9 100644 --- a/art/estimators/object_detection/pytorch_object_detector.py +++ b/art/estimators/object_detection/pytorch_object_detector.py @@ -49,10 +49,10 @@ class PyTorchObjectDetector(ObjectDetectorMixin, PyTorchEstimator): def __init__( self, model: "torch.nn.Module", - input_shape: Tuple[int, ...] = (-1, -1, -1), + input_shape: Tuple[int, ...] = (3, 416, 416), optimizer: Optional["torch.optim.Optimizer"] = None, clip_values: Optional["CLIP_VALUES_TYPE"] = None, - channels_first: Optional[bool] = False, + channels_first: Optional[bool] = True, preprocessing_defences: Union["Preprocessor", List["Preprocessor"], None] = None, postprocessing_defences: Union["Postprocessor", List["Postprocessor"], None] = None, preprocessing: "PREPROCESSING_TYPE" = None, @@ -67,13 +67,13 @@ def __init__( """ Initialization. - :param model: Object detection model. The output of the model is `List[Dict[Tensor]]`, one for each input - image. The fields of the Dict are as follows: + :param model: Object detection model. The output of the model is `List[Dict[str, torch.Tensor]]`, one for + each input image. The fields of the Dict are as follows: - - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values - between 0 and H and 0 and W - - labels (Int64Tensor[N]): the predicted labels for each image - - scores (Tensor[N]): the scores or each prediction + - boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and + 0 <= y1 < y2 <= H. + - labels [N]: the labels for each image. + - scores [N]: the scores of each prediction. :param input_shape: The shape of one input sample. :param optimizer: The optimizer for training the classifier. :param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and diff --git a/art/estimators/object_detection/pytorch_yolo.py b/art/estimators/object_detection/pytorch_yolo.py index cdc4c72446..a2c3ab06ef 100644 --- a/art/estimators/object_detection/pytorch_yolo.py +++ b/art/estimators/object_detection/pytorch_yolo.py @@ -148,7 +148,7 @@ def __init__( """ Initialization. - :param model: Object detection model wrapped as demonstrated in examples/get_started_yolo.py. + :param model: YOLO v3 or v5 model wrapped as demonstrated in examples/get_started_yolo.py. The output of the model is `List[Dict[str, torch.Tensor]]`, one for each input image. The fields of the Dict are as follows: From 98016c67ddaa5b1426dddb5f9d6a18cf3265bb34 Mon Sep 17 00:00:00 2001 From: Farhan Ahmed Date: Tue, 5 Sep 2023 12:40:03 -0700 Subject: [PATCH 3/3] address review comments Signed-off-by: Farhan Ahmed --- art/estimators/object_detection/pytorch_faster_rcnn.py | 2 +- art/estimators/object_detection/pytorch_object_detector.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/art/estimators/object_detection/pytorch_faster_rcnn.py b/art/estimators/object_detection/pytorch_faster_rcnn.py index cc606ad098..ddd635b03f 100644 --- a/art/estimators/object_detection/pytorch_faster_rcnn.py +++ b/art/estimators/object_detection/pytorch_faster_rcnn.py @@ -45,7 +45,7 @@ class PyTorchFasterRCNN(PyTorchObjectDetector): def __init__( self, model: Optional["torchvision.models.detection.FasterRCNN"] = None, - input_shape: Tuple[int, ...] = (3, 416, 416), + input_shape: Tuple[int, ...] = (-1, -1, -1), optimizer: Optional["torch.optim.Optimizer"] = None, clip_values: Optional["CLIP_VALUES_TYPE"] = None, channels_first: Optional[bool] = True, diff --git a/art/estimators/object_detection/pytorch_object_detector.py b/art/estimators/object_detection/pytorch_object_detector.py index d6c6180971..4316d4e202 100644 --- a/art/estimators/object_detection/pytorch_object_detector.py +++ b/art/estimators/object_detection/pytorch_object_detector.py @@ -49,7 +49,7 @@ class PyTorchObjectDetector(ObjectDetectorMixin, PyTorchEstimator): def __init__( self, model: "torch.nn.Module", - input_shape: Tuple[int, ...] = (3, 416, 416), + input_shape: Tuple[int, ...] = (-1, -1, -1), optimizer: Optional["torch.optim.Optimizer"] = None, clip_values: Optional["CLIP_VALUES_TYPE"] = None, channels_first: Optional[bool] = True,