From 52f64a838300f4d862c6ea329b356c13093a4172 Mon Sep 17 00:00:00 2001
From: Beat Buesser <beat.buesser@ibm.com>
Date: Wed, 30 Aug 2023 23:39:20 +0200
Subject: [PATCH 1/3] Remove in-place operations

Signed-off-by: Beat Buesser <beat.buesser@ibm.com>
---
 .../object_detection/pytorch_object_detector.py           | 4 ++--
 art/estimators/object_detection/pytorch_yolo.py           | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/art/estimators/object_detection/pytorch_object_detector.py b/art/estimators/object_detection/pytorch_object_detector.py
index d6317c10a5..3e7fd445ac 100644
--- a/art/estimators/object_detection/pytorch_object_detector.py
+++ b/art/estimators/object_detection/pytorch_object_detector.py
@@ -215,7 +215,7 @@ def _preprocess_and_convert_inputs(
 
             if not self.channels_first:
                 x_tensor = torch.permute(x_tensor, (0, 3, 1, 2))
-            x_tensor /= norm_factor
+            x_tensor = x_tensor / norm_factor
 
             # Set gradients
             if not no_grad:
@@ -236,7 +236,7 @@ def _preprocess_and_convert_inputs(
 
             if not self.channels_first:
                 x_preprocessed = torch.permute(x_preprocessed, (0, 3, 1, 2))
-            x_preprocessed /= norm_factor
+            x_preprocessed = x_preprocessed / norm_factor
 
             # Set gradients
             if not no_grad:
diff --git a/art/estimators/object_detection/pytorch_yolo.py b/art/estimators/object_detection/pytorch_yolo.py
index cdc4c72446..f680795443 100644
--- a/art/estimators/object_detection/pytorch_yolo.py
+++ b/art/estimators/object_detection/pytorch_yolo.py
@@ -103,8 +103,8 @@ def translate_labels_x1y1x2y2_to_xcycwh(
         labels[:, 2:6] = label_dict["boxes"]
 
         # normalize bounding boxes to [0, 1]
-        labels[:, 2:6:2] /= width
-        labels[:, 3:6:2] /= height
+        labels[:, 2:6:2] = labels[:, 2:6:2] / width
+        labels[:, 3:6:2] = labels[:, 3:6:2] / height
 
         # convert from x1y1x2y2 to xcycwh
         labels[:, 4] -= labels[:, 2]
@@ -290,7 +290,7 @@ def _preprocess_and_convert_inputs(
 
             if not self.channels_first:
                 x_tensor = torch.permute(x_tensor, (0, 3, 1, 2))
-            x_tensor /= norm_factor
+            x_tensor = x_tensor / norm_factor
 
             # Set gradients
             if not no_grad:
@@ -311,7 +311,7 @@ def _preprocess_and_convert_inputs(
 
             if not self.channels_first:
                 x_preprocessed = torch.permute(x_preprocessed, (0, 3, 1, 2))
-            x_preprocessed /= norm_factor
+            x_preprocessed = x_preprocessed / norm_factor
 
             # Set gradients
             if not no_grad:

From 0b2ee2a3be8c703432dbc3c53ddbc406afdb53a4 Mon Sep 17 00:00:00 2001
From: Farhan Ahmed <Farhan.Ahmed@ibm.com>
Date: Wed, 30 Aug 2023 14:42:04 -0700
Subject: [PATCH 2/3] update docstrings and default parameters

Signed-off-by: Farhan Ahmed <Farhan.Ahmed@ibm.com>
---
 .../object_detection/pytorch_faster_rcnn.py   | 20 +++++++++----------
 .../pytorch_object_detector.py                | 16 +++++++--------
 .../object_detection/pytorch_yolo.py          |  2 +-
 3 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/art/estimators/object_detection/pytorch_faster_rcnn.py b/art/estimators/object_detection/pytorch_faster_rcnn.py
index ce18fea694..cc606ad098 100644
--- a/art/estimators/object_detection/pytorch_faster_rcnn.py
+++ b/art/estimators/object_detection/pytorch_faster_rcnn.py
@@ -38,17 +38,17 @@
 
 class PyTorchFasterRCNN(PyTorchObjectDetector):
     """
-    This class implements a model-specific object detector using Faster-RCNN and PyTorch following the input and output
+    This class implements a model-specific object detector using Faster R-CNN and PyTorch following the input and output
     formats of torchvision.
     """
 
     def __init__(
         self,
-        model: Optional["torchvision.models.detection.fasterrcnn_resnet50_fpn"] = None,
-        input_shape: Tuple[int, ...] = (-1, -1, -1),
+        model: Optional["torchvision.models.detection.FasterRCNN"] = None,
+        input_shape: Tuple[int, ...] = (3, 416, 416),
         optimizer: Optional["torch.optim.Optimizer"] = None,
         clip_values: Optional["CLIP_VALUES_TYPE"] = None,
-        channels_first: Optional[bool] = False,
+        channels_first: Optional[bool] = True,
         preprocessing_defences: Union["Preprocessor", List["Preprocessor"], None] = None,
         postprocessing_defences: Union["Postprocessor", List["Postprocessor"], None] = None,
         preprocessing: "PREPROCESSING_TYPE" = None,
@@ -63,13 +63,13 @@ def __init__(
         """
         Initialization.
 
-        :param model: Faster-RCNN model. The output of the model is `List[Dict[Tensor]]`, one for each input image. The
-                      fields of the Dict are as follows:
+        :param model: Faster R-CNN model. The output of the model is `List[Dict[str, torch.Tensor]]`, one for
+                      each input image. The fields of the Dict are as follows:
 
-                      - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values \
-                        between 0 and H and 0 and W
-                      - labels (Int64Tensor[N]): the predicted labels for each image
-                      - scores (Tensor[N]): the scores or each prediction
+                      - boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and
+                        0 <= y1 < y2 <= H.
+                      - labels [N]: the labels for each image.
+                      - scores [N]: the scores of each prediction.
         :param input_shape: The shape of one input sample.
         :param optimizer: The optimizer for training the classifier.
         :param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and
diff --git a/art/estimators/object_detection/pytorch_object_detector.py b/art/estimators/object_detection/pytorch_object_detector.py
index d6317c10a5..45afb7e6d9 100644
--- a/art/estimators/object_detection/pytorch_object_detector.py
+++ b/art/estimators/object_detection/pytorch_object_detector.py
@@ -49,10 +49,10 @@ class PyTorchObjectDetector(ObjectDetectorMixin, PyTorchEstimator):
     def __init__(
         self,
         model: "torch.nn.Module",
-        input_shape: Tuple[int, ...] = (-1, -1, -1),
+        input_shape: Tuple[int, ...] = (3, 416, 416),
         optimizer: Optional["torch.optim.Optimizer"] = None,
         clip_values: Optional["CLIP_VALUES_TYPE"] = None,
-        channels_first: Optional[bool] = False,
+        channels_first: Optional[bool] = True,
         preprocessing_defences: Union["Preprocessor", List["Preprocessor"], None] = None,
         postprocessing_defences: Union["Postprocessor", List["Postprocessor"], None] = None,
         preprocessing: "PREPROCESSING_TYPE" = None,
@@ -67,13 +67,13 @@ def __init__(
         """
         Initialization.
 
-        :param model: Object detection model. The output of the model is `List[Dict[Tensor]]`, one for each input
-                      image. The fields of the Dict are as follows:
+        :param model: Object detection model. The output of the model is `List[Dict[str, torch.Tensor]]`, one for
+                      each input image. The fields of the Dict are as follows:
 
-                      - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values
-                        between 0 and H and 0 and W
-                      - labels (Int64Tensor[N]): the predicted labels for each image
-                      - scores (Tensor[N]): the scores or each prediction
+                      - boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and
+                        0 <= y1 < y2 <= H.
+                      - labels [N]: the labels for each image.
+                      - scores [N]: the scores of each prediction.
         :param input_shape: The shape of one input sample.
         :param optimizer: The optimizer for training the classifier.
         :param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and
diff --git a/art/estimators/object_detection/pytorch_yolo.py b/art/estimators/object_detection/pytorch_yolo.py
index cdc4c72446..a2c3ab06ef 100644
--- a/art/estimators/object_detection/pytorch_yolo.py
+++ b/art/estimators/object_detection/pytorch_yolo.py
@@ -148,7 +148,7 @@ def __init__(
         """
         Initialization.
 
-        :param model: Object detection model wrapped as demonstrated in examples/get_started_yolo.py.
+        :param model: YOLO v3 or v5 model wrapped as demonstrated in examples/get_started_yolo.py.
                       The output of the model is `List[Dict[str, torch.Tensor]]`, one for each input image.
                       The fields of the Dict are as follows:
 

From 98016c67ddaa5b1426dddb5f9d6a18cf3265bb34 Mon Sep 17 00:00:00 2001
From: Farhan Ahmed <Farhan.Ahmed@ibm.com>
Date: Tue, 5 Sep 2023 12:40:03 -0700
Subject: [PATCH 3/3] address review comments

Signed-off-by: Farhan Ahmed <Farhan.Ahmed@ibm.com>
---
 art/estimators/object_detection/pytorch_faster_rcnn.py     | 2 +-
 art/estimators/object_detection/pytorch_object_detector.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/art/estimators/object_detection/pytorch_faster_rcnn.py b/art/estimators/object_detection/pytorch_faster_rcnn.py
index cc606ad098..ddd635b03f 100644
--- a/art/estimators/object_detection/pytorch_faster_rcnn.py
+++ b/art/estimators/object_detection/pytorch_faster_rcnn.py
@@ -45,7 +45,7 @@ class PyTorchFasterRCNN(PyTorchObjectDetector):
     def __init__(
         self,
         model: Optional["torchvision.models.detection.FasterRCNN"] = None,
-        input_shape: Tuple[int, ...] = (3, 416, 416),
+        input_shape: Tuple[int, ...] = (-1, -1, -1),
         optimizer: Optional["torch.optim.Optimizer"] = None,
         clip_values: Optional["CLIP_VALUES_TYPE"] = None,
         channels_first: Optional[bool] = True,
diff --git a/art/estimators/object_detection/pytorch_object_detector.py b/art/estimators/object_detection/pytorch_object_detector.py
index d6c6180971..4316d4e202 100644
--- a/art/estimators/object_detection/pytorch_object_detector.py
+++ b/art/estimators/object_detection/pytorch_object_detector.py
@@ -49,7 +49,7 @@ class PyTorchObjectDetector(ObjectDetectorMixin, PyTorchEstimator):
     def __init__(
         self,
         model: "torch.nn.Module",
-        input_shape: Tuple[int, ...] = (3, 416, 416),
+        input_shape: Tuple[int, ...] = (-1, -1, -1),
         optimizer: Optional["torch.optim.Optimizer"] = None,
         clip_values: Optional["CLIP_VALUES_TYPE"] = None,
         channels_first: Optional[bool] = True,