Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PGD attack on multi-modal CLIP model #2340

Closed
wants to merge 46 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
d37aa67
initial demo attack on clip
GiulioZizzo Aug 28, 2023
a1f3b6a
initial POC of attacking CLIP with ART tools
GiulioZizzo Sep 18, 2023
8bbb18e
fix assignment with torch.no_grad
GiulioZizzo Sep 18, 2023
852229e
fix bug in which x.copy() required a deepcopy for the new hf input type
GiulioZizzo Sep 20, 2023
988d348
general updates
GiulioZizzo Sep 25, 2023
5483e81
Rename of input, type hinting, function commenting
GiulioZizzo Sep 29, 2023
699527b
initial adversarial training scripts
GiulioZizzo Oct 13, 2023
c4b28a1
adding initial notebook and cuda compatibility
GiulioZizzo Oct 17, 2023
f04f132
pylint and mypy edits
GiulioZizzo Oct 17, 2023
f75ef03
refactor to experimental
GiulioZizzo Oct 21, 2023
f5774b5
run ci
GiulioZizzo Oct 25, 2023
9aa1365
commenting and formatting edits
GiulioZizzo Oct 25, 2023
109905c
move pgd changes to experimental
GiulioZizzo Oct 26, 2023
7589750
restore orignal fgsm and pgd files
GiulioZizzo Oct 26, 2023
b6f9cf0
moving to experimental
GiulioZizzo Nov 1, 2023
4356c08
moving labels to correct device, remove repeated code
GiulioZizzo Nov 7, 2023
95b6629
update notebook and formatting edits
GiulioZizzo Nov 7, 2023
45616bd
update tests
GiulioZizzo Nov 7, 2023
469f6bf
adding comments to mm_inputs
GiulioZizzo Nov 27, 2023
70f01cb
remove old files and redundant changes
GiulioZizzo Nov 27, 2023
3d2e075
moving functionality to experimental
GiulioZizzo Nov 27, 2023
9241fea
re-add original test bash script
GiulioZizzo Nov 28, 2023
19f6493
updated naming
GiulioZizzo Nov 28, 2023
09c8461
mypy fixes
GiulioZizzo Nov 28, 2023
a372550
updating tests
GiulioZizzo Nov 29, 2023
a084756
fix spelling error
GiulioZizzo Nov 30, 2023
5afe1e3
moving some tests to new script for estimator
GiulioZizzo Nov 30, 2023
d85b267
remove development files
GiulioZizzo Nov 30, 2023
a6ccea1
updates to tests
GiulioZizzo Nov 30, 2023
9df9d14
consistancy in naming
GiulioZizzo Nov 30, 2023
4bb4139
remove feature branch in ci pipeline
GiulioZizzo Nov 30, 2023
48391d1
mypy fixes
GiulioZizzo Nov 30, 2023
0defd9d
mypy fixes
GiulioZizzo Dec 1, 2023
0b5b773
checking codeql error
GiulioZizzo Dec 1, 2023
e8e4746
Formatting fix. Check if deepcopy is the problem with codeQL
GiulioZizzo Dec 1, 2023
c7573ee
check sentinel fix
GiulioZizzo Dec 1, 2023
8bbf92c
refactor to address codeQL
GiulioZizzo Dec 1, 2023
ae9a261
refactor for codeQL
GiulioZizzo Dec 1, 2023
455f31e
refactor for codeQL
GiulioZizzo Dec 1, 2023
fc37e87
refactor for codeQL
GiulioZizzo Dec 1, 2023
689777e
try sentinel fix
GiulioZizzo Dec 1, 2023
5a92140
refactor with setter method for codeQL
GiulioZizzo Dec 1, 2023
105c881
refactor for codeQl fix
GiulioZizzo Dec 1, 2023
31cbcca
refactor for codeQl fix
GiulioZizzo Dec 1, 2023
ea35d39
explicitly removing random restarts as ART currently only supports re…
GiulioZizzo Dec 7, 2023
6951923
updating notebook
GiulioZizzo Feb 2, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/codeql-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name: "CodeQL"

on:
push:
branches: [main, dev_*]
branches: [main, dev_*, clip_1.17_dev]
pull_request:
# The branches below must be a subset of the branches above
branches: [main, dev_*]
Expand Down
4 changes: 4 additions & 0 deletions art/experimental/attacks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""
This module contains the experimental attacks for ART
"""
from art.experimental.attacks import evasion
6 changes: 6 additions & 0 deletions art/experimental/attacks/evasion/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""
This module contains the fgsm attack for the multimodal CLIP model
"""
from art.experimental.attacks.evasion.projected_gradient_descent.projected_gradient_descent_numpy import (
CLIPProjectedGradientDescentNumpy,
)
373 changes: 373 additions & 0 deletions art/experimental/attacks/evasion/fast_gradient.py

Large diffs are not rendered by default.

Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
# MIT License
#
# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2023
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""
This module contains an experimental PGD attack for multimodal models.
"""
from __future__ import absolute_import, division, print_function, unicode_literals

import copy
import logging
from typing import Optional, Union, TYPE_CHECKING

import numpy as np
from tqdm.auto import trange

from art.config import ART_NUMPY_DTYPE
from art.estimators.classification.classifier import ClassifierMixin
from art.utils import compute_success, compute_success_array

from art.attacks.evasion.projected_gradient_descent.projected_gradient_descent_numpy import (
ProjectedGradientDescentNumpy,
)
from art.experimental.attacks.evasion.fast_gradient import (
FastGradientMethodCLIP,
)
from art.summary_writer import SummaryWriter

if TYPE_CHECKING:
from art.utils import CLASSIFIER_LOSS_GRADIENTS_TYPE, OBJECT_DETECTOR_TYPE

logger = logging.getLogger(__name__)


class CLIPProjectedGradientDescentNumpy(ProjectedGradientDescentNumpy, FastGradientMethodCLIP):

Check warning

Code scanning / CodeQL

Conflicting attributes in base classes Warning

Base classes have conflicting values for attribute '_estimator_requirements':
Tuple
and
Tuple
.
"""
Implementation of the PGD attack operating on the image portion of multimodal inputs
to the CLIP model.
"""

def __init__(
self,
estimator: Union["CLASSIFIER_LOSS_GRADIENTS_TYPE", "OBJECT_DETECTOR_TYPE"],
norm: Union[int, float, str] = np.inf,
eps: Union[int, float, np.ndarray] = 0.3,
eps_step: Union[int, float, np.ndarray] = 0.1,
decay: Optional[float] = None,
max_iter: int = 100,
targeted: bool = False,
batch_size: int = 32,
random_eps: bool = False,
summary_writer: Union[str, bool, SummaryWriter] = False,
verbose: bool = True,
) -> None:
"""
Create a :class:`.ProjectedGradientDescentNumpy` instance.

:param estimator: An trained estimator.
:param norm: The norm of the adversarial perturbation supporting "inf", np.inf, 1 or 2.
:param eps: Maximum perturbation that the attacker can introduce.
:param eps_step: Attack step size (input variation) at each iteration.
:param random_eps: When True, epsilon is drawn randomly from truncated normal distribution. The literature
suggests this for FGSM based training to generalize across different epsilons. eps_step
is modified to preserve the ratio of eps / eps_step. The effectiveness of this method with
PGD is untested (https://arxiv.org/pdf/1611.01236.pdf).
:param max_iter: The maximum number of iterations.
:param targeted: Indicates whether the attack is targeted (True) or untargeted (False)
:param batch_size: Size of the batch on which adversarial samples are generated.
:param summary_writer: Activate summary writer for TensorBoard.
Default is `False` and deactivated summary writer.
If `True` save runs/CURRENT_DATETIME_HOSTNAME in current directory.
If of type `str` save in path.
If of type `SummaryWriter` apply provided custom summary writer.
Use hierarchical folder structure to compare between runs easily. e.g. pass in
‘runs/exp1’, ‘runs/exp2’, etc. for each new experiment to compare across them.
:param verbose: Show progress bars.
"""

super().__init__(
estimator=estimator,
norm=norm,
eps=eps,
eps_step=eps_step,
decay=decay,
max_iter=max_iter,
targeted=targeted,
num_random_init=0,
batch_size=batch_size,
random_eps=random_eps,
summary_writer=summary_writer,
verbose=verbose,
)

def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
"""
Generate adversarial samples and return them in an array.

:param x: An array with the original inputs.
:param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
(nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
(explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.

:param mask: An array with a mask broadcastable to input `x` defining where to apply adversarial perturbations.
Shape needs to be broadcastable to the shape of x and can also be of the same shape as `x`. Any
features for which the mask is zero will not be adversarially perturbed.
:type mask: `np.ndarray`
:return: An array holding the adversarial examples.
"""
mask = self._get_mask(x, **kwargs)

# Ensure eps is broadcastable
self._check_compatibility_input_and_eps(x=x)

# Check whether random eps is enabled
self._random_eps()

if isinstance(self.estimator, ClassifierMixin):
# Set up targets
targets = self._set_targets(x, y)

# Start to compute adversarial examples
adv_x = x.astype(ART_NUMPY_DTYPE)

for batch_id in range(int(np.ceil(x.shape[0] / float(self.batch_size)))):

self._batch_id = batch_id

for rand_init_num in trange(
max(1, self.num_random_init), desc="PGD - Random Initializations", disable=not self.verbose
):
batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size
batch_index_2 = min(batch_index_2, x.shape[0])
batch = x[batch_index_1:batch_index_2]
batch_labels = targets[batch_index_1:batch_index_2]
mask_batch = mask

if mask is not None:
if len(mask.shape) == len(x.shape):
mask_batch = mask[batch_index_1:batch_index_2]

momentum = np.zeros(batch.shape)

for i_max_iter in trange(
self.max_iter, desc="PGD - Iterations", leave=False, disable=not self.verbose
):
self._i_max_iter = i_max_iter

batch = self._compute(
batch,
x[batch_index_1:batch_index_2],
batch_labels,
mask_batch,
self.eps,
self.eps_step,
self._project,
self.num_random_init > 0 and i_max_iter == 0,
self._batch_id,
decay=self.decay,
momentum=momentum,
)

if rand_init_num == 0:
# initial (and possibly only) random restart: we only have this set of
# adversarial examples for now
adv_x[batch_index_1:batch_index_2] = copy.deepcopy(batch)
else:
# replace adversarial examples if they are successful
attack_success = compute_success_array(
self.estimator, # type: ignore
x[batch_index_1:batch_index_2],
targets[batch_index_1:batch_index_2],
batch,
self.targeted,
batch_size=self.batch_size,
)
adv_x[batch_index_1:batch_index_2][attack_success] = batch[attack_success]

logger.info(
"Success rate of attack: %.2f%%",
100
* compute_success(
self.estimator, # type: ignore
x,
targets,
adv_x,
self.targeted,
batch_size=self.batch_size, # type: ignore
),
)
else:
if self.num_random_init > 0: # pragma: no cover
raise ValueError("Random initialisation is only supported for classification.")

# Set up targets
targets = self._set_targets(x, y, classifier_mixin=False)

# Start to compute adversarial examples
if x.dtype == object:
adv_x = copy.deepcopy(x)
else:
adv_x = x.astype(ART_NUMPY_DTYPE)

momentum = np.zeros(adv_x.shape)

for i_max_iter in trange(self.max_iter, desc="PGD - Iterations", disable=not self.verbose):
self._i_max_iter = i_max_iter

adv_x = self._compute(
adv_x,
x,
targets,
mask,
self.eps,
self.eps_step,
self._project,
self.num_random_init > 0 and i_max_iter == 0,
decay=self.decay,
momentum=momentum,
)

if self.summary_writer is not None:
self.summary_writer.reset()

return adv_x
2 changes: 2 additions & 0 deletions art/experimental/estimators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""
Experimental Estimator API
"""
from art.experimental.estimators.hugging_face_multimodal.hugging_face_mm import HuggingFaceMultiModalPyTorch
from art.experimental.estimators.hugging_face_multimodal.hugging_face_mm_inputs import HuggingFaceMultiModalInput
from art.experimental.estimators.jax import JaxEstimator
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
Module containing estimators for CLIP.
"""
from art.experimental.estimators.hugging_face_multimodal.hugging_face_mm import HuggingFaceMultiModalPyTorch
from art.experimental.estimators.hugging_face_multimodal.hugging_face_mm_inputs import HuggingFaceMultiModalInput
Loading
Loading