Skip to content

Commit

Permalink
Add support FluxFill inpainting pipeline (#1095)
Browse files Browse the repository at this point in the history
* add support FluxFill inpainting pipeline

* add tests

* register dummy model class

* enable cli export tests
  • Loading branch information
eaidova authored Jan 8, 2025
1 parent 124e4ca commit 58aec63
Show file tree
Hide file tree
Showing 10 changed files with 106 additions and 28 deletions.
39 changes: 26 additions & 13 deletions optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
_torch_version,
_transformers_version,
compare_versions,
is_diffusers_version,
is_openvino_tokenizers_version,
is_tokenizers_version,
is_transformers_version,
Expand Down Expand Up @@ -988,24 +989,36 @@ def _get_submodels_and_export_configs(
def get_diffusion_models_for_export_ext(
pipeline: "DiffusionPipeline", int_dtype: str = "int64", float_dtype: str = "fp32", exporter: str = "openvino"
):
try:
from diffusers import (
StableDiffusion3Img2ImgPipeline,
StableDiffusion3InpaintPipeline,
StableDiffusion3Pipeline,
)
if is_diffusers_version(">=", "0.29.0"):
from diffusers import StableDiffusion3Img2ImgPipeline, StableDiffusion3Pipeline

is_sd3 = isinstance(
pipeline, (StableDiffusion3Pipeline, StableDiffusion3InpaintPipeline, StableDiffusion3Img2ImgPipeline)
)
except ImportError:
sd3_pipes = [StableDiffusion3Pipeline, StableDiffusion3Img2ImgPipeline]
if is_diffusers_version(">=", "0.30.0"):
from diffusers import StableDiffusion3InpaintPipeline

sd3_pipes.append(StableDiffusion3InpaintPipeline)

is_sd3 = isinstance(pipeline, tuple(sd3_pipes))
else:
is_sd3 = False

try:
if is_diffusers_version(">=", "0.30.0"):
from diffusers import FluxPipeline

is_flux = isinstance(pipeline, FluxPipeline)
except ImportError:
flux_pipes = [FluxPipeline]

if is_diffusers_version(">=", "0.31.0"):
from diffusers import FluxImg2ImgPipeline, FluxInpaintPipeline

flux_pipes.extend([FluxPipeline, FluxImg2ImgPipeline, FluxInpaintPipeline])

if is_diffusers_version(">=", "0.32.0"):
from diffusers import FluxFillPipeline

flux_pipes.append(FluxFillPipeline)

is_flux = isinstance(pipeline, tuple(flux_pipes))
else:
is_flux = False

if not is_sd3 and not is_flux:
Expand Down
11 changes: 10 additions & 1 deletion optimum/exporters/openvino/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,12 @@
)
from optimum.utils.normalized_config import NormalizedConfig, NormalizedTextConfig, NormalizedVisionConfig

from ...intel.utils.import_utils import _transformers_version, is_diffusers_version, is_transformers_version
from ...intel.utils.import_utils import (
_transformers_version,
is_diffusers_available,
is_diffusers_version,
is_transformers_version,
)
from .model_patcher import (
AquilaModelPatcher,
ArcticModelPatcher,
Expand Down Expand Up @@ -119,6 +124,10 @@ def init_model_configs():
"image-text-to-text"
] = TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS["text-generation"]

if is_diffusers_available() and "fill" not in TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS:
TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS["fill"] = "FluxFillPipeline"
TasksManager._DIFFUSERS_TASKS_TO_MODEL_MAPPINGS["fill"] = {"flux": "FluxFillPipeline"}

supported_model_types = [
"_SUPPORTED_MODEL_TYPE",
"_DIFFUSERS_SUPPORTED_MODEL_TYPE",
Expand Down
2 changes: 2 additions & 0 deletions optimum/intel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
"OVFluxPipeline",
"OVFluxImg2ImgPipeline",
"OVFluxInpaintPipeline",
"OVFluxFillPipeline",
"OVPipelineForImage2Image",
"OVPipelineForText2Image",
"OVPipelineForInpainting",
Expand All @@ -148,6 +149,7 @@
"OVFluxPipeline",
"OVFluxImg2ImgPipeline",
"OVFluxInpaintPipeline",
"OVFluxFillPipeline",
"OVPipelineForImage2Image",
"OVPipelineForText2Image",
"OVPipelineForInpainting",
Expand Down
1 change: 1 addition & 0 deletions optimum/intel/openvino/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
if is_diffusers_available():
from .modeling_diffusion import (
OVDiffusionPipeline,
OVFluxFillPipeline,
OVFluxImg2ImgPipeline,
OVFluxInpaintPipeline,
OVFluxPipeline,
Expand Down
19 changes: 17 additions & 2 deletions optimum/intel/openvino/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,11 @@
FluxImg2ImgPipeline = object
FluxInpaintPipeline = object

if is_diffusers_version(">=", "0.32.0"):
from diffusers import FluxFillPipeline
else:
FluxFillPipeline = object


DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER = "transformer"
DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER = "text_encoder_3"
Expand Down Expand Up @@ -1458,17 +1463,23 @@ class OVFluxPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxPip


class OVFluxImg2ImgPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxImg2ImgPipeline):
main_input_name = "prompt"
main_input_name = "image"
export_feature = "image-to-image"
auto_model_class = FluxImg2ImgPipeline


class OVFluxInpaintPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxInpaintPipeline):
main_input_name = "prompt"
main_input_name = "image"
export_feature = "inpainting"
auto_model_class = FluxInpaintPipeline


class OVFluxFillPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxFillPipeline):
main_input_name = "image"
export_feature = "inpainting"
auto_model_class = FluxFillPipeline


SUPPORTED_OV_PIPELINES = [
OVStableDiffusionPipeline,
OVStableDiffusionImg2ImgPipeline,
Expand Down Expand Up @@ -1537,6 +1548,10 @@ def _get_ov_class(pipeline_class_name: str, throw_error_if_not_exist: bool = Tru
OV_INPAINT_PIPELINES_MAPPING["flux"] = OVFluxInpaintPipeline
OV_IMAGE2IMAGE_PIPELINES_MAPPING["flux"] = OVFluxImg2ImgPipeline

if is_diffusers_version(">=", "0.32.0"):
OV_INPAINT_PIPELINES_MAPPING["flux-fill"] = OVFluxFillPipeline
SUPPORTED_OV_PIPELINES.append(OVFluxFillPipeline)

SUPPORTED_OV_PIPELINES_MAPPINGS = [
OV_TEXT2IMAGE_PIPELINES_MAPPING,
OV_IMAGE2IMAGE_PIPELINES_MAPPING,
Expand Down
1 change: 1 addition & 0 deletions optimum/intel/openvino/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
"stable-diffusion-xl": "OVStableDiffusionXLPipeline",
"stable-diffusion-3": "OVStableDiffusion3Pipeline",
"flux": "OVFluxPipeline",
"flux-fill": "OVFluxFillPipeline",
"pix2struct": "OVModelForPix2Struct",
"latent-consistency": "OVLatentConsistencyModelPipeline",
"open_clip_text": "OVModelOpenCLIPText",
Expand Down
11 changes: 11 additions & 0 deletions optimum/intel/utils/dummy_openvino_and_diffusers_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,14 @@ def __init__(self, *args, **kwargs):
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["openvino", "diffusers"])


class OVFluxFillPipeline(metaclass=DummyObject):
_backends = ["openvino", "diffusers"]

def __init__(self, *args, **kwargs):
requires_backends(self, ["openvino", "diffusers"])

@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["openvino", "diffusers"])
42 changes: 31 additions & 11 deletions tests/openvino/test_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,13 +667,14 @@ class OVPipelineForInpaintingTest(unittest.TestCase):
if is_transformers_version(">=", "4.40.0"):
SUPPORTED_ARCHITECTURES.append("stable-diffusion-3")
SUPPORTED_ARCHITECTURES.append("flux")
SUPPORTED_ARCHITECTURES.append("flux-fill")

AUTOMODEL_CLASS = AutoPipelineForInpainting
OVMODEL_CLASS = OVPipelineForInpainting

TASK = "inpainting"

def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_type="pil"):
def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_type="pil", model_arch=""):
inputs = _generate_prompts(batch_size=batch_size)

inputs["image"] = _generate_images(
Expand All @@ -683,7 +684,8 @@ def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_
height=height, width=width, batch_size=batch_size, channel=1, input_type=input_type
)

inputs["strength"] = 0.75
if model_arch != "flux-fill":
inputs["strength"] = 0.75
inputs["height"] = height
inputs["width"] = width

Expand All @@ -699,7 +701,12 @@ def test_load_vanilla_model_which_is_not_supported(self):
@parameterized.expand(SUPPORTED_ARCHITECTURES)
@require_diffusers
def test_ov_pipeline_class_dispatch(self, model_arch: str):
auto_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
if model_arch != "flux-fill":
auto_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
else:
from diffusers import FluxFillPipeline

auto_pipeline = FluxFillPipeline.from_pretrained(MODEL_NAMES[model_arch])
ov_pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])

self.assertEqual(ov_pipeline.auto_model_class, auto_pipeline.__class__)
Expand All @@ -713,7 +720,9 @@ def test_num_images_per_prompt(self, model_arch: str):
for height in [64, 128]:
for width in [64, 128]:
for num_images_per_prompt in [1, 3]:
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
inputs = self.generate_inputs(
height=height, width=width, batch_size=batch_size, model_arch=model_arch
)
outputs = pipeline(**inputs, num_images_per_prompt=num_images_per_prompt).images
self.assertEqual(outputs.shape, (batch_size * num_images_per_prompt, height, width, 3))

Expand Down Expand Up @@ -752,7 +761,9 @@ def test_shape(self, model_arch: str):
height, width, batch_size = 128, 64, 1

for input_type in ["pil", "np", "pt"]:
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, input_type=input_type)
inputs = self.generate_inputs(
height=height, width=width, batch_size=batch_size, input_type=input_type, model_arch=model_arch
)

for output_type in ["pil", "np", "pt", "latent"]:
inputs["output_type"] = output_type
Expand All @@ -764,7 +775,7 @@ def test_shape(self, model_arch: str):
elif output_type == "pt":
self.assertEqual(outputs.shape, (batch_size, 3, height, width))
else:
if model_arch != "flux":
if not model_arch.startswith("flux"):
out_channels = (
pipeline.unet.config.out_channels
if pipeline.unet is not None
Expand All @@ -782,17 +793,26 @@ def test_shape(self, model_arch: str):
else:
packed_height = height // pipeline.vae_scale_factor // 2
packed_width = width // pipeline.vae_scale_factor // 2
channels = pipeline.transformer.config.in_channels
channels = (
pipeline.transformer.config.in_channels
if model_arch != "flux-fill"
else pipeline.transformer.out_channels
)
self.assertEqual(outputs.shape, (batch_size, packed_height * packed_width, channels))

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@require_diffusers
def test_compare_to_diffusers_pipeline(self, model_arch: str):
ov_pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
if model_arch != "flux-fill":
diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
else:
from diffusers import FluxFillPipeline

diffusers_pipeline = FluxFillPipeline.from_pretrained(MODEL_NAMES[model_arch])

height, width, batch_size = 64, 64, 1
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, model_arch=model_arch)

for output_type in ["latent", "np", "pt"]:
inputs["output_type"] = output_type
Expand All @@ -804,7 +824,7 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str):

# test generation when input resolution nondevisible on 64
height, width, batch_size = 96, 96, 1
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, model_arch=model_arch)

for output_type in ["latent", "np", "pt"]:
inputs["output_type"] = output_type
Expand All @@ -820,7 +840,7 @@ def test_image_reproducibility(self, model_arch: str):
pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])

height, width, batch_size = 64, 64, 1
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, model_arch=model_arch)

for generator_framework in ["np", "pt"]:
ov_outputs_1 = pipeline(**inputs, generator=get_generator(generator_framework, SEED))
Expand Down
6 changes: 5 additions & 1 deletion tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

from optimum.exporters.openvino.__main__ import main_export
from optimum.intel import ( # noqa
OVFluxFillPipeline,
OVFluxPipeline,
OVLatentConsistencyModelPipeline,
OVModelForAudioClassification,
Expand Down Expand Up @@ -82,7 +83,9 @@ class OVCLIExportTestCase(unittest.TestCase):
]

if is_transformers_version(">=", "4.45"):
SUPPORTED_ARCHITECTURES.extend([("text-to-image", "stable-diffusion-3"), ("text-to-image", "flux")])
SUPPORTED_ARCHITECTURES.extend(
[("text-to-image", "stable-diffusion-3"), ("text-to-image", "flux"), ("inpainting", "flux-fill")]
)
EXPECTED_NUMBER_OF_TOKENIZER_MODELS = {
"gpt2": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
"t5": 0, # no .model file in the repository
Expand All @@ -97,6 +100,7 @@ class OVCLIExportTestCase(unittest.TestCase):
"stable-diffusion-xl": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
"stable-diffusion-3": 6 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 2,
"flux": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
"flux-fill": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
"llava": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
}

Expand Down
2 changes: 2 additions & 0 deletions tests/openvino/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
"falcon-40b": "katuni4ka/tiny-random-falcon-40b",
"flaubert": "hf-internal-testing/tiny-random-flaubert",
"flux": "katuni4ka/tiny-random-flux",
"flux-fill": "katuni4ka/tiny-random-flux-fill",
"gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",
"gpt2": "hf-internal-testing/tiny-random-gpt2",
"gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel",
Expand Down Expand Up @@ -193,6 +194,7 @@
"open-clip": (20, 28),
"stable-diffusion-3": (66, 42, 58, 30),
"flux": (56, 24, 28, 64),
"flux-fill": (56, 24, 28, 64),
"llava": (30, 9, 1),
"llava_next": (30, 9, 1),
"minicpmv": (30, 26, 1, 6),
Expand Down

0 comments on commit 58aec63

Please sign in to comment.