Skip to content

Commit

Permalink
Fix for AUTO:device; add device to compile info msg (#434)
Browse files Browse the repository at this point in the history
Also remove _SUPPORTED_DEVICES since they are not used
  • Loading branch information
helena-intel authored Sep 25, 2023
1 parent 1db2651 commit 985d0d1
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 27 deletions.
18 changes: 1 addition & 17 deletions optimum/intel/openvino/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,6 @@

logger = logging.getLogger(__name__)

_SUPPORTED_DEVICES = {
"CPU",
"GPU",
"AUTO",
"AUTO:CPU,GPU",
"AUTO:GPU,CPU",
"MULTI",
"MULTI:CPU,GPU",
"MULTI:GPU,CPU",
}


# workaround to enable compatibility between openvino models and transformers pipelines
class PreTrainedModel(OptimizedModel):
Expand Down Expand Up @@ -325,7 +314,7 @@ def _to_load(

def compile(self):
if self.request is None:
logger.info("Compiling the model...")
logger.info(f"Compiling the model to {self._device} ...")
ov_config = {**self.ov_config}
if "CACHE_DIR" not in self.ov_config.keys():
# Set default CACHE_DIR only if it is not set.
Expand Down Expand Up @@ -382,11 +371,6 @@ def half(self):
self.request = None
return self

def _ensure_supported_device(self, device: str = None):
device = device if device is not None else self._device
if device not in _SUPPORTED_DEVICES:
raise ValueError(f"Unknown device: {device}. Expected one of {_SUPPORTED_DEVICES}.")

def forward(self, *args, **kwargs):
raise NotImplementedError

Expand Down
8 changes: 6 additions & 2 deletions optimum/intel/openvino/modeling_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,12 @@ def update_pkv_precision(self, force_fp32=False):
pkv_precision = Type.f32
if not force_fp32:
device = self._device.upper()
if "INFERENCE_PRECISION_HINT" in core.get_property(device, "SUPPORTED_PROPERTIES"):
pkv_precision = core.get_property(device, "INFERENCE_PRECISION_HINT")
try:
if "INFERENCE_PRECISION_HINT" in core.get_property(device, "SUPPORTED_PROPERTIES"):
pkv_precision = core.get_property(device, "INFERENCE_PRECISION_HINT")
except RuntimeError: # use default precision when get_property fails, e.g. when device is "AUTO:GPU"
pass

# ov_config["INFERENCE_PRECISION_HINT"] may override the prefer precision
if self.ov_config:
inference_precision_hint = self.ov_config.get("INFERENCE_PRECISION_HINT", "")
Expand Down
2 changes: 1 addition & 1 deletion optimum/intel/openvino/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ def __init__(

def _compile(self):
if self.request is None:
logger.info(f"Compiling the {self._model_name}...")
logger.info(f"Compiling the {self._model_name} to {self.device} ...")
self.request = core.compile_model(self.model, self.device, self.ov_config)

@property
Expand Down
4 changes: 2 additions & 2 deletions optimum/intel/openvino/modeling_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def __call__(self, *args, **kwargs):

def _compile(self):
if self.request is None:
logger.info("Compiling the encoder...")
logger.info(f"Compiling the encoder to {self._device} ...")
self.request = core.compile_model(self.model, self._device, self.ov_config)


Expand Down Expand Up @@ -442,5 +442,5 @@ def __call__(self, *args, **kwargs):

def _compile(self):
if self.request is None:
logger.info("Compiling the decoder...")
logger.info(f"Compiling the decoder to {self._device} ...")
self.request = core.compile_model(self.model, self._device, self.ov_config).create_infer_request()
11 changes: 6 additions & 5 deletions tests/openvino/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,11 +560,12 @@ def test_compare_with_and_without_past_key_values(self):

def test_auto_device_loading(self):
model_id = MODEL_NAMES["gpt2"]
model = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, device="AUTO")
model.half()
self.assertEqual(model._device, "AUTO")
del model
gc.collect()
for device in ("AUTO", "AUTO:CPU"):
model = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, device=device)
model.half()
self.assertEqual(model._device, device)
del model
gc.collect()


class OVModelForMaskedLMIntegrationTest(unittest.TestCase):
Expand Down

0 comments on commit 985d0d1

Please sign in to comment.