Skip to content

Commit

Permalink
add input preprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
eaidova committed Nov 14, 2024
1 parent 0a3041f commit 1aaac07
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 3 deletions.
21 changes: 19 additions & 2 deletions optimum/intel/openvino/modeling_visual_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -1809,8 +1809,8 @@ def preprocess_inputs(
raise ValueError("Tokenizer is required.")
if image is not None and processor is None:
raise ValueError("Processor is required.")
text_content = f"<image>\n{text}" if image is not None else text
messages = [{"role": "user", "content": text_content}]
text = f"<image>\n{text}" if image is not None else text
messages = [{"role": "user", "content": text}]
if tokenizer.chat_template is not None:
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
if image is not None:
Expand Down Expand Up @@ -1957,6 +1957,23 @@ def get_multimodal_embeddings(

return inputs_embeds, attention_mask, position_ids

@staticmethod
def preprocess_inputs(
text: str,
image: Optional[Image] = None,
processor: Optional[AutoImageProcessor] = None,
tokenizer: Optional[PreTrainedTokenizer] = None,
):
if processor is None:
raise ValueError("Processor is required.")
if image is not None and "<|image_1|>" not in text:
text = "<|image_1|>\n" + text
if getattr(processor.tokenizer, "chat_template", None) is not None:
chat_prompt = [{"role": "user", "content": text}]
text = processor.tokenizer.apply_chat_template(chat_prompt, add_generation_prompt=True, tokenize=False)
inputs = processor(images=image, text=text, return_tensors="pt")
return inputs


MODEL_TYPE_TO_CLS_MAPPING = {
"llava": _OVLlavaForCausalLM,
Expand Down
2 changes: 1 addition & 1 deletion tests/openvino/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1883,7 +1883,7 @@ class OVModelForVisualCausalLMIntegrationTest(unittest.TestCase):
SUPPORTED_ARCHITECTURES += ["minicpmv", "internvl2", "phi3_v"]
REMOTE_CODE_MODELS = ["minicpmv", "nanollava", "phi3_v"]
TASK = "image-text-to-text"
REMOTE_CODE_MODELS = ["internvl2", "minicpmv", "nanollava"]
REMOTE_CODE_MODELS = ["internvl2", "minicpmv", "nanollava", "phi3_v"]

IMAGE = Image.open(
requests.get(
Expand Down

0 comments on commit 1aaac07

Please sign in to comment.