Skip to content

Commit

Permalink
fix images processing
Browse files Browse the repository at this point in the history
  • Loading branch information
eaidova committed Nov 5, 2024
1 parent d2c40ca commit b978df7
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions optimum/intel/openvino/modeling_visual_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,17 @@ def forward(self, image_feature, pos_embed, key_padding_mask):
return result


MODEL_PARTS_CLS_MAPPING = {"resampler": OVResampler}
class OVVisionProjection(OVModelPart):
_model_name = "vision_projection"

def forward(self, img_features):
return self.request(img_features)[0]


MODEL_PARTS_CLS_MAPPING = {
"resampler": OVResampler,
"vision_projection": OVVisionProjection
}


class OVModelForVisualCausalLM(OVBaseModel, GenerationMixin):
Expand Down Expand Up @@ -1665,12 +1675,10 @@ def get_multimodal_embeddings(
input_ids = input_ids.clamp_min(0).clamp_max(self.config.vocab_size)
inputs_embeds = torch.from_numpy(self.get_text_embeddings(input_ids, **kwargs))
if has_image:
vision_embeds = self.get_vision_embeddings(
pixel_values, input_ids=input_ids, image_sizes=image_sizes, **kwargs
)
vision_embeds = self.get_vision_embeddings(pixel_values, input_ids=input_ids, image_sizes=image_sizes, **kwargs)
image_features_proj = torch.from_numpy(vision_embeds)
inputs_embeds = inputs_embeds.index_put(positions, image_features_proj, accumulate=False)

return inputs_embeds, attention_mask, position_ids

MODEL_TYPE_TO_CLS_MAPPING = {
Expand Down

0 comments on commit b978df7

Please sign in to comment.