Merge branch 'main' into ov-pipeline

huggingface · Jun 7, 2024 · ad87768 · ad87768
2 parents 45209de + ca2b1fa
commit ad87768
Show file tree

Hide file tree

Showing 34 changed files with 1,107 additions and 321 deletions.
diff --git a/.github/workflows/test_inc.yml b/.github/workflows/test_inc.yml
@@ -4,9 +4,12 @@ name: Intel Neural Compressor - Test
 
 on:
   push:
-    branches: [ main ]
+    branches:
+      - main
+      - v*-release
   pull_request:
-    branches: [ main ]
+    branches:
+      - main
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/.github/workflows/test_ipex.yml b/.github/workflows/test_ipex.yml
@@ -4,9 +4,12 @@ name: Intel IPEX - Test
 
 on:
   push:
-    branches: [ main ]
+    branches:
+      - main
+      - v*-release
   pull_request:
-    branches: [ main ]
+    branches:
+      - main
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
@@ -18,6 +21,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version: [3.8, 3.9]
+        transformers-version: [4.39.0, 4.41.2]
         os: [ubuntu-latest]
 
     runs-on: ${{ matrix.os }}
@@ -32,6 +36,7 @@ jobs:
         python -m pip install --upgrade pip
         pip install torch torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
         pip install .[ipex,tests]
+        pip install transformers==${{ matrix.transformers-version }}
     - name: Test with Pytest
       run: |
         pytest tests/ipex/
diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml
@@ -4,9 +4,12 @@ name: OpenVINO - Test
 
 on:
   push:
-    branches: [ main ]
+    branches:
+      - main
+      - v*-release
   pull_request:
-    branches: [ main ]
+    branches:
+      - main
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
@@ -17,21 +20,23 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8, 3.11]
+        python-version: ["3.8", "3.12"]
+        transformers-version: ["4.36.0", "4.41.*"]
         os: [ubuntu-latest]
 
     runs-on: ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Setup Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
         # install PyTorch CPU version to avoid installing CUDA packages on GitHub runner without GPU
         pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+        pip install transformers==${{ matrix.transformers-version }}
         pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime
     - name: Test with Pytest
       run: |
@@ -46,3 +51,4 @@ jobs:
         pip install openvino-nightly
         python -c "from optimum.intel import OVModelForCausalLM; OVModelForCausalLM.from_pretrained('hf-internal-testing/tiny-random-gpt2', export=True, compile=False)"
         optimum-cli export openvino -m hf-internal-testing/tiny-random-gpt2 gpt2-ov
+
diff --git a/.github/workflows/test_openvino_basic.yml b/.github/workflows/test_openvino_basic.yml
@@ -24,16 +24,16 @@ jobs:
       matrix:
         # Testing lower and upper bound of supported Python versions
         # This also ensures that the test fails if dependencies break for Python 3.7
-        python-version: ["3.8", "3.11"]
-        transformers: ['transformers']
+        python-version: ["3.8", "3.12"]
         optimum: ['optimum', 'git+https://github.com/huggingface/optimum.git']
+        os: ["ubuntu-22.04", "windows-latest"]
 
-    runs-on: ubuntu-20.04
+    runs-on: ${{ matrix.os }}
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Setup Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
 
@@ -43,12 +43,17 @@ jobs:
         # optimum or transformers to a specific version
         # Install PyTorch CPU to prevent unnecessary downloading/installing of CUDA packages
         pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-        pip install .[tests] openvino onnx onnxruntime ${{ matrix.optimum}} ${{ matrix.transformers }}
+        pip install .[tests] openvino onnxruntime ${{ matrix.optimum}}
 
-    - name: Pip freeze        
+    - name: Pip freeze
       run: pip freeze
 
     - name: Test with Pytest
       run: |
         pytest tests/openvino/test_modeling_basic.py
-        RUN_SLOW=1 pytest tests/openvino/test_modeling.py -s -m "run_slow" --durations=0
+
+    - name: Slow tests
+      run: |
+        pytest tests/openvino/test_modeling.py -s -m "run_slow" --durations=0
+      env:
+        RUN_SLOW: 1
diff --git a/.github/workflows/test_openvino_examples.yml b/.github/workflows/test_openvino_examples.yml
@@ -22,7 +22,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.11"]
+        python-version: ["3.8", "3.12"]
 
     runs-on: ubuntu-22.04
 

diff --git a/.github/workflows/test_openvino_notebooks.yml b/.github/workflows/test_openvino_notebooks.yml
@@ -23,7 +23,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.11"]
+        python-version: ["3.8", "3.12"]
 
     runs-on: ubuntu-22.04
 

diff --git a/README.md b/README.md
@@ -239,3 +239,8 @@ Do not forget to install requirements for every example:
 cd <example-folder>
 pip install -r requirements.txt
 ```
+
+
+## Gaudi
+
+To train your model on [Intel Gaudi AI Accelerators (HPU)](https://docs.habana.ai/en/latest/index.html), check out [Optimum Habana](https://github.com/huggingface/optimum-habana) which provides a set of tools enabling easy model loading, training and inference on single- and multi-HPU settings for different downstream tasks. After training your model, feel free to submit it to the Intel [leaderboard](https://huggingface.co/spaces/Intel/powered_by_intel_llm_leaderboard) which is designed to evaluate, score, and rank open-source LLMs that have been pre-trained or fine-tuned on Intel Hardwares. Models submitted to the leaderboard will be evaluated on the Intel Developer Cloud. The evaluation platform consists of Gaudi Accelerators and Xeon CPUs running benchmarks from the Eleuther AI Language Model Evaluation Harness.
diff --git a/docs/source/inference.mdx b/docs/source/inference.mdx
@@ -28,8 +28,12 @@ As shown in the table below, each task is associated with a class enabling to au
 | `image-classification`               | `OVModelForImageClassification`      |
 | `feature-extraction`                 | `OVModelForFeatureExtraction`        |
 | `fill-mask`                          | `OVModelForMaskedLM`                 |
-| `text-generation`                    | `OVModelForCausalLM`                 |
-| `text2text-generation`               | `OVModelForSeq2SeqLM`                |
+| `image-classification`               | `OVModelForImageClassification`      |
+| `audio-classification`               | `OVModelForAudioClassification`      |
+| `text-generation-with-past`          | `OVModelForCausalLM`                 |
+| `text2text-generation-with-past`     | `OVModelForSeq2SeqLM`                |
+| `automatic-speech-recognition`       | `OVModelForSpeechSeq2Seq`            |
+| `image-to-text`                      | `OVModelForVision2Seq`               |
 
 
 ### Export
@@ -42,14 +46,20 @@ optimum-cli export openvino --model gpt2 ov_model
 
 The example above illustrates exporting a checkpoint from the 🤗 Hub. When exporting a local model, first make sure that you saved both the model’s weights and tokenizer files in the same directory (`local_path`).
 When using CLI, pass the `local_path` to the model argument instead of the checkpoint name of the model hosted on the Hub and provide the `--task` argument. You can review the list of supported tasks in the 🤗 [Optimum documentation](https://huggingface.co/docs/optimum/exporters/task_manager). If task argument is not provided, it will default to the model architecture without any task specific head.
-Here we set the `task` to `text-generation-with-past`, with the `-with-past` suffix enabling the re-use of the pre-computed key/values hidden-states `use_cache=True`.
+The `-with-past` suffix enable the re-use of the pre-computed key/values hidden-states and is the recommended option, to export the model without (equivalent to `use_cache=False`), you will need to remove this suffix.
 
 ```bash
 optimum-cli export openvino --model local_path --task text-generation-with-past ov_model
 ```
 
 To export your model in fp16, you can add `--weight-format fp16` when exporting your model.
 
+<Tip warning={true}>
+
+Models larger than 1 billion parameters are exported to the OpenVINO format with 8-bit weights by default. You can disable it with `--weight-format fp32`.
+
+</Tip>
+
 Once the model is exported, you can load the OpenVINO model using :
 
 ```python
@@ -126,7 +136,7 @@ model = OVModelForCausalLM.from_pretrained(model_id, load_in_8bit=True)
 
 <Tip warning={true}>
 
-`load_in_8bit` is enabled by default for the models larger than 1 billion parameters. You can disable it with `load_in_8bit=False`.
+If not specified, `load_in_8bit` will be set to `True` by default when models larger than 1 billion parameters are exported to the OpenVINO format (with `export=True`). You can disable it with `load_in_8bit=False`.
 
 </Tip>
 

diff --git a/docs/source/optimization_ov.mdx b/docs/source/optimization_ov.mdx
@@ -44,7 +44,7 @@ model.save_pretrained(saving_directory)
 
 <Tip warning={true}>
 
-`load_in_8bit` is enabled by default for the models larger than 1 billion parameters. You can disable it with `load_in_8bit=False`.
+If not specified, `load_in_8bit` will be set to `True` by default when models larger than 1 billion parameters are exported to the OpenVINO format (with `export=True`). You can disable it with `load_in_8bit=False`.
 
 </Tip>
 

diff --git a/docs/source/reference_ov.mdx b/docs/source/reference_ov.mdx
@@ -14,56 +14,113 @@ See the License for the specific language governing permissions and
 limitations under the License.
 -->
 
-# Reference
+# Models
 
-## OVModelForFeatureExtraction
+## Natural Language Processing
 
-[[autodoc]] openvino.modeling.OVModelForFeatureExtraction
+The following classes are available for the following natural language processing tasks.
+
+### OVModelForCausalLM
+
+[[autodoc]] openvino.modeling_decoder.OVModelForCausalLM
+    - forward
+    - generate
 
-## OVModelForMaskedLM
+### OVModelForMaskedLM
 
 [[autodoc]] openvino.modeling.OVModelForMaskedLM
+    - forward
+
+### OVModelForSeq2SeqLM
+
+[[autodoc]] openvino.modeling_seq2seq.OVModelForSeq2SeqLM
+    - forward
 
-## OVModelForQuestionAnswering
+### OVModelForQuestionAnswering
 
 [[autodoc]] openvino.modeling.OVModelForQuestionAnswering
+    - forward
 
-## OVModelForSequenceClassification
+### OVModelForSequenceClassification
 
 [[autodoc]] openvino.modeling.OVModelForSequenceClassification
+    - forward
 
-## OVModelForTokenClassification
+### OVModelForTokenClassification
 
 [[autodoc]] openvino.modeling.OVModelForTokenClassification
+    - forward
 
-## OVModelForAudioClassification
+
+## Audio
+
+The following classes are available for the following audio tasks.
+
+### OVModelForAudioClassification
 
 [[autodoc]] openvino.modeling.OVModelForAudioClassification
+    - forward
 
-## OVModelForAudioFrameClassification
+### OVModelForAudioFrameClassification
 
 [[autodoc]] openvino.modeling.OVModelForAudioFrameClassification
+    - forward
 
-## OVModelForCTC
+### OVModelForCTC
 
 [[autodoc]] openvino.modeling.OVModelForCTC
+    - forward
 
-## OVModelForAudioXVector
+### OVModelForAudioXVector
 
 [[autodoc]] openvino.modeling.OVModelForAudioXVector
+    - forward
+
+### OVModelForSpeechSeq2Seq
+
+[[autodoc]] openvino.modeling_seq2seq.OVModelForSpeechSeq2Seq
+    - forward
+
+
+## Computer Vision
 
-## OVModelForImageClassification
+The following classes are available for the following computer vision tasks.
+
+### OVModelForImageClassification
 
 [[autodoc]] openvino.modeling.OVModelForImageClassification
+    - forward
 
-## OVModelForCausalLM
 
-[[autodoc]] openvino.modeling_decoder.OVModelForCausalLM
+## Multimodal
 
-## OVModelForSeq2SeqLM
+The following classes are available for the following multimodal tasks.
 
-[[autodoc]] openvino.modeling_seq2seq.OVModelForSeq2SeqLM
+### OVModelForVision2Seq
+
+[[autodoc]] openvino.modeling_seq2seq.OVModelForVision2Seq
+    - forward
+
+### OVModelForPix2Struct
+
+[[autodoc]] openvino.modeling_seq2seq.OVModelForPix2Struct
+    - forward
+
+##  Custom Tasks
+
+### OVModelForCustomTasks
+
+[[autodoc]] openvino.modeling.OVModelForCustomTasks
+    - forward
+
+### OVModelForFeatureExtraction
+
+[[autodoc]] openvino.modeling.OVModelForFeatureExtraction
+    - forward
+
+
+# Quantization
 
-## OVQuantizer
+### OVQuantizer
 
 [[autodoc]] openvino.quantization.OVQuantizer
diff --git a/notebooks/openvino/quantized_generation_demo.ipynb b/notebooks/openvino/quantized_generation_demo.ipynb
@@ -32,7 +32,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# ! pip install optimum[openvino,nncf] torch"
+    "# ! pip install optimum[openvino,nncf] torch==2.2.2"
    ]
   },
   {
-Original file line number
+Diff line change
@@ Expand Up / @@ -44,7 +44,7 @@ model.save_pretrained(saving_directory) @@
     <Tip warning={true}>
-    `load_in_8bit` is enabled by default for the models larger than 1 billion parameters. You can disable it with `load_in_8bit=False`.
+    If not specified, `load_in_8bit` will be set to `True` by default when models larger than 1 billion parameters are exported to the OpenVINO format (with `export=True`). You can disable it with `load_in_8bit=False`.
     </Tip>
@@ Expand Down @@