huggingface
diff --git a/‎.github/workflows/build_documentation.yml‎
Lines changed: 9 additions & 7 deletions b/‎.github/workflows/build_documentation.yml‎
Lines changed: 9 additions & 7 deletions
diff --git a/‎.github/workflows/build_pr_documentation.yml‎
Lines changed: 22 additions & 27 deletions b/‎.github/workflows/build_pr_documentation.yml‎
Lines changed: 22 additions & 27 deletions
diff --git a/‎.github/workflows/test_inc.yml‎
Lines changed: 1 addition & 2 deletions b/‎.github/workflows/test_inc.yml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎.github/workflows/test_openvino.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/test_openvino.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/test_openvino_full.yml‎
Lines changed: 15 additions & 27 deletions b/‎.github/workflows/test_openvino_full.yml‎
Lines changed: 15 additions & 27 deletions
diff --git a/‎.github/workflows/test_openvino_notebooks.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/test_openvino_notebooks.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/test_openvino_slow.yml‎
Lines changed: 4 additions & 0 deletions b/‎.github/workflows/test_openvino_slow.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 9 additions & 0 deletions b/‎Makefile‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 16 additions & 54 deletions b/‎README.md‎
Lines changed: 16 additions & 54 deletions
diff --git a/‎docs/Dockerfile‎
Lines changed: 0 additions & 28 deletions b/‎docs/Dockerfile‎
Lines changed: 0 additions & 28 deletions
@@ -8,6 +8,10 @@ on:
       - doc-builder*
       - v*-release
 
+env:
+  UV_SYSTEM_PYTHON: 1
+  UV_TORCH_BACKEND: auto
+
 jobs:
   build_documentation:
     runs-on: ubuntu-22.04
@@ -21,13 +25,13 @@ jobs:
       - uses: actions/checkout@v4
       - uses: actions/setup-node@v4
         with:
-          node-version: '18'
+          node-version: "18"
           cache-dependency-path: "kit/package-lock.json"
 
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
-          python-version: '3.11'
+          python-version: "3.11"
 
       - name: Set environment variables
         run: |
@@ -45,11 +49,9 @@ jobs:
 
       - name: Setup environment
         run: |
-          python -m pip install --upgrade pip
-          python -m pip install --upgrade setuptools
-          python -m pip install git+https://github.com/huggingface/doc-builder
-          python -m pip install .[quality]
-          python -m pip install openvino nncf neural-compressor[pt] diffusers accelerate
+          pip install --upgrade pip uv
+          uv pip install git+https://github.com/huggingface/doc-builder
+          uv pip install .[quality] nncf openvino neural-compressor[pt]>3.4 diffusers accelerate
 
       - name: Make documentation
         shell: bash
 
@@ -9,10 +9,13 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
   cancel-in-progress: true
 
+env:
+  UV_SYSTEM_PYTHON: 1
+  UV_TORCH_BACKEND: auto
+
 jobs:
   build_documentation:
     runs-on: ubuntu-22.04
-
     env:
       COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
       PR_NUMBER: ${{ github.event.number }}
@@ -21,42 +24,34 @@ jobs:
 
     steps:
       - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
         with:
-          repository: "huggingface/doc-builder"
-          path: doc-builder
+          node-version: "18"
+          cache-dependency-path: "kit/package-lock.json"
 
-      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v4
         with:
-          repository: "huggingface/optimum-intel"
-          path: optimum-intel
-
-      - name: Setup Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: 3.9
+          python-version: "3.11"
 
       - name: Setup environment
         run: |
-          pip install --upgrade pip
-          pip uninstall -y doc-builder
-          cd doc-builder
-          git pull origin main
-          pip install .
-          pip install black
-          cd ..
+          pip install --upgrade pip uv
+          uv pip install git+https://github.com/huggingface/doc-builder
+          uv pip install .[quality] nncf openvino neural-compressor[pt]>3.4 diffusers accelerate
 
       - name: Make documentation
+        shell: bash
         run: |
-          cd optimum-intel
-          make doc BUILD_DIR=intel-doc-build VERSION=pr_$PR_NUMBER COMMIT_SHA_SUBPACKAGE=$COMMIT_SHA CLONE_URL=$PR_CLONE_URL
-          cd ..
-
-      - name: Save commit_sha & pr_number
-        run: |
-          cd optimum-intel
-          sudo chmod -R ugo+rwx intel-doc-build
+          doc-builder build optimum.intel docs/source/ \
+            --repo_name optimum-intel \
+            --build_dir intel-doc-build/ \
+            --version pr_${{ env.PR_NUMBER }} \
+            --version_tag_suffix "" \
+            --html \
+            --clean
           cd intel-doc-build
-          sudo mv optimum.intel optimum-intel
+          mv optimum.intel optimum-intel
           echo ${{ env.COMMIT_SHA }} > ./commit_sha
           echo ${{ env.PR_NUMBER }} > ./pr_number
 
 
@@ -38,8 +38,7 @@ jobs:
         run: |
           pip install --upgrade pip
           pip install torch==${{ matrix.torch-version }} torchaudio torchvision --index-url https://download.pytorch.org/whl/cpu
-          pip install .[neural-compressor,tests] intel-extension-for-pytorch==${{ matrix.torch-version }}
-          pip install diffusers==0.32.2
+          pip install .[tests,neural-compressor] intel-extension-for-pytorch==${{ matrix.torch-version }} diffusers==0.32.2
 
       - name: Assert versions
         run: |
 
@@ -41,7 +41,7 @@ jobs:
         run: |
           pip install --upgrade pip
           pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-          pip install .[openvino,openvino-tokenizers,diffusers,tests]
+          pip install .[openvino,diffusers,tests]
 
       - if: ${{ matrix.transformers-version != 'latest' }}
         name: Install specific dependencies and versions required for older transformers
 
@@ -23,43 +23,31 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        include:
-          - python-version: "3.9"
-            os: "ubuntu-22.04"
-            transformers-version: "latest"
-            openvino: "ov-stable"
-            nncf: "nncf-stable"
-          - python-version: "3.9"
-            os: "ubuntu-22.04"
-            transformers-version: "latest"
-            openvino: "ov-nightly"
-            nncf: "nncf-stable"
-          - python-version: "3.9"
-            os: "ubuntu-22.04"
-            transformers-version: "latest"
-            openvino: "ov-stable"
-            nncf: "nncf-develop"
-          - python-version: "3.9"
-            os: "ubuntu-22.04"
-            transformers-version: "latest"
-            openvino: "ov-nightly"
-            nncf: "nncf-develop"
-
-    runs-on: ${{ matrix.os }}
+        nncf: ["nncf-stable", "nncf-develop"]
+        openvino: ["ov-stable", "ov-nightly"]
+        transformers-version: ["latest"]
+
+    runs-on: ubuntu-22.04
 
     steps:
-      - uses: actions/checkout@v4
-      - name: Setup Python ${{ matrix.python-version }}
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Setup Python
         uses: actions/setup-python@v5
         with:
-          python-version: ${{ matrix.python-version }}
+          python-version: 3.9
 
       - name: Install dependencies
         run: |
-          python -m pip install --upgrade pip
+          pip install --upgrade pip
           # Install PyTorch CPU to prevent unnecessary downloading/installing of CUDA packages
           pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
           pip install .[tests,diffusers]
+          pip uninstall opencv-python -y && pip install opencv-python-headless
 
       - name: Install openvino-nightly
         if: ${{ matrix.openvino == 'ov-nightly' }}
 
@@ -29,6 +29,7 @@ jobs:
             "optimum_openvino_inference.ipynb",
             "question_answering_quantization.ipynb",
             "sentence_transformer_quantization.ipynb",
+            "vision_language_quantization.ipynb",
             # "stable_diffusion_hybrid_quantization.ipynb", TODO: update and ran on a powerful cpu
           ]
 
@@ -45,6 +46,7 @@ jobs:
 
       - name: Install packages
         run: |
+          sudo apt-get update
           sudo apt-get install -y ffmpeg
 
       - name: Install dependencies
 
@@ -40,6 +40,10 @@ jobs:
     runs-on: ${{ matrix.os }}
 
     steps:
+      - name: Free Disk Space (Ubuntu)
+        if: matrix.runs-on == 'ubuntu-22.04'
+        uses: jlumbroso/free-disk-space@main
+
       - name: Checkout code
         uses: actions/checkout@v4
 
 
@@ -59,3 +59,12 @@ doc: build_doc_docker_image
 		--version_tag_suffix "" \
 		--html \
 		--clean
+
+clean:
+	rm -rf build
+	rm -rf dist
+	rm -rf .pytest_cache
+	rm -rf .ruff_cache
+	rm -rf .mypy_cache
+	rm -rf optimum_intel.egg-info
+	rm -rf *__pycache__
@@ -40,31 +40,22 @@ or to install from source including dependencies:
 python -m pip install "optimum-intel[extras]"@git+https://github.com/huggingface/optimum-intel.git
 ```
 
-where `extras` can be one or more of `ipex`, `neural-compressor`, `openvino`, `nncf`.
+where `extras` can be one or more of `ipex`, `neural-compressor`, `openvino`.
 
 # Quick tour
 
 ## Neural Compressor
 
-Dynamic quantization can be used through the Optimum command-line interface:
+Dynamic quantization can be used through the Optimum CLI:
 
 ```bash
 optimum-cli inc quantize --model distilbert-base-cased-distilled-squad --output ./quantized_distilbert
 ```
 Note that quantization is currently only supported for CPUs (only CPU backends are available), so we will not be utilizing GPUs / CUDA in this example.
 
-To load a quantized model hosted locally or on the 🤗 hub, you can do as follows :
-```python
-from optimum.intel import INCModelForSequenceClassification
-
-model_id = "Intel/distilbert-base-uncased-finetuned-sst-2-english-int8-dynamic"
-model = INCModelForSequenceClassification.from_pretrained(model_id)
-```
-
 You can load many more quantized models hosted on the hub under the Intel organization [`here`](https://huggingface.co/Intel).
 
-For more details on the supported compression techniques, please refer to the [documentation](https://huggingface.co/docs/optimum/main/en/intel/optimization_inc).
-
+For more details on the supported compression techniques, please refer to the [documentation](https://huggingface.co/docs/optimum-intel/en/neural_compressor/optimization).
 
 ## OpenVINO
 
@@ -75,28 +66,27 @@ Below are examples of how to use OpenVINO and its [NNCF](https://docs.openvino.a
 It is also possible to export your model to the [OpenVINO IR](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) format with the CLI :
 
 ```plain
-optimum-cli export openvino --model gpt2 ov_model
+optimum-cli export openvino --model meta-llama/Meta-Llama-3-8B ov_llama/
 ```
 
 You can also apply 8-bit weight-only quantization when exporting your model : the model linear, embedding and convolution weights will be quantized to INT8, the activations will be kept in floating point precision.
 
 ```plain
-optimum-cli export openvino --model gpt2 --weight-format int8 ov_model
+optimum-cli export openvino --model meta-llama/Meta-Llama-3-8B --weight-format int8 ov_llama_int8/
 ```
 
 Quantization in hybrid mode can be applied to Stable Diffusion pipeline during model export. This involves applying hybrid post-training quantization to the UNet model and weight-only quantization for the rest of the pipeline components. In the hybrid mode, weights in MatMul and Embedding layers are quantized, as well as activations of other layers.
 
 ```plain
-optimum-cli export openvino --model stabilityai/stable-diffusion-2-1 --dataset conceptual_captions --weight-format int8 ov_model
+optimum-cli export openvino --model stabilityai/stable-diffusion-2-1 --dataset conceptual_captions --weight-format int8 ov_model_sd/
 ```
 
-To apply quantization on both weights and activations, you can find more information in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/optimization_ov).
+To apply quantization on both weights and activations, you can find more information in the [documentation](https://huggingface.co/docs/optimum-intel/en/openvino/optimization).
 
 #### Inference:
 
 To load a model and run inference with OpenVINO Runtime, you can just replace your `AutoModelForXxx` class with the corresponding `OVModelForXxx` class.
 
-
 ```diff
 - from transformers import AutoModelForSeq2SeqLM
 + from optimum.intel import OVModelForSeq2SeqLM
@@ -112,50 +102,22 @@ To load a model and run inference with OpenVINO Runtime, you can just replace yo
   [{'translation_text': "Il n'est jamais sorti sans un livre sous son bras, et il est souvent revenu avec deux."}]
 ```
 
-If you want to load a PyTorch checkpoint, set `export=True` to convert your model to the OpenVINO IR.
+#### Quantization:
 
-```python
-from optimum.intel import OVModelForCausalLM
-
-model = OVModelForCausalLM.from_pretrained("gpt2", export=True)
-model.save_pretrained("./ov_model")
-```
+Post-training static quantization can also be applied. Here is an example on how to apply static quantization on a Whisper model using the [LibriSpeech](https://huggingface.co/datasets/openslr/librispeech_asr) dataset for the calibration step.
 
+```python
+from optimum.intel import OVModelForSpeechSeq2Seq, OVQuantizationConfig
 
-#### Post-training static quantization:
-
-Post-training static quantization introduces an additional calibration step where data is fed through the network in order to compute the activations quantization parameters. Here is an example on how to apply static quantization on a fine-tuned DistilBERT.
+model_id = "openai/whisper-tiny"
+q_config = OVQuantizationConfig(dtype="int8", dataset="librispeech", num_samples=50)
+q_model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, quantization_config=q_config)
 
-```python
-from functools import partial
-from optimum.intel import OVQuantizer, OVModelForSequenceClassification, OVConfig, OVQuantizationConfig
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-
-model_id = "distilbert-base-uncased-finetuned-sst-2-english"
-model = OVModelForSequenceClassification.from_pretrained(model_id, export=True)
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-def preprocess_fn(examples, tokenizer):
-    return tokenizer(
-        examples["sentence"], padding=True, truncation=True, max_length=128
-    )
-
-quantizer = OVQuantizer.from_pretrained(model)
-calibration_dataset = quantizer.get_calibration_dataset(
-    "glue",
-    dataset_config_name="sst2",
-    preprocess_function=partial(preprocess_fn, tokenizer=tokenizer),
-    num_samples=100,
-    dataset_split="train",
-    preprocess_batch=True,
-)
 # The directory where the quantized model will be saved
 save_dir = "nncf_results"
-# Apply static quantization and save the resulting model in the OpenVINO IR format
-ov_config = OVConfig(quantization_config=OVQuantizationConfig())
-quantizer.quantize(ov_config=ov_config, calibration_dataset=calibration_dataset, save_directory=save_dir)
-# Load the quantized model
-optimized_model = OVModelForSequenceClassification.from_pretrained(save_dir)
+q_model.save_pretrained(save_dir)
 ```
+You can find more information in the [documentation](https://huggingface.co/docs/optimum-intel/en/openvino/optimization).
 
 
 ## IPEX