Fix finish generation from streamer (#1554) #417

Workflow file for this run

.github/workflows/genai-tools.yml at ef1f0f6

	# This workflow will install Python dependencies, run tests and lint with a single version of Python
	# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

	name: GenAI tools

	on:
	workflow_dispatch:
	pull_request:
	merge_group:
	push:
	branches:
	- master
	- 'releases/**'

	permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions

	concurrency:
	# github.ref is not unique in post-commit
	group: ${{ github.event_name == 'push' && github.run_id \|\| github.ref }}-llm-bench-python
	cancel-in-progress: true

	jobs:
	openvino_download:
	name: Download OpenVINO
	outputs:
	status: ${{ steps.openvino_download.outcome }}
	ov_artifact_name: ${{ steps.openvino_download.outputs.ov_artifact_name }}
	ov_wheel_source: ${{ steps.openvino_download.outputs.ov_wheel_source }}
	ov_version: ${{ steps.openvino_download.outputs.ov_version }}
	timeout-minutes: 10
	defaults:
	run:
	shell: bash
	runs-on: aks-linux-2-cores-8gb
	container:
	image: 'openvinogithubactions.azurecr.io/openvino_provider:0.1.0'
	volumes:
	- /mount:/mount
	- ${{ github.workspace }}:${{ github.workspace }}

	steps:
	- uses: openvinotoolkit/openvino/.github/actions/openvino_provider@master
	id: openvino_download
	with:
	platform: ubuntu22
	commit_packages_to_provide: wheels
	revision: latest_available_commit

	llm_bench:
	name: 'LLM bench tests'
	defaults:
	run:
	shell: bash
	runs-on: ubuntu-22.04
	strategy:
	fail-fast: false
	matrix:
	python-version: ["3.11"]
	needs: [ openvino_download ]
	env:
	OV_INSTALL_DIR: ${{ github.workspace }}/ov
	SRC_DIR: ${{ github.workspace }}
	LLM_BENCH_PYPATH: ${{ github.workspace }}/tools/llm_bench

	steps:
	- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
	with:
	submodules: recursive
	- name: Set up Python ${{ matrix.python-version }}
	uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
	with:
	python-version: ${{ matrix.python-version }}
	- name: Lint with flake8
	run: \|
	python -m pip install --upgrade pip
	python -m pip install flake8 pytest black
	# stop the build if there are Python syntax errors or undefined names
	python -m flake8 ${{ env.LLM_BENCH_PYPATH }} --config=${{ env.LLM_BENCH_PYPATH }}/setup.cfg
	- name: Download OpenVINO package
	uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
	with:
	name: ${{ needs.openvino_download.outputs.ov_artifact_name }}
	path: ${{ env.OV_INSTALL_DIR }}
	merge-multiple: true
	- name: Install dependencies
	run: \|
	python -m pip install ${{ env.SRC_DIR }}/thirdparty/openvino_tokenizers -v ${{ needs.openvino_download.outputs.ov_wheel_source }}
	python -m pip install ${{ env.SRC_DIR }} -v ${{ needs.openvino_download.outputs.ov_wheel_source }}
	python -m pip install -r ${{ env.LLM_BENCH_PYPATH }}/requirements.txt ${{ needs.openvino_download.outputs.ov_wheel_source }}
	working-directory: ${{ env.OV_INSTALL_DIR }}
	- name: Test native pytorch model
	run: \|
	huggingface-cli download katuni4ka/tiny-random-qwen --local-dir ./tiny-random-qwen
	python ./tools/llm_bench/benchmark.py -m ./tiny-random-qwen -d cpu -n 1 -f pt -ic 20
	rm -rf ./tiny-random-qwen
	- name: Test katuni4ka/tiny-random-baichuan2 Optimum Intel
	run: \|
	optimum-cli export openvino --model katuni4ka/tiny-random-baichuan2 --trust-remote-code --weight-format fp16 ./ov_models/tiny-random-baichuan2
	python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-baichuan2/ -d cpu -n 1 --optimum -ic 10
	rm -rf ./ov_models/
	- name: Create prompt file for for image generation
	run: \|
	prompt="side profile centered painted portrait, Gandhi rolling a blunt, Gloomhaven, matte painting concept art, art nouveau, 8K HD Resolution, beautifully background"
	json_template='{steps: $steps, width: $width, height: $height, guidance_scale: $guidance_scale, prompt: $prompt}'
	jq -n -c --arg steps "30" --arg width "64" --arg height "128" --arg guidance_scale "1.0" --arg prompt "$prompt" "$json_template" > ./image_generation.jsonl
	jq -n -c --arg steps "4" --arg width "64" --arg height "32" --arg guidance_scale "7.0" --arg prompt "$prompt" "$json_template" >> ./image_generation.jsonl
	- name: Test echarlaix/tiny-random-latent-consistency Optimum Intel
	run: \|
	optimum-cli export openvino --model echarlaix/tiny-random-latent-consistency --trust-remote-code --weight-format fp16 ./ov_models/tiny-random-latent-consistency
	python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-latent-consistency/ -pf ./image_generation.jsonl -d cpu -n 1 --optimum --num_steps 4
	- name: Test echarlaix/tiny-random-latent-consistency with GenAI
	run: python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-latent-consistency/ -pf ./image_generation.jsonl -d cpu -n 1 --num_steps 4
	- name: Test echarlaix/tiny-random-latent-consistency with GenAI and LoRA
	run: \|
	huggingface-cli download katuni4ka/tiny-random-latent-consistency-lora --local-dir ./lora
	python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-latent-consistency/ -pf ./image_generation.jsonl -d cpu -n 1 --num_steps 4 --lora ./lora/tiny-random-latent-consistency-lora.safetensors --lora_alphas 0.7
	rm -rf ./ov_models/ ./lora ./image_generation.jsonl
	- name: Test TinyLlama-1.1B-Chat-v1.0 in Speculative Decoding via GenAI
	run: \|
	optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --trust-remote-code --weight-format fp16 ov_models/TinyLlama-1.1B-Chat-v1.0/FP16
	optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --trust-remote-code --weight-format int8 ov_models/TinyLlama-1.1B-Chat-v1.0/INT8
	python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --assistant_confidence_threshold 0.4 -ic 20
	python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --num_assistant_tokens 5 -ic 20
	rm -rf ov_models/TinyLlama-1.1B-Chat-v1.0
	- name: Test openai/whisper-tiny via Optimum
	run: \|
	GIT_LFS_SKIP_SMUDGE=1 git clone --depth 1 --branch main --single-branch https://huggingface.co/datasets/facebook/multilingual_librispeech
	cd multilingual_librispeech
	git lfs pull -I /data/mls_polish/train/audio/3283_1447_000.tar.gz
	mkdir data/mls_polish/train/audio/3283_1447_000
	tar zxvf data/mls_polish/train/audio/3283_1447_000.tar.gz -C data/mls_polish/train/audio/3283_1447_000/
	cd ..
	optimum-cli export openvino --trust-remote-code --model openai/whisper-tiny ./ov_models/whisper-tiny
	python ./tools/llm_bench/benchmark.py -m ./ov_models/whisper-tiny --media multilingual_librispeech/data/mls_polish/train/audio/3283_1447_000/3283_1447_000000.flac -d cpu -n 1 --optimum
	- name: Test openai/whisper-tiny via GenAI
	run: \|
	python ./tools/llm_bench/benchmark.py -m ./ov_models/whisper-tiny --media multilingual_librispeech/data/mls_polish/train/audio/3283_1447_000/3283_1447_000000.flac -d cpu -n 1
	rm -rf ./ov_models/whisper-tiny
	rm -rf multilingual_librispeech
	- name: Test katuni4ka/tiny-random-llava via Optimum
	run: \|
	optimum-cli export openvino --model katuni4ka/tiny-random-llava ./ov_models/tiny-random-llava --task image-text-to-text --trust-remote-code
	python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-llava --media https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --prompt "What is unusual on this image?" -ic 20 --optimum
	- name: Test katuni4ka/tiny-random-llava via GenAI
	run: \|
	python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-llava --media https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --prompt "What is unusual on this image?" -ic 20
	rm -rf ./ov_models

	wwb:
	name: 'WWB tests'
	defaults:
	run:
	shell: bash
	runs-on: ubuntu-22.04
	strategy:
	fail-fast: false
	matrix:
	python-version: ["3.11"]
	needs: [ openvino_download ]
	env:
	OV_INSTALL_DIR: ${{ github.workspace }}/ov
	SRC_DIR: ${{ github.workspace }}
	WWB_PATH: ${{ github.workspace }}/tools/who_what_benchmark

	steps:
	- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
	with:
	submodules: recursive
	- name: Set up Python ${{ matrix.python-version }}
	uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
	with:
	python-version: ${{ matrix.python-version }}
	- name: Lint with flake8
	run: \|
	python -m pip install --upgrade pip
	python -m pip install flake8 pytest black
	# stop the build if there are Python syntax errors or undefined names
	python -m flake8 ${{ env.WWB_PATH }} --config=${{ env.WWB_PATH }}/setup.cfg
	- name: Download OpenVINO package
	uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
	with:
	name: ${{ needs.openvino_download.outputs.ov_artifact_name }}
	path: ${{ env.OV_INSTALL_DIR }}
	merge-multiple: true
	- name: Install dependencies
	run: \|
	python -m pip install ${{ env.SRC_DIR }}/thirdparty/openvino_tokenizers -v ${{ needs.openvino_download.outputs.ov_wheel_source }}
	python -m pip install ${{ env.SRC_DIR }} -v ${{ needs.openvino_download.outputs.ov_wheel_source }}
	python -m pip install -r ${{ env.WWB_PATH }}/requirements.txt ${{ needs.openvino_download.outputs.ov_wheel_source }}
	python -m pip install git+https://github.com/huggingface/optimum-intel.git@main#egg=optimum-intel
	working-directory: ${{ env.OV_INSTALL_DIR }}
	- name: WWB Tests
	run: \|
	python -m pip install -v ${{ env.WWB_PATH }}
	python -m pytest -v ${{ env.WWB_PATH }}/tests

	Overall_Status:
	name: ci/gha_overall_status_llm_bench
	needs: [openvino_download, llm_bench, wwb]
	if: ${{ always() }}
	runs-on: ubuntu-latest
	steps:
	- name: Check status of all jobs
	if: >-
	${{
	contains(needs.*.result, 'failure') \|\|
	contains(needs.*.result, 'cancelled')
	}}
	run: exit 1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix finish generation from streamer (#1554) #417

Workflow file

Fix finish generation from streamer (#1554) #417

Jobs

Run details

Workflow file for this run