Merge pull request #1 from BlueLabelLabs/feat/add-hpu-support

Add HPUs (Intel® Gaudi®) support
langchain-ai · Jan 16, 2025 · 90c97df · 90c97df
2 parents b509747 + a03eacc
commit 90c97df
Show file tree

Hide file tree

Showing 13 changed files with 439 additions and 1 deletion.
diff --git a/README_hpu.md b/README_hpu.md
@@ -0,0 +1,65 @@
+# Running HuggingFace on Intel Gaudi (HPU)
+
+## Prerequisites
+
+Before you begin, ensure you have Docker installed and can run Docker containers on your machine. You'll also need access to Intel Gaudi hardware (HPUs).
+
+## Build the Docker Image
+
+1. Build the Docker image using the provided Dockerfile.
+
+   ```bash
+   cd docker/gaudi
+   ```
+
+   ```bash
+   docker build -t langchain-hpu .
+   ```
+
+   This will create a Docker image called `langchain-hpu`, which includes all necessary dependencies for running HuggingFace on Intel Gaudi (HPU).
+
+## Run the Docker Container
+
+1. Start the Docker container with an interactive terminal.
+
+   ```bash
+   docker run -it langchain-hpu
+   ```
+
+2. Once inside the container, navigate to the HuggingFace integration folder.
+
+   ```bash
+   cd /workspace/langchain/libs/partners/huggingface
+   ```
+
+3. Now, you are ready to run any scripts or tests for HuggingFace models on HPU. For example, you can start a training script or load models for inference on the Intel Gaudi (HPU) device.
+
+   ### Running HPU-Specific Tests
+
+   To run HPU-specific tests, use the following command:
+
+   ```bash
+   export RUN_HPU_TEST=1 && make hpu_tests
+   ```
+
+   This will set the `RUN_HPU_TEST` environment variable and run all tests that require HPU (those files ending with `_hpu.py`).
+
+   ### Example:
+
+   To run a specific test file that requires HPU, use:
+
+   ```bash
+   export RUN_HPU_TEST=1 && poetry run pytest tests/integration_tests/test_llms_hpu.py
+   ```
+
+   Replace `test_llms_hpu.py` with the actual script you'd like to execute, and ensure that the environment is configured to use HPU during model execution.
+
+## Dependencies
+
+The Dockerfile installs both general and HPU-specific dependencies. If you need to update or add any additional dependencies for your HuggingFace integration, you can modify the `requirements_hpu.txt` file located in the `/libs/partners/huggingface/` directory and rebuild the image.
+
+## Notes
+
+- Ensure that the container has access to Intel Gaudi hardware (HPU) to properly execute the scripts.
+- You may want to use `poetry` or `pip` for managing Python dependencies in the container, depending on your project's setup.
+- If you're using `poetry`, you can install the dependencies by running `poetry install` inside the container.
diff --git a/docker/gaudi/Dockerfile b/docker/gaudi/Dockerfile
@@ -0,0 +1,27 @@
+# Use the official Gaudi Docker image with PyTorch
+FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest
+
+# Set the working directory to where the project will be copied
+WORKDIR /workspace
+
+# Copy the entire project into the container (this assumes the whole project is in the context directory)
+COPY . /workspace/langchain
+
+# Install general dependencies
+RUN apt-get update && apt-get install -y \
+    python3-pip \
+    python3-dev \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Poetry for managing dependencies (optional if using Poetry)
+RUN curl -sSL https://install.python-poetry.org | python3 -
+
+# Copy and install the base dependencies (optional if using Poetry)
+RUN poetry install
+
+# Install HPU-specific dependencies from a separate file
+COPY libs/partners/huggingface/requirements_hpu.txt /workspace/langchain/libs/partners/huggingface/requirements_hpu.txt
+
+# Install HPU-specific dependencies using Poetry or pip
+RUN poetry add $(cat /workspace/langchain/libs/partners/huggingface/requirements_hpu.txt)
diff --git a/libs/community/Makefile b/libs/community/Makefile
@@ -1,11 +1,12 @@
-.PHONY: all format lint test tests test_watch integration_tests docker_tests help extended_tests
+.PHONY: all format lint test tests test_watch integration_tests hpu_tests docker_tests help extended_tests
 
 # Default target executed when no arguments are given to make.
 all: help
 
 # Define a variable for the test file path.
 TEST_FILE ?= tests/unit_tests/
 integration_tests: TEST_FILE = tests/integration_tests/
+HPU_TEST_FILES=$(shell find tests -name '*_hpu.py')
 
 # Run unit tests and generate a coverage report.
 coverage:
@@ -27,6 +28,11 @@ test_watch:
 check_imports: $(shell find langchain_community -name '*.py')
 	poetry run python ./scripts/check_imports.py $^
 
+# Run HPU-specific tests only if RUN_HPU_TEST is set.
+hpu_tests:
+	@echo "Setting RUN_HPU_TEST to 1 to enable HPU-specific tests."
+	export RUN_HPU_TEST=1 && poetry run pytest --disable-socket --allow-unix-socket $(HPU_TEST_FILES)
+
 extended_tests:
 	poetry run pytest --disable-socket --allow-unix-socket --only-extended tests/unit_tests
 
@@ -74,3 +80,4 @@ help:
 	@echo 'tests                        - run unit tests'
 	@echo 'test TEST_FILE=<test_file>   - run all tests in file'
 	@echo 'test_watch                   - run unit tests in watch mode'
+	@echo 'hpu_tests                    - run all HPU-specific tests (files ending with *_hpu.py) if RUN_HPU_TEST is set'
diff --git a/libs/community/langchain_community/embeddings/self_hosted_hugging_face.py b/libs/community/langchain_community/embeddings/self_hosted_hugging_face.py
@@ -34,6 +34,15 @@ def load_embedding_model(model_id: str, instruct: bool = False, device: int = 0)
 
         client = INSTRUCTOR(model_id)
 
+    if importlib.util.find_spec("habana_frameworks") is not None:
+        import habana_frameworks.torch.hpu as hthpu
+
+        if hthpu.is_available():
+            import torch
+            from habana_frameworks.torch.hpu import wrap_in_hpu_graph
+            client = wrap_in_hpu_graph(client)
+            return client.eval().to(torch.device("hpu"))
+
     if importlib.util.find_spec("torch") is not None:
         import torch
 

diff --git a/libs/community/tests/integration_tests/.env.example b/libs/community/tests/integration_tests/.env.example
@@ -66,3 +66,7 @@ UPSTASH_VECTOR_URL=your_upstash_vector_url
 UPSTASH_VECTOR_TOKEN=your_upstash_vector_token
 UPSTASH_VECTOR_URL_EMBEDDING=your_upstash_vector_embedding_url
 UPSTASH_VECTOR_TOKEN_EMBEDDING=your_upstash_vector_embedding_token
+
+
+# Intel® Gaudi®
+RUN_HPU_TEST=0
diff --git a/libs/community/tests/integration_tests/embeddings/test_huggingface_hpu.py b/libs/community/tests/integration_tests/embeddings/test_huggingface_hpu.py
@@ -0,0 +1,63 @@
+"""Test huggingface embeddings."""
+import os
+import pytest
+from langchain_community.embeddings.huggingface import (
+    HuggingFaceEmbeddings,
+    HuggingFaceInstructEmbeddings,
+)
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+def test_huggingface_embedding_documents_on_hpu() -> None:
+    """Test huggingface embeddings."""
+    documents = ["foo bar"]
+    embedding = HuggingFaceEmbeddings(model_kwargs={"device": "hpu"})
+    output = embedding.embed_documents(documents)
+    assert len(output) == 1
+    assert len(output[0]) == 768
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+def test_huggingface_embedding_query_on_hpu() -> None:
+    """Test huggingface embeddings."""
+    document = "foo bar"
+    embedding = HuggingFaceEmbeddings(encode_kwargs={"batch_size": 16}, model_kwargs={"device": "hpu"})
+    output = embedding.embed_query(document)
+    assert len(output) == 768
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+def test_huggingface_instructor_embedding_documents_on_hpu() -> None:
+    """Test huggingface embeddings."""
+    documents = ["foo bar"]
+    model_name = "hkunlp/instructor-base"
+    embedding = HuggingFaceInstructEmbeddings(model_name=model_name, model_kwargs={"device": "hpu"})
+    output = embedding.embed_documents(documents)
+    assert len(output) == 1
+    assert len(output[0]) == 768
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+def test_huggingface_instructor_embedding_query_on_hpu() -> None:
+    """Test huggingface embeddings."""
+    query = "foo bar"
+    model_name = "hkunlp/instructor-base"
+    embedding = HuggingFaceInstructEmbeddings(model_name=model_name, model_kwargs={"device": "hpu"})
+    output = embedding.embed_query(query)
+    assert len(output) == 768
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+def test_huggingface_instructor_embedding_normalize_on_hpu() -> None:
+    """Test huggingface embeddings."""
+    query = "foo bar"
+    model_name = "hkunlp/instructor-base"
+    encode_kwargs = {"normalize_embeddings": True}
+    embedding = HuggingFaceInstructEmbeddings(
+        model_name=model_name, encode_kwargs=encode_kwargs, model_kwargs={"device": "hpu"}
+    )
+    output = embedding.embed_query(query)
+    assert len(output) == 768
+    eps = 1e-5
+    norm = sum([o ** 2 for o in output])
+    assert abs(1 - norm) <= eps
diff --git a/libs/community/tests/integration_tests/embeddings/test_huggingface_hub_hpu.py b/libs/community/tests/integration_tests/embeddings/test_huggingface_hub_hpu.py
@@ -0,0 +1,42 @@
+"""Test HuggingFaceHub embeddings."""
+import os
+import pytest
+from langchain_community.embeddings import HuggingFaceHubEmbeddings
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+def test_huggingfacehub_embedding_documents_on_hpu() -> None:
+    """Test huggingfacehub embeddings."""
+    documents = ["foo bar"]
+    embedding = HuggingFaceHubEmbeddings(model_kwargs={"device": "hpu"})  # type: ignore[call-arg]
+    output = embedding.embed_documents(documents)
+    assert len(output) == 1
+    assert len(output[0]) == 768
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+async def test_huggingfacehub_embedding_async_documents_on_hpu() -> None:
+    """Test huggingfacehub embeddings."""
+    documents = ["foo bar"]
+    embedding = HuggingFaceHubEmbeddings(model_kwargs={"device": "hpu"})  # type: ignore[call-arg]
+    output = await embedding.aembed_documents(documents)
+    assert len(output) == 1
+    assert len(output[0]) == 768
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+def test_huggingfacehub_embedding_query_on_hpu() -> None:
+    """Test huggingfacehub embeddings."""
+    document = "foo bar"
+    embedding = HuggingFaceHubEmbeddings(model_kwargs={"device": "hpu"})  # type: ignore[call-arg]
+    output = embedding.embed_query(document)
+    assert len(output) == 768
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+async def test_huggingfacehub_embedding_async_query_on_hpu() -> None:
+    """Test huggingfacehub embeddings."""
+    document = "foo bar"
+    embedding = HuggingFaceHubEmbeddings(model_kwargs={"device": "hpu"})  # type: ignore[call-arg]
+    output = await embedding.aembed_query(document)
+    assert len(output) == 768
diff --git a/libs/community/tests/integration_tests/embeddings/test_self_hosted_hpu.py b/libs/community/tests/integration_tests/embeddings/test_self_hosted_hpu.py
@@ -0,0 +1,61 @@
+"""Test self-hosted embeddings."""
+import os
+import pytest
+from langchain_community.embeddings import (
+    SelfHostedHuggingFaceEmbeddings,
+    SelfHostedHuggingFaceInstructEmbeddings,
+)
+from typing import Any
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+def get_remote_instance() -> Any:
+    """Get remote instance for testing using HPU."""
+    import runhouse as rh
+
+    # Intel Gaudi instance
+    hpu = rh.cluster(name="gaudi-instance", instance_type="dl1.24xlarge")
+    hpu.install_packages(["pip:./"])
+    return hpu
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+def test_self_hosted_huggingface_embedding_documents_hpu() -> None:
+    """Test self-hosted huggingface embeddings using HPU."""
+    documents = ["foo bar"]
+    hpu = get_remote_instance()
+    embedding = SelfHostedHuggingFaceEmbeddings(hardware=hpu)
+    output = embedding.embed_documents(documents)
+    assert len(output) == 1
+    assert len(output[0]) == 768
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+def test_self_hosted_huggingface_embedding_query_hpu() -> None:
+    """Test self-hosted huggingface embeddings using HPU."""
+    document = "foo bar"
+    hpu = get_remote_instance()
+    embedding = SelfHostedHuggingFaceEmbeddings(hardware=hpu)
+    output = embedding.embed_query(document)
+    assert len(output) == 768
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+def test_self_hosted_huggingface_instructor_embedding_documents_hpu() -> None:
+    """Test self-hosted huggingface instruct embeddings using HPU."""
+    documents = ["foo bar"]
+    hpu = get_remote_instance()
+    embedding = SelfHostedHuggingFaceInstructEmbeddings(hardware=hpu)
+    output = embedding.embed_documents(documents)
+    assert len(output) == 1
+    assert len(output[0]) == 768
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+def test_self_hosted_huggingface_instructor_embedding_query_hpu() -> None:
+    """Test self-hosted huggingface instruct embeddings using HPU."""
+    query = "foo bar"
+    hpu = get_remote_instance()
+    embedding = SelfHostedHuggingFaceInstructEmbeddings(hardware=hpu)
+    output = embedding.embed_query(query)
+    assert len(output) == 768
diff --git a/libs/community/tests/integration_tests/llms/test_huggingface_pipeline_hpu.py b/libs/community/tests/integration_tests/llms/test_huggingface_pipeline_hpu.py
@@ -0,0 +1,54 @@
+"""Test HuggingFace Pipeline wrapper."""
+import os
+import pytest
+from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+def test_huggingface_pipeline_text_generation_on_hpu() -> None:
+    """Test valid call to HuggingFace text generation model."""
+    llm = HuggingFacePipeline.from_model_id(
+        model_id="gpt2",
+        task="text-generation",
+        pipeline_kwargs={"max_new_tokens": 10},
+        model_kwargs={"device": "hpu"},
+    )
+    output = llm.invoke("Say foo:")
+    assert isinstance(output, str)
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+def test_huggingface_pipeline_text2text_generation_on_hpu() -> None:
+    """Test valid call to HuggingFace text2text generation model."""
+    llm = HuggingFacePipeline.from_model_id(
+        model_id="google/flan-t5-small",
+        task="text2text-generation",
+        model_kwargs={"device": "hpu"},
+    )
+    output = llm.invoke("Say foo:")
+    assert isinstance(output, str)
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+def test_huggingface_pipeline_invalid_hpu_and_openvino_backend() -> None:
+    """Test invalid backend."""
+    try:
+        HuggingFacePipeline.from_model_id(
+            model_id="google/flan-t5-small",
+            task="text2text-generation",
+            model_kwargs={"device": "hpu", "backend": "openvino"},
+        )
+    except ValueError as e:
+        assert "Cannot specify `model_kwargs{'device': 'hpu'}` and `backend=openvino` at the same time." in str(e)
+
+
+@pytest.mark.skipif(not os.getenv('RUN_HPU_TEST'), reason="RUN_HPU_TEST is not set")
+def test_huggingface_pipeline_summarization_on_hpu() -> None:
+    """Test valid call to HuggingFace summarization model."""
+    llm = HuggingFacePipeline.from_model_id(
+        model_id="facebook/bart-large-cnn",
+        task="summarization",
+        model_kwargs={"device": "hpu"},
+    )
+    output = llm.invoke("Say foo:")
+    assert isinstance(output, str)
diff --git a/libs/partners/huggingface/langchain_huggingface/embeddings/huggingface.py b/libs/partners/huggingface/langchain_huggingface/embeddings/huggingface.py
@@ -24,6 +24,20 @@ class HuggingFaceEmbeddings(BaseModel, Embeddings):
                 model_kwargs=model_kwargs,
                 encode_kwargs=encode_kwargs
             )
+
+    Example using HPU:
+        .. code-block:: python
+
+            from langchain_huggingface import HuggingFaceEmbeddings
+
+            model_name = "sentence-transformers/all-mpnet-base-v2"
+            model_kwargs = {'device': 'hpu'}
+            encode_kwargs = {'normalize_embeddings': False}
+            hf = HuggingFaceEmbeddings(
+                model_name=model_name,
+                model_kwargs=model_kwargs,
+                encode_kwargs=encode_kwargs
+            )
     """
 
     model_name: str = DEFAULT_MODEL_NAME