langchain-ai · bsbodden · Dec 29, 2024 · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024
diff --git a/libs/redis/langchain_redis/vectorstores.py b/libs/redis/langchain_redis/vectorstores.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Any, Iterable, List, Optional, Tuple, Union, cast
+from typing import Any, Iterable, List, Optional, Sequence, Tuple, Union, cast
 
 import numpy as np
 from langchain_core.documents import Document
@@ -1276,3 +1276,60 @@ def max_marginal_relevance_search(
         return self.max_marginal_relevance_search_by_vector(
             query_embedding, k=k, fetch_k=fetch_k, lambda_mult=lambda_mult, **kwargs
         )
+
+    def get_by_ids(self, ids: Sequence[str]) -> List[Document]:
+        """Get documents by their IDs.
+
+        The returned documents are expected to have the ID field set to the ID of the
+        document in the vector store.
+
+        Fewer documents may be returned than requested if some IDs are not found or
+        if there are duplicated IDs.
+
+        Users should not assume that the order of the returned documents matches
+        the order of the input IDs. Instead, users should rely on the ID field of the
+        returned documents.
+
+        This method should **NOT** raise exceptions if no documents are found for
+        some IDs.
+
+        Args:
+            ids: List of ids to retrieve.
+
+        Returns:
+            List of Documents.
+
+        .. versionadded:: 0.1.2
+        """
+        redis = self.config.redis()
+        if self.config.key_prefix:
+            full_ids = [f"{self.config.key_prefix}:{id}" for id in ids]
+        else:
+            full_ids = list(ids)
+        if self.config.storage_type == StorageType.JSON.value:
+            values = redis.json().mget(full_ids, ".")
+        else:
+            pipe = redis.pipeline()
+            for id_ in full_ids:
+                pipe.hgetall(id_)
+            values = pipe.execute()
+        documents = []
+        for id_, value in zip(ids, values):
+            if value is None:
+                continue
+            if self.config.storage_type == StorageType.JSON.value:
+                doc = cast(dict, value)
+            else:
+                doc = convert_bytes(value)
+            documents.append(
+                Document(
+                    id=id_,
+                    page_content=doc[self.config.content_field],
+                    metadata={
+                        k: v
+                        for k, v in doc.items()
+                        if k != self.config.content_field and k != "embedding"
+                    },
+                )
+            )
+        return documents
diff --git a/libs/redis/tests/integration_tests/test_vectorstores_hash.py b/libs/redis/tests/integration_tests/test_vectorstores_hash.py
@@ -573,6 +573,38 @@ def test_similarity_search_with_scores(redis_url: str) -> None:
     vector_store.index.delete(drop=True)
 
 
+def test_get_by_ids(redis_url: str) -> None:
+    """Test end to end construction and getting by ids."""
+    # Create embeddings
+    embeddings = OpenAIEmbeddings()
+
+    # Create a unique index name for testing
+    index_name = f"test_index_{str(ULID())}"
+
+    doc_1_id = "doc-1"
+    doc_1_content = "foo"
+    documents = [
+        Document(page_content=doc_1_content, id=doc_1_id),
+    ]
+
+    vector_store = RedisVectorStore(
+        embeddings=embeddings,
+        index_name=index_name,
+        key_prefix="tst12",
+        redis_url=redis_url,
+    )
+    vector_store.add_documents(documents, keys=[doc.id for doc in documents])
+
+    # Perform similarity search
+    docs = vector_store.get_by_ids([doc_1_id])
+    assert docs == [
+        Document(page_content=doc_1_content, id=doc_1_id),
+    ]
+
+    # Clean up
+    vector_store.index.delete(drop=True)
+
+
 def test_add_texts(redis_url: str) -> None:
     """Test adding texts to an existing index."""
     embeddings = OpenAIEmbeddings()

diff --git a/libs/redis/tests/integration_tests/test_vectorstores_json.py b/libs/redis/tests/integration_tests/test_vectorstores_json.py
@@ -585,6 +585,39 @@ def test_similarity_search_with_scores(redis_url: str) -> None:
     vector_store.index.delete(drop=True)
 
 
+def test_get_by_ids(redis_url: str) -> None:
+    """Test end to end construction and getting by ids."""
+    # Create embeddings
+    embeddings = OpenAIEmbeddings()
+
+    # Create a unique index name for testing
+    index_name = f"test_index_{str(ULID())}"
+
+    doc_1_id = "doc-1"
+    doc_1_content = "foo"
+    documents = [
+        Document(page_content=doc_1_content, id=doc_1_id),
+    ]
+
+    vector_store = RedisVectorStore(
+        embeddings=embeddings,
+        index_name=index_name,
+        key_prefix="tst12",
+        redis_url=redis_url,
+        storage_type="json",
+    )
+    vector_store.add_documents(documents, keys=[doc.id for doc in documents])
+
+    # Perform similarity search
+    docs = vector_store.get_by_ids([doc_1_id])
+    assert docs == [
+        Document(page_content=doc_1_content, id=doc_1_id),
+    ]
+
+    # Clean up
+    vector_store.index.delete(drop=True)
+
+
 def test_add_texts(redis_url: str) -> None:
     """Test adding texts to an existing index."""
     embeddings = OpenAIEmbeddings()