From 70581a467570aa8927263c01fe5d8805481107b2 Mon Sep 17 00:00:00 2001 From: moon Date: Sat, 12 Aug 2023 18:02:43 -0700 Subject: [PATCH] 0.4.7 - fix postgres bug, get_memories returns embeddings --- agentmemory/persistence.py | 3 +++ agentmemory/postgres.py | 20 +++++++++++++++++++- agentmemory/tests/clustering.py | 7 +++---- setup.py | 2 +- 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/agentmemory/persistence.py b/agentmemory/persistence.py index f2b2524..8246615 100644 --- a/agentmemory/persistence.py +++ b/agentmemory/persistence.py @@ -56,6 +56,9 @@ def export_memory_to_file(path="./memory.json", include_embeddings=True): # Export the database to a dictionary collections_dict = export_memory_to_json(include_embeddings) + print('collections_dict') + print(collections_dict) + # Write the dictionary to a JSON file with open(path, "w") as outfile: json.dump(collections_dict, outfile) diff --git a/agentmemory/postgres.py b/agentmemory/postgres.py index 2f45d28..8e3f1ee 100644 --- a/agentmemory/postgres.py +++ b/agentmemory/postgres.py @@ -86,6 +86,8 @@ def get( where_document=None, include=["metadatas", "documents"], ): + # TODO: Mirrors Chroma API, but could be optimized a lot + category = self.category table_name = self.client._table_name(category) conditions = [] @@ -152,12 +154,28 @@ def get( item["metadata"] = metadata result.append(item) - return { + output = { "ids": [row["id"] for row in result], "documents": [row["document"] for row in result], "metadatas": [row["metadata"] for row in result], } + if len(result) == 0 or include is None: + return output + + # embeddings is an array, check if include includes "embeddings" + if 'embeddings' in include and result[0].get("embedding", None) is not None: + output["embeddings"] = [row["embedding"] for row in result] + # transform from ndarray to list + output["embeddings"] = [emb.tolist() for emb in output["embeddings"]] + + if 'distances' in include and result[0].get("distance", None) is not None: + output["distances"] = [row["distances"] for row in result] + # transform to list + output["distances"] = [dist.tolist() for dist in output["distances"]] + + return output + def peek(self, limit=10): return self.get(limit=limit) diff --git a/agentmemory/tests/clustering.py b/agentmemory/tests/clustering.py index 776f474..7bfe21c 100644 --- a/agentmemory/tests/clustering.py +++ b/agentmemory/tests/clustering.py @@ -38,6 +38,8 @@ def test_cluster_no_neighbors(): memories_data = get_memories("numbers") + assert memories_data[0]['embedding'] is not None + # All memories should be marked as noise since they have no neighbors for memory in memories_data: assert memory["metadata"].get("cluster") == "noise" @@ -62,13 +64,10 @@ def test_cluster_insufficient_neighbors(): memories_data = get_memories("fruits") - # Print metadata for debugging - for memory in memories_data: - print(f"Memory Document: {memory['document']}, Metadata: {memory['metadata']}") - # Only 'banana' should be marked as noise since 'apple' has 2 neighbors but needs 3 for memory in memories_data: assert memory["metadata"].get("cluster") == "noise" + wipe_category("fruits") diff --git a/setup.py b/setup.py index a60e80a..49b1767 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup( name='agentmemory', - version='0.4.6', + version='0.4.7', description='Easy-to-use memory for agents, document search, knowledge graphing and more.', long_description=long_description, # added this line long_description_content_type="text/markdown", # and this line