From b093ec526d447c61f0f863f6f7017fdd44dd8ac1 Mon Sep 17 00:00:00 2001
From: Daniel Wiesmann <yellowcap@users.noreply.github.com>
Date: Wed, 16 Oct 2024 18:47:38 +0100
Subject: [PATCH] Flaten path to mirror naip-analytic bucket

---
 embeddings/README.md | 2 ++
 embeddings/utils.py  | 4 +---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/embeddings/README.md b/embeddings/README.md
index 302b0278..cc7685ee 100644
--- a/embeddings/README.md
+++ b/embeddings/README.md
@@ -34,6 +34,8 @@ For NAIP, we use the `naip-analytic` bucket. We leverage the manifest file that
 lists all files in the bucket. This list is parsed in the beginning and each
 job processes a section of the naip scenes.
 
+At the moment of processing there were 1'231'441 NAIP scenes.
+
 ### Sentinel-2
 
 For Sentinel-2 we use the `sentinel-cogs` bucket. Also here we use the manifest
diff --git a/embeddings/utils.py b/embeddings/utils.py
index 3dff287f..2c33dfb4 100644
--- a/embeddings/utils.py
+++ b/embeddings/utils.py
@@ -147,14 +147,12 @@ def write_to_table(embeddings, bboxs, datestr, gsd, destination_bucket, path):
     if len(embeddings.shape) == EMBEDDING_SHAPE_CLASS:
         # Handle class embeddings
         index["embeddings"] = [np.ascontiguousarray(dat) for dat in np_embeddings]
-        embedding_level = "class"
     elif len(embeddings.shape) == EMBEDDING_SHAPE_PATCH:
         # Handle patch embeddings
         for i in range(embeddings.shape[1]):
             index[f"patch_embeddings_{i}"] = [
                 np.ascontiguousarray(dat) for dat in np_embeddings[:, i, :]
             ]
-        embedding_level = "patch"
 
     table = pa.table(
         index,
@@ -172,5 +170,5 @@ def write_to_table(embeddings, bboxs, datestr, gsd, destination_bucket, path):
     s3_bucket = s3_resource.Bucket(name=destination_bucket)
     s3_bucket.put_object(
         Body=body,
-        Key=f"{embedding_level}/{path.parent}/{path.stem}.parquet",
+        Key=f"{path.parent}/{path.stem}.parquet",
     )