Flaten path to mirror naip-analytic bucket

Clay-foundation · Nov 5, 2024 · b093ec5 · b093ec5
1 parent 9146bdd
commit b093ec5
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 3 deletions.
diff --git a/embeddings/README.md b/embeddings/README.md
@@ -34,6 +34,8 @@ For NAIP, we use the `naip-analytic` bucket. We leverage the manifest file that
 lists all files in the bucket. This list is parsed in the beginning and each
 job processes a section of the naip scenes.
 
+At the moment of processing there were 1'231'441 NAIP scenes.
+
 ### Sentinel-2
 
 For Sentinel-2 we use the `sentinel-cogs` bucket. Also here we use the manifest

diff --git a/embeddings/utils.py b/embeddings/utils.py
@@ -147,14 +147,12 @@ def write_to_table(embeddings, bboxs, datestr, gsd, destination_bucket, path):
     if len(embeddings.shape) == EMBEDDING_SHAPE_CLASS:
         # Handle class embeddings
         index["embeddings"] = [np.ascontiguousarray(dat) for dat in np_embeddings]
-        embedding_level = "class"
     elif len(embeddings.shape) == EMBEDDING_SHAPE_PATCH:
         # Handle patch embeddings
         for i in range(embeddings.shape[1]):
             index[f"patch_embeddings_{i}"] = [
                 np.ascontiguousarray(dat) for dat in np_embeddings[:, i, :]
             ]
-        embedding_level = "patch"
 
     table = pa.table(
         index,
@@ -172,5 +170,5 @@ def write_to_table(embeddings, bboxs, datestr, gsd, destination_bucket, path):
     s3_bucket = s3_resource.Bucket(name=destination_bucket)
     s3_bucket.put_object(
         Body=body,
-        Key=f"{embedding_level}/{path.parent}/{path.stem}.parquet",
+        Key=f"{path.parent}/{path.stem}.parquet",
     )