From b093ec526d447c61f0f863f6f7017fdd44dd8ac1 Mon Sep 17 00:00:00 2001 From: Daniel Wiesmann Date: Wed, 16 Oct 2024 18:47:38 +0100 Subject: [PATCH] Flaten path to mirror naip-analytic bucket --- embeddings/README.md | 2 ++ embeddings/utils.py | 4 +--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/embeddings/README.md b/embeddings/README.md index 302b0278..cc7685ee 100644 --- a/embeddings/README.md +++ b/embeddings/README.md @@ -34,6 +34,8 @@ For NAIP, we use the `naip-analytic` bucket. We leverage the manifest file that lists all files in the bucket. This list is parsed in the beginning and each job processes a section of the naip scenes. +At the moment of processing there were 1'231'441 NAIP scenes. + ### Sentinel-2 For Sentinel-2 we use the `sentinel-cogs` bucket. Also here we use the manifest diff --git a/embeddings/utils.py b/embeddings/utils.py index 3dff287f..2c33dfb4 100644 --- a/embeddings/utils.py +++ b/embeddings/utils.py @@ -147,14 +147,12 @@ def write_to_table(embeddings, bboxs, datestr, gsd, destination_bucket, path): if len(embeddings.shape) == EMBEDDING_SHAPE_CLASS: # Handle class embeddings index["embeddings"] = [np.ascontiguousarray(dat) for dat in np_embeddings] - embedding_level = "class" elif len(embeddings.shape) == EMBEDDING_SHAPE_PATCH: # Handle patch embeddings for i in range(embeddings.shape[1]): index[f"patch_embeddings_{i}"] = [ np.ascontiguousarray(dat) for dat in np_embeddings[:, i, :] ] - embedding_level = "patch" table = pa.table( index, @@ -172,5 +170,5 @@ def write_to_table(embeddings, bboxs, datestr, gsd, destination_bucket, path): s3_bucket = s3_resource.Bucket(name=destination_bucket) s3_bucket.put_object( Body=body, - Key=f"{embedding_level}/{path.parent}/{path.stem}.parquet", + Key=f"{path.parent}/{path.stem}.parquet", )