Merge pull request #82 from IDEA-Research/dev

Release v0.7.0
IDEA-Research · Sep 1, 2023 · 189a9e8 · 189a9e8
2 parents ab89fc8 + 35c388b
commit 189a9e8
Show file tree

Hide file tree

Showing 399 changed files with 13,589 additions and 14,572 deletions.
diff --git a/deepdataspace/plugins/coco2017/importer.py b/deepdataspace/plugins/coco2017/importer.py
@@ -23,19 +23,19 @@ class COCO2017Importer(FileImporter):
     Importer for coco2017 dataset.
     """
 
-    def __init__(self, dataset_path: str, media_dir: str = None, prediction_dir: str = None, enforce: bool = False):
+    def __init__(self, dataset_path: str, image_root: str = None, predictions: List[str] = None, enforce: bool = False):
         """
         :param dataset_path: path to a json file of coco2017 dataset.
-        :param media_dir: an optional local directory containing image files of this dataset.
+        :param image_root: an optional local directory containing image files of this dataset.
             If no media_dir is provided, the image files will be served from the original coco image urls.
-        :param prediction_dir: an optional local directory containing json files of predictions of this dataset.
+        :param predictions: an optional list containing json files of predictions of this dataset.
         :param enforce: if True, the importer will re-import the dataset even if it is already imported.
         """
 
         dataset_path = os.path.abspath(dataset_path)
         self.dataset_path = dataset_path
-        self.media_dir = media_dir
-        self.prediction_dir = prediction_dir
+        self.image_root = image_root
+        self.predictions = predictions
 
         super(COCO2017Importer, self).__init__(dataset_path, enforce=enforce)
         self.dataset.type = DatasetType.COCO2017
@@ -123,13 +123,24 @@ def __iter__(self) -> Tuple[Dict, List[Dict]]:
 
             # prepare image uri
 
-            uri = coco_image_data["coco_url"]
-            if self.media_dir:
-                image_path = os.path.join(self.media_dir, coco_image_data["file_name"])
-                if os.path.exists(image_path):
-                    uri = f"file://{image_path}"
-                    coco_image_data.pop("coco_url")
-                    coco_image_data.pop("file_name")
+            uri = None
+
+            # trying to find the image file in local file system
+            if self.image_root and coco_image_data.get("file_name", None):
+                image_path = coco_image_data.get("file_name", None)
+                image_path = os.path.join(self.image_root, image_path)
+                uri = f"file://{image_path}"
+
+            # trying to find the image file in the original coco image urls
+            if uri is None:
+                uri = coco_image_data.get("coco_url", None)
+
+            if uri is None:
+                logger.warning(f"Cannot find image file for image {image_id}, skip it.")
+                continue
+
+            coco_image_data.pop("coco_url", None)
+            coco_image_data.pop("file_name", None)
 
             # prepare other image data
             width = coco_image_data.pop("width", None)
@@ -177,7 +188,7 @@ def __iter__(self) -> Tuple[Dict, List[Dict]]:
                 is_group = anno_data.pop("is_group", None)
 
                 # prepare confidence
-                conf = anno_data.pop("conf", 1.0)
+                conf = anno_data.pop("score", 1.0)
                 if label_type == LabelType.GroundTruth:
                     conf = 1.0
 
@@ -199,37 +210,15 @@ def can_import(path: str):
         if os.path.isdir(path):
             return False
 
-        if not path.endswith(".json"):
-            return False
-
-        if os.path.basename(path).startswith("captions_"):
-            return False
-
-        return True
+        return not path.startswith(".") and path.endswith(".json")
 
     def collect_files(self) -> dict:
         files = super(COCO2017Importer, self).collect_files()
-        if not self.prediction_dir or not os.path.exists(self.prediction_dir):
-            return files
-
-        for item in os.listdir(self.prediction_dir):
-            if not item.endswith(".json"):
-                continue
 
-            pred_path = os.path.join(self.prediction_dir, item)
-            with open(pred_path, "r", encoding="utf8") as fp:
-                pred_data = json.load(fp)
-
-            gt_path = pred_data.get("info", {}).get("gt", None)
-            if gt_path is None:
-                continue
-
-            gt_name = os.path.splitext(os.path.basename(gt_path))[0]
-            if gt_name != self.dataset_name:
-                continue
-
-            pred_name = os.path.splitext(item)[0]
-            files[f"PRED/{pred_name}"] = pred_path
+        for pred in self.predictions:
+            pred_name = os.path.basename(pred)
+            pred_name = os.path.splitext(pred_name)[0]
+            files[f"PRED/{pred_name}"] = pred
 
         return files
 
@@ -239,41 +228,50 @@ class COCO2017GroupImporter(FileGroupImporter):
     Importer for COCO2017 dataset group.
     """
 
-    def choose_importer(self, path: str) -> FileImporter:
-        subset_name = os.path.basename(path).split("_")[-1]
-        subset_name = os.path.splitext(subset_name)[0]
-
-        media_dir = os.path.join(self.group_path, subset_name)
-        prediction_dir = os.path.join(self.group_path, "predictions")
+    def __init__(self, path: str, group_name: str = None, group_id: str = None, enforce: bool = False):
+        super().__init__(path, group_name, group_id, enforce=enforce)
+        self.coco2017_file = os.path.join(self.group_path, ".coco2017.json")
+        self.anno_files = {}  # {"anno_file_path": {"annotation": "xxx", "image_root": "yyy", "predictions": ["a",]} }
 
-        if not os.path.exists(media_dir):
-            media_dir = None
-        if not os.path.exists(prediction_dir):
-            prediction_dir = None
+    def choose_importer(self, path: str) -> FileImporter:
+        anno_file_data = self.anno_files[path]
 
-        importer = COCO2017Importer(path, media_dir, prediction_dir, enforce=self.enforce)
+        image_root = anno_file_data.get("image_root", None)
+        predictions = anno_file_data.get("predictions", [])
+        importer = COCO2017Importer(path, image_root, predictions, enforce=self.enforce)
         return importer
 
     @staticmethod
     def can_import(path: str) -> bool:
         if os.path.isfile(path):
             return False
 
-        dir_name = os.path.basename(path)
-        if dir_name.lower() == "coco2017":
-            return True
-
-        for item in os.listdir(path):
-            if item.lower() == ".coco2017":
-                return True
+        coco2017_file = os.path.join(path, ".coco2017.json")
+        if not os.path.exists(coco2017_file):
+            return False
 
-        return False
+        return True
 
     def find_files(self) -> List[str]:
         files = []
-        annotations_dir = os.path.join(self.group_path, "annotations")
-        for file in os.listdir(annotations_dir):
-            file = os.path.join(annotations_dir, file)
-            if COCO2017Importer.can_import(file):
-                files.append(file)
+        with open(self.coco2017_file, "r", encoding="utf8") as fp:
+            coco2017_data = json.load(fp)
+            for item in coco2017_data:
+                anno_path = os.path.join(self.group_path, item["annotation"])
+                anno_path = os.path.abspath(anno_path)
+
+                image_root = item.get("image_root", None)
+                if image_root:
+                    image_root = os.path.join(self.group_path, image_root)
+                    item["image_root"] = image_root
+                    assert os.path.exists(image_root), f"Image root {image_root} does not exist."
+
+                predictions = item.get("predictions", [])
+                for idx, pred in enumerate(predictions):
+                    pred = os.path.join(self.group_path, pred)
+                    predictions[idx] = pred
+                    assert os.path.exists(pred), f"Prediction file {pred} does not exist."
+
+                self.anno_files[anno_path] = item
+                files.append(anno_path)
         return files
diff --git a/deepdataspace/scripts/__init__.py b/deepdataspace/scripts/__init__.py
@@ -26,6 +26,5 @@ def ddsop():
 from deepdataspace.scripts.dataset_cmds import import_all
 from deepdataspace.scripts.dataset_cmds import import_one
 from deepdataspace.scripts.dataset_cmds import delete_one
-from deepdataspace.scripts.dataset_cmds import delete_all
 from deepdataspace.scripts.label_project_cmds import lp_export
 from deepdataspace.scripts.migrate import migrate
diff --git a/deepdataspace/scripts/dataset_cmds.py b/deepdataspace/scripts/dataset_cmds.py
@@ -43,25 +43,6 @@ def delete_one(dataset_dir):
     print(f"dataset [{dataset_id}] is deleted.")
 
 
-@ddsop.command("delete_all", help="Delete all datasets imported before.")
-@click.option('--confirm', prompt="You are deleting all datasets, are you sure?[y/N]")
-def delete_all(confirm):
-    if confirm.lower() != "y":
-        print("Abort.")
-        return
-
-    from deepdataspace.globals import MongoDB
-
-    collections = MongoDB.list_collection_names()
-    collections = sorted(collections)
-    print(f"found {len(collections)} to delete")
-
-    for collection in collections:
-        print(f"collection [{collection}] found, deleting...")
-        MongoDB.drop_collection(collection)
-    print(f"{len(collections)} collections deleted")
-
-
 @ddsop.command("import_all", help="Trigger a background task of importing all datasets in a data dir.")
 @click.option("--data_dir", "-d",
               default=None,
@@ -78,7 +59,7 @@ def import_all(data_dir, force):
         data_dir = os.path.abspath(data_dir)
 
     import_and_process_data_dir.apply_async(args=(data_dir,), kwargs={"enforce": force})
-    print(f"task of importing dir[{data_dir}] is arranged, you can check the logs by command: ddsop logs -c")
+    print(f"task of importing dir[{data_dir}] is arranged")
 
 
 @ddsop.command("import_one", help="Trigger a background task of importing one dataset.")
@@ -92,4 +73,4 @@ def import_one(dataset_dir, force):
     dataset_dir = os.path.abspath(dataset_dir)
 
     import_and_process_dataset.apply_async(args=(dataset_dir,), kwargs={"enforce": force})
-    print(f"task of importing dataset [{dataset_dir}] is arranged, you can check the logs by command: ddsop logs -c")
+    print(f"task of importing dataset [{dataset_dir}] is arranged")
diff --git a/deepdataspace/server/static/000000002299.jpg b/deepdataspace/server/static/000000002299.jpg