Skip to content

Commit

Permalink
Merge pull request #82 from IDEA-Research/dev
Browse files Browse the repository at this point in the history
Release v0.7.0
  • Loading branch information
imhuwq authored Sep 1, 2023
2 parents ab89fc8 + 35c388b commit 189a9e8
Show file tree
Hide file tree
Showing 399 changed files with 13,589 additions and 14,572 deletions.
126 changes: 62 additions & 64 deletions deepdataspace/plugins/coco2017/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,19 @@ class COCO2017Importer(FileImporter):
Importer for coco2017 dataset.
"""

def __init__(self, dataset_path: str, media_dir: str = None, prediction_dir: str = None, enforce: bool = False):
def __init__(self, dataset_path: str, image_root: str = None, predictions: List[str] = None, enforce: bool = False):
"""
:param dataset_path: path to a json file of coco2017 dataset.
:param media_dir: an optional local directory containing image files of this dataset.
:param image_root: an optional local directory containing image files of this dataset.
If no media_dir is provided, the image files will be served from the original coco image urls.
:param prediction_dir: an optional local directory containing json files of predictions of this dataset.
:param predictions: an optional list containing json files of predictions of this dataset.
:param enforce: if True, the importer will re-import the dataset even if it is already imported.
"""

dataset_path = os.path.abspath(dataset_path)
self.dataset_path = dataset_path
self.media_dir = media_dir
self.prediction_dir = prediction_dir
self.image_root = image_root
self.predictions = predictions

super(COCO2017Importer, self).__init__(dataset_path, enforce=enforce)
self.dataset.type = DatasetType.COCO2017
Expand Down Expand Up @@ -123,13 +123,24 @@ def __iter__(self) -> Tuple[Dict, List[Dict]]:

# prepare image uri

uri = coco_image_data["coco_url"]
if self.media_dir:
image_path = os.path.join(self.media_dir, coco_image_data["file_name"])
if os.path.exists(image_path):
uri = f"file://{image_path}"
coco_image_data.pop("coco_url")
coco_image_data.pop("file_name")
uri = None

# trying to find the image file in local file system
if self.image_root and coco_image_data.get("file_name", None):
image_path = coco_image_data.get("file_name", None)
image_path = os.path.join(self.image_root, image_path)
uri = f"file://{image_path}"

# trying to find the image file in the original coco image urls
if uri is None:
uri = coco_image_data.get("coco_url", None)

if uri is None:
logger.warning(f"Cannot find image file for image {image_id}, skip it.")
continue

coco_image_data.pop("coco_url", None)
coco_image_data.pop("file_name", None)

# prepare other image data
width = coco_image_data.pop("width", None)
Expand Down Expand Up @@ -177,7 +188,7 @@ def __iter__(self) -> Tuple[Dict, List[Dict]]:
is_group = anno_data.pop("is_group", None)

# prepare confidence
conf = anno_data.pop("conf", 1.0)
conf = anno_data.pop("score", 1.0)
if label_type == LabelType.GroundTruth:
conf = 1.0

Expand All @@ -199,37 +210,15 @@ def can_import(path: str):
if os.path.isdir(path):
return False

if not path.endswith(".json"):
return False

if os.path.basename(path).startswith("captions_"):
return False

return True
return not path.startswith(".") and path.endswith(".json")

def collect_files(self) -> dict:
files = super(COCO2017Importer, self).collect_files()
if not self.prediction_dir or not os.path.exists(self.prediction_dir):
return files

for item in os.listdir(self.prediction_dir):
if not item.endswith(".json"):
continue

pred_path = os.path.join(self.prediction_dir, item)
with open(pred_path, "r", encoding="utf8") as fp:
pred_data = json.load(fp)

gt_path = pred_data.get("info", {}).get("gt", None)
if gt_path is None:
continue

gt_name = os.path.splitext(os.path.basename(gt_path))[0]
if gt_name != self.dataset_name:
continue

pred_name = os.path.splitext(item)[0]
files[f"PRED/{pred_name}"] = pred_path
for pred in self.predictions:
pred_name = os.path.basename(pred)
pred_name = os.path.splitext(pred_name)[0]
files[f"PRED/{pred_name}"] = pred

return files

Expand All @@ -239,41 +228,50 @@ class COCO2017GroupImporter(FileGroupImporter):
Importer for COCO2017 dataset group.
"""

def choose_importer(self, path: str) -> FileImporter:
subset_name = os.path.basename(path).split("_")[-1]
subset_name = os.path.splitext(subset_name)[0]

media_dir = os.path.join(self.group_path, subset_name)
prediction_dir = os.path.join(self.group_path, "predictions")
def __init__(self, path: str, group_name: str = None, group_id: str = None, enforce: bool = False):
super().__init__(path, group_name, group_id, enforce=enforce)
self.coco2017_file = os.path.join(self.group_path, ".coco2017.json")
self.anno_files = {} # {"anno_file_path": {"annotation": "xxx", "image_root": "yyy", "predictions": ["a",]} }

if not os.path.exists(media_dir):
media_dir = None
if not os.path.exists(prediction_dir):
prediction_dir = None
def choose_importer(self, path: str) -> FileImporter:
anno_file_data = self.anno_files[path]

importer = COCO2017Importer(path, media_dir, prediction_dir, enforce=self.enforce)
image_root = anno_file_data.get("image_root", None)
predictions = anno_file_data.get("predictions", [])
importer = COCO2017Importer(path, image_root, predictions, enforce=self.enforce)
return importer

@staticmethod
def can_import(path: str) -> bool:
if os.path.isfile(path):
return False

dir_name = os.path.basename(path)
if dir_name.lower() == "coco2017":
return True

for item in os.listdir(path):
if item.lower() == ".coco2017":
return True
coco2017_file = os.path.join(path, ".coco2017.json")
if not os.path.exists(coco2017_file):
return False

return False
return True

def find_files(self) -> List[str]:
files = []
annotations_dir = os.path.join(self.group_path, "annotations")
for file in os.listdir(annotations_dir):
file = os.path.join(annotations_dir, file)
if COCO2017Importer.can_import(file):
files.append(file)
with open(self.coco2017_file, "r", encoding="utf8") as fp:
coco2017_data = json.load(fp)
for item in coco2017_data:
anno_path = os.path.join(self.group_path, item["annotation"])
anno_path = os.path.abspath(anno_path)

image_root = item.get("image_root", None)
if image_root:
image_root = os.path.join(self.group_path, image_root)
item["image_root"] = image_root
assert os.path.exists(image_root), f"Image root {image_root} does not exist."

predictions = item.get("predictions", [])
for idx, pred in enumerate(predictions):
pred = os.path.join(self.group_path, pred)
predictions[idx] = pred
assert os.path.exists(pred), f"Prediction file {pred} does not exist."

self.anno_files[anno_path] = item
files.append(anno_path)
return files
1 change: 0 additions & 1 deletion deepdataspace/scripts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,5 @@ def ddsop():
from deepdataspace.scripts.dataset_cmds import import_all
from deepdataspace.scripts.dataset_cmds import import_one
from deepdataspace.scripts.dataset_cmds import delete_one
from deepdataspace.scripts.dataset_cmds import delete_all
from deepdataspace.scripts.label_project_cmds import lp_export
from deepdataspace.scripts.migrate import migrate
23 changes: 2 additions & 21 deletions deepdataspace/scripts/dataset_cmds.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,25 +43,6 @@ def delete_one(dataset_dir):
print(f"dataset [{dataset_id}] is deleted.")


@ddsop.command("delete_all", help="Delete all datasets imported before.")
@click.option('--confirm', prompt="You are deleting all datasets, are you sure?[y/N]")
def delete_all(confirm):
if confirm.lower() != "y":
print("Abort.")
return

from deepdataspace.globals import MongoDB

collections = MongoDB.list_collection_names()
collections = sorted(collections)
print(f"found {len(collections)} to delete")

for collection in collections:
print(f"collection [{collection}] found, deleting...")
MongoDB.drop_collection(collection)
print(f"{len(collections)} collections deleted")


@ddsop.command("import_all", help="Trigger a background task of importing all datasets in a data dir.")
@click.option("--data_dir", "-d",
default=None,
Expand All @@ -78,7 +59,7 @@ def import_all(data_dir, force):
data_dir = os.path.abspath(data_dir)

import_and_process_data_dir.apply_async(args=(data_dir,), kwargs={"enforce": force})
print(f"task of importing dir[{data_dir}] is arranged, you can check the logs by command: ddsop logs -c")
print(f"task of importing dir[{data_dir}] is arranged")


@ddsop.command("import_one", help="Trigger a background task of importing one dataset.")
Expand All @@ -92,4 +73,4 @@ def import_one(dataset_dir, force):
dataset_dir = os.path.abspath(dataset_dir)

import_and_process_dataset.apply_async(args=(dataset_dir,), kwargs={"enforce": force})
print(f"task of importing dataset [{dataset_dir}] is arranged, you can check the logs by command: ddsop logs -c")
print(f"task of importing dataset [{dataset_dir}] is arranged")
Binary file removed deepdataspace/server/static/000000002299.jpg
Binary file not shown.
Loading

0 comments on commit 189a9e8

Please sign in to comment.