Skip to content

Commit

Permalink
Add option to create unique ID index when inserting from JSONL
Browse files Browse the repository at this point in the history
  • Loading branch information
ml-evs committed Nov 17, 2024
1 parent 43ef254 commit c266a3e
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 11 deletions.
6 changes: 3 additions & 3 deletions optimade/server/entry_collections/entry_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,11 +274,11 @@ def all_fields(self) -> set[str]:

return self._all_fields

def create_index(self, fields: str | set[str], unique: bool = False) -> None:
"""Create an index on the given fields.
def create_index(self, field: str, unique: bool = False) -> None:
"""Create an index on the given field.
Arguments:
fields: The fields, or single field, to index.
field: The field to index.
unique: Whether or not the index should be unique.
"""
Expand Down
11 changes: 4 additions & 7 deletions optimade/server/entry_collections/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,15 @@ def insert(self, data: list[EntryResource]) -> None:
except Exception:
pass

def create_index(self, fields: str | set[str], unique: bool = False) -> None:
"""Create an index on the given fields.
def create_index(self, field: str, unique: bool = False) -> None:
"""Create an index on the given field.
Arguments:
fields: The fields, or single field, to index.
field: The field to index.
unique: Whether or not the index should be unique.
"""
if isinstance(fields, str):
fields = {fields}

self.collection.create_indexes(fields, unique=unique, background=True)
self.collection.create_index(field, unique=unique, background=True)

def handle_query_params(
self, params: EntryListingQueryParams | SingleEntryQueryParams
Expand Down
10 changes: 9 additions & 1 deletion optimade/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@
)


def insert_from_jsonl(jsonl_path: Path) -> None:
def insert_from_jsonl(jsonl_path: Path, make_default_index: bool = False) -> None:
"""Insert OPTIMADE JSON lines data into the database.
Arguments:
jsonl_path: Path to the JSON lines file.
make_default_index: Whether to create a default index on the `id` field.
"""
from collections import defaultdict
Expand Down Expand Up @@ -100,6 +101,13 @@ def insert_from_jsonl(jsonl_path: Path) -> None:
ENTRY_COLLECTIONS[entry_type].insert(batch[entry_type])
batch[entry_type] = []

if make_default_index:
for entry_type in ENTRY_COLLECTIONS:
try:
ENTRY_COLLECTIONS[entry_type].create_index("id", unique=True)
except NotImplementedError:
pass

if bad_rows:
LOGGER.warning("Could not read %d rows from the JSONL file", bad_rows)

Expand Down

0 comments on commit c266a3e

Please sign in to comment.