Skip to content

Commit

Permalink
Merge pull request #93 from SciCatProject/fixes-for-deployment
Browse files Browse the repository at this point in the history
All the fixes introduced while testing the production deployments
  • Loading branch information
YooSunYoung authored Dec 4, 2024
2 parents b06d32c + 85e8ac2 commit b569fc1
Show file tree
Hide file tree
Showing 9 changed files with 311 additions and 20 deletions.
272 changes: 272 additions & 0 deletions resources/small-coda.imsc.json.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
{
"id" : "628b28d6-9c26-11ef-948d-0b2d405fc82f",
"name" : "Test Coda Metadata Schema",
"instrument": "coda",
"selector": "filename:starts_with:/ess/services/scicat-ingestor/software/test-data/small-coda",
"order": 110,
"variables" : {
"job_id": {
"source": "NXS",
"path": "/entry/entry_identifier_uuid",
"value_type": "string"
},
"pid": {
"source": "VALUE",
"value": "20.500.12269/<ingestor_run_id>",
"value_type": "string"
},
"proposal_id": {
"source": "NXS",
"path": "/entry/experiment_identifier",
"value_type": "string"
},
"proposal_data": {
"source": "SC",
"url": "proposals/<proposal_id>",
"field" : "",
"value_type": "dict"
},
"pi_firstname": {
"source": "VALUE",
"operator": "getitem",
"value": "<proposal_data>",
"field" : "pi_firstname",
"value_type": "string"
},
"pi_lastname": {
"source": "VALUE",
"operator": "getitem",
"value": "<proposal_data>",
"field" : "pi_lastname",
"value_type": "string"
},
"pi_email": {
"source": "VALUE",
"operator": "getitem",
"value": "<proposal_data>",
"field" : "pi_email",
"value_type": "string"
},
"dataset_original_name": {
"source": "NXS",
"path": "/entry/title",
"value_type": "string"
},
"dataset_name": {
"source": "VALUE",
"value" : "coda test - <dataset_original_name> - <ingestor_run_id>",
"value_type": "string"
},
"instrument_name": {
"source": "NXS",
"path": "/entry/instrument/name",
"value_type": "string"
},
"instruments_data": {
"source": "SC",
"url": "instruments?filter=%7B%22where%22%20%3A%20%7B%20%22name%22%20%3A%20%22coda%22%20%7D%20%7D",
"field": "",
"value_type": "list"
},
"instrument_data": {
"source": "VALUE",
"operator": "getitem",
"value": "<instruments_data>",
"field" : 0,
"value_type": "dict"
},
"instrument_id": {
"source": "VALUE",
"operator": "getitem",
"value": "<instrument_data>",
"field" : "id",
"value_type": "string"
},
"start_time": {
"source": "NXS",
"path": "/entry/start_time",
"value_type": "date"
},
"end_time": {
"source": "NXS",
"path": "/entry/end_time",
"value_type": "date"
},
"run_number": {
"source": "NXS",
"path": "/entry/entry_identifier",
"value_type": "integer"
},
"acquisition_team_members_list": {
"source": "NXS",
"path" : "/entry/user_*/name",
"value_type": "string[]"
},
"acquisition_team_members": {
"source": "VALUE",
"operator" : "join_with_space",
"value" : "<acquisition_team_members_list>",
"value_type": "string"
},
"owner_group": {
"source": "VALUE",
"value": "<proposal_id>",
"value_type": "string"
},
"access_groups": {
"source": "VALUE",
"value": ["scientific information management systems group"],
"value_type": "string[]"
},
"source_folder": {
"source": "VALUE",
"operator": "dirname",
"value": "<filepath>",
"value_type": "string"
},
"keywords" : {
"source": "VALUE",
"value": ["TEST CODA","Scicat Ingestor 05","TEST RUN","CODA","<instrument_name>","CODA <instrument_name>"],
"value_type": "string[]"
}
},
"schema": {
"pid": {
"field_type": "high_level",
"machine_name": "pid",
"value": "<pid>",
"type": "string"
},
"type" : {
"field_type": "high_level",
"machine_name": "type",
"value": "raw",
"type": "string"
},
"proposal_id": {
"field_type": "high_level",
"machine_name": "proposalId",
"value": "<proposal_id>",
"type": "string"
},
"dataset_name": {
"field_type": "high_level",
"machine_name": "datasetName",
"value": "<dataset_name>",
"type": "string"
},
"principal_investigator": {
"field_type": "high_level",
"machine_name": "principalInvestigator",
"value": "<pi_firstname> <pi_lastname>",
"type": "string"
},
"owner": {
"field_type": "high_level",
"machine_name": "owner",
"value": "<pi_firstname> <pi_lastname>",
"type": "string"
},
"owner_email": {
"field_type": "high_level",
"machine_name": "ownerEmail",
"value": "<pi_email>",
"type": "email"
},
"contact_email": {
"field_type": "high_level",
"machine_name": "contactEmail",
"value": "<pi_email>",
"type": "email"
},
"instrument_id": {
"field_type": "high_level",
"machine_name": "instrumentId",
"value": "<instrument_id>",
"type": "string"
},
"creation_location": {
"field_type": "high_level",
"machine_name": "creationLocation",
"value": "ESS:CODA:<instrument_name>",
"type": "string"
},
"start_time_hl": {
"field_type": "high_level",
"machine_name": "startTime",
"value": "<start_time>",
"type": "date"
},
"end_time_hl": {
"field_type": "high_level",
"machine_name": "endTime",
"value": "<end_time>",
"type": "date"
},
"start_time_sm": {
"field_type": "scientific_metadata",
"machine_name": "start_time",
"human_name": "Start Time",
"value": "<start_time>",
"type": "date"
},
"end_time_sm": {
"field_type": "scientific_metadata",
"machine_name": "end_time",
"human_name": "End Time",
"value": "<end_time>",
"type": "date"
},
"run_number_sm": {
"field_type": "scientific_metadata",
"machine_name": "run_number",
"human_name": "Run Number",
"value": "<run_number>",
"type": "integer"
},
"job_id": {
"field_type": "scientific_metadata",
"machine_name": "job_id",
"human_name": "ESS Data Collection Job Id",
"value": "<job_id>",
"type": "string"
},
"acquisition_team_members": {
"field_type": "scientific_metadata",
"machine_name": "acquisition_team_members",
"human_name": "Acquisition Team Members",
"value": "<acquisition_team_members>",
"type": "string"
},
"owner_group": {
"field_type": "high_level",
"machine_name": "ownerGroup",
"value": "<owner_group>",
"type": "string"
},
"access_groups": {
"field_type": "high_level",
"machine_name": "accessGroups",
"value": "<access_groups>",
"type": "string[]"
},
"source_folder": {
"field_type": "high_level",
"machine_name": "sourceFolder",
"value": "<source_folder>",
"type": "string"
},
"creation_time": {
"field_type": "high_level",
"machine_name": "creationTime",
"value": "<now>",
"type": "date"
},
"keywords": {
"field_type": "high_level",
"machine_name": "keywords",
"value": "<keywords>",
"type": "string[]"
}
}
}
13 changes: 6 additions & 7 deletions src/scicat_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,9 @@ def extract_variables_values(
) -> dict:
variable_map = {
"ingestor_run_id": str(uuid.uuid4()),
"filepath": pathlib.Path(config.nexus_file),
"data_file_path": pathlib.Path(config.nexus_file),
"now": datetime.datetime.now(tz=datetime.UTC).isoformat(),
"ingestor_files_directory": config.ingestion.file_handling.ingestor_files_directory,
}
for variable_name, variable_recipe in variables.items():
source = variable_recipe.source
Expand All @@ -187,11 +188,7 @@ def extract_variables_values(
)
elif isinstance(variable_recipe, ValueMetadataVariable):
value = variable_recipe.value
value = (
render_variable_value(value, variable_map)
if isinstance(value, str)
else value
)
value = render_variable_value(value, variable_map)
_operator = _get_operator(variable_recipe.operator)
if variable_recipe.field is not None:
value = _operator(value, variable_recipe.field)
Expand Down Expand Up @@ -265,6 +262,8 @@ class ScicatDataset:
accessGroups: list[str] | None = None
startTime: str | None = None
endTime: str | None = None
runNumber: str | None = None
keywords: list[str] | None = None


@dataclass(kw_only=True)
Expand Down Expand Up @@ -459,7 +458,7 @@ def _filter_by_field_type(
return [field for field in schemas if field.field_type == field_type]


def _render_variable_as_type(value: str, variable_map: dict, dtype: str) -> Any:
def _render_variable_as_type(value: Any, variable_map: dict, dtype: str) -> Any:
return convert_to_type(render_variable_value(value, variable_map), dtype)


Expand Down
2 changes: 1 addition & 1 deletion src/scicat_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def build_logger(

# Add graylog handler
if logging_options.graylog:
graylog_handler = graypy.GELFTCPHandler(
graylog_handler = graypy.GELFUDPHandler(
logging_options.graylog_host,
int(logging_options.graylog_port),
facility=logging_options.graylog_facility,
Expand Down
19 changes: 15 additions & 4 deletions src/scicat_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from collections.abc import Callable
from dataclasses import dataclass
from importlib.metadata import entry_points
from typing import Any

SCIENTIFIC_METADATA_TYPE = "scientific_metadata"
HIGH_LEVEL_METADATA_TYPE = "high_level"
Expand Down Expand Up @@ -140,20 +141,30 @@ def from_file(cls, schema_file_name: pathlib.Path) -> "MetadataSchema":
return cls.from_dict(_load_json_schema(schema_file_name))


def render_variable_value(var_value: str, variable_registry: dict) -> str:
def render_variable_value(var_value: Any, variable_registry: dict) -> str:
# if input is not a string it converts it to string
output_value = var_value if isinstance(var_value, str) else json.dumps(var_value)

# If it is only one variable, then it is a simple replacement
if (var_key := var_value.removesuffix(">").removeprefix("<")) in variable_registry:
if (
var_key := output_value.removesuffix(">").removeprefix("<")
) in variable_registry:
return variable_registry[var_key]

# If it is a complex variable, then it is a combination of variables
# similar to f-string in python
for reg_var_name, reg_var_value in variable_registry.items():
var_value = var_value.replace("<" + reg_var_name + ">", str(reg_var_value))
output_value = output_value.replace(
"<" + reg_var_name + ">", str(reg_var_value)
)

if "<" in var_value and ">" in var_value:
raise Exception(f"Unresolved variable: {var_value}")

return var_value
output_value = (
output_value if isinstance(var_value, str) else json.loads(output_value)
)
return output_value


def collect_schemas(dir_path: pathlib.Path) -> OrderedDict[str, MetadataSchema]:
Expand Down
5 changes: 5 additions & 0 deletions src/scicat_offline_ingestor.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,11 @@ def main() -> None:
with h5py.File(nexus_file_path) as h5file:
# load instrument metadata configuration
metadata_schema = select_applicable_schema(nexus_file_path, schemas)
logger.info(
"Metadata Schema selected : %s (Id: %s)",
metadata_schema.name,
metadata_schema.id,
)

# define variables values
variable_map = extract_variables_values(
Expand Down
4 changes: 2 additions & 2 deletions test-data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ with h5py.File('copied_coda.hdf', 'r+') as f:
del instrument_gr[name]

# Copy the rest of the file
with h5py.File('small_coda.hdf', 'w') as new_f:
with h5py.File('small-coda.hdf', 'w') as new_f:
# copy everything
f.copy('entry', new_f)

Expand All @@ -39,7 +39,7 @@ with h5py.File('copied_ymir.hdf', 'r+') as f:
del instrument_gr[name]

# Copy the rest of the file
with h5py.File('small_ymir.hdf', 'w') as new_f:
with h5py.File('small-ymir.hdf', 'w') as new_f:
# copy everything
f.copy('entry', new_f)

Expand Down
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit b569fc1

Please sign in to comment.