From c65e2f66244453f32c5f76f0e672171b199c0f3a Mon Sep 17 00:00:00 2001 From: Jessica Gadling Date: Mon, 7 Oct 2024 08:10:00 -0700 Subject: [PATCH] fix: keep both `tomogram.is_canonical` and `tomogram.is_portal_standard` (#303) * fix: keep both `tomogram.is_canonical` and `tomogram.is_portal_standard` to allow the frontend queries to migrate. --- .../versions/20241002_152402_autogenerated.py | 7 +- apiv2/database/models/tomogram.py | 1 + apiv2/db_import/importers/tomogram.py | 2 +- apiv2/db_import/tests/populate_db.py | 8 +- apiv2/db_import/tests/test_db_tomo_import.py | 2 + apiv2/graphql_api/helpers/tomogram.py | 1 + apiv2/graphql_api/schema.graphql | 13 + apiv2/graphql_api/schema.json | 57 ++ apiv2/graphql_api/types/tomogram.py | 12 + apiv2/schema/README.md | 1 + apiv2/schema/schema.yaml | 555 +++++++++--------- apiv2/test_infra/factories/tomogram.py | 1 + apiv2/validators/tomogram.py | 2 + 13 files changed, 395 insertions(+), 267 deletions(-) diff --git a/apiv2/database/migrations/versions/20241002_152402_autogenerated.py b/apiv2/database/migrations/versions/20241002_152402_autogenerated.py index 66a5b8bce..6d145d9bb 100644 --- a/apiv2/database/migrations/versions/20241002_152402_autogenerated.py +++ b/apiv2/database/migrations/versions/20241002_152402_autogenerated.py @@ -115,13 +115,16 @@ def upgrade() -> None: op.add_column("tomogram", sa.Column("deposition_date", sa.DateTime(timezone=True), nullable=True)) op.add_column("tomogram", sa.Column("release_date", sa.DateTime(timezone=True), nullable=True)) op.add_column("tomogram", sa.Column("last_modified_date", sa.DateTime(timezone=True), nullable=True)) - op.drop_column("tomogram", "is_canonical") + + conn = op.get_bind() + backfill_sql = sa.sql.text("UPDATE tomogram SET is_portal_standard = is_canonical") + conn.execute(backfill_sql) + # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.add_column("tomogram", sa.Column("is_canonical", sa.BOOLEAN(), autoincrement=False, nullable=True)) op.drop_column("tomogram", "last_modified_date") op.drop_column("tomogram", "release_date") op.drop_column("tomogram", "deposition_date") diff --git a/apiv2/database/models/tomogram.py b/apiv2/database/models/tomogram.py index 59b2c93cf..267d7f562 100644 --- a/apiv2/database/models/tomogram.py +++ b/apiv2/database/models/tomogram.py @@ -89,6 +89,7 @@ class Tomogram(Base): tomogram_version: Mapped[float] = mapped_column(Float, nullable=True) processing_software: Mapped[str] = mapped_column(String, nullable=True) reconstruction_software: Mapped[str] = mapped_column(String, nullable=False) + is_canonical: Mapped[bool] = mapped_column(Boolean, nullable=True) is_portal_standard: Mapped[bool] = mapped_column(Boolean, nullable=True) is_author_submitted: Mapped[bool] = mapped_column(Boolean, nullable=True) is_visualization_default: Mapped[bool] = mapped_column(Boolean, nullable=True) diff --git a/apiv2/db_import/importers/tomogram.py b/apiv2/db_import/importers/tomogram.py index d577ca5ba..9e3d1916b 100644 --- a/apiv2/db_import/importers/tomogram.py +++ b/apiv2/db_import/importers/tomogram.py @@ -98,7 +98,6 @@ def get_computed_fields(self) -> dict[str, Any]: "fiducial_alignment_status": normalize_fiducial_alignment(self.metadata.get("fiducial_alignment_status")), "reconstruction_method": self.normalize_to_unknown_str(self.metadata.get("reconstruction_method")), "reconstruction_software": self.normalize_to_unknown_str(self.metadata.get("reconstruction_software")), - "is_canonical": True, # TODO: mark this for deprecation "s3_omezarr_dir": self.join_path(s3_prefix, self.dir_prefix, self.metadata["omezarr_dir"]), "https_omezarr_dir": self.join_path(https_prefix, self.dir_prefix, self.metadata["omezarr_dir"]), "s3_mrc_file": self.join_path(s3_prefix, self.dir_prefix, self.metadata["mrc_files"][0]), @@ -107,6 +106,7 @@ def get_computed_fields(self) -> dict[str, Any]: "key_photo_thumbnail_url": None, "neuroglancer_config": self.generate_neuroglancer_data(), "type": self.get_tomogram_type(), + "is_canonical": self.metadata.get("is_standardized") or False, "is_portal_standard": self.metadata.get("is_standardized") or False, } date_fields = ["deposition_date", "release_date", "last_modified_date"] diff --git a/apiv2/db_import/tests/populate_db.py b/apiv2/db_import/tests/populate_db.py index d5022f38d..3c680d478 100644 --- a/apiv2/db_import/tests/populate_db.py +++ b/apiv2/db_import/tests/populate_db.py @@ -236,6 +236,7 @@ def populate_stale_tomogram_voxel_spacing(session: sa.orm.Session, run_id: int = offset_y=0, offset_z=0, is_portal_standard=True, + is_canonical=True, deposition_date=datetime.min, release_date=datetime.min, last_modified_date=datetime.min, @@ -290,6 +291,7 @@ def populate_tomograms(session: sa.orm.Session) -> Tomogram: offset_y=0, offset_z=0, is_portal_standard=True, + is_canonical=True, deposition_date=datetime.min, release_date=datetime.min, last_modified_date=datetime.min, @@ -323,6 +325,7 @@ def populate_stale_tomograms(session: sa.orm.Session) -> Tomogram: offset_y=0, offset_z=0, is_portal_standard=True, + is_canonical=True, deposition_date=datetime.min, release_date=datetime.min, last_modified_date=datetime.min, @@ -546,7 +549,10 @@ def populate_stale_annotation_method_links(session: sa.orm.Session) -> None: populate_stale_annotations(session) session.add( AnnotationMethodLink( - annotation_id=STALE_ANNOTATION_ID, name="Stale Link 0", link_type="other", link="https://some-link.com", + annotation_id=STALE_ANNOTATION_ID, + name="Stale Link 0", + link_type="other", + link="https://some-link.com", ), ) session.add( diff --git a/apiv2/db_import/tests/test_db_tomo_import.py b/apiv2/db_import/tests/test_db_tomo_import.py index b9fffcc75..c33c52e90 100644 --- a/apiv2/db_import/tests/test_db_tomo_import.py +++ b/apiv2/db_import/tests/test_db_tomo_import.py @@ -87,6 +87,7 @@ def expected_tomograms_by_run(http_prefix: str) -> dict[str, dict[float, list[di "neuroglancer_config": '{"foo":"bar","baz":"test"}', "deposition_id": 301, "is_portal_standard": False, + "is_canonical": False, "deposition_date": date(2023, 4, 2), "release_date": date(2024, 6, 1), "last_modified_date": date(2023, 9, 2), @@ -116,6 +117,7 @@ def expected_tomograms_by_run(http_prefix: str) -> dict[str, dict[float, list[di "neuroglancer_config": "{}", "deposition_id": 300, "is_portal_standard": False, + "is_canonical": False, "deposition_date": date(2022, 4, 2), "release_date": date(2022, 6, 1), "last_modified_date": date(2022, 9, 2), diff --git a/apiv2/graphql_api/helpers/tomogram.py b/apiv2/graphql_api/helpers/tomogram.py index 05a5a7d97..42dcccf82 100644 --- a/apiv2/graphql_api/helpers/tomogram.py +++ b/apiv2/graphql_api/helpers/tomogram.py @@ -59,6 +59,7 @@ class TomogramGroupByOptions: tomogram_version: Optional[float] = None processing_software: Optional[str] = None reconstruction_software: Optional[str] = None + is_canonical: Optional[bool] = None is_portal_standard: Optional[bool] = None is_author_submitted: Optional[bool] = None is_visualization_default: Optional[bool] = None diff --git a/apiv2/graphql_api/schema.graphql b/apiv2/graphql_api/schema.graphql index 9649492ca..69a0c89d4 100644 --- a/apiv2/graphql_api/schema.graphql +++ b/apiv2/graphql_api/schema.graphql @@ -4999,6 +4999,9 @@ type Tomogram implements EntityInterface & Node { """Name of software used for reconstruction""" reconstructionSoftware: String! + """whether this tomogram adheres to portal standards""" + isCanonical: Boolean + """whether this tomogram adheres to portal standards""" isPortalStandard: Boolean @@ -5339,6 +5342,7 @@ enum TomogramCountColumns { tomogramVersion processingSoftware reconstructionSoftware + isCanonical isPortalStandard isAuthorSubmitted isVisualizationDefault @@ -5412,6 +5416,9 @@ input TomogramCreateInput { """Name of software used for reconstruction""" reconstructionSoftware: String! + """whether this tomogram adheres to portal standards""" + isCanonical: Boolean = null + """whether this tomogram adheres to portal standards""" isPortalStandard: Boolean = null @@ -5515,6 +5522,7 @@ type TomogramGroupByOptions { tomogramVersion: Float processingSoftware: String reconstructionSoftware: String + isCanonical: Boolean isPortalStandard: Boolean isAuthorSubmitted: Boolean isVisualizationDefault: Boolean @@ -5598,6 +5606,7 @@ input TomogramOrderByClause { tomogramVersion: orderBy processingSoftware: orderBy reconstructionSoftware: orderBy + isCanonical: orderBy isPortalStandard: orderBy isAuthorSubmitted: orderBy isVisualizationDefault: orderBy @@ -5671,6 +5680,9 @@ input TomogramUpdateInput { """Name of software used for reconstruction""" reconstructionSoftware: String + """whether this tomogram adheres to portal standards""" + isCanonical: Boolean = null + """whether this tomogram adheres to portal standards""" isPortalStandard: Boolean = null @@ -5956,6 +5968,7 @@ input TomogramWhereClause { tomogramVersion: FloatComparators processingSoftware: StrComparators reconstructionSoftware: StrComparators + isCanonical: BoolComparators isPortalStandard: BoolComparators isAuthorSubmitted: BoolComparators isVisualizationDefault: BoolComparators diff --git a/apiv2/graphql_api/schema.json b/apiv2/graphql_api/schema.json index 9d1912279..3261179a3 100644 --- a/apiv2/graphql_api/schema.json +++ b/apiv2/graphql_api/schema.json @@ -6743,6 +6743,15 @@ "ofType": null } }, + { + "defaultValue": null, + "name": "isCanonical", + "type": { + "kind": "INPUT_OBJECT", + "name": "BoolComparators", + "ofType": null + } + }, { "defaultValue": null, "name": "isPortalStandard", @@ -21870,6 +21879,15 @@ } } }, + { + "args": [], + "name": "isCanonical", + "type": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + } + }, { "args": [], "name": "isPortalStandard", @@ -22498,6 +22516,15 @@ "ofType": null } }, + { + "defaultValue": null, + "name": "isCanonical", + "type": { + "kind": "ENUM", + "name": "orderBy", + "ofType": null + } + }, { "defaultValue": null, "name": "isPortalStandard", @@ -23360,6 +23387,15 @@ "ofType": null } }, + { + "args": [], + "name": "isCanonical", + "type": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + } + }, { "args": [], "name": "isPortalStandard", @@ -24927,6 +24963,9 @@ { "name": "reconstructionSoftware" }, + { + "name": "isCanonical" + }, { "name": "isPortalStandard" }, @@ -35075,6 +35114,15 @@ } } }, + { + "defaultValue": "null", + "name": "isCanonical", + "type": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + } + }, { "defaultValue": "null", "name": "isPortalStandard", @@ -35443,6 +35491,15 @@ "ofType": null } }, + { + "defaultValue": "null", + "name": "isCanonical", + "type": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + } + }, { "defaultValue": "null", "name": "isPortalStandard", diff --git a/apiv2/graphql_api/types/tomogram.py b/apiv2/graphql_api/types/tomogram.py index 3d3a389b1..897f37119 100644 --- a/apiv2/graphql_api/types/tomogram.py +++ b/apiv2/graphql_api/types/tomogram.py @@ -235,6 +235,7 @@ class TomogramWhereClause(TypedDict): tomogram_version: Optional[FloatComparators] | None processing_software: Optional[StrComparators] | None reconstruction_software: Optional[StrComparators] | None + is_canonical: Optional[BoolComparators] | None is_portal_standard: Optional[BoolComparators] | None is_author_submitted: Optional[BoolComparators] | None is_visualization_default: Optional[BoolComparators] | None @@ -287,6 +288,7 @@ class TomogramOrderByClause(TypedDict): tomogram_version: Optional[orderBy] | None processing_software: Optional[orderBy] | None reconstruction_software: Optional[orderBy] | None + is_canonical: Optional[orderBy] | None is_portal_standard: Optional[orderBy] | None is_author_submitted: Optional[orderBy] | None is_visualization_default: Optional[orderBy] | None @@ -360,6 +362,9 @@ class Tomogram(EntityInterface): description="Processing software used to derive the tomogram", default=None, ) reconstruction_software: str = strawberry.field(description="Name of software used for reconstruction") + is_canonical: Optional[bool] = strawberry.field( + description="whether this tomogram adheres to portal standards", default=None, + ) is_portal_standard: Optional[bool] = strawberry.field( description="whether this tomogram adheres to portal standards", default=None, ) @@ -511,6 +516,7 @@ class TomogramCountColumns(enum.Enum): tomogramVersion = "tomogram_version" processingSoftware = "processing_software" reconstructionSoftware = "reconstruction_software" + isCanonical = "is_canonical" isPortalStandard = "is_portal_standard" isAuthorSubmitted = "is_author_submitted" isVisualizationDefault = "is_visualization_default" @@ -606,6 +612,9 @@ class TomogramCreateInput: description="Processing software used to derive the tomogram", default=None, ) reconstruction_software: str = strawberry.field(description="Name of software used for reconstruction") + is_canonical: Optional[bool] = strawberry.field( + description="whether this tomogram adheres to portal standards", default=None, + ) is_portal_standard: Optional[bool] = strawberry.field( description="whether this tomogram adheres to portal standards", default=None, ) @@ -698,6 +707,9 @@ class TomogramUpdateInput: description="Processing software used to derive the tomogram", default=None, ) reconstruction_software: Optional[str] = strawberry.field(description="Name of software used for reconstruction") + is_canonical: Optional[bool] = strawberry.field( + description="whether this tomogram adheres to portal standards", default=None, + ) is_portal_standard: Optional[bool] = strawberry.field( description="whether this tomogram adheres to portal standards", default=None, ) diff --git a/apiv2/schema/README.md b/apiv2/schema/README.md index 9f00aa86e..143076f83 100644 --- a/apiv2/schema/README.md +++ b/apiv2/schema/README.md @@ -12,6 +12,7 @@ Tomogram { float tomogram_version string processing_software string reconstruction_software + boolean is_canonical boolean is_portal_standard boolean is_author_submitted boolean is_visualization_default diff --git a/apiv2/schema/schema.yaml b/apiv2/schema/schema.yaml index 7d4dcff99..e2de33632 100644 --- a/apiv2/schema/schema.yaml +++ b/apiv2/schema/schema.yaml @@ -3,7 +3,7 @@ name: cdp-dataset-config description: Schema for dataset configs version: 1.1.0 imports: -- linkml:types + - linkml:types prefixes: linkml: prefix_prefix: linkml @@ -18,34 +18,34 @@ types: name: string description: A character string notes: - - In RDF serializations, a slot with range of string is treated as a literal or - type xsd:string. If you are authoring schemas in LinkML YAML, the type is - referenced with the lower case "string". + - In RDF serializations, a slot with range of string is treated as a literal or + type xsd:string. If you are authoring schemas in LinkML YAML, the type is + referenced with the lower case "string". from_schema: cdp-dataset-config exact_mappings: - - schema:Text + - schema:Text base: string uri: xsd:string integer: name: integer description: An integer notes: - - If you are authoring schemas in LinkML YAML, the type is referenced with the - lower case "integer". + - If you are authoring schemas in LinkML YAML, the type is referenced with the + lower case "integer". from_schema: cdp-dataset-config exact_mappings: - - schema:Integer + - schema:Integer base: integer uri: xsd:integer boolean: name: boolean description: A binary (true or false) value notes: - - If you are authoring schemas in LinkML YAML, the type is referenced with the - lower case "boolean". + - If you are authoring schemas in LinkML YAML, the type is referenced with the + lower case "boolean". from_schema: cdp-dataset-config exact_mappings: - - schema:Boolean + - schema:Boolean base: Bool uri: xsd:boolean repr: bool @@ -53,47 +53,49 @@ types: name: float description: A real number that conforms to the xsd:float specification notes: - - If you are authoring schemas in LinkML YAML, the type is referenced with the - lower case "float". + - If you are authoring schemas in LinkML YAML, the type is referenced with the + lower case "float". from_schema: cdp-dataset-config exact_mappings: - - schema:Float + - schema:Float base: float uri: xsd:float double: name: double description: A real number that conforms to the xsd:double specification notes: - - If you are authoring schemas in LinkML YAML, the type is referenced with the - lower case "double". + - If you are authoring schemas in LinkML YAML, the type is referenced with the + lower case "double". from_schema: cdp-dataset-config close_mappings: - - schema:Float + - schema:Float base: float uri: xsd:double decimal: name: decimal - description: A real number with arbitrary precision that conforms to the xsd:decimal + description: + A real number with arbitrary precision that conforms to the xsd:decimal specification notes: - - If you are authoring schemas in LinkML YAML, the type is referenced with the - lower case "decimal". + - If you are authoring schemas in LinkML YAML, the type is referenced with the + lower case "decimal". from_schema: cdp-dataset-config broad_mappings: - - schema:Number + - schema:Number base: Decimal uri: xsd:decimal time: name: time - description: A time object represents a (local) time of day, independent of any + description: + A time object represents a (local) time of day, independent of any particular day notes: - - URI is dateTime because OWL reasoners do not work with straight date or time - - If you are authoring schemas in LinkML YAML, the type is referenced with the - lower case "time". + - URI is dateTime because OWL reasoners do not work with straight date or time + - If you are authoring schemas in LinkML YAML, the type is referenced with the + lower case "time". from_schema: cdp-dataset-config exact_mappings: - - schema:Time + - schema:Time base: XSDTime uri: xsd:time repr: string @@ -101,12 +103,12 @@ types: name: date description: a date (year, month and day) in an idealized calendar notes: - - URI is dateTime because OWL reasoners don't work with straight date or time - - If you are authoring schemas in LinkML YAML, the type is referenced with the - lower case "date". + - URI is dateTime because OWL reasoners don't work with straight date or time + - If you are authoring schemas in LinkML YAML, the type is referenced with the + lower case "date". from_schema: cdp-dataset-config exact_mappings: - - schema:Date + - schema:Date base: XSDDate uri: xsd:date repr: string @@ -114,11 +116,11 @@ types: name: datetime description: The combination of a date and time notes: - - If you are authoring schemas in LinkML YAML, the type is referenced with the - lower case "datetime". + - If you are authoring schemas in LinkML YAML, the type is referenced with the + lower case "datetime". from_schema: cdp-dataset-config exact_mappings: - - schema:DateTime + - schema:DateTime base: XSDDateTime uri: xsd:dateTime repr: string @@ -126,8 +128,8 @@ types: name: date_or_datetime description: Either a date or a datetime notes: - - If you are authoring schemas in LinkML YAML, the type is referenced with the - lower case "date_or_datetime". + - If you are authoring schemas in LinkML YAML, the type is referenced with the + lower case "date_or_datetime". from_schema: cdp-dataset-config base: string uri: linkml:DateOrDatetime @@ -136,8 +138,8 @@ types: name: uriorcurie description: a URI or a CURIE notes: - - If you are authoring schemas in LinkML YAML, the type is referenced with the - lower case "uriorcurie". + - If you are authoring schemas in LinkML YAML, the type is referenced with the + lower case "uriorcurie". from_schema: cdp-dataset-config base: URIorCURIE uri: xsd:anyURI @@ -147,11 +149,11 @@ types: conforms_to: https://www.w3.org/TR/curie/ description: a compact URI notes: - - If you are authoring schemas in LinkML YAML, the type is referenced with the - lower case "curie". + - If you are authoring schemas in LinkML YAML, the type is referenced with the + lower case "curie". comments: - - in RDF serializations this MUST be expanded to a URI - - in non-RDF serializations MAY be serialized as the compact representation + - in RDF serializations this MUST be expanded to a URI + - in non-RDF serializations MAY be serialized as the compact representation from_schema: cdp-dataset-config base: Curie uri: xsd:string @@ -161,15 +163,15 @@ types: conforms_to: https://www.ietf.org/rfc/rfc3987.txt description: a complete URI notes: - - If you are authoring schemas in LinkML YAML, the type is referenced with the - lower case "uri". + - If you are authoring schemas in LinkML YAML, the type is referenced with the + lower case "uri". comments: - - in RDF serializations a slot with range of uri is treated as a literal or type - xsd:anyURI unless it is an identifier or a reference to an identifier, in which - case it is translated directly to a node + - in RDF serializations a slot with range of uri is treated as a literal or type + xsd:anyURI unless it is an identifier or a reference to an identifier, in which + case it is translated directly to a node from_schema: cdp-dataset-config close_mappings: - - schema:URL + - schema:URL base: URI uri: xsd:anyURI repr: string @@ -177,8 +179,8 @@ types: name: ncname description: Prefix part of CURIE notes: - - If you are authoring schemas in LinkML YAML, the type is referenced with the - lower case "ncname". + - If you are authoring schemas in LinkML YAML, the type is referenced with the + lower case "ncname". from_schema: cdp-dataset-config base: NCName uri: xsd:string @@ -187,10 +189,10 @@ types: name: objectidentifier description: A URI or CURIE that represents an object in the model. notes: - - If you are authoring schemas in LinkML YAML, the type is referenced with the - lower case "objectidentifier". + - If you are authoring schemas in LinkML YAML, the type is referenced with the + lower case "objectidentifier". comments: - - Used for inheritance and type checking + - Used for inheritance and type checking from_schema: cdp-dataset-config base: ElementIdentifier uri: shex:iri @@ -199,8 +201,8 @@ types: name: nodeidentifier description: A URI, CURIE or BNODE that represents a node in a model. notes: - - If you are authoring schemas in LinkML YAML, the type is referenced with the - lower case "nodeidentifier". + - If you are authoring schemas in LinkML YAML, the type is referenced with the + lower case "nodeidentifier". from_schema: cdp-dataset-config base: NodeIdentifier uri: shex:nonLiteral @@ -208,12 +210,13 @@ types: jsonpointer: name: jsonpointer conforms_to: https://datatracker.ietf.org/doc/html/rfc6901 - description: A string encoding a JSON Pointer. The value of the string MUST conform + description: + A string encoding a JSON Pointer. The value of the string MUST conform to JSON Point syntax and SHOULD dereference to a valid object within the current instance document when encoded in tree form. notes: - - If you are authoring schemas in LinkML YAML, the type is referenced with the - lower case "jsonpointer". + - If you are authoring schemas in LinkML YAML, the type is referenced with the + lower case "jsonpointer". from_schema: cdp-dataset-config base: string uri: xsd:string @@ -238,7 +241,7 @@ enums: permissible_values: projection_matching: description: Alignment was generated via AreTomo - patch_tracking : + patch_tracking: description: Alignment was generated via IMOD without fiducials fiducial_based: description: Alignment was generated based on fiducials @@ -289,7 +292,8 @@ enums: description: Links to the documentation related to the method. models_weights: text: models_weights - description: Links to the weights that the models used for generating annotations + description: + Links to the weights that the models used for generating annotations were trained with. other: text: other @@ -299,7 +303,8 @@ enums: description: Links to the source code of the method. website: text: website - description: Links to a website of the method or tool used to generate the + description: + Links to a website of the method or tool used to generate the annotation. deposition_types_enum: name: deposition_types_enum @@ -337,7 +342,8 @@ enums: description: Tomographic data of purified viruses or VLPs. in_vitro: text: in_vitro - description: Tomographic data of in vitro reconstituted systems or mixtures + description: + Tomographic data of in vitro reconstituted systems or mixtures of proteins. in_silico: text: in_silico @@ -447,7 +453,7 @@ classes: description: Numeric identifier (May change!) from_schema: cdp-dataset-config exact_mappings: - - api_sequential_identifier + - api_sequential_identifier range: integer required: true annotations: @@ -460,20 +466,22 @@ classes: attributes: s3_prefix: name: s3_prefix - description: Path to a directory containing data for this entity as an S3 + description: + Path to a directory containing data for this entity as an S3 url from_schema: cdp-dataset-config exact_mappings: - - api_s3_prefix + - api_s3_prefix range: string required: true https_prefix: name: https_prefix - description: Path to a directory containing data for this entity as an HTTPS + description: + Path to a directory containing data for this entity as an HTTPS url from_schema: cdp-dataset-config exact_mappings: - - api_https_prefix + - api_https_prefix range: string required: true AuthorEntityMixin: @@ -482,14 +490,14 @@ classes: from_schema: cdp-dataset-config mixin: true mixins: - - AuthorMixin + - AuthorMixin attributes: author_list_order: name: author_list_order description: The order in which the author appears in the publication from_schema: cdp-dataset-config exact_mappings: - - api_author_list_order + - api_author_list_order range: integer required: true orcid: @@ -497,16 +505,16 @@ classes: description: A unique, persistent identifier for researchers, provided by ORCID. from_schema: cdp-dataset-config exact_mappings: - - author_orcid + - author_orcid range: string recommended: true - pattern: '[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$' + pattern: "[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$" name: name: name description: Full name of an author (e.g. Jane Doe). from_schema: cdp-dataset-config exact_mappings: - - author_name + - author_name range: string required: true email: @@ -514,28 +522,28 @@ classes: description: Email address for this author from_schema: cdp-dataset-config exact_mappings: - - author_email + - author_email range: string affiliation_name: name: affiliation_name description: Name of the institutions an author is affiliated with. Comma separated from_schema: cdp-dataset-config exact_mappings: - - author_affiliation_name + - author_affiliation_name range: string affiliation_address: name: affiliation_address description: Address of the institution an author is affiliated with. from_schema: cdp-dataset-config exact_mappings: - - author_affiliation_address + - author_affiliation_address range: string affiliation_identifier: name: affiliation_identifier description: A unique identifier assigned to the affiliated institution by The Research Organization Registry (ROR). from_schema: cdp-dataset-config exact_mappings: - - author_affiliation_identifier + - author_affiliation_identifier range: string recommended: true corresponding_author_status: @@ -543,7 +551,7 @@ classes: description: Indicates whether an author is the corresponding author from_schema: cdp-dataset-config exact_mappings: - - author_corresponding_author_status + - author_corresponding_author_status ifabsent: false range: boolean primary_author_status: @@ -551,7 +559,7 @@ classes: description: Whether the author is a primary author. from_schema: cdp-dataset-config exact_mappings: - - author_primary_author_status + - author_primary_author_status ifabsent: false range: boolean GainFile: @@ -561,7 +569,7 @@ classes: value: GainFiles description: Gain values for frames in this run mixins: - - IDMixin + - IDMixin attributes: run: inverse: Run.gain_files @@ -581,7 +589,7 @@ classes: value: FrameAcquisitionFiles description: References to files containing more information about frame acquisition mixins: - - IDMixin + - IDMixin attributes: run: inverse: Run.frame_acquisition_files @@ -603,7 +611,7 @@ classes: description: Tiltseries Alignment from_schema: cdp-dataset-config mixins: - - IDMixin + - IDMixin attributes: annotation_files: name: annotation_files @@ -652,7 +660,7 @@ classes: description: Whether this a LOCAL or GLOBAL alignment from_schema: cdp-dataset-config exact_mappings: - - alignment_alignment_type + - alignment_alignment_type range: alignment_type_enum pattern: (^LOCAL$)|(^GLOBAL$) alignment_method: @@ -663,7 +671,7 @@ classes: description: X dimension of the reconstruction volume in angstrom from_schema: cdp-dataset-config exact_mappings: - - alignment_volume_x_dimension + - alignment_volume_x_dimension range: float unit: symbol: Å @@ -673,7 +681,7 @@ classes: description: Y dimension of the reconstruction volume in angstrom from_schema: cdp-dataset-config exact_mappings: - - alignment_volume_y_dimension + - alignment_volume_y_dimension range: float unit: symbol: Å @@ -683,7 +691,7 @@ classes: description: Z dimension of the reconstruction volume in angstrom from_schema: cdp-dataset-config exact_mappings: - - alignment_volume_z_dimension + - alignment_volume_z_dimension range: float unit: symbol: Å @@ -693,7 +701,7 @@ classes: description: X shift of the reconstruction volume in angstrom from_schema: cdp-dataset-config exact_mappings: - - alignment_volume_x_offset + - alignment_volume_x_offset range: float unit: symbol: Å @@ -703,7 +711,7 @@ classes: description: Y shift of the reconstruction volume in angstrom from_schema: cdp-dataset-config exact_mappings: - - alignment_volume_y_offset + - alignment_volume_y_offset range: float unit: symbol: Å @@ -713,7 +721,7 @@ classes: description: Z shift of the reconstruction volume in angstrom from_schema: cdp-dataset-config exact_mappings: - - alignment_volume_z_offset + - alignment_volume_z_offset range: float unit: symbol: Å @@ -723,7 +731,7 @@ classes: description: Additional X rotation of the reconstruction volume in degrees from_schema: cdp-dataset-config exact_mappings: - - alignment_x_rotation_offset + - alignment_x_rotation_offset range: float unit: symbol: ° @@ -733,7 +741,7 @@ classes: description: Additional tilt offset in degrees from_schema: cdp-dataset-config exact_mappings: - - alignment_tilt_offset + - alignment_tilt_offset range: float unit: symbol: ° @@ -745,8 +753,8 @@ classes: array: exact_number_dimensions: 2 dimensions: - - exact_cardinality: 4 - - exact_cardinality: 4 + - exact_cardinality: 4 + - exact_cardinality: 4 range: string s3_alignment_metadata: description: S3 path to the metadata file for this alignment @@ -768,8 +776,8 @@ classes: description: Metadata for an annotation's authors from_schema: cdp-dataset-config mixins: - - IDMixin - - AuthorEntityMixin + - IDMixin + - AuthorEntityMixin attributes: annotation: name: annotation @@ -797,7 +805,7 @@ classes: description: Metadata for files associated with an annotation from_schema: cdp-dataset-config mixins: - - IDMixin + - IDMixin attributes: alignment: name: alignment @@ -821,7 +829,7 @@ classes: description: File format for this file from_schema: cdp-dataset-config exact_mappings: - - api_file_format + - api_file_format range: string required: true s3_path: @@ -829,7 +837,7 @@ classes: description: s3 path of the annotation file from_schema: cdp-dataset-config exact_mappings: - - api_s3_path + - api_s3_path range: string required: true https_path: @@ -837,7 +845,7 @@ classes: description: HTTPS path for this annotation file from_schema: cdp-dataset-config exact_mappings: - - api_https_path + - api_https_path range: string required: true is_visualization_default: @@ -845,7 +853,7 @@ classes: description: Data curator’s subjective choice of default annotation to display in visualization for an object from_schema: cdp-dataset-config exact_mappings: - - annotation_source_file_is_visualization_default + - annotation_source_file_is_visualization_default ifabsent: false range: boolean source: @@ -853,7 +861,7 @@ classes: description: The source type for the annotation file (dataset_author, community, or portal_standard) from_schema: cdp-dataset-config exact_mappings: - - api_annotation_file_source + - api_annotation_file_source range: annotation_file_source_enum pattern: (^dataset_author$)|(^community$)|(^portal_standard$) AnnotationShape: @@ -865,7 +873,7 @@ classes: description: Shapes associated with an annotation from_schema: cdp-dataset-config mixins: - - IDMixin + - IDMixin attributes: annotation: name: annotation @@ -897,8 +905,8 @@ classes: description: Metadata for an annotation from_schema: cdp-dataset-config mixins: - - IDMixin - - DateStampedEntityMixin + - IDMixin + - DateStampedEntityMixin attributes: run: name: run @@ -940,7 +948,7 @@ classes: description: S3 path for the metadata json file for this annotation from_schema: cdp-dataset-config exact_mappings: - - api_s3_path + - api_s3_path range: string required: true https_metadata_path: @@ -948,7 +956,7 @@ classes: description: HTTPS path for the metadata json file for this annotation from_schema: cdp-dataset-config exact_mappings: - - api_https_path + - api_https_path range: string required: true annotation_publication: @@ -956,25 +964,27 @@ classes: description: DOIs for publications that describe the dataset. Use a comma to separate multiple DOIs. from_schema: cdp-dataset-config exact_mappings: - - annotation_publications + - annotation_publications range: string pattern: ^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|pdb[0-9a-zA-Z]{4,8}))*$ annotation_method: name: annotation_method - description: Describe how the annotation is made (e.g. Manual, crYoLO, Positive + description: + Describe how the annotation is made (e.g. Manual, crYoLO, Positive Unlabeled Learning, template matching) from_schema: cdp-dataset-config exact_mappings: - - annotation_method + - annotation_method range: string required: true ground_truth_status: name: ground_truth_status - description: Whether an annotation is considered ground truth, as determined + description: + Whether an annotation is considered ground truth, as determined by the annotator. from_schema: cdp-dataset-config exact_mappings: - - annotation_ground_truth_status + - annotation_ground_truth_status ifabsent: false range: boolean recommended: true @@ -983,81 +993,85 @@ classes: description: Gene Ontology Cellular Component identifier or UniProtKB accession for the annotation object. from_schema: cdp-dataset-config exact_mappings: - - annotation_object_id + - annotation_object_id range: string required: true pattern: ^GO:[0-9]{7}$ object_name: name: object_name - description: Name of the object being annotated (e.g. ribosome, nuclear pore + description: + Name of the object being annotated (e.g. ribosome, nuclear pore complex, actin filament, membrane) from_schema: cdp-dataset-config exact_mappings: - - annotation_object_name + - annotation_object_name range: string required: true object_description: name: object_description - description: A textual description of the annotation object, can be a longer + description: + A textual description of the annotation object, can be a longer description to include additional information not covered by the Annotation object name and state. from_schema: cdp-dataset-config exact_mappings: - - annotation_object_description + - annotation_object_description range: string object_state: name: object_state description: Molecule state annotated (e.g. open, closed) from_schema: cdp-dataset-config exact_mappings: - - annotation_object_state + - annotation_object_state range: string object_count: name: object_count description: Number of objects identified from_schema: cdp-dataset-config exact_mappings: - - annotation_object_count + - annotation_object_count range: integer confidence_precision: name: confidence_precision - description: Describe the confidence level of the annotation. Precision is + description: + Describe the confidence level of the annotation. Precision is defined as the % of annotation objects being true positive from_schema: cdp-dataset-config exact_mappings: - - annotation_confidence_precision + - annotation_confidence_precision range: float minimum_value: 0 maximum_value: 100 unit: - symbol: '%' + symbol: "%" descriptive_name: percentage confidence_recall: name: confidence_recall - description: Describe the confidence level of the annotation. Recall is defined + description: + Describe the confidence level of the annotation. Recall is defined as the % of true positives being annotated correctly from_schema: cdp-dataset-config exact_mappings: - - annotation_confidence_recall + - annotation_confidence_recall range: float minimum_value: 0 maximum_value: 100 unit: - symbol: '%' + symbol: "%" descriptive_name: percentage ground_truth_used: name: ground_truth_used description: Annotation filename used as ground truth for precision and recall from_schema: cdp-dataset-config exact_mappings: - - annotation_ground_truth_used + - annotation_ground_truth_used range: string annotation_software: name: annotation_software description: Software used for generating this annotation from_schema: cdp-dataset-config exact_mappings: - - annotation_software + - annotation_software range: string recommended: true is_curator_recommended: @@ -1065,7 +1079,7 @@ classes: description: Data curator’s subjective choice as the best annotation of the same annotation object ID from_schema: cdp-dataset-config exact_mappings: - - annotation_is_curator_recommended + - annotation_is_curator_recommended ifabsent: false range: boolean method_type: @@ -1073,7 +1087,7 @@ classes: description: The method type for generating the annotation (e.g. manual, hybrid, automated) from_schema: cdp-dataset-config exact_mappings: - - annotation_method_type + - annotation_method_type range: annotation_method_type_enum required: true pattern: (^manual$)|(^automated$)|(^hybrid$) @@ -1082,7 +1096,7 @@ classes: description: Date when an annotation set is initially received by the Data Portal. from_schema: cdp-dataset-config exact_mappings: - - deposition_date + - deposition_date range: date required: true release_date: @@ -1090,7 +1104,7 @@ classes: description: Date when annotation data is made public by the Data Portal. from_schema: cdp-dataset-config exact_mappings: - - release_date + - release_date range: date required: true last_modified_date: @@ -1098,7 +1112,7 @@ classes: description: Date when an annotation was last modified in the Data Portal from_schema: cdp-dataset-config exact_mappings: - - last_modified_date + - last_modified_date range: date required: true DatasetAuthor: @@ -1110,8 +1124,8 @@ classes: description: An author of a dataset from_schema: cdp-dataset-config mixins: - - IDMixin - - AuthorEntityMixin + - IDMixin + - AuthorEntityMixin attributes: dataset: name: dataset @@ -1133,7 +1147,7 @@ classes: description: Metadata for a dataset's funding sources from_schema: cdp-dataset-config mixins: - - IDMixin + - IDMixin attributes: dataset: name: dataset @@ -1146,7 +1160,7 @@ classes: description: Name of the funding agency. from_schema: cdp-dataset-config exact_mappings: - - funding_agency_name + - funding_agency_name range: string recommended: true grant_id: @@ -1154,7 +1168,7 @@ classes: description: Grant identifier provided by the funding agency. from_schema: cdp-dataset-config exact_mappings: - - funding_grant_id + - funding_grant_id range: string recommended: true Dataset: @@ -1166,9 +1180,9 @@ classes: description: A collection of imaging experiments on the same organism from_schema: cdp-dataset-config mixins: - - IDMixin - - DateStampedEntityMixin - - S3PrefixedEntityMixin + - IDMixin + - DateStampedEntityMixin + - S3PrefixedEntityMixin attributes: deposition: name: deposition @@ -1208,16 +1222,17 @@ classes: description: Title of a CryoET dataset from_schema: cdp-dataset-config exact_mappings: - - dataset_title + - dataset_title range: string required: true description: name: description - description: A short description of a CryoET dataset, similar to an abstract + description: + A short description of a CryoET dataset, similar to an abstract for a journal article or dataset. from_schema: cdp-dataset-config exact_mappings: - - dataset_description + - dataset_description range: string required: true organism_name: @@ -1225,7 +1240,7 @@ classes: description: Name of the organism from which a biological sample used in a CryoET study is derived from, e.g. homo sapiens from_schema: cdp-dataset-config exact_mappings: - - organism_name + - organism_name range: string recommended: true required: false @@ -1234,12 +1249,13 @@ classes: description: NCBI taxonomy identifier for the organism, e.g. 9606 from_schema: cdp-dataset-config exact_mappings: - - organism_taxid + - organism_taxid range: integer recommended: true minimum_value: 1 tissue_name: - description: Name of the tissue from which a biological sample used in a CryoET + description: + Name of the tissue from which a biological sample used in a CryoET study is derived from. range: string # TODO THIS SHOULD BE REQUIRED @@ -1249,7 +1265,7 @@ classes: description: UBERON identifier for the tissue from_schema: cdp-dataset-config exact_mappings: - - tissue_id + - tissue_id range: string recommended: true pattern: ^BTO:[0-9]{7}$ @@ -1264,7 +1280,7 @@ classes: description: Cell Ontology identifier for the cell type from_schema: cdp-dataset-config exact_mappings: - - cell_type_id + - cell_type_id range: string recommended: true pattern: ^CL:[0-9]{7}$ @@ -1278,7 +1294,7 @@ classes: description: Link to more information about the cell strain from_schema: cdp-dataset-config exact_mappings: - - cell_strain_id + - cell_strain_id recommended: true pattern: (WBStrain[0-9]{8}$)|(^[a-zA-Z]+:[0-9]+$) range: string @@ -1292,7 +1308,7 @@ classes: description: Describe how the sample was prepared. from_schema: cdp-dataset-config exact_mappings: - - sample_preparation + - sample_preparation range: string recommended: true grid_preparation: @@ -1300,7 +1316,7 @@ classes: description: Describe Cryo-ET grid preparation. from_schema: cdp-dataset-config exact_mappings: - - grid_preparation + - grid_preparation range: string recommended: true other_setup: @@ -1308,7 +1324,7 @@ classes: description: Describe other setup not covered by sample preparation or grid preparation that may make this dataset unique in the same publication from_schema: cdp-dataset-config exact_mappings: - - preparation_other_setup + - preparation_other_setup range: string recommended: true key_photo_url: @@ -1316,14 +1332,14 @@ classes: description: URL for the dataset preview image. from_schema: cdp-dataset-config exact_mappings: - - dataset_key_photo_url + - dataset_key_photo_url range: string key_photo_thumbnail_url: name: key_photo_thumbnail_url description: URL for the thumbnail of preview image. from_schema: cdp-dataset-config exact_mappings: - - dataset_key_photo_thumbnail_url + - dataset_key_photo_thumbnail_url range: string cell_component_name: name: cell_component_name @@ -1342,7 +1358,7 @@ classes: description: Date when a dataset is initially received by the Data Portal. from_schema: cdp-dataset-config exact_mappings: - - deposition_date + - deposition_date range: date required: true release_date: @@ -1350,7 +1366,7 @@ classes: description: Date when a dataset is made available on the Data Portal. from_schema: cdp-dataset-config exact_mappings: - - release_date + - release_date range: date required: true last_modified_date: @@ -1358,12 +1374,13 @@ classes: description: Date when a released dataset is last modified. from_schema: cdp-dataset-config exact_mappings: - - last_modified_date + - last_modified_date range: date required: true dataset_publications: name: dataset_publications - description: Comma-separated list of DOIs for publications associated with + description: + Comma-separated list of DOIs for publications associated with the dataset. from_schema: cdp-dataset-config range: string @@ -1381,7 +1398,7 @@ classes: description: The S3 public bucket path where this dataset is contained from_schema: cdp-dataset-config exact_mappings: - - api_s3_prefix + - api_s3_prefix range: string required: true https_prefix: @@ -1389,7 +1406,7 @@ classes: description: The https directory path where this dataset is contained from_schema: cdp-dataset-config exact_mappings: - - api_https_prefix + - api_https_prefix range: string required: true slot_usage: @@ -1404,8 +1421,8 @@ classes: description: Authors for a deposition from_schema: cdp-dataset-config mixins: - - IDMixin - - AuthorEntityMixin + - IDMixin + - AuthorEntityMixin attributes: deposition: name: deposition @@ -1427,8 +1444,8 @@ classes: value: Depositions from_schema: cdp-dataset-config mixins: - - IDMixin - - DateStampedEntityMixin + - IDMixin + - DateStampedEntityMixin attributes: authors: name: authors @@ -1492,10 +1509,10 @@ classes: cascade_delete: true title: name: title - description: Title for the deposition + description: Title for the deposition from_schema: cdp-dataset-config exact_mappings: - - deposition_title + - deposition_title range: string required: true description: @@ -1503,7 +1520,7 @@ classes: description: Description for the deposition from_schema: cdp-dataset-config exact_mappings: - - deposition_description + - deposition_description range: string required: true deposition_types: @@ -1534,7 +1551,7 @@ classes: description: The date the deposition was deposited from_schema: cdp-dataset-config exact_mappings: - - deposition_date + - deposition_date range: date required: true release_date: @@ -1542,7 +1559,7 @@ classes: description: The date the deposition was released from_schema: cdp-dataset-config exact_mappings: - - release_date + - release_date range: date required: true last_modified_date: @@ -1550,7 +1567,7 @@ classes: description: The date the deposition was last modified from_schema: cdp-dataset-config exact_mappings: - - last_modified_date + - last_modified_date range: date required: true key_photo_url: @@ -1569,7 +1586,7 @@ classes: value: DepositionTypes from_schema: cdp-dataset-config mixins: - - IDMixin + - IDMixin attributes: deposition: name: deposition @@ -1590,8 +1607,8 @@ classes: value: Frames from_schema: cdp-dataset-config mixins: - - IDMixin - - S3PrefixedEntityMixin + - IDMixin + - S3PrefixedEntityMixin attributes: deposition: name: deposition @@ -1608,7 +1625,7 @@ classes: description: Camera angle for a frame from_schema: cdp-dataset-config exact_mappings: - - api_frame_raw_angle + - api_frame_raw_angle range: float required: true minimum_value: -90 @@ -1618,7 +1635,7 @@ classes: description: Frame's acquistion order within a tilt experiment from_schema: cdp-dataset-config exact_mappings: - - api_frame_acquisition_order + - api_frame_acquisition_order range: integer required: false dose: @@ -1626,7 +1643,7 @@ classes: description: The raw camera angle for a frame from_schema: cdp-dataset-config exact_mappings: - - api_frame_dose + - api_frame_dose range: float required: true unit: @@ -1637,24 +1654,26 @@ classes: description: Whether this frame has been gain corrected from_schema: cdp-dataset-config exact_mappings: - - api_frame_is_gain_corrected + - api_frame_is_gain_corrected range: boolean s3_prefix: name: s3_prefix - description: Path to a directory containing data for this entity as an S3 + description: + Path to a directory containing data for this entity as an S3 url from_schema: cdp-dataset-config exact_mappings: - - api_s3_prefix + - api_s3_prefix range: string required: true https_prefix: name: https_prefix - description: Path to a directory containing data for this entity as an HTTPS + description: + Path to a directory containing data for this entity as an HTTPS url from_schema: cdp-dataset-config exact_mappings: - - api_https_prefix + - api_https_prefix range: string required: true PerSectionAlignmentParameters: @@ -1666,7 +1685,7 @@ classes: description: Map alignment parameters to tilt series frames from_schema: cdp-dataset-config mixins: - - IDMixin + - IDMixin attributes: alignment: name: alignment @@ -1680,7 +1699,7 @@ classes: description: z-index of the frame in the tiltseries from_schema: cdp-dataset-config exact_mappings: - - per_section_z_index + - per_section_z_index range: integer required: true minimum_value: 0 @@ -1689,7 +1708,7 @@ classes: description: In-plane X-shift of the projection in angstrom from_schema: cdp-dataset-config exact_mappings: - - per_section_alignment_x_offset + - per_section_alignment_x_offset range: float unit: symbol: Å @@ -1699,7 +1718,7 @@ classes: description: In-plane Y-shift of the projection in angstrom from_schema: cdp-dataset-config exact_mappings: - - per_section_alignment_y_offset + - per_section_alignment_y_offset range: float unit: symbol: Å @@ -1718,7 +1737,7 @@ classes: description: Tilt angle of the projection in degrees from_schema: cdp-dataset-config exact_mappings: - - per_section_alignment_tilt_angle + - per_section_alignment_tilt_angle range: float unit: symbol: ° @@ -1731,8 +1750,8 @@ classes: value: Runs from_schema: cdp-dataset-config mixins: - - IDMixin - - S3PrefixedEntityMixin + - IDMixin + - S3PrefixedEntityMixin attributes: alignments: name: alignments @@ -1810,7 +1829,7 @@ classes: description: Short name for this experiment run from_schema: cdp-dataset-config exact_mappings: - - api_run_name + - api_run_name range: string required: true s3_prefix: @@ -1818,7 +1837,7 @@ classes: description: The S3 public bucket path where this run is contained from_schema: cdp-dataset-config exact_mappings: - - api_s3_prefix + - api_s3_prefix range: string required: true https_prefix: @@ -1827,7 +1846,7 @@ classes: url from_schema: cdp-dataset-config exact_mappings: - - api_https_prefix + - api_https_prefix range: string required: true Tiltseries: @@ -1838,7 +1857,7 @@ classes: value: Tiltseries from_schema: cdp-dataset-config mixins: - - IDMixin + - IDMixin attributes: alignments: description: Tiltseries Alignment @@ -1865,49 +1884,49 @@ classes: description: S3 path to this tiltseries in multiscale OME-Zarr format from_schema: cdp-dataset-config exact_mappings: - - tiltseries_s3_omezarr_dir + - tiltseries_s3_omezarr_dir range: string s3_mrc_file: name: s3_mrc_file description: S3 path to this tiltseries in MRC format (no scaling) from_schema: cdp-dataset-config exact_mappings: - - tiltseries_s3_mrc_file + - tiltseries_s3_mrc_file range: string https_omezarr_dir: name: https_omezarr_dir description: HTTPS path to this tiltseries in multiscale OME-Zarr format from_schema: cdp-dataset-config exact_mappings: - - tiltseries_https_omezarr_dir + - tiltseries_https_omezarr_dir range: string https_mrc_file: name: https_mrc_file description: HTTPS path to this tiltseries in MRC format (no scaling) from_schema: cdp-dataset-config exact_mappings: - - tiltseries_https_mrc_file + - tiltseries_https_mrc_file range: string s3_angle_list: name: s3_angle_list description: S3 path to the angle list file for this tiltseries from_schema: cdp-dataset-config exact_mappings: - - tiltseries_s3_angle_list + - tiltseries_s3_angle_list range: string https_angle_list: name: https_angle_list description: HTTPS path to the angle list file for this tiltseries from_schema: cdp-dataset-config exact_mappings: - - tiltseries_https_angle_list + - tiltseries_https_angle_list range: string acceleration_voltage: name: acceleration_voltage description: Electron Microscope Accelerator voltage in volts from_schema: cdp-dataset-config exact_mappings: - - tiltseries_acceleration_voltage + - tiltseries_acceleration_voltage range: integer required: true minimum_value: 20000 @@ -1927,7 +1946,7 @@ classes: description: Name of the microscope manufacturer (FEI, TFS, JEOL) from_schema: cdp-dataset-config exact_mappings: - - tiltseries_microscope_manufacturer + - tiltseries_microscope_manufacturer range: tiltseries_microscope_manufacturer_enum required: true pattern: (^FEI$)|(^TFS$)|(^JEOL$) @@ -1936,7 +1955,7 @@ classes: description: Microscope model name from_schema: cdp-dataset-config exact_mappings: - - tiltseries_microscope_model + - tiltseries_microscope_model range: string required: true microscope_energy_filter: @@ -1944,7 +1963,7 @@ classes: description: Energy filter setup used from_schema: cdp-dataset-config exact_mappings: - - tiltseries_microscope_energy_filter + - tiltseries_microscope_energy_filter range: string required: true microscope_phase_plate: @@ -1952,29 +1971,30 @@ classes: description: Phase plate configuration from_schema: cdp-dataset-config exact_mappings: - - tiltseries_microscope_phase_plate + - tiltseries_microscope_phase_plate range: string microscope_image_corrector: name: microscope_image_corrector description: Image corrector setup from_schema: cdp-dataset-config exact_mappings: - - tiltseries_microscope_image_corrector + - tiltseries_microscope_image_corrector range: string microscope_additional_info: name: microscope_additional_info - description: Other microscope optical setup information, in addition to energy + description: + Other microscope optical setup information, in addition to energy filter, phase plate and image corrector from_schema: cdp-dataset-config exact_mappings: - - tiltseries_microscope_additional_info + - tiltseries_microscope_additional_info range: string camera_manufacturer: name: camera_manufacturer description: Name of the camera manufacturer from_schema: cdp-dataset-config exact_mappings: - - tiltseries_camera_manufacturer + - tiltseries_camera_manufacturer range: string required: true camera_model: @@ -1982,7 +2002,7 @@ classes: description: Camera model name from_schema: cdp-dataset-config exact_mappings: - - tiltseries_camera_model + - tiltseries_camera_model range: string required: true tilt_min: @@ -1990,7 +2010,7 @@ classes: description: Minimal tilt angle in degrees from_schema: cdp-dataset-config exact_mappings: - - tiltseries_tilt_min + - tiltseries_tilt_min range: float required: true minimum_value: -90 @@ -2003,7 +2023,7 @@ classes: description: Maximal tilt angle in degrees from_schema: cdp-dataset-config exact_mappings: - - tiltseries_tilt_max + - tiltseries_tilt_max range: float required: true minimum_value: -90 @@ -2016,7 +2036,7 @@ classes: description: Total tilt range in degrees from_schema: cdp-dataset-config exact_mappings: - - api_tiltseries_tilt_range + - api_tiltseries_tilt_range range: float required: true minimum_value: 0 @@ -2029,7 +2049,7 @@ classes: description: Tilt step in degrees from_schema: cdp-dataset-config exact_mappings: - - tiltseries_tilt_step + - tiltseries_tilt_step range: float required: true minimum_value: 0 @@ -2042,7 +2062,7 @@ classes: description: The order of stage tilting during acquisition of the data from_schema: cdp-dataset-config exact_mappings: - - tiltseries_tilting_scheme + - tiltseries_tilting_scheme range: string required: true tilt_axis: @@ -2050,7 +2070,7 @@ classes: description: Rotation angle in degrees from_schema: cdp-dataset-config exact_mappings: - - tiltseries_tilt_axis + - tiltseries_tilt_axis range: float required: true minimum_value: -360 @@ -2060,11 +2080,12 @@ classes: descriptive_name: degrees total_flux: name: total_flux - description: Number of Electrons reaching the specimen in a square Angstrom + description: + Number of Electrons reaching the specimen in a square Angstrom area for the entire tilt series from_schema: cdp-dataset-config exact_mappings: - - tiltseries_total_flux + - tiltseries_total_flux range: float required: true minimum_value: 0 @@ -2076,16 +2097,17 @@ classes: description: Software used to collect data from_schema: cdp-dataset-config exact_mappings: - - tiltseries_data_acquisition_software + - tiltseries_data_acquisition_software range: string required: true related_empiar_entry: name: related_empiar_entry - description: If a tilt series is deposited into EMPIAR, enter the EMPIAR dataset + description: + If a tilt series is deposited into EMPIAR, enter the EMPIAR dataset identifier from_schema: cdp-dataset-config exact_mappings: - - tiltseries_related_empiar_entry + - tiltseries_related_empiar_entry range: string pattern: ^EMPIAR-[0-9]+$ binning_from_frames: @@ -2093,16 +2115,17 @@ classes: description: Describes the binning factor from frames to tilt series file from_schema: cdp-dataset-config exact_mappings: - - tiltseries_binning_from_frames + - tiltseries_binning_from_frames range: float minimum_value: 0 tilt_series_quality: name: tilt_series_quality - description: Author assessment of tilt series quality within the dataset (1-5, + description: + Author assessment of tilt series quality within the dataset (1-5, 5 is best) from_schema: cdp-dataset-config exact_mappings: - - tiltseries_tilt_series_quality + - tiltseries_tilt_series_quality range: integer required: true minimum_value: 1 @@ -2112,7 +2135,7 @@ classes: description: Whether this tilt series is aligned from_schema: cdp-dataset-config exact_mappings: - - tiltseries_is_aligned + - tiltseries_is_aligned range: boolean required: true pixel_spacing: @@ -2120,7 +2143,7 @@ classes: description: Pixel spacing equal in both axes in angstroms from_schema: cdp-dataset-config exact_mappings: - - tiltseries_pixel_spacing + - tiltseries_pixel_spacing range: float required: true minimum_value: 0.001 @@ -2132,7 +2155,7 @@ classes: description: Binning factor of the aligned tilt series from_schema: cdp-dataset-config exact_mappings: - - tiltseries_aligned_tiltseries_binning + - tiltseries_aligned_tiltseries_binning range: integer minimum_value: 0 TomogramAuthor: @@ -2144,8 +2167,8 @@ classes: description: Author of a tomogram from_schema: cdp-dataset-config mixins: - - IDMixin - - AuthorEntityMixin + - IDMixin + - AuthorEntityMixin attributes: tomogram: name: tomogram @@ -2165,8 +2188,8 @@ classes: description: Voxel spacings for a run from_schema: cdp-dataset-config mixins: - - IDMixin - - S3PrefixedEntityMixin + - IDMixin + - S3PrefixedEntityMixin attributes: annotation_files: name: annotation_files @@ -2197,7 +2220,7 @@ classes: description: The voxel spacing for the tomograms in this set in angstroms from_schema: cdp-dataset-config exact_mappings: - - tomogram_voxel_spacing + - tomogram_voxel_spacing range: float required: true minimum_value: 0.001 @@ -2209,7 +2232,7 @@ classes: description: The S3 public bucket path where this tomogram voxel spacing is contained from_schema: cdp-dataset-config exact_mappings: - - api_s3_prefix + - api_s3_prefix range: string required: true https_prefix: @@ -2217,7 +2240,7 @@ classes: description: The HTTPS directory path where this tomogram voxel spacing is contained from_schema: cdp-dataset-config exact_mappings: - - api_https_prefix + - api_https_prefix range: string required: true Tomogram: @@ -2229,8 +2252,8 @@ classes: description: Metadata describing a tomogram. from_schema: cdp-dataset-config mixins: - - IDMixin - - DateStampedEntityMixin + - IDMixin + - DateStampedEntityMixin slot_usage: deposition_date: required: False @@ -2277,14 +2300,14 @@ classes: description: Short name for this tomogram from_schema: cdp-dataset-config exact_mappings: - - tomogram_name + - tomogram_name range: string size_x: name: size_x description: Number of pixels in the 3D data fast axis from_schema: cdp-dataset-config exact_mappings: - - api_tomogram_size_x + - api_tomogram_size_x range: integer required: true minimum_value: 0 @@ -2296,7 +2319,7 @@ classes: description: Number of pixels in the 3D data medium axis from_schema: cdp-dataset-config exact_mappings: - - api_tomogram_size_y + - api_tomogram_size_y range: integer required: true minimum_value: 0 @@ -2308,7 +2331,7 @@ classes: description: Number of pixels in the 3D data slow axis. This is the image projection direction at zero stage tilt from_schema: cdp-dataset-config exact_mappings: - - api_tomogram_size_z + - api_tomogram_size_z range: integer required: true minimum_value: 0 @@ -2320,7 +2343,7 @@ classes: description: Voxel spacing equal in all three axes in angstroms from_schema: cdp-dataset-config exact_mappings: - - tomogram_voxel_spacing + - tomogram_voxel_spacing range: float required: true minimum_value: 0.001 @@ -2329,10 +2352,10 @@ classes: descriptive_name: Angstroms per voxel fiducial_alignment_status: name: fiducial_alignment_status - description: 'Fiducial Alignment status: True = aligned with fiducial False = aligned without fiducial' + description: "Fiducial Alignment status: True = aligned with fiducial False = aligned without fiducial" from_schema: cdp-dataset-config exact_mappings: - - tomogram_fiducial_alignment_status + - tomogram_fiducial_alignment_status range: fiducial_alignment_status_enum required: true pattern: (^FIDUCIAL$)|(^NON_FIDUCIAL$) @@ -2345,7 +2368,7 @@ classes: description: Describe additional processing used to derive the tomogram from_schema: cdp-dataset-config exact_mappings: - - tomogram_processing + - tomogram_processing range: tomogram_processing_enum required: true pattern: (^denoised$)|(^filtered$)|(^raw$) @@ -2360,6 +2383,10 @@ classes: range: string required: true description: Name of software used for reconstruction + is_canonical: + description: whether this tomogram adheres to portal standards + range: boolean + ifabsent: false is_portal_standard: description: whether this tomogram adheres to portal standards range: boolean @@ -2377,56 +2404,56 @@ classes: description: S3 path to this tomogram in multiscale OME-Zarr format from_schema: cdp-dataset-config exact_mappings: - - tomogram_s3_omezarr_dir + - tomogram_s3_omezarr_dir range: string https_omezarr_dir: name: https_omezarr_dir description: HTTPS path to this tomogram in multiscale OME-Zarr format from_schema: cdp-dataset-config exact_mappings: - - tomogram_https_omezarr_dir + - tomogram_https_omezarr_dir range: string s3_mrc_file: name: s3_mrc_file description: S3 path to this tomogram in MRC format (no scaling) from_schema: cdp-dataset-config exact_mappings: - - tomogram_s3_mrc_file + - tomogram_s3_mrc_file range: string https_mrc_file: name: https_mrc_file description: HTTPS path to this tomogram in MRC format (no scaling) from_schema: cdp-dataset-config exact_mappings: - - tomogram_https_mrc_file + - tomogram_https_mrc_file range: string scale0_dimensions: name: scale0_dimensions description: comma separated x,y,z dimensions of the unscaled tomogram from_schema: cdp-dataset-config exact_mappings: - - tomogram_scale0_dimensions + - tomogram_scale0_dimensions range: string scale1_dimensions: name: scale1_dimensions description: comma separated x,y,z dimensions of the scale1 tomogram from_schema: cdp-dataset-config exact_mappings: - - tomogram_scale1_dimensions + - tomogram_scale1_dimensions range: string scale2_dimensions: name: scale2_dimensions description: comma separated x,y,z dimensions of the scale2 tomogram from_schema: cdp-dataset-config exact_mappings: - - tomogram_scale2_dimensions + - tomogram_scale2_dimensions range: string ctf_corrected: name: ctf_corrected description: Whether this tomogram is CTF corrected from_schema: cdp-dataset-config exact_mappings: - - tomogram_ctf_corrected + - tomogram_ctf_corrected range: boolean recommended: true offset_x: @@ -2461,21 +2488,21 @@ classes: description: URL for the key photo from_schema: cdp-dataset-config exact_mappings: - - tomogram_key_photo_url + - tomogram_key_photo_url range: string key_photo_thumbnail_url: name: key_photo_thumbnail_url description: URL for the thumbnail of key photo from_schema: cdp-dataset-config exact_mappings: - - tomogram_key_photo_thumbnail_url + - tomogram_key_photo_thumbnail_url range: string neuroglancer_config: name: neuroglancer_config description: the compact json of neuroglancer config from_schema: cdp-dataset-config exact_mappings: - - tomogram_neuroglancer_config + - tomogram_neuroglancer_config range: string publications: description: Comma-separated list of DOIs for publications associated with the tomogram. @@ -2489,7 +2516,8 @@ classes: pattern: (^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8}))*$)|(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8}))*$) DateStampedEntityMixin: name: DateStampedEntityMixin - description: A set of dates at which a data item was deposited, published and + description: + A set of dates at which a data item was deposited, published and last modified. from_schema: cdp-dataset-config mixin: true @@ -2499,7 +2527,7 @@ classes: description: The date a data item was received by the cryoET data portal. from_schema: cdp-dataset-config exact_mappings: - - deposition_date + - deposition_date range: date required: true release_date: @@ -2507,16 +2535,17 @@ classes: description: The date a data item was received by the cryoET data portal. from_schema: cdp-dataset-config exact_mappings: - - release_date + - release_date range: date required: true last_modified_date: name: last_modified_date - description: The date a piece of data was last modified on the cryoET data + description: + The date a piece of data was last modified on the cryoET data portal. from_schema: cdp-dataset-config exact_mappings: - - last_modified_date + - last_modified_date range: date required: true AuthorMixin: @@ -2530,7 +2559,7 @@ classes: description: Full name of a deposition author (e.g. Jane Doe). from_schema: cdp-dataset-config exact_mappings: - - author_name + - author_name range: string required: true email: @@ -2538,28 +2567,28 @@ classes: description: Email address for this author from_schema: cdp-dataset-config exact_mappings: - - author_email + - author_email range: string affiliation_name: name: affiliation_name description: Name of the institutions an author is affiliated with. Comma separated from_schema: cdp-dataset-config exact_mappings: - - author_affiliation_name + - author_affiliation_name range: string affiliation_address: name: affiliation_address description: Address of the institution an author is affiliated with. from_schema: cdp-dataset-config exact_mappings: - - author_affiliation_address + - author_affiliation_address range: string affiliation_identifier: name: affiliation_identifier description: A unique identifier assigned to the affiliated institution by The Research Organization Registry (ROR). from_schema: cdp-dataset-config exact_mappings: - - author_affiliation_identifier + - author_affiliation_identifier range: string recommended: true corresponding_author_status: @@ -2567,7 +2596,7 @@ classes: description: Whether the author is a corresponding author. from_schema: cdp-dataset-config exact_mappings: - - author_corresponding_author_status + - author_corresponding_author_status ifabsent: false range: boolean primary_author_status: @@ -2575,7 +2604,7 @@ classes: description: Whether the author is a primary author. from_schema: cdp-dataset-config exact_mappings: - - author_primary_author_status + - author_primary_author_status ifabsent: false range: boolean AnnotationMethodLink: @@ -2585,7 +2614,7 @@ classes: value: AnnotationMethodLinks description: A set of links to models, source code, documentation, etc referenced by annotation method mixins: - - IDMixin + - IDMixin attributes: annotation: name: annotation diff --git a/apiv2/test_infra/factories/tomogram.py b/apiv2/test_infra/factories/tomogram.py index 76d433c50..ee5dc312d 100644 --- a/apiv2/test_infra/factories/tomogram.py +++ b/apiv2/test_infra/factories/tomogram.py @@ -55,6 +55,7 @@ class Meta: tomogram_version = fuzzy.FuzzyFloat(1, 100) processing_software = fuzzy.FuzzyText() reconstruction_software = fuzzy.FuzzyText() + is_canonical = factory.Faker("boolean") is_portal_standard = factory.Faker("boolean") is_author_submitted = factory.Faker("boolean") is_visualization_default = factory.Faker("boolean") diff --git a/apiv2/validators/tomogram.py b/apiv2/validators/tomogram.py index a30f3f138..fd383f31a 100644 --- a/apiv2/validators/tomogram.py +++ b/apiv2/validators/tomogram.py @@ -69,6 +69,7 @@ class TomogramCreateInputValidator(BaseModel): strip_whitespace=True, ), ] + is_canonical: Annotated[bool | None, Field()] is_portal_standard: Annotated[bool | None, Field()] is_author_submitted: Annotated[bool | None, Field()] is_visualization_default: Annotated[bool | None, Field()] @@ -209,6 +210,7 @@ class TomogramUpdateInputValidator(BaseModel): strip_whitespace=True, ), ] + is_canonical: Annotated[bool | None, Field()] is_portal_standard: Annotated[bool | None, Field()] is_author_submitted: Annotated[bool | None, Field()] is_visualization_default: Annotated[bool | None, Field()]