diff --git a/linkml_runtime/utils/compile_python.py b/linkml_runtime/utils/compile_python.py index 906d30d0..341a748d 100644 --- a/linkml_runtime/utils/compile_python.py +++ b/linkml_runtime/utils/compile_python.py @@ -30,11 +30,15 @@ def compile_python(text_or_fn: str, package_path: str = None) -> ModuleType: spec = compile(python_txt, 'test', 'exec') module = ModuleType('test') if package_path: + package_path_abs = os.path.join(os.getcwd(), package_path) # We have to calculate the path to expected path relative to the current working directory for path in sys.path: if package_path.startswith(path): path_from_tests_parent = os.path.relpath(package_path, path) break + if package_path_abs.startswith(path): + path_from_tests_parent = os.path.relpath(package_path_abs, path) + break else: warning(f"There is no established path to {package_path} - compile_python may or may not work") path_from_tests_parent = os.path.relpath(package_path, os.path.join(os.getcwd(), '..')) diff --git a/linkml_runtime/utils/schemaview.py b/linkml_runtime/utils/schemaview.py new file mode 100644 index 00000000..4a5fc084 --- /dev/null +++ b/linkml_runtime/utils/schemaview.py @@ -0,0 +1,782 @@ +import os +import uuid +import logging +from functools import lru_cache +from copy import copy +from collections import defaultdict +from typing import List, Any, Dict, Union, Mapping +from dataclasses import dataclass +from linkml_runtime.utils.namespaces import Namespaces +from linkml_runtime.utils.formatutils import camelcase, underscore + +from linkml_runtime.loaders.yaml_loader import YAMLLoader +from linkml_runtime.utils.context_utils import parse_import_map +from linkml_runtime.linkml_model.meta import * +from linkml_runtime.linkml_model.annotations import Annotation, Annotatable + +yaml_loader = YAMLLoader() + +CACHE_SIZE = 1024 + + +SLOTS = 'slots' +CLASSES = 'classes' +ENUMS = 'enums' +SUBSETS = 'subsets' +TYPES = 'types' + +CLASS_NAME = Union[ClassDefinitionName, str] +SLOT_NAME = Union[SlotDefinitionName, str] +SUBSET_NAME = Union[SubsetDefinitionName, str] +TYPE_NAME = Union[TypeDefinitionName, str] +ENUM_NAME = Union[EnumDefinitionName, str] + +def _closure(f, x, reflexive=True, **kwargs): + if reflexive: + rv = [x] + else: + rv = [] + visited = [] + todo = [x] + while len(todo) > 0: + i = todo.pop() + visited.append(i) + vals = f(i) + for v in vals: + if v not in visited: + todo.append(v) + rv.append(v) + return rv + +def load_schema_wrap(path: str, **kwargs): + schema: SchemaDefinition + schema = yaml_loader.load(path, target_class=SchemaDefinition, **kwargs) + schema.source_file = path + return schema + +@dataclass +class SchemaUsage(): + """ + A usage of an element of a schema + """ + used_by: ElementName + slot: SlotDefinitionName + metaslot: SlotDefinitionName + used: ElementName + + +@dataclass +class SchemaView(object): + """ + A SchemaView provides a virtual schema layered on top of a schema plus its import closure + + Most operations are parameterized by `imports`. If this is set to True (default), then the full + import closure is considered when answering + + This class utilizes caching for efficient lookup operations. + + TODO: decide how to use this in conjunction with the existing schemaloader, which injects + into the schema rather than providing dynamic methods. + + See: + - https://github.com/linkml/linkml/issues/59 + - https://github.com/linkml/linkml/discussions/144 + - https://github.com/linkml/linkml/issues/48 + - https://github.com/linkml/linkml/issues/270 + """ + + schema: SchemaDefinition = None + schema_map: Dict[SchemaDefinitionName, SchemaDefinition] = None + importmap: Optional[Mapping[str, str]] = None + modifications: int = 0 + uuid: str = None + + + def __init__(self, schema: Union[str, SchemaDefinition], + importmap: Optional[Mapping[str, str]] = None): + if isinstance(schema, str): + schema = load_schema_wrap(schema) + self.schema = schema + self.schema_map = {schema.name: schema} + self.importmap = parse_import_map(importmap, self.base_dir) if self.importmap is not None else dict() + self.uuid = str(uuid.uuid4()) + + def __key(self): + return (self.schema.id, self.uuid, self.modifications) + + def __eq__(self, other): + if isinstance(other, SchemaView): + return self.__key() == other.__key() + return NotImplemented + + def __hash__(self): + return hash(self.__key()) + + @lru_cache() + def namespaces(self) -> Namespaces: + namespaces = Namespaces() + for s in self.schema_map.values(): + for prefix in s.prefixes.values(): + namespaces[prefix.prefix_prefix] = prefix.prefix_reference + for cmap in self.schema.default_curi_maps: + namespaces.add_prefixmap(cmap, include_defaults=False) + return namespaces + + + def load_import(self, imp: str, from_schema: SchemaDefinition = None): + if from_schema is None: + from_schema = self.schema + # TODO: this code is copied from linkml.utils.schemaloader; put this somewhere reusable + sname = self.importmap.get(str(imp), imp) # Import map may use CURIE + sname = self.namespaces().uri_for(sname) if ':' in sname else sname + sname = self.importmap.get(str(sname), sname) # It may also use URI or other forms + print(f'Loading schema {sname} from {from_schema.source_file}') + schema = load_schema_wrap(sname + '.yaml', + base_dir=os.path.dirname(from_schema.source_file) if from_schema.source_file else None) + return schema + + @lru_cache() + def imports_closure(self, traverse=True) -> List[SchemaDefinitionName]: + """ + Return all imports + + :param traverse: if true, traverse recursively + :return: all schema names in the transitive reflexive imports closure + """ + if self.schema_map is None: + self.schema_map = {self.schema.name: self.schema} + closure = [] + visited = set() + todo = [self.schema.name] + if not traverse: + return todo + while len(todo) > 0: + sn = todo.pop() + visited.add(sn) + if sn not in self.schema_map: + imported_schema = self.load_import(sn) + self.schema_map[sn] = imported_schema + s = self.schema_map[sn] + if sn not in closure: + closure.append(sn) + for i in s.imports: + if i not in visited: + todo.append(i) + return closure + + @lru_cache() + def all_schema(self, imports: True) -> List[SchemaDefinition]: + """ + :param imports: include imports closure + :return: all schemas + """ + m = self.schema_map + return [m[sn] for sn in self.imports_closure(imports)] + + @lru_cache() + def all_class(self, imports=True) -> Dict[ClassDefinitionName, ClassDefinition]: + """ + :param imports: include imports closure + :return: all classes in schema view + """ + return self._get_dict(CLASSES, imports) + + @lru_cache() + def all_slot(self, imports=True) -> Dict[SlotDefinitionName, SlotDefinition]: + """ + :param imports: include imports closure + :return: all slots in schema view + """ + return self._get_dict(SLOTS, imports) + + @lru_cache() + def all_enum(self, imports=True) -> Dict[EnumDefinitionName, EnumDefinition]: + """ + :param imports: include imports closure + :return: all enums in schema view + """ + return self._get_dict(ENUMS, imports) + + @lru_cache() + def all_type(self, imports=True) -> Dict[TypeDefinitionName, TypeDefinition]: + """ + :param imports: include imports closure + :return: all types in schema view + """ + return self._get_dict(TYPES, imports) + + @lru_cache() + def all_subset(self, imports=True) -> Dict[SubsetDefinitionName, SubsetDefinition]: + """ + :param imports: include imports closure + :return: all subsets in schema view + """ + return self._get_dict(SUBSETS, imports) + + @lru_cache() + def all_element(self, imports=True) -> Dict[ElementName, Element]: + """ + :param imports: include imports closure + :return: all elements in schema view + """ + all_c = self.all_class(imports) + all_s = self.all_slot(imports) + all_e = self.all_enum(imports) + all_t = self.all_type(imports) + all_v = self.all_subset(imports) + return {**all_c, **all_s, **all_e, **all_t, **all_v} + + def _get_dict(self, slot_name: str, imports=True) -> Dict: + schemas = self.all_schema(imports) + d = {} + for s in schemas: + d1 = getattr(s, slot_name, {}) + d = {**d, **d1} + return d + + @lru_cache() + def in_schema(self, element_name: ElementName) -> SchemaDefinitionName: + """ + :param element_name: + :return: name of schema in which element is defined + """ + ix = self.element_by_schema_map() + return ix[element_name] + + @lru_cache() + def element_by_schema_map(self) -> Dict[ElementName, SchemaDefinitionName]: + ix = {} + schemas = self.all_schema(True) + for schema in schemas: + for type_key in [CLASSES, SLOTS, TYPES, ENUMS, SUBSETS]: + for k, v in getattr(schema, type_key, {}).items(): + ix[k] = schema.name + return ix + + @lru_cache() + def get_class(self, class_name: CLASS_NAME, imports=True) -> ClassDefinition: + """ + :param class_name: name of the class to be retrieved + :param imports: include import closure + :return: class definition + """ + return self.all_class(imports).get(class_name, None) + + @lru_cache() + def get_slot(self, slot_name: SLOT_NAME, imports=True, attributes=False) -> SlotDefinition: + """ + :param slot_name: name of the slot to be retrieved + :param imports: include import closure + :return: slot definition + """ + slot = self.all_slot(imports).get(slot_name, None) + if slot is None and attributes: + for c in self.all_class(imports).values(): + if slot_name in c.attributes: + if slot is not None: + # slot name is ambiguous, no results + return None + slot = c.attributes[slot_name] + return slot + + @lru_cache() + def get_subset(self, subset_name: SUBSET_NAME, imports=True) -> SubsetDefinition: + """ + :param subset_name: name of the subsey to be retrieved + :param imports: include import closure + :return: subset definition + """ + return self.all_subset(imports).get(subset_name, None) + + @lru_cache() + def get_enum(self, enum_name: ENUM_NAME, imports=True) -> EnumDefinition: + """ + :param enum_name: name of the enum to be retrieved + :param imports: include import closure + :return: enum definition + """ + return self.all_enum(imports).get(enum_name, None) + + @lru_cache() + def get_type(self, type_name: TYPE_NAME, imports=True) -> TypeDefinition: + """ + :param type_name: name of the type to be retrieved + :param imports: include import closure + :return: type definition + """ + return self.all_type(imports).get(type_name, None) + + def _parents(self, e: Element, imports=True, mixins=True) -> List[ElementName]: + if mixins: + parents = copy(e.mixins) + else: + parents = [] + if e.is_a is not None: + parents.append(e.is_a) + return parents + + @lru_cache() + def class_parents(self, class_name: CLASS_NAME, imports=True, mixins=True) -> List[ClassDefinitionName]: + """ + :param class_name: child class name + :param imports: include import closure + :param mixins: include mixins (default is True) + :return: all direct parent class names (is_a and mixins) + """ + cls = self.get_class(class_name, imports) + return self._parents(cls, imports, mixins) + + @lru_cache() + def slot_parents(self, slot_name: SLOT_NAME, imports=True, mixins=True) -> List[SlotDefinitionName]: + """ + :param slot_name: child slot name + :param imports: include import closure + :param mixins: include mixins (default is True) + :return: all direct parent slot names (is_a and mixins) + """ + s = self.get_slot(slot_name, imports) + return self._parents(s, imports, mixins) + + @lru_cache() + def class_children(self, class_name: CLASS_NAME, imports=True, mixins=True) -> List[ClassDefinitionName]: + """ + :param class_name: parent class name + :param imports: include import closure + :param mixins: include mixins (default is True) + :return: all direct child class names (is_a and mixins) + """ + elts = [self.get_class(x) for x in self.all_class(imports)] + return [x.name for x in elts if x.is_a == class_name or (mixins and class_name in x.mixins)] + + @lru_cache() + def slot_children(self, slot_name: SLOT_NAME, imports=True, mixins=True) -> List[SlotDefinitionName]: + """ + :param slot_name: parent slot name + :param imports: include import closure + :param mixins: include mixins (default is True) + :return: all direct child slot names (is_a and mixins) + """ + elts = [self.get_slot(x) for x in self.all_slot(imports)] + return [x.name for x in elts if x.is_a == slot_name or (mixins and slot_name in x.mixins)] + + @lru_cache() + def class_ancestors(self, class_name: CLASS_NAME, imports=True, mixins=True, reflexive=True) -> List[ClassDefinitionName]: + """ + Closure of class_parents method + + :param class_name: query class + :param imports: include import closure + :param mixins: include mixins (default is True) + :param reflexive: include self in set of ancestors + :return: ancestor class names + """ + return _closure(lambda x: self.class_parents(x, imports=imports, mixins=mixins), class_name, reflexive=reflexive) + + @lru_cache() + def slot_ancestors(self, slot_name: SLOT_NAME, imports=True, mixins=True, reflexive=True) -> List[SlotDefinitionName]: + """ + Closure of slot_parents method + + :param slot_name: query slot + :param imports: include import closure + :param mixins: include mixins (default is True) + :param reflexive: include self in set of ancestors + :return: ancestor slot names + """ + return _closure(lambda x: self.slot_parents(x, imports=imports, mixins=mixins), slot_name, reflexive=reflexive) + + @lru_cache() + def class_descendants(self, class_name: CLASS_NAME, imports=True, mixins=True, reflexive=True) -> List[ClassDefinitionName]: + """ + Closure of class_children method + + :param class_name: query class + :param imports: include import closure + :param mixins: include mixins (default is True) + :param reflexive: include self in set of descendants + :return: descendants class names + """ + return _closure(lambda x: self.class_children(x, imports=imports, mixins=mixins), class_name, reflexive=reflexive) + + + @lru_cache() + def class_roots(self, class_name: CLASS_NAME, imports=True, mixins=True) -> List[ClassDefinitionName]: + """ + All classes that have no parents + :param class_name: + :param imports: + :param mixins: + :return: + """ + return [c + for c in self.all_class(imports=imports) + if self.class_parents(c, mixins=mixins, imports=imports) == []] + + @lru_cache() + def class_leaves(self, class_name: CLASS_NAME, imports=True, mixins=True) -> List[ClassDefinitionName]: + """ + All classes that have no children + :param class_name: + :param imports: + :param mixins: + :return: + """ + return [c + for c in self.all_class(imports=imports) + if self.class_children(c, mixins=mixins, imports=imports) == []] + + + @lru_cache() + def slot_roots(self, slot_name: SLOT_NAME, imports=True, mixins=True) -> List[SlotDefinitionName]: + """ + All slotes that have no parents + :param slot_name: + :param imports: + :param mixins: + :return: + """ + return [c + for c in self.all_slot(imports=imports) + if self.slot_parents(c, mixins=mixins, imports=imports) == []] + + @lru_cache() + def slot_leaves(self, slot_name: SLOT_NAME, imports=True, mixins=True) -> List[SlotDefinitionName]: + """ + All slotes that have no children + :param slot_name: + :param imports: + :param mixins: + :return: + """ + return [c + for c in self.all_slot(imports=imports) + if self.slot_children(c, mixins=mixins, imports=imports) == []] + + + def get_element(self, element: Union[ElementName, Element], imports=True) -> Element: + if isinstance(element, Element): + return element + e = self.get_class(element, imports=imports) + if e is None: + e = self.get_slot(element, imports=imports) + if e is None: + e = self.get_type(element, imports=imports) + if e is None: + e = self.get_enum(element, imports=imports) + if e is None: + e = self.get_subset(element, imports=imports) + return e + + + def get_uri(self, element: Union[ElementName, Element], imports=True, expand=False, native=False) -> str: + """ + Return the CURIE or URI for a schema element. If the schema defines a specific URI, this is + used, otherwise this is constructed from the default prefix combined with the element name + + :param element_name: name of schema element + :param imports: include imports closure + :param native: return the native CURIE or URI rather than what is declared in the uri slot + :param expand: expand the CURIE to a URI; defaults to False + :return: URI or CURIE as a string + """ + e = self.get_element(element, imports=imports) + e_name = e.name + if isinstance(e, ClassDefinition): + uri = e.class_uri + e_name = camelcase(e.name) + elif isinstance(e, SlotDefinition): + uri = e.slot_uri + e_name = underscore(e.name) + elif isinstance(e, TypeDefinition): + uri = e.uri + e_name = underscore(e.name) + else: + raise Exception(f'Must be class or slot or type: {e}') + if uri is None or native: + schema = self.schema_map[self.in_schema(e.name)] + ns = self.namespaces() + pfx = schema.default_prefix + uri = f'{pfx}:{e_name}' + if expand: + return self.expand_curie(uri) + else: + return uri + + def expand_curie(self, uri: str) -> str: + """ + Expands a URI or CURIE to a full URI + :param uri: + :return: URI as a string + """ + if ':' in uri: + parts = uri.split(':') + if len(parts) == 2: + [pfx, local_id] = parts + ns = self.namespaces() + if pfx in ns: + return ns[pfx] + local_id + return uri + + @lru_cache() + def get_mappings(self, element_name: ElementName = None, imports=True, expand=False) -> Dict[str, List[URIorCURIE]]: + e = self.get_element(element_name, imports=imports) + m_dict = { + 'self': [self.get_uri(element_name, imports=imports, expand=False)], + 'native': [self.get_uri(element_name, imports=imports, expand=False, native=True)], + 'exact': e.exact_mappings, + 'narrow': e.narrow_mappings, + 'broad': e.broad_mappings, + 'related': e.related_mappings, + 'close': e.close_mappings, + 'undefined': e.mappings + } + if expand: + for k, vs in m_dict.items(): + m_dict[k] = [self.expand_curie(v) for v in vs] + + return m_dict + + + @lru_cache() + def is_relationship(self, class_name: CLASS_NAME = None, imports=True) -> bool: + """ + Tests if a class represents a relationship or reified statement + + :param class_name: + :param imports: + :return: true if the class represents a relationship + """ + STMT_TYPES = ['rdf:Statement', 'owl:Axiom'] + for an in self.class_ancestors(class_name, imports=imports): + if self.get_uri(an) in STMT_TYPES: + return True + a = self.get_class(an, imports=imports) + for m in a.exact_mappings: + if m in STMT_TYPES: + return True + return False + + @lru_cache() + def annotation_dict(self, element_name: ElementName, imports=True) -> Dict[URIorCURIE, Any]: + """ + Return a dictionary where keys are annotation tags and values are annotation values for any given element. + + Note this will not include higher-order annotations + + :param element_name: + :param imports: + :return: annotation dictionary + """ + e = self.get_element(element_name, imports=imports) + return {k: v.value for k, v in e.annotations.items()} + + + @lru_cache() + def class_slots(self, class_name: CLASS_NAME = None, imports=True, direct=False, attributes=True) -> List[SlotDefinitionName]: + """ + :param class_name: + :param imports: include imports closure + :param direct: only returns slots directly associated with a class (default is False) + :param attributes: include attribute declarations as well as slots (default is True) + :return: all slot names applicable for a class + """ + if direct: + ancs = [class_name] + else: + ancs = self.class_ancestors(class_name, imports=imports) + slots = [] + for an in ancs: + a = self.get_class(an, imports) + slots += a.slots + if attributes: + slots += a.attributes.keys() + slots_nr = [] + for s in slots: + if s not in slots_nr: + slots_nr.append(s) + return slots_nr + + @lru_cache() + def induced_slot(self, slot_name: SLOT_NAME, class_name: CLASS_NAME = None, imports=True) -> SlotDefinition: + """ + Given a slot, in the context of a particular class, yield a dynamic SlotDefinition that + has all properties materialized. + + This makes use of schema slots, such as attributes, slot_usage. It also uses ancestor relationships + to infer missing values + + :param slot_name: slot to be queries + :param class_name: class used as context + :param imports: include imports closure + :return: dynamic slot constructed by inference + """ + slot = self.get_slot(slot_name, imports) + cls = self.get_class(class_name, imports) + islot = None + if slot is not None: + islot = copy(slot) + else: + for an in self.class_ancestors(class_name): + a = self.get_class(an, imports) + if slot_name in a.attributes: + islot = copy(a.attributes[slot_name]) + break + if islot is None: + raise Exception(f'No such slot: {slot_name} and no attribute by that name in ancestors of {class_name}') + + COMBINE = { + 'maximum_value': lambda x, y: min(x, y), + 'minimum_value': lambda x, y: max(x, y), + } + for metaslot_name in SlotDefinition._inherited_slots: + v = getattr(islot, metaslot_name, None) + for an in self.class_ancestors(class_name): + a = self.get_class(an, imports) + anc_slot_usage = a.slot_usage.get(slot_name, {}) + v2 = getattr(anc_slot_usage, metaslot_name, None) + if v is None: + v = v2 + else: + if metaslot_name in COMBINE: + if v2 is not None: + v = COMBINE[metaslot_name](v, v2) + else: + break + if v is None: + if metaslot_name == 'range': + v = self.schema.default_range + if v is not None: + setattr(islot, metaslot_name, v) + return islot + + @lru_cache() + def usage_index(self) -> Dict[ElementName, List[SchemaUsage]]: + """ + :return: dictionary keyed by used elements + """ + ROLES = ['domain', 'range'] + ix = defaultdict(list) + for cn, c in self.all_class().items(): + for sn in self.class_slots(cn): + s = self.induced_slot(sn, cn) + for k in ROLES: + v = getattr(s, k) + if isinstance(v, list): + vl = v + else: + vl = [v] + for x in vl: + u = SchemaUsage(used_by=cn, slot=sn, metaslot=k, used=x) + ix[x].append(u) + return ix + + # MUTATION OPERATIONS + + def add_class(self, cls: ClassDefinition) -> None: + """ + :param cls: class to be added + :return: + """ + self.schema.classes[cls.name] = cls + self.set_modified() + + def add_slot(self, slot: SlotDefinition) -> None: + """ + :param slot: slot to be added + :return: + """ + self.schema.slots[slot.name] = slot + self.set_modified() + + def add_enum(self, enum: EnumDefinition) -> None: + """ + :param enum: enum to be added + :return: + """ + self.schema.enums[enum.name] = enum + self.set_modified() + + def add_type(self, type: TypeDefinition) -> None: + """ + :param type: type to be added + :return: + """ + self.schema.types[type.name] = type + self.set_modified() + + def add_subset(self, subset: SubsetDefinition) -> None: + """ + :param subset: subset to be added + :return: + """ + self.schema.subsets[subset.name] = type + self.set_modified() + + def delete_class(self, class_name: ClassDefinitionName) -> None: + """ + :param class_name: class to be deleted + :return: + """ + del self.schema.classes[class_name] + self.set_modified() + + def delete_slot(self, slot_name: SlotDefinitionName) -> None: + """ + :param slot_name: slot to be deleted + :return: + """ + del self.schema.slotes[slot_name] + self.set_modified() + + def delete_enum(self, enum_name: EnumDefinitionName) -> None: + """ + :param enum_name: enum to be deleted + :return: + """ + del self.schema.enumes[enum_name] + self.set_modified() + + def delete_type(self, type_name: TypeDefinitionName) -> None: + """ + :param type_name: type to be deleted + :return: + """ + del self.schema.typees[type_name] + self.set_modified() + + def delete_subset(self, subset_name: SubsetDefinitionName) -> None: + """ + :param subset_name: subset to be deleted + :return: + """ + del self.schema.subsetes[subset_name] + self.set_modified() + + def merge_schema(self, schema: SchemaDefinition) -> None: + """ + merges another schema into this one + :param schema: schema to be merged + """ + dest = self.schema + for k, v in schema.prefixes.items(): + if k not in dest.prefixes: + dest.prefixes[k] = copy(y) + for k, v in schema.classes.items(): + if k not in dest.classes: + dest.classes[k] = copy(y) + for k, v in schema.slots.items(): + if k not in dest.slots: + dest.slots[k] = copy(y) + for k, v in schema.types.items(): + if k not in dest.types: + dest.types[k] = copy(y) + for k, v in schema.enums.items(): + if k not in dest.types: + dest.enums[k] = copy(y) + self.set_modified() + + + + + def set_modified(self) -> None: + self.modifications += 1 diff --git a/notebooks/SchemaView_BioLink.ipynb b/notebooks/SchemaView_BioLink.ipynb new file mode 100644 index 00000000..90cfbab0 --- /dev/null +++ b/notebooks/SchemaView_BioLink.ipynb @@ -0,0 +1,575 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from linkml_runtime.utils.schemaview import SchemaView" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "view = SchemaView(\"../tests/test_utils/input/biolink-model.yaml\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading schema https://w3id.org/linkml/types from ../tests/test_utils/input/biolink-model.yaml\n" + ] + }, + { + "data": { + "text/plain": [ + "['Biolink-Model', 'linkml:types']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "view.imports_closure()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(257, 421, 4)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(view.all_class()), len(view.all_slot()), len(view.all_subset())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['gene',\n", + " 'gene or gene product',\n", + " 'genomic entity',\n", + " 'chemical entity or gene or gene product',\n", + " 'physical essence',\n", + " 'ontology class',\n", + " 'biological entity',\n", + " 'named thing',\n", + " 'entity',\n", + " 'physical essence or occurrent',\n", + " 'thing with taxon',\n", + " 'macromolecular machine mixin']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "view.class_ancestors('gene')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['biolink:Gene',\n", + " 'biolink:GeneOrGeneProduct',\n", + " 'biolink:GenomicEntity',\n", + " 'biolink:ChemicalEntityOrGeneOrGeneProduct',\n", + " 'biolink:PhysicalEssence',\n", + " 'biolink:OntologyClass',\n", + " 'biolink:BiologicalEntity',\n", + " 'biolink:NamedThing',\n", + " 'biolink:Entity',\n", + " 'biolink:PhysicalEssenceOrOccurrent',\n", + " 'biolink:ThingWithTaxon',\n", + " 'biolink:MacromolecularMachineMixin']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[view.get_uri(c) for c in view.class_ancestors('gene')]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['https://w3id.org/biolink/vocab/Gene',\n", + " 'https://w3id.org/biolink/vocab/GeneOrGeneProduct',\n", + " 'https://w3id.org/biolink/vocab/GenomicEntity',\n", + " 'https://w3id.org/biolink/vocab/ChemicalEntityOrGeneOrGeneProduct',\n", + " 'https://w3id.org/biolink/vocab/PhysicalEssence',\n", + " 'https://w3id.org/biolink/vocab/OntologyClass',\n", + " 'https://w3id.org/biolink/vocab/BiologicalEntity',\n", + " 'https://w3id.org/biolink/vocab/NamedThing',\n", + " 'https://w3id.org/biolink/vocab/Entity',\n", + " 'https://w3id.org/biolink/vocab/PhysicalEssenceOrOccurrent',\n", + " 'https://w3id.org/biolink/vocab/ThingWithTaxon',\n", + " 'https://w3id.org/biolink/vocab/MacromolecularMachineMixin']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[view.get_uri(c, expand=True) for c in view.class_ancestors('gene')]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['gene', 'biological entity', 'named thing', 'entity']" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "view.class_ancestors('gene', mixins=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['affects', 'related to']" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "view.slot_ancestors('affects')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['affects abundance of',\n", + " 'abundance affected by',\n", + " 'affects activity of',\n", + " 'activity affected by',\n", + " 'activity affects',\n", + " 'affects expression of',\n", + " 'expression affected by',\n", + " 'affects folding of',\n", + " 'folding affected by',\n", + " 'affects localization of',\n", + " 'localization affected by',\n", + " 'affects metabolic processing of',\n", + " 'metabolic processing affected by',\n", + " 'affects molecular modification of',\n", + " 'molecular modification affected by',\n", + " 'affects synthesis of',\n", + " 'sythesis affected by',\n", + " 'affects degradation of',\n", + " 'degradation affected by',\n", + " 'affects mutation rate of',\n", + " 'mutation rate affected by',\n", + " 'affects response to',\n", + " 'response affected by',\n", + " 'affects splicing of',\n", + " 'splicing affected by',\n", + " 'affects stability of',\n", + " 'stability affected by',\n", + " 'affects transport of',\n", + " 'transport affected by',\n", + " 'affects secretion of',\n", + " 'secretion affected by',\n", + " 'affects uptake of',\n", + " 'uptake affected by',\n", + " 'process regulates process',\n", + " 'entity regulates entity',\n", + " 'disrupts',\n", + " 'ameliorates',\n", + " 'exacerbates',\n", + " 'affects expression in']" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "view.slot_children('affects')" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "affects = view.get_slot('affects')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['SEMMEDDB:AFFECTS', 'SEMMEDDB:affects', 'DGIdb:affects', 'RTXKG1:affects']" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "affects.exact_mappings" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'self': ['biolink:affects'],\n", + " 'native': ['biolink:affects'],\n", + " 'exact': ['SEMMEDDB:AFFECTS',\n", + " 'SEMMEDDB:affects',\n", + " 'DGIdb:affects',\n", + " 'RTXKG1:affects'],\n", + " 'narrow': ['SEMMEDDB:administered_to',\n", + " 'CTD:prediction_hypothesis',\n", + " 'GOREL:0001006',\n", + " 'CTD:inferred',\n", + " 'UPHENO:0000001',\n", + " 'RO:0002263',\n", + " 'RO:0002264',\n", + " 'NCIT:R158',\n", + " 'NCIT:R160',\n", + " 'NCIT:R30',\n", + " 'NCIT:R150',\n", + " 'NCIT:R72',\n", + " 'NCIT:R146',\n", + " 'NCIT:R124',\n", + " 'NCIT:R173',\n", + " 'NCIT:R100',\n", + " 'NCIT:R102',\n", + " 'NCIT:R101',\n", + " 'NCIT:R113',\n", + " 'NCIT:R23',\n", + " 'NCIT:R25',\n", + " 'NCIT:gene_mapped_to_disease',\n", + " 'NCIT:R133',\n", + " 'RO:0002343',\n", + " 'RO:0002355',\n", + " 'RO:0002591',\n", + " 'RO:0002592',\n", + " 'RO:0012003',\n", + " 'SNOMED:has_pathological_process'],\n", + " 'broad': [],\n", + " 'related': ['DRUGBANK:pathway'],\n", + " 'close': [],\n", + " 'undefined': []}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "view.get_mappings(affects.name)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'self': ['https://w3id.org/biolink/vocab/affects'],\n", + " 'native': ['https://w3id.org/biolink/vocab/affects'],\n", + " 'exact': ['https://skr3.nlm.nih.gov/SemMedDBAFFECTS',\n", + " 'https://skr3.nlm.nih.gov/SemMedDBaffects',\n", + " 'https://www.dgidb.org/interaction_typesaffects',\n", + " 'http://kg1endpoint.rtx.ai/affects'],\n", + " 'narrow': ['https://skr3.nlm.nih.gov/SemMedDBadministered_to',\n", + " 'http://translator.ncats.nih.gov/CTD_prediction_hypothesis',\n", + " 'http://purl.obolibrary.org/obo/GOREL_0001006',\n", + " 'http://translator.ncats.nih.gov/CTD_inferred',\n", + " 'http://purl.obolibrary.org/obo/UPHENO_0000001',\n", + " 'http://purl.obolibrary.org/obo/RO_0002263',\n", + " 'http://purl.obolibrary.org/obo/RO_0002264',\n", + " 'http://purl.obolibrary.org/obo/NCIT_R158',\n", + " 'http://purl.obolibrary.org/obo/NCIT_R160',\n", + " 'http://purl.obolibrary.org/obo/NCIT_R30',\n", + " 'http://purl.obolibrary.org/obo/NCIT_R150',\n", + " 'http://purl.obolibrary.org/obo/NCIT_R72',\n", + " 'http://purl.obolibrary.org/obo/NCIT_R146',\n", + " 'http://purl.obolibrary.org/obo/NCIT_R124',\n", + " 'http://purl.obolibrary.org/obo/NCIT_R173',\n", + " 'http://purl.obolibrary.org/obo/NCIT_R100',\n", + " 'http://purl.obolibrary.org/obo/NCIT_R102',\n", + " 'http://purl.obolibrary.org/obo/NCIT_R101',\n", + " 'http://purl.obolibrary.org/obo/NCIT_R113',\n", + " 'http://purl.obolibrary.org/obo/NCIT_R23',\n", + " 'http://purl.obolibrary.org/obo/NCIT_R25',\n", + " 'http://purl.obolibrary.org/obo/NCIT_gene_mapped_to_disease',\n", + " 'http://purl.obolibrary.org/obo/NCIT_R133',\n", + " 'http://purl.obolibrary.org/obo/RO_0002343',\n", + " 'http://purl.obolibrary.org/obo/RO_0002355',\n", + " 'http://purl.obolibrary.org/obo/RO_0002591',\n", + " 'http://purl.obolibrary.org/obo/RO_0002592',\n", + " 'http://purl.obolibrary.org/obo/RO_0012003',\n", + " 'http://purl.obolibrary.org/obo/SNOMED_has_pathological_process'],\n", + " 'broad': [],\n", + " 'related': ['http://identifiers.org/drugbank/pathway'],\n", + " 'close': [],\n", + " 'undefined': []}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "view.get_mappings(affects.name, expand=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['association',\n", + " 'contributor association',\n", + " 'genotype to genotype part association',\n", + " 'genotype to gene association',\n", + " 'genotype to variant association',\n", + " 'gene to gene association',\n", + " 'gene to gene homology association',\n", + " 'gene to gene coexpression association',\n", + " 'pairwise gene to gene interaction',\n", + " 'pairwise molecular interaction',\n", + " 'cell line to disease or phenotypic feature association',\n", + " 'chemical to chemical association',\n", + " 'reaction to participant association',\n", + " 'reaction to catalyst association',\n", + " 'chemical to chemical derivation association',\n", + " 'chemical to disease or phenotypic feature association',\n", + " 'chemical to pathway association',\n", + " 'chemical to gene association',\n", + " 'drug to gene association',\n", + " 'material sample derivation association']" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[c for c in view.all_class().keys() if view.is_relationship(c)][0:20]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "view.annotation_dict(affects.name)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'biolink:canonical_predicate': Annotation(tag='biolink:canonical_predicate', value='True', extensions={}, annotations={})}" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "affects.annotations" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from linkml_runtime.linkml_model.annotations import Annotation, Annotatable\n", + "\n", + "isinstance(affects, Annotatable)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SlotDefinition(name='affects', id_prefixes=[], definition_uri=None, aliases=[], local_names={}, mappings=[], exact_mappings=['SEMMEDDB:AFFECTS', 'SEMMEDDB:affects', 'DGIdb:affects', 'RTXKG1:affects'], close_mappings=[], related_mappings=['DRUGBANK:pathway'], narrow_mappings=['SEMMEDDB:administered_to', 'CTD:prediction_hypothesis', 'GOREL:0001006', 'CTD:inferred', 'UPHENO:0000001', 'RO:0002263', 'RO:0002264', 'NCIT:R158', 'NCIT:R160', 'NCIT:R30', 'NCIT:R150', 'NCIT:R72', 'NCIT:R146', 'NCIT:R124', 'NCIT:R173', 'NCIT:R100', 'NCIT:R102', 'NCIT:R101', 'NCIT:R113', 'NCIT:R23', 'NCIT:R25', 'NCIT:gene_mapped_to_disease', 'NCIT:R133', 'RO:0002343', 'RO:0002355', 'RO:0002591', 'RO:0002592', 'RO:0012003', 'SNOMED:has_pathological_process'], broad_mappings=[], extensions={}, annotations={'biolink:canonical_predicate': Annotation(tag='biolink:canonical_predicate', value='True', extensions={}, annotations={})}, description=\"describes an entity that has a direct affect on the state or quality of another existing entity. Use of the 'affects' predicate implies that the affected entity already exists, unlike predicates such as 'affects risk for' and 'prevents, where the outcome is something that may or may not come to be.\", alt_descriptions={}, deprecated=None, todos=[], notes=[], comments=[], examples=[], in_subset=['translator_minimal'], from_schema=None, imported_from=None, see_also=[], deprecated_element_has_exact_replacement=None, deprecated_element_has_possible_replacement=None, is_a='related to', abstract=None, mixin=None, mixins=[], apply_to=[], values_from=[], created_by=None, created_on=None, last_updated_on=None, modified_by=None, status=None, string_serialization=None, singular_name=None, domain=None, range=None, slot_uri=None, multivalued=None, inherited=None, readonly=None, ifabsent=None, required=None, recommended=None, inlined=None, inlined_as_list=None, key=None, identifier=None, alias=None, owner=None, domain_of=[], subproperty_of=None, symmetric=None, inverse=None, is_class_field=None, role=None, is_usage_slot=None, usage_slot_name=None, minimum_value=None, maximum_value=None, pattern=None)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "affects" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SlotDefinition(name='affects', id_prefixes=[], definition_uri=None, aliases=[], local_names={}, mappings=[], exact_mappings=['SEMMEDDB:AFFECTS', 'SEMMEDDB:affects', 'DGIdb:affects', 'RTXKG1:affects'], close_mappings=[], related_mappings=['DRUGBANK:pathway'], narrow_mappings=['SEMMEDDB:administered_to', 'CTD:prediction_hypothesis', 'GOREL:0001006', 'CTD:inferred', 'UPHENO:0000001', 'RO:0002263', 'RO:0002264', 'NCIT:R158', 'NCIT:R160', 'NCIT:R30', 'NCIT:R150', 'NCIT:R72', 'NCIT:R146', 'NCIT:R124', 'NCIT:R173', 'NCIT:R100', 'NCIT:R102', 'NCIT:R101', 'NCIT:R113', 'NCIT:R23', 'NCIT:R25', 'NCIT:gene_mapped_to_disease', 'NCIT:R133', 'RO:0002343', 'RO:0002355', 'RO:0002591', 'RO:0002592', 'RO:0012003', 'SNOMED:has_pathological_process'], broad_mappings=[], extensions={}, annotations={'biolink:canonical_predicate': Annotation(tag='biolink:canonical_predicate', value='True', extensions={}, annotations={})}, description=\"describes an entity that has a direct affect on the state or quality of another existing entity. Use of the 'affects' predicate implies that the affected entity already exists, unlike predicates such as 'affects risk for' and 'prevents, where the outcome is something that may or may not come to be.\", alt_descriptions={}, deprecated=None, todos=[], notes=[], comments=[], examples=[], in_subset=['translator_minimal'], from_schema=None, imported_from=None, see_also=[], deprecated_element_has_exact_replacement=None, deprecated_element_has_possible_replacement=None, is_a='related to', abstract=None, mixin=None, mixins=[], apply_to=[], values_from=[], created_by=None, created_on=None, last_updated_on=None, modified_by=None, status=None, string_serialization=None, singular_name=None, domain=None, range=None, slot_uri=None, multivalued=None, inherited=None, readonly=None, ifabsent=None, required=None, recommended=None, inlined=None, inlined_as_list=None, key=None, identifier=None, alias=None, owner=None, domain_of=[], subproperty_of=None, symmetric=None, inverse=None, is_class_field=None, role=None, is_usage_slot=None, usage_slot_name=None, minimum_value=None, maximum_value=None, pattern=None)" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "e = view.get_element('affects')\n", + "e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/SchemaView_MIxS.ipynb b/notebooks/SchemaView_MIxS.ipynb new file mode 100644 index 00000000..717b6775 --- /dev/null +++ b/notebooks/SchemaView_MIxS.ipynb @@ -0,0 +1,317 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "d82643aa", + "metadata": {}, + "outputs": [], + "source": [ + "from linkml_runtime.utils.schemaview import SchemaView" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "850e4b34", + "metadata": {}, + "outputs": [], + "source": [ + "view = SchemaView(\"../tests/test_utils/input/mixs/mixs.yaml\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c0e9ccd4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading schema water from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema terms from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema ranges from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema https://w3id.org/linkml/types from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema wastewater_sludge from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema soil from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema sediment from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema plant_associated from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema miscellaneous_natural_or_artificial_environment from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema microbial_mat_biofilm from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema hydrocarbon_resources_fluids_swabs from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema hydrocarbon_resources_cores from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema human_vaginal from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema human_skin from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema human_oral from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema human_gut from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema human_associated from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema host_associated from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema built_environment from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema air from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema core from ../tests/test_utils/input/mixs/mixs.yaml\n", + "Loading schema checklists from ../tests/test_utils/input/mixs/mixs.yaml\n" + ] + }, + { + "data": { + "text/plain": [ + "609" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(view.all_slot())" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "98f7305b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['water',\n", + " 'quantity value',\n", + " 'wastewater_sludge',\n", + " 'soil',\n", + " 'sediment',\n", + " 'plant-associated',\n", + " 'miscellaneous natural or artificial environment',\n", + " 'microbial mat_biofilm',\n", + " 'hydrocarbon resources-fluids_swabs',\n", + " 'hydrocarbon resources-cores']" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(view.all_class().keys())[0:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "84e6ff27", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "view.get_slot('elev').required is True" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "cf892380", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "view.induced_slot('elev', 'soil').required is True" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "735859e1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "view.induced_slot('elev', 'human-gut').required is True" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "ff33091c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Minimum Information About an Uncultivated Virus Genome'" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "view.get_class('MIUVIG').description" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "11a0226e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['MIUVIG',\n", + " 'air MIUVIG',\n", + " 'built environment MIUVIG',\n", + " 'host-associated MIUVIG',\n", + " 'human-associated MIUVIG',\n", + " 'human-gut MIUVIG',\n", + " 'human-oral MIUVIG',\n", + " 'human-skin MIUVIG',\n", + " 'human-vaginal MIUVIG',\n", + " 'hydrocarbon resources-cores MIUVIG',\n", + " 'hydrocarbon resources-fluids_swabs MIUVIG',\n", + " 'microbial mat_biofilm MIUVIG',\n", + " 'miscellaneous natural or artificial environment MIUVIG',\n", + " 'plant-associated MIUVIG',\n", + " 'sediment MIUVIG',\n", + " 'soil MIUVIG',\n", + " 'wastewater_sludge MIUVIG',\n", + " 'water MIUVIG']" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "view.class_descendants('MIUVIG')" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "4656e62b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "view.induced_slot('vir_ident_software', 'MIUVIG').required is True" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "2e103d35", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "view.induced_slot('vir_ident_software', 'soil MIUVIG').required is True" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "1b9d4bd2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "view.induced_slot('vir_ident_software', 'soil').required is True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b8dcfa2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py index 1e205f39..ac98c179 100644 --- a/tests/test_utils/__init__.py +++ b/tests/test_utils/__init__.py @@ -1,3 +1,9 @@ +import os + METAMODEL_CONTEXT_URI = "https://w3id.org/linkml/meta.context.jsonld" META_BASE_URI = "https://w3id.org/linkml/" +TESTING_DIR = os.path.abspath(os.path.dirname(__file__)) + +INPUT_DIR = os.path.join(TESTING_DIR, 'input') +OUTPUT_DIR = os.path.join(TESTING_DIR, 'output') diff --git a/tests/test_utils/input/core.yaml b/tests/test_utils/input/core.yaml new file mode 100644 index 00000000..1d0d2f7c --- /dev/null +++ b/tests/test_utils/input/core.yaml @@ -0,0 +1,95 @@ +id: https://w3id.org/linkml/tests/core +name: core +description: |- + core schema imported by kitchen_sink +default_curi_maps: + - semweb_context +imports: + - linkml:types +prefixes: + pav: http://purl.org/pav/ + dce: http://purl.org/dc/elements/1.1/ + linkml: https://w3id.org/linkml/ + biolink: https://w3id.org/biolink/ + core: https://w3id.org/linkml/tests/core/ + prov: http://www.w3.org/ns/prov# +default_prefix: core +license: https://creativecommons.org/publicdomain/zero/1.0/ + +types: + + +classes: + + activity: + description: "a provence-generating activity" + slots: + - id + - started at time + - ended at time + - was informed by + - was associated with + - used + - description + exact_mappings: + - prov:Activity + + agent: + description: "a provence-generating agent" + slots: + - id + - acted on behalf of + - was informed by + class_uri: prov:Agent + + TestClass: + +slots: + + id: + identifier: true + + name: + required: false + + description: + range: NarrativeText + + started at time: + slot_uri: prov:startedAtTime + range: date + + ended at time: + slot_uri: prov:endedAtTime + range: date + + was informed by: + range: activity + slot_uri: prov:wasInformedBy + + was associated with: + range: agent + slot_uri: prov:wasAssociatedWith + inlined: false + + acted on behalf of: + range: agent + slot_uri: prov:actedOnBehalfOf + + was generated by: + range: activity + slot_uri: prov:wasGeneratedBy + + used: + domain: activity + slot_uri: prov:used + + activity set: + range: activity + multivalued: true + inlined_as_list: true + + agent set: + range: agent + multivalued: true + inlined_as_list: true diff --git a/tests/test_utils/input/kitchen_sink.yaml b/tests/test_utils/input/kitchen_sink.yaml new file mode 100644 index 00000000..12bfd87e --- /dev/null +++ b/tests/test_utils/input/kitchen_sink.yaml @@ -0,0 +1,242 @@ +id: https://w3id.org/linkml/tests/kitchen_sink +name: kitchen_sink +description: |- + Kitchen Sink Schema + + This schema does not do anything useful. It exists to test all features of linkml. + + This particular text field exists to demonstrate markdown within a text field: + + Lists: + + * a + * b + * c + + And links, e.g to [Person](Person.md) + +default_curi_maps: + - semweb_context +imports: + - linkml:types + - core +prefixes: + pav: http://purl.org/pav/ + dce: http://purl.org/dc/elements/1.1/ + lego: http://geneontology.org/lego/ + linkml: https://w3id.org/linkml/ + biolink: https://w3id.org/biolink/ + ks: https://w3id.org/linkml/tests/kitchen_sink/ + RO: http://purl.obolibrary.org/obo/RO_ + BFO: http://purl.obolibrary.org/obo/BFO_ +default_prefix: ks +default_range: string +see_also: + - https://example.org/ + +subsets: + + subset A: + description: >- + test subset A + comments: + - this subset is meaningless, it is just here for testing + aliases: + - A + subset B: + description: >- + test subset B + aliases: + - B + +classes: + + HasAliases: + mixin: true + attributes: + aliases: + multivalued: true + + Thing: + slots: + - id + - name + + Person: + is_a: Thing + in_subset: + - subset A + mixins: + - HasAliases + slots: + - has employment history + - has familial relationships + - has medical history + - age in years + - addresses + - has birth event + slot_usage: + name: + pattern: "^\\S+ \\S+" ## do not do this in a real schema, people have all kinds of names + + Adult: + is_a: Person + slot_usage: + age in years: + minimum_value: 16 + + Organization: + is_a: Thing + mixins: + - HasAliases + + Place: + mixins: + - HasAliases + slots: + - id + - name + Address: + slots: + - street + - city + + Event: + slots: + - started at time + - ended at time + - is current + + Relationship: + slots: + - started at time + - ended at time + - related to + - type + + FamilialRelationship: + is_a: Relationship + slot_usage: + type: + range: FamilialRelationshipType + required: true + related to: + range: Person + required: true + + BirthEvent: + is_a: Event + slots: + - in location + + EmploymentEvent: + is_a: Event + slots: + - employed at + + MedicalEvent: + is_a: Event + + WithLocation: + mixin: true + slots: + - in location + + MarriageEvent: + is_a: Event + mixins: + - WithLocation + slots: + - married to + + Company: + is_a: Organization + attributes: + ceo: + range: Person + + Dataset: + attributes: + persons: + range: Person + inlined: true + inlined_as_list: true + multivalued: true + companies: + range: Company + inlined_as_list: true + inlined: true + multivalued: true + activities: + range: activity + inlined_as_list: true + inlined: true + multivalued: true + +slots: + employed at: + range: Company + in_subset: + - subset A + is current: + range: boolean + annotations: + tag: ks:foo + value: bar + + has employment history: + range: EmploymentEvent + multivalued: true + inlined_as_list: true + in_subset: + - subset B + has marriage history: + range: MarriageEvent + multivalued: true + inlined_as_list: true + in_subset: + - subset B + has medical history: + range: MedicalEvent + multivalued: true + inlined_as_list: true + in_subset: + - subset B + has familial relationships: + range: FamilialRelationship + multivalued: true + inlined_as_list: true + in_subset: + - subset B + married to: + range: Person + in location: + range: Place + addresses: + range: Address + multivalued: True + age in years: + range: integer + minimum_value: 0 + maximum_value: 999 + in_subset: + - subset A + - subset B + related to: + type: + street: + city: + has birth event: + range: BirthEvent + + + + + +enums: + FamilialRelationshipType: + permissible_values: + SIBLING_OF: + PARENT_OF: + CHILD_OF: + DiagnosisType: diff --git a/tests/test_utils/input/kitchen_sink_noimports.yaml b/tests/test_utils/input/kitchen_sink_noimports.yaml new file mode 100644 index 00000000..299f3449 --- /dev/null +++ b/tests/test_utils/input/kitchen_sink_noimports.yaml @@ -0,0 +1,313 @@ +id: https://w3id.org/linkml/tests/kitchen_sink +name: kitchen_sink +description: |- + Kitchen Sink Schema (no imports version) + + This schema does not do anything useful. It exists to test all features of linkml. + + This particular text field exists to demonstrate markdown within a text field: + + Lists: + + * a + * b + * c + + And links, e.g to [Person](Person.md) + +default_curi_maps: + - semweb_context +prefixes: + pav: http://purl.org/pav/ + dce: http://purl.org/dc/elements/1.1/ + lego: http://geneontology.org/lego/ + linkml: https://w3id.org/linkml/ + biolink: https://w3id.org/biolink/ + ks: https://w3id.org/linkml/tests/kitchen_sink/ + RO: http://purl.obolibrary.org/obo/RO_ + BFO: http://purl.obolibrary.org/obo/BFO_ +default_prefix: ks +default_range: string +see_also: + - https://example.org/ + +subsets: + + subset A: + description: >- + test subset A + comments: + - this subset is meaningless, it is just here for testing + aliases: + - A + subset B: + description: >- + test subset B + aliases: + - B + +classes: + + HasAliases: + mixin: true + attributes: + aliases: + multivalued: true + + Thing: + slots: + - id + - name + + Person: + is_a: Thing + in_subset: + - subset A + mixins: + - HasAliases + slots: + - has employment history + - has familial relationships + - has medical history + - age in years + - addresses + - has birth event + slot_usage: + name: + pattern: "^\\S+ \\S+" ## do not do this in a real schema, people have all kinds of names + + Adult: + is_a: Person + slot_usage: + age in years: + minimum_value: 16 + + Organization: + is_a: Thing + mixins: + - HasAliases + + Place: + mixins: + - HasAliases + slots: + - id + - name + Address: + slots: + - street + - city + + Event: + slots: + - started at time + - ended at time + - is current + + Relationship: + slots: + - started at time + - ended at time + - related to + - type + + FamilialRelationship: + is_a: Relationship + slot_usage: + type: + range: FamilialRelationshipType + required: true + related to: + range: Person + required: true + + BirthEvent: + is_a: Event + slots: + - in location + + EmploymentEvent: + is_a: Event + slots: + - employed at + + MedicalEvent: + is_a: Event + + WithLocation: + mixin: true + slots: + - in location + + MarriageEvent: + is_a: Event + mixins: + - WithLocation + slots: + - married to + + Company: + is_a: Organization + attributes: + ceo: + range: Person + + Dataset: + attributes: + persons: + range: Person + inlined: true + inlined_as_list: true + multivalued: true + companies: + range: Company + inlined_as_list: true + inlined: true + multivalued: true + activities: + range: activity + inlined_as_list: true + inlined: true + multivalued: true + + activity: + description: "a provence-generating activity" + slots: + - id + - started at time + - ended at time + - was informed by + - was associated with + - used + - description + exact_mappings: + - prov:Activity + + agent: + description: "a provence-generating agent" + slots: + - id + - acted on behalf of + - was informed by + class_uri: prov:Agent + +slots: + employed at: + range: Company + in_subset: + - subset A + annotations: + - tag: "ks:a1" + value: [1,2,3] + - tag: "ks:a2" + value: ["v1", "v2", "v3"] + - tag: "ks:a3" + value: 'a3.1' + - tag: "ks:a3" + value: 'v3.2' + is current: + range: boolean + annotations: + "ks:foo": bar + has employment history: + range: EmploymentEvent + multivalued: true + inlined_as_list: true + in_subset: + - subset B + annotations: + "ks:mv": 1 + has marriage history: + range: MarriageEvent + multivalued: true + inlined_as_list: true + in_subset: + - subset B + has medical history: + range: MedicalEvent + multivalued: true + inlined_as_list: true + in_subset: + - subset B + has familial relationships: + range: FamilialRelationship + multivalued: true + inlined_as_list: true + in_subset: + - subset B + married to: + range: Person + in location: + range: Place + addresses: + range: Address + multivalued: True + age in years: + range: integer + minimum_value: 0 + maximum_value: 999 + in_subset: + - subset A + - subset B + related to: + type: + street: + city: + has birth event: + range: BirthEvent + + id: + identifier: true + + name: + required: false + + description: + + started at time: + slot_uri: prov:startedAtTime + range: date + + ended at time: + slot_uri: prov:endedAtTime + range: date + + was informed by: + range: activity + slot_uri: prov:wasInformedBy + + was associated with: + range: agent + slot_uri: prov:wasAssociatedWith + inlined: false + + acted on behalf of: + range: agent + slot_uri: prov:actedOnBehalfOf + + was generated by: + range: activity + slot_uri: prov:wasGeneratedBy + + used: + domain: activity + slot_uri: prov:used + + activity set: + range: activity + multivalued: true + inlined_as_list: true + + agent set: + range: agent + multivalued: true + inlined_as_list: true + + +enums: + FamilialRelationshipType: + permissible_values: + SIBLING_OF: + PARENT_OF: + CHILD_OF: + DiagnosisType: diff --git a/tests/test_utils/test_schemaview.py b/tests/test_utils/test_schemaview.py new file mode 100644 index 00000000..c886869f --- /dev/null +++ b/tests/test_utils/test_schemaview.py @@ -0,0 +1,170 @@ +import os +import json +import unittest +from typing import List, Tuple, Any + +from linkml_runtime.linkml_model.meta import SchemaDefinition, ClassDefinition +from linkml_runtime.loaders.yaml_loader import YAMLLoader +from linkml_runtime.utils.schemaview import SchemaView + +from tests.test_utils import INPUT_DIR + +SCHEMA_NO_IMPORTS = os.path.join(INPUT_DIR, 'kitchen_sink_noimports.yaml') +SCHEMA_WITH_IMPORTS = os.path.join(INPUT_DIR, 'kitchen_sink.yaml') + +yaml_loader = YAMLLoader() + +class SchemaViewTestCase(unittest.TestCase): + + def test_schemaview(self): + # no import schema + view = SchemaView(SCHEMA_NO_IMPORTS) + print(view.imports_closure()) + assert len(view.imports_closure()) == 1 + all_cls = view.all_class() + print(f'n_cls = {len(all_cls)}') + + e = view.get_element('is current') + assert list(view.annotation_dict('is current').values()) == ['bar'] + print(view.annotation_dict('employed at')) + e = view.get_element('employed at') + print(e.annotations) + e = view.get_element('has employment history') + print(e.annotations) + #assert list(view.annotation_dict('employed at')[] + + if True: + # this section is mostly for debugging + for cn in all_cls.keys(): + print(f'{cn} PARENTS = {view.class_parents(cn)}') + print(f'{cn} ANCS = {view.class_ancestors(cn)}') + print(f'{cn} CHILDREN = {view.class_children(cn)}') + print(f'{cn} DESCS = {view.class_descendants(cn)}') + print(f'{cn} SCHEMA = {view.in_schema(cn)}') + print(f' SLOTS = {view.class_slots(cn)}') + for sn in view.class_slots(cn): + slot = view.get_slot(sn) + if slot is None: + print(f'NO SLOT: {sn}') + else: + print(f' SLOT {sn} R: {slot.range} U: {view.get_uri(sn)} ANCS: {view.slot_ancestors(sn)}') + induced_slot = view.induced_slot(sn, cn) + print(f' INDUCED {sn}={induced_slot}') + + print(f'ALL = {view.all_element().keys()}') + + # -- TEST ANCESTOR/DESCENDANTS FUNCTIONS -- + + self.assertCountEqual(['Company', 'Organization', 'HasAliases', 'Thing'], + view.class_ancestors('Company')) + self.assertCountEqual(['Organization', 'HasAliases', 'Thing'], + view.class_ancestors('Company', reflexive=False)) + self.assertCountEqual(['Thing', 'Person', 'Organization', 'Company', 'Adult'], + view.class_descendants('Thing')) + + # -- TEST CLASS SLOTS -- + + + self.assertCountEqual(['id', 'name', ## From Thing + 'has employment history', 'has familial relationships', 'has medical history', + 'age in years', 'addresses', 'has birth event', ## From Person + 'aliases' ## From HasAliases + ], + view.class_slots('Person')) + self.assertCountEqual(view.class_slots('Person'), view.class_slots('Adult')) + self.assertCountEqual(['id', 'name', ## From Thing + 'ceo', ## From Company + 'aliases' ## From HasAliases + ], + view.class_slots('Company')) + + assert view.get_class('agent').class_uri == 'prov:Agent' + assert view.get_uri('agent') == 'prov:Agent' + print(view.get_class('Company').class_uri) + #assert view.get_class('Company').class_uri == 'prov:Agent' + assert view.get_uri('Company') == 'ks:Company' + + for c in ['Company', 'Person', 'Organization',]: + assert view.induced_slot('aliases', c).multivalued is True + + for c in ['Company', 'Person', 'Organization', 'Thing']: + assert view.induced_slot('id', c).identifier is True + assert view.induced_slot('name', c).identifier is not True + assert view.induced_slot('name', c).required is False + assert view.induced_slot('name', c).range == 'string' + for c in ['Event', 'EmploymentEvent', 'MedicalEvent']: + s = view.induced_slot('started at time', c) + print(f's={s.range} // c = {c}') + assert s.range == 'date' + assert s.slot_uri == 'prov:startedAtTime' + assert view.induced_slot('age in years', 'Person').minimum_value == 0 + assert view.induced_slot('age in years', 'Adult').minimum_value == 16 + assert view.induced_slot('type', 'FamilialRelationship').range == 'FamilialRelationshipType' + assert view.induced_slot('related to', 'FamilialRelationship').range == 'Person' + + u = view.usage_index() + for k, v in u.items(): + print(f' {k} = {v}') + + #for e in view.all_element(imports=True): + # print(view.annotation_dict(e)) + #print(u) + + + def test_caching(self): + s = SchemaDefinition(id='test', name='test') + view = SchemaView(s) + self.assertCountEqual([], view.all_class()) + view.add_class(ClassDefinition('X')) + self.assertCountEqual(['X'], view.all_class()) + view.add_class(ClassDefinition('Y')) + self.assertCountEqual(['X', 'Y'], view.all_class()) + # bypass view method + view.schema.classes['Z'] = ClassDefinition('Z') + self.assertCountEqual(['X', 'Y'], view.all_class()) + view.set_modified() + self.assertCountEqual(['X', 'Y', 'Z'], view.all_class()) + view.delete_class('X') + self.assertCountEqual(['Y', 'Z'], view.all_class()) + + def test_imports(self): + view = SchemaView(SCHEMA_WITH_IMPORTS) + print(view.imports_closure()) + self.assertCountEqual(['kitchen_sink', 'core', 'linkml:types'], view.imports_closure()) + for t in view.all_type().keys(): + print(f'T={t} in={view.in_schema(t)}') + assert view.in_schema('Person') == 'kitchen_sink' + assert view.in_schema('id') == 'core' + assert view.in_schema('name') == 'core' + assert view.in_schema('activity') == 'core' + assert view.in_schema('string') == 'types' + + for c in ['Company', 'Person', 'Organization', 'Thing']: + assert view.induced_slot('id', c).identifier is True + assert view.induced_slot('name', c).identifier is not True + assert view.induced_slot('name', c).required is False + assert view.induced_slot('name', c).range == 'string' + for c in ['Event', 'EmploymentEvent', 'MedicalEvent']: + s = view.induced_slot('started at time', c) + print(f's={s.range} // c = {c}') + assert s.range == 'date' + assert s.slot_uri == 'prov:startedAtTime' + assert view.induced_slot('age in years', 'Person').minimum_value == 0 + assert view.induced_slot('age in years', 'Adult').minimum_value == 16 + + + assert view.get_class('agent').class_uri == 'prov:Agent' + assert view.get_uri('agent') == 'prov:Agent' + print(view.get_class('Company').class_uri) + #assert view.get_class('Company').class_uri == 'prov:Agent' + assert view.get_uri('Company') == 'ks:Company' + assert view.get_uri('Company', expand=True) == 'https://w3id.org/linkml/tests/kitchen_sink/Company' + print(view.get_uri("TestClass")) + assert view.get_uri('TestClass') == 'core:TestClass' + assert view.get_uri('TestClass', expand=True) == 'https://w3id.org/linkml/tests/core/TestClass' + + assert view.get_uri('string') == 'xsd:string' + + +if __name__ == '__main__': + unittest.main()