From 1a553349943e24287500a319c97b2c6849035c36 Mon Sep 17 00:00:00 2001 From: cmungall Date: Wed, 6 Jul 2022 16:31:00 -0700 Subject: [PATCH 1/4] Adding Docker. Fixes #76 --- Dockerfile | 29 +++++++++++++++++++++++++++++ Makefile | 41 +++++++++++++++++++++++++++++++++++++++++ docs/install.rst | 19 +++++++++++++++++++ 3 files changed, 89 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..9a2600c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,29 @@ +# set base image (host OS) +FROM python:3.9 + +# https://stackoverflow.com/questions/53835198/integrating-python-poetry-with-docker +ENV YOUR_ENV=${YOUR_ENV} \ + PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PIP_NO_CACHE_DIR=off \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + POETRY_VERSION=1.1.13 + +# System deps: +RUN pip install "poetry==$POETRY_VERSION" + +# set the working directory in the container +WORKDIR /work + +RUN pip install schema-automator + +#COPY poetry.lock pyproject.toml /code/ + +# Project initialization: +#RUN poetry install + + +# command to run on container start +CMD [ "bash" ] diff --git a/Makefile b/Makefile index 7cccf94..35172b8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,10 @@ +VERSION = $(shell git tag | tail -1) + .PHONY: all clean test all: clean test target/soil_meanings.yaml + clean: rm -rf target/soil_meanings.yaml rm -rf target/soil_meanings_generated.yaml @@ -63,3 +66,41 @@ target/availabilities_g_s_strain_202112151116_org_meanings_curated.yaml: target/ # this can be used outside the poetry environment bin/schemauto: echo `poetry run which schemauto` '"$$@"' > $@ && chmod +x $@ + + +################################################ +#### Commands for building the Docker image #### +################################################ + +IM=linkml/schema-automator + +docker-build-no-cache: + @docker build --no-cache -t $(IM):$(VERSION) . \ + && docker tag $(IM):$(VERSION) $(IM):latest + +docker-build: + @docker build -t $(IM):$(VERSION) . \ + && docker tag $(IM):$(VERSION) $(IM):latest + +docker-build-use-cache-dev: + @docker build -t $(DEV):$(VERSION) . \ + && docker tag $(DEV):$(VERSION) $(DEV):latest + +docker-clean: + docker kill $(IM) || echo not running ; + docker rm $(IM) || echo not made + +docker-publish-no-build: + @docker push $(IM):$(VERSION) \ + && docker push $(IM):latest + +docker-publish-dev-no-build: + @docker push $(DEV):$(VERSION) \ + && docker push $(DEV):latest + +docker-publish: docker-build + @docker push $(IM):$(VERSION) \ + && docker push $(IM):latest + +docker-run: + @docker run -v $(PWD):/work -w /work -ti $(IM):$(VERSION) diff --git a/docs/install.rst b/docs/install.rst index c247ecd..db4c120 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -1,4 +1,7 @@ Installation +====== + +Direct Installation ------------ ``schema-automator`` and its components require Python 3.9 or greater. @@ -13,3 +16,19 @@ To check this works: schemauto --help +Running via Docker +------------ + +You can use the `Schema Automator Docker Container `_ + +To start a shell + +.. code:: bash + + docker run -v $PWD:/work -w /work -ti linkml/schema-automator + +Within the shell you should see all your files, and you should have access: + +.. code:: bash + + schemauto --help From e3e2d1626a4eb5588bd1b8175b58630f4ef93b45 Mon Sep 17 00:00:00 2001 From: cmungall Date: Wed, 6 Jul 2022 16:31:21 -0700 Subject: [PATCH 2/4] fixing authors --- docs/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 4304a8c..4bc31b9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,8 +20,8 @@ # -- Project information ----------------------------------------------------- project = 'Schema Automator' -copyright = '2022, Chris Mungall' -author = 'Chris Mungall, Harshad Hegde' +copyright = '2022, LinkML Developers' +author = 'Chris Mungall, Harshad Hegde, Mark Miller' # The full version, including alpha/beta/rc tags # release = '0.1.4' From 292c4a54c57f2c14d2c14bf49e2a18e53f206e3e Mon Sep 17 00:00:00 2001 From: cmungall Date: Wed, 6 Jul 2022 16:31:31 -0700 Subject: [PATCH 3/4] Improving docs --- docs/index.rst | 19 ++++++++++++++++++- docs/introduction.rst | 16 +++++++++------- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index dbabeeb..dfd4006 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,12 +1,29 @@ LinkML Schema Automator ============================================ -Schema Automator is a toolkit for bootstrapping and automatically enhancing LinkML schemas from a variety of sources +Schema Automator is a toolkit for bootstrapping and automatically enhancing schemas from a variety of sources. + +Use cases include: + +1. Inferring an initial schema or data dictionary from a dataset that is a collection of TSVs +2. Automatically annotating schema elements and enumerations using the BioPortal annotator +3. Importing from a language like RDFS/OWL + +The primary output of Schema Automator is a `LinkML Schema `_. This can be converted to other +schema frameworks, including: + +* JSON-Schema +* SQL DDL +* SHACL +* ShEx +* RDFS/OWL +* Python dataclasses or Pydantic .. toctree:: :maxdepth: 3 :caption: Contents: + index introduction install cli diff --git a/docs/introduction.rst b/docs/introduction.rst index 18f923e..c37a821 100644 --- a/docs/introduction.rst +++ b/docs/introduction.rst @@ -1,4 +1,6 @@ -LinkML Schema Automator +.. _introduction: + +Introduction ======================= This is a toolkit that assists with generating and enhancing schemas and data models from a variety @@ -17,17 +19,17 @@ See :ref:`generalizers` Generalizers allow you to *bootstrap* a schema by generalizing from existing data files -- TSVs and spreadsheets -- SQLite databases -- RDF instance graphs +* TSVs and spreadsheets +* SQLite databases +* RDF instance graphs -Importing from alternative modeling framework +Importing from alternative modeling frameworks --------------------------------- See :ref:`importers` -- OWL (but this only works for schema-style OWL) -- JSON-Schema +* OWL (but this only works for schema-style OWL) +* JSON-Schema In future other frameworks will be supported From c0295e4097fe9d59ebc93a80152ed6500d6c6af5 Mon Sep 17 00:00:00 2001 From: cmungall Date: Wed, 6 Jul 2022 16:31:38 -0700 Subject: [PATCH 4/4] Adding schema enricher --- .../annotators/schema_annotator.py | 19 +++++++++++ schema_automator/cli.py | 17 ++++++++++ tests/resources/so-mini.obo | 4 +++ tests/test_annotators/test_schema_enricher.py | 32 +++++++++++++++++++ 4 files changed, 72 insertions(+) create mode 100644 tests/resources/so-mini.obo create mode 100644 tests/test_annotators/test_schema_enricher.py diff --git a/schema_automator/annotators/schema_annotator.py b/schema_automator/annotators/schema_annotator.py index 9b897d7..882105a 100644 --- a/schema_automator/annotators/schema_annotator.py +++ b/schema_automator/annotators/schema_annotator.py @@ -90,6 +90,25 @@ def annotate_schema(self, schema: Union[SchemaDefinition, str], curie_only=True) return sv.schema + def enrich(self, schema: Union[SchemaDefinition, str]) -> SchemaDefinition: + sv = SchemaView(schema) + oi = self.ontology_implementation + for elt_name, elt in sv.all_elements().items(): + curies = [sv.get_uri(elt)] + for rel, ms in sv.get_mappings().items(): + curies += ms + for x in curies: + print(f"X={x}") + if elt.description: + break + try: + defn = oi.get_definition_by_curie(x) + if defn: + elt.description = defn + except Exception: + pass + return sv.schema + @click.command() @click.argument('schema') diff --git a/schema_automator/cli.py b/schema_automator/cli.py index 82685c4..13f965e 100644 --- a/schema_automator/cli.py +++ b/schema_automator/cli.py @@ -245,6 +245,23 @@ def annotate_schema(schema: str, input: str, output: str, curie_only: bool, **ar write_schema(schema, output) +@main.command() +@click.argument('schema') +@click.option('--input', '-i', help="OAK input ontology selector") +@output_option +def enrich_schema(schema: str, input: str, output: str, **args): + """ + Annotate all elements of a schema + + Requires Bioportal API key + """ + impl = get_implementation_from_shorthand(input) + logging.basicConfig(level=logging.INFO) + annr = SchemaAnnotator(impl) + schema = annr.enrich(schema) + write_schema(schema, output) + + @main.command() @click.argument('schema') @output_option diff --git a/tests/resources/so-mini.obo b/tests/resources/so-mini.obo new file mode 100644 index 0000000..fb3f959 --- /dev/null +++ b/tests/resources/so-mini.obo @@ -0,0 +1,4 @@ +[Term] +id: SO:0000704 +name: gene +def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [] diff --git a/tests/test_annotators/test_schema_enricher.py b/tests/test_annotators/test_schema_enricher.py new file mode 100644 index 0000000..11fa8ab --- /dev/null +++ b/tests/test_annotators/test_schema_enricher.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +import logging +import os +import unittest +from linkml.utils.schema_builder import SchemaBuilder +from linkml_runtime.dumpers import yaml_dumper +from linkml_runtime.linkml_model import SchemaDefinition, EnumDefinition, PermissibleValue +from oaklib.implementations import BioportalImplementation +from oaklib.selector import get_implementation_from_shorthand + +from schema_automator.annotators.schema_annotator import SchemaAnnotator +from linkml.generators.yamlgen import YAMLGenerator +from tests import INPUT_DIR, OUTPUT_DIR + + +class SchemaEnricherTestCase(unittest.TestCase): + + def setUp(self) -> None: + impl = get_implementation_from_shorthand(os.path.join(INPUT_DIR, "so-mini.obo")) + self.annotator = SchemaAnnotator(impl) + + def test_enrich(self): + s = SchemaDefinition(id='test', name='test') + sb = SchemaBuilder(s) + sb.add_class('Gene', class_uri="SO:0000704").add_slot('part_of') + s = self.annotator.enrich(sb.schema) + #print(yaml_dumper.dumps(s)) + assert s.classes['Gene'].description.startswith("A region") + + +