From 5bed746515d81d201f08e657201c21d7344f3c0e Mon Sep 17 00:00:00 2001 From: wrznr Date: Thu, 25 Jul 2019 17:18:23 +0200 Subject: [PATCH 1/3] [WIP] Start compiling the magic element "bibl" The main title is now extracted from the logical struct map (thanks to @henning-gerhardt for the hint) along with the publication type. --- mets_mods2teiHeader/api/mets.py | 7 +++++-- mets_mods2teiHeader/api/tei.py | 13 +++++++++++++ mets_mods2teiHeader/scripts/mets_mods2teiHeader.py | 3 +++ tests/test_tei.py | 4 ++++ 4 files changed, 25 insertions(+), 2 deletions(-) diff --git a/mets_mods2teiHeader/api/mets.py b/mets_mods2teiHeader/api/mets.py index a2d77ec..389e443 100644 --- a/mets_mods2teiHeader/api/mets.py +++ b/mets_mods2teiHeader/api/mets.py @@ -108,8 +108,11 @@ def __spur(self): # alternatively identify the corresponding dmdSec via # - # main title - self.title = self.tree.xpath("//mets:dmdSec[1]//mods:mods/mods:titleInfo/mods:title", namespaces=ns)[0].text + # main title and manuscript type + title = self.tree.xpath('//mets:structMap[@TYPE="LOGICAL"]/mets:div', namespaces=ns) + if title: + self.title = title[0].get("LABEL") + self.type = title[0].get("TYPE") # # sub titles diff --git a/mets_mods2teiHeader/api/tei.py b/mets_mods2teiHeader/api/tei.py index d1a6fcb..f481a44 100644 --- a/mets_mods2teiHeader/api/tei.py +++ b/mets_mods2teiHeader/api/tei.py @@ -35,6 +35,13 @@ def main_title(self): by the TEI Header. """ return self.tree.xpath('//tei:titleStmt/tei:title[@type="main"]', namespaces=ns)[0].text + + @property + def publication_level(self): + """ + Returns the level of publication ('monographic' vs. 'analytic') + """ + return self.tree.xpath('//tei:sourceDesc/tei:biblFull/tei:titleStmt/tei:title[@type="main"]', namespaces=ns)[0].get("level") @property def subtitles(self): @@ -126,6 +133,12 @@ def set_main_title(self, string): for main_title in self.tree.xpath('//tei:titleStmt/tei:title[@type="main"]', namespaces=ns): main_title.text = string + def set_publication_level(self, level): + """ + Sets the level of publication ('monographic' vs. 'analytic') + """ + self.tree.xpath('//tei:sourceDesc/tei:biblFull/tei:titleStmt/tei:title[@type="main"]', namespaces=ns)[0].set("level", level) + def add_sub_title(self, string): """ Adds a sub title to the title statements. diff --git a/mets_mods2teiHeader/scripts/mets_mods2teiHeader.py b/mets_mods2teiHeader/scripts/mets_mods2teiHeader.py index a2122d6..aeec7e3 100644 --- a/mets_mods2teiHeader/scripts/mets_mods2teiHeader.py +++ b/mets_mods2teiHeader/scripts/mets_mods2teiHeader.py @@ -36,6 +36,9 @@ def cli(mets): # main title tei.set_main_title(mets.get_main_title()) + # publication level + tei.set_publication_level(mets.type) + # sub titles for sub_title in mets.get_sub_titles(): tei.add_sub_title(sub_title) diff --git a/tests/test_tei.py b/tests/test_tei.py index 2a273da..cdc7323 100644 --- a/tests/test_tei.py +++ b/tests/test_tei.py @@ -23,6 +23,10 @@ def test_data_assignment(subtests): tei.set_main_title("Testbuch") assert(tei.main_title == "Testbuch") + with subtests.test("Check publication level"): + tei.set_publication_level("m") + assert(tei.publication_level == "m") + with subtests.test("Check first subtitle"): tei.add_sub_title("Untertitel 1") assert(tei.subtitles == ["Untertitel 1"]) From 561a8935ac87d8fb14f1c3771a48cbdccbcec40a Mon Sep 17 00:00:00 2001 From: wrznr Date: Tue, 30 Jul 2019 13:40:53 +0200 Subject: [PATCH 2/3] Add first working version for small sample --- mets_mods2teiHeader/api/tei.py | 48 ++++++++++++++++++- mets_mods2teiHeader/data/tei_skeleton.xml | 3 -- .../scripts/mets_mods2teiHeader.py | 3 ++ tests/test_tei.py | 12 ++++- 4 files changed, 60 insertions(+), 6 deletions(-) diff --git a/mets_mods2teiHeader/api/tei.py b/mets_mods2teiHeader/api/tei.py index f481a44..df25fbc 100644 --- a/mets_mods2teiHeader/api/tei.py +++ b/mets_mods2teiHeader/api/tei.py @@ -59,7 +59,7 @@ def authors(self): """ authors = [] for author in self.tree.xpath('//tei:fileDesc/tei:titleStmt/tei:author', namespaces=ns): - authors.append(" ".join(author.xpath('descendant-or-self::*/text()'))) + authors.append(", ".join(author.xpath('descendant-or-self::*/text()'))) return authors @property @@ -126,6 +126,22 @@ def extents(self): """ return [extent.text for extent in self.tree.xpath('//tei:msDesc/tei:physDesc/tei:objectDesc/tei:supportDesc/tei:extent', namespaces=ns)] + @property + def series(self): + """ + Returns information on the series of the work represented + by the TEI Header. + """ + return [series.text for series in self.tree.xpath('//tei:profileDesc/tei:creation', namespaces=ns)] + + @property + def bibl(self): + """ + Returns the short citation of the work represented + by the TEI Header. + """ + return self.tree.xpath("//tei:fileDesc/tei:sourceDesc/tei:bibl", namespaces=ns)[0] + def set_main_title(self, string): """ Sets the main title of the title statements. @@ -335,3 +351,33 @@ def add_extent(self, extent): support_desc = phys_desc.xpath('/tei:objectDesc/tei:supportDesc', namespaces=ns)[0] extent_elem = etree.SubElement(support_desc, "%sextent" % TEI) extent_elem.text = extent + + def add_series(self, series): + """ + Adds a (free-text) series of the digital document + """ + profile_desc = self.tree.xpath('//tei:profileDesc', namespaces=ns)[0] + creation = etree.SubElement(profile_desc, "%screation" % TEI) + creation.text = series + + def compile_bibl(self): + """ + Compile the content of the short citation element 'bibl' based on the current state + """ + if self.publication_level: + self.bibl.set("type", self.publication_level) + bibl_text = "" + if self.authors: + bibl_text += "; ".join(self.authors) + ": " + elif self.publication_level == "monograph": + bibl_text = "[N. N.], " + bibl_text += self.main_title + "." + if self.places: + bibl_text += " " + self.places[0].split(":")[1] + if len(self.places) > 1: + bibl_text += "u. a." + if self.dates: + if self.places: + bibl_text += "," + bibl_text += " " + self.dates[0] + "." + self.bibl.text = bibl_text diff --git a/mets_mods2teiHeader/data/tei_skeleton.xml b/mets_mods2teiHeader/data/tei_skeleton.xml index 430c3d4..fcc73a9 100644 --- a/mets_mods2teiHeader/data/tei_skeleton.xml +++ b/mets_mods2teiHeader/data/tei_skeleton.xml @@ -33,9 +33,6 @@ - - dtae - diff --git a/mets_mods2teiHeader/scripts/mets_mods2teiHeader.py b/mets_mods2teiHeader/scripts/mets_mods2teiHeader.py index aeec7e3..508c009 100644 --- a/mets_mods2teiHeader/scripts/mets_mods2teiHeader.py +++ b/mets_mods2teiHeader/scripts/mets_mods2teiHeader.py @@ -104,6 +104,9 @@ def cli(mets): for extent in mets.extents: tei.add_extent(extent) + # citation + tei.compile_bibl() + click.echo(tei.tostring()) diff --git a/tests/test_tei.py b/tests/test_tei.py index cdc7323..c07e4a0 100644 --- a/tests/test_tei.py +++ b/tests/test_tei.py @@ -37,11 +37,11 @@ def test_data_assignment(subtests): with subtests.test("Check first author"): tei.add_author({'family': 'Mustermann', 'given': 'Max', 'date': '12.10.1956', 'title': 'Dr.'}, "personal") - assert(tei.authors == ["Mustermann Max Dr."]) + assert(tei.authors == ["Mustermann, Max, Dr."]) with subtests.test("Check further author (organisation)"): tei.add_author({'family': 'Mustermann', 'given': 'Max', 'date': '12.10.1956', 'title': 'Dr.'}, "corporate") - assert(tei.authors == ["Mustermann Max Dr.", "Mustermann Max 12.10.1956 Dr."]) + assert(tei.authors == ["Mustermann, Max, Dr.", "Mustermann Max 12.10.1956 Dr."]) with subtests.test("Check date(s)"): tei.add_date({"from": "01.01.1823", "to": "25.01.1823"}) @@ -78,3 +78,11 @@ def test_data_assignment(subtests): with subtests.test("Check further extent"): tei.add_extent("5 Abb.") assert(tei.extents == ["32 S.", "5 Abb."]) + + with subtests.test("Check series"): + tei.add_series("LDP") + assert(tei.series == ["LDP"]) + + with subtests.test("Check bibl"): + tei.compile_bibl() + assert(tei.bibl.text == "Mustermann, Max, Dr.; Mustermann Max 12.10.1956 Dr.: Testbuch. Dresden, 01.01.1823.") From ce49e02ca4899bbca0b179a8f137c26d7cd3a4ff Mon Sep 17 00:00:00 2001 From: wrznr Date: Wed, 31 Jul 2019 13:50:04 +0200 Subject: [PATCH 3/3] Improve collection handling Digital collections from `mets:relatedItem` are now added to the profile description. --- mets_mods2teiHeader/api/mets.py | 3 ++- mets_mods2teiHeader/api/tei.py | 12 ++++++------ mets_mods2teiHeader/scripts/mets_mods2teiHeader.py | 4 ++++ tests/test_mets.py | 3 +++ tests/test_tei.py | 6 +++--- 5 files changed, 18 insertions(+), 10 deletions(-) diff --git a/mets_mods2teiHeader/api/mets.py b/mets_mods2teiHeader/api/mets.py index 389e443..7e60d78 100644 --- a/mets_mods2teiHeader/api/mets.py +++ b/mets_mods2teiHeader/api/mets.py @@ -70,6 +70,7 @@ def __init__(self): self.collections = None self.languages = None self.extents = None + self.series = None @classmethod def read(cls, source): @@ -243,7 +244,7 @@ def __spur(self): collections = self.tree.xpath("//mets:dmdSec[1]//mods:mods/mods:relatedItem[@type='series']", namespaces=ns) self.collections = [] for collection in collections: - title = collection.xpath("//mods:titleinfo/mods:title", namespaces=ns) + title = collection.xpath("./mods:titleInfo/mods:title", namespaces=ns) if title: self.collections.append(title[0].text) diff --git a/mets_mods2teiHeader/api/tei.py b/mets_mods2teiHeader/api/tei.py index df25fbc..7fd3e00 100644 --- a/mets_mods2teiHeader/api/tei.py +++ b/mets_mods2teiHeader/api/tei.py @@ -127,12 +127,12 @@ def extents(self): return [extent.text for extent in self.tree.xpath('//tei:msDesc/tei:physDesc/tei:objectDesc/tei:supportDesc/tei:extent', namespaces=ns)] @property - def series(self): + def collections(self): """ - Returns information on the series of the work represented + Returns information on the collections of the work represented by the TEI Header. """ - return [series.text for series in self.tree.xpath('//tei:profileDesc/tei:creation', namespaces=ns)] + return [collection.text for collection in self.tree.xpath('//tei:profileDesc/tei:creation', namespaces=ns)] @property def bibl(self): @@ -352,13 +352,13 @@ def add_extent(self, extent): extent_elem = etree.SubElement(support_desc, "%sextent" % TEI) extent_elem.text = extent - def add_series(self, series): + def add_collection(self, collection): """ - Adds a (free-text) series of the digital document + Adds a (free-text) collection of the digital document """ profile_desc = self.tree.xpath('//tei:profileDesc', namespaces=ns)[0] creation = etree.SubElement(profile_desc, "%screation" % TEI) - creation.text = series + creation.text = collection def compile_bibl(self): """ diff --git a/mets_mods2teiHeader/scripts/mets_mods2teiHeader.py b/mets_mods2teiHeader/scripts/mets_mods2teiHeader.py index 508c009..c7b8ab0 100644 --- a/mets_mods2teiHeader/scripts/mets_mods2teiHeader.py +++ b/mets_mods2teiHeader/scripts/mets_mods2teiHeader.py @@ -104,6 +104,10 @@ def cli(mets): for extent in mets.extents: tei.add_extent(extent) + # collection + for collection in mets.collections: + tei.add_collection(collection) + # citation tei.compile_bibl() diff --git a/tests/test_mets.py b/tests/test_mets.py index 5e46bd1..fa6c69e 100644 --- a/tests/test_mets.py +++ b/tests/test_mets.py @@ -74,3 +74,6 @@ def test_data_assignment(subtests, datadir): with subtests.test("Check manuscript extent"): assert(mets.extents == ['[8] Bl., 783 S., [1] Bl.']) + + with subtests.test("Check collections"): + assert(mets.collections == ['Drucke des 18. Jahrhunderts', 'Saxonica']) diff --git a/tests/test_tei.py b/tests/test_tei.py index c07e4a0..e694dae 100644 --- a/tests/test_tei.py +++ b/tests/test_tei.py @@ -79,9 +79,9 @@ def test_data_assignment(subtests): tei.add_extent("5 Abb.") assert(tei.extents == ["32 S.", "5 Abb."]) - with subtests.test("Check series"): - tei.add_series("LDP") - assert(tei.series == ["LDP"]) + with subtests.test("Check collections"): + tei.add_collection("LDP") + assert(tei.collections == ["LDP"]) with subtests.test("Check bibl"): tei.compile_bibl()