From 7a7894620a961198c4d05d66f9ac710df2212761 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonah=20Br=C3=BCchert?= Date: Sat, 17 Feb 2024 12:43:19 +0100 Subject: [PATCH] Add script to generate license summary --- feeds/de-berlin.json | 6 +++- feeds/lv.json | 15 +++++++-- src/generate-attribution.py | 67 +++++++++++++++++++++++++++++++++++++ src/metadata.py | 15 ++++++++- src/transitland.py | 9 +++++ 5 files changed, 107 insertions(+), 5 deletions(-) create mode 100755 src/generate-attribution.py diff --git a/feeds/de-berlin.json b/feeds/de-berlin.json index 47b3a300..b59c71a5 100644 --- a/feeds/de-berlin.json +++ b/feeds/de-berlin.json @@ -9,7 +9,11 @@ { "name": "vbb", "type": "http", - "url": "https://www.vbb.de/fileadmin/user_upload/VBB/Dokumente/API-Datensaetze/gtfs-mastscharf/GTFS.zip" + "url": "https://www.vbb.de/fileadmin/user_upload/VBB/Dokumente/API-Datensaetze/gtfs-mastscharf/GTFS.zip", + "license": { + "spdx-identifier": "CC-BY-4.0", + "url": "https://www.vbb.de/vbb-services/api-open-data/datensaetze/" + } } ] } diff --git a/feeds/lv.json b/feeds/lv.json index 6271a22f..3d0fd5a1 100644 --- a/feeds/lv.json +++ b/feeds/lv.json @@ -14,18 +14,27 @@ { "name": "rigas-satiksme", "type": "http", - "url": "https://gtfs.pro/files/uran/improved-gtfs-satiksme.zip" + "url": "https://gtfs.pro/files/uran/improved-gtfs-satiksme.zip", + "license": { + "spdx-identifier": "CC0-1.0" + } }, { "name": "rigas-saraksti", "type": "http", - "url": "https://gtfs.pro/files/uran/improved-gtfs-saraksti.zip" + "url": "https://gtfs.pro/files/uran/improved-gtfs-saraksti.zip", + "license": { + "spdx-identifier": "CC0-1.0" + } }, { "name": "valsts-sia-autotransporta", "type": "http", "url": "https://www.atd.lv/sites/default/files/GTFS/gtfs-latvia-lv.zip", - "fix": true + "fix": true, + "license": { + "spdx-identifier": "CC0-1.0" + } } ] } diff --git a/src/generate-attribution.py b/src/generate-attribution.py new file mode 100755 index 00000000..3dc7b2e2 --- /dev/null +++ b/src/generate-attribution.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: 2024 Jonah Brüchert +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +import json +import csv +import io +import transitland + +from pathlib import Path +from metadata import * +from zipfile import ZipFile + + +if __name__ == "__main__": + feed_dir = Path("feeds/") + + transitland_atlas = transitland.Atlas.load(Path("transitland-atlas/")) + + attributions = [] + + for feed in feed_dir.glob("*.json"): + parsed = {} + with open(feed, "r") as f: + parsed = json.load(f) + + region = Region(parsed) + for source in region.sources: + if (type(source) == TransitlandSource): + source = transitland_atlas.source_by_id(source) + + attribution = {} + + if source.license: + if source.license.spdx_identifier: + attribution["spdx_identifier"] \ + = source.license.spdx_identifier + if source.license.url: + attribution["url"] = source.license.url + + attribution["copyright_holders"] = [] + + metadata_filename = feed.name + region_name = metadata_filename[:metadata_filename.rfind('.')] + + feed_path = Path(f"out/{region_name}_{source.name}.gtfs.zip") + + attribution["filename"] = feed_path.name + + if not feed_path.exists(): + print(f"Info: {feed_path} does not exist, skipping…") + continue + + with ZipFile(feed_path) as z: + with z.open("agency.txt", "r") as a: + with io.TextIOWrapper(a) as at: + agencyreader = \ + csv.DictReader(at, delimiter=',', quotechar='"') + for row in agencyreader: + attribution["copyright_holders"] \ + .append(row["agency_name"]) + + attributions.append(attribution) + + with open("out/license.json", "w") as outfile: + json.dump(attributions, outfile, indent=4, ensure_ascii=False) diff --git a/src/metadata.py b/src/metadata.py index b8f881b7..6588a187 100644 --- a/src/metadata.py +++ b/src/metadata.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: AGPL-3.0-or-later -from typing import List +from typing import List, Optional class Maintainer: @@ -14,12 +14,25 @@ def __init__(self, parsed: dict): self.email = parsed["email"] +class License: + spdx_identifier: Optional[str] = None + url: Optional[str] = None + + class Source: name: str fix: bool = False + license: License def __init__(self, parsed: dict = None): + self.license = License() if parsed: + if "license" in parsed: + if "spdx-identifier" in parsed["license"]: + self.license.spdx_identifier = parsed["license"]["spdx-identifier"] + if "url" in parsed["license"]: + self.license.url = parsed["license"]["url"] + self.name = parsed["name"] if "fix" in parsed: self.fix = bool(parsed["fix"]) diff --git a/src/transitland.py b/src/transitland.py index c11592c6..8fef2c87 100644 --- a/src/transitland.py +++ b/src/transitland.py @@ -26,5 +26,14 @@ def load(path: Path): def source_by_id(self, source: TransitlandSource) -> HttpSource: feed = self.by_id[source.transitland_atlas_id] http_source = HttpSource() + http_source.name = source.name http_source.url = feed["urls"]["static_current"] + + if "license" in feed: + http_source.license = License() + if "spdx_identifier" in feed["license"]: + http_source.license.spdx_identifier = feed["license"]["spdx_identifier"] + if "url" in feed["license"]: + http_source.license.url = feed["license"]["url"] + return http_source