From 80a65ad0aaf61d5063e2d800bd4f02e25ae6f26b Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 14 Dec 2023 15:58:53 +0100 Subject: [PATCH] defaults for mets_basename and mets_server_url --- ocrd/ocrd/cli/bashlib.py | 3 ++- ocrd/ocrd/cli/process.py | 4 ++-- ocrd/ocrd/cli/validate.py | 4 ++-- ocrd/ocrd/cli/workspace.py | 4 ++-- ocrd/ocrd/cli/zip.py | 6 +++--- ocrd/ocrd/decorators/ocrd_cli_options.py | 3 ++- ocrd/ocrd/resolver.py | 7 ++++--- ocrd/ocrd/workspace.py | 3 ++- ocrd/ocrd/workspace_backup.py | 8 ++++---- ocrd/ocrd/workspace_bagger.py | 10 +++++----- ocrd_network/ocrd_network/cli/client.py | 4 +++- ocrd_utils/ocrd_utils/__init__.py | 1 + ocrd_utils/ocrd_utils/constants.py | 2 ++ ocrd_validators/ocrd_validators/workspace_validator.py | 4 ++-- 14 files changed, 36 insertions(+), 27 deletions(-) diff --git a/ocrd/ocrd/cli/bashlib.py b/ocrd/ocrd/cli/bashlib.py index b387c4b5a..5746151c7 100644 --- a/ocrd/ocrd/cli/bashlib.py +++ b/ocrd/ocrd/cli/bashlib.py @@ -14,6 +14,7 @@ from ocrd.constants import BASHLIB_FILENAME import ocrd.constants import ocrd_utils.constants +from ocrd_utils.constants import DEFAULT_METS_BASENAME import ocrd_models.constants import ocrd_validators.constants from ocrd.decorators import ( @@ -78,7 +79,7 @@ def bashlib_constants(name): print(val) @bashlib_cli.command('input-files') -@click.option('-m', '--mets', help="METS to process", default="mets.xml") +@click.option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME) @click.option('-w', '--working-dir', help="Working Directory") @click.option('-I', '--input-file-grp', help='File group(s) used as input.', default='INPUT') @click.option('-O', '--output-file-grp', help='File group(s) used as output.', default='OUTPUT') diff --git a/ocrd/ocrd/cli/process.py b/ocrd/ocrd/cli/process.py index 0e0129439..b71b74d09 100644 --- a/ocrd/ocrd/cli/process.py +++ b/ocrd/ocrd/cli/process.py @@ -8,7 +8,7 @@ """ import click -from ocrd_utils import getLogger, initLogging +from ocrd_utils import getLogger, initLogging, DEFAULT_METS_BASENAME from ocrd.task_sequence import run_tasks from ..decorators import ocrd_loglevel @@ -18,7 +18,7 @@ # ---------------------------------------------------------------------- @click.command('process') @ocrd_loglevel -@click.option('-m', '--mets', help="METS to process", default="mets.xml") +@click.option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME) @click.option('-g', '--page-id', help="ID(s) of the pages to process") @click.option('--overwrite', is_flag=True, default=False, help="Remove output pages/images if they already exist") @click.argument('tasks', nargs=-1, required=True) diff --git a/ocrd/ocrd/cli/validate.py b/ocrd/ocrd/cli/validate.py index 668e1f8b7..b26803d05 100644 --- a/ocrd/ocrd/cli/validate.py +++ b/ocrd/ocrd/cli/validate.py @@ -14,7 +14,7 @@ from ocrd import Resolver, Workspace from ocrd.task_sequence import ProcessorTask, validate_tasks -from ocrd_utils import initLogging, parse_json_string_or_file +from ocrd_utils import initLogging, parse_json_string_or_file, DEFAULT_METS_BASENAME from ocrd_validators import ( OcrdToolValidator, OcrdZipValidator, @@ -101,7 +101,7 @@ def validate_page(page, **kwargs): @validate_cli.command('tasks') @click.option('--workspace', nargs=1, required=False, help='Workspace directory these tasks are to be run. If omitted, only validate syntax') -@click.option('-M', '--mets-basename', nargs=1, default='mets.xml', help='Basename of the METS file, used in conjunction with --workspace') +@click.option('-M', '--mets-basename', nargs=1, default=DEFAULT_METS_BASENAME, help='Basename of the METS file, used in conjunction with --workspace') @click.option('--overwrite', is_flag=True, default=False, help='When checking against a concrete workspace, simulate overwriting output or page range.') @click.option('-g', '--page-id', help="ID(s) of the pages to process") @click.argument('tasks', nargs=-1, required=True) diff --git a/ocrd/ocrd/cli/workspace.py b/ocrd/ocrd/cli/workspace.py index 910151699..318c1e971 100644 --- a/ocrd/ocrd/cli/workspace.py +++ b/ocrd/ocrd/cli/workspace.py @@ -20,14 +20,14 @@ from ocrd import Resolver, Workspace, WorkspaceValidator, WorkspaceBackupManager from ocrd.mets_server import OcrdMetsServer -from ocrd_utils import getLogger, initLogging, pushd_popd, EXT_TO_MIME, safe_filename, parse_json_string_or_file, partition_list +from ocrd_utils import getLogger, initLogging, pushd_popd, EXT_TO_MIME, safe_filename, parse_json_string_or_file, partition_list, DEFAULT_METS_BASENAME from ocrd.decorators import mets_find_options from . import command_with_replaced_help class WorkspaceCtx(): - def __init__(self, directory, mets_url, mets_basename, mets_server_url, automatic_backup): + def __init__(self, directory, mets_url, mets_basename=DEFAULT_METS_BASENAME, mets_server_url=None, automatic_backup=False): self.log = getLogger('ocrd.cli.workspace') if mets_basename: self.log.warning(DeprecationWarning('--mets-basename is deprecated. Use --mets/--directory instead.')) diff --git a/ocrd/ocrd/cli/zip.py b/ocrd/ocrd/cli/zip.py index d36bfc85b..7db18b09c 100644 --- a/ocrd/ocrd/cli/zip.py +++ b/ocrd/ocrd/cli/zip.py @@ -9,7 +9,7 @@ import click -from ocrd_utils import initLogging +from ocrd_utils import initLogging, DEFAULT_METS_BASENAME from ocrd_validators import OcrdZipValidator from ..resolver import Resolver @@ -35,13 +35,13 @@ def zip_cli(): help='Workspace folder location.', show_default=True) @click.option('-M', '--mets-basename', - default="mets.xml", + default=DEFAULT_METS_BASENAME, help='Basename of the METS file.', show_default=True) @click.option('-q', '--include-file-grps', 'include_fileGrp', help="fileGrps to include", default=[], multiple=True) @click.option('-Q', '--exclude-file-grps', 'exclude_fileGrp', help="fileGrps to exclude", default=[], multiple=True) @click.option('-i', '--identifier', '--id', help="Ocrd-Identifier", required=True) -@click.option('-m', '--mets', help="location of mets.xml in the bag's data dir", default="mets.xml") +@click.option('-m', '--mets', help="location of mets.xml in the bag's data dir", default=DEFAULT_METS_BASENAME) @click.option('-b', '--base-version-checksum', help="Ocrd-Base-Version-Checksum") @click.option('-t', '--tag-file', help="Add a non-payload file to bag", type=click.Path(file_okay=True, dir_okay=False, readable=True, resolve_path=True), multiple=True) @click.option('-Z', '--skip-zip', help="Create a directory but do not ZIP it", is_flag=True, default=False) diff --git a/ocrd/ocrd/decorators/ocrd_cli_options.py b/ocrd/ocrd/decorators/ocrd_cli_options.py index c4cd7e34b..d1d3a9624 100644 --- a/ocrd/ocrd/decorators/ocrd_cli_options.py +++ b/ocrd/ocrd/decorators/ocrd_cli_options.py @@ -1,5 +1,6 @@ import click from click import option, Path, group, command, argument +from ocrd_utils import DEFAULT_METS_BASENAME from ocrd_network import NETWORK_AGENT_SERVER, NETWORK_AGENT_WORKER from .parameter_option import parameter_option, parameter_override_option from .loglevel_option import loglevel_option @@ -25,7 +26,7 @@ def cli(mets_url): """ # XXX Note that the `--help` output is statically generate_processor_help params = [ - option('-m', '--mets', help="METS to process", default="mets.xml"), + option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME), option('-w', '--working-dir', help="Working Directory"), option('-U', '--mets-server-url', help="METS server URL. Starts with http:// then TCP, otherwise unix socket path"), # TODO OCR-D/core#274 diff --git a/ocrd/ocrd/resolver.py b/ocrd/ocrd/resolver.py index 25f7507f1..4b8fe6b21 100644 --- a/ocrd/ocrd/resolver.py +++ b/ocrd/ocrd/resolver.py @@ -8,6 +8,7 @@ from ocrd.constants import TMP_PREFIX from ocrd_utils import ( config, + DEFAULT_METS_BASENAME, getLogger, is_local_filename, get_local_filename, @@ -224,7 +225,7 @@ def workspace_from_url( return workspace - def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_mets=False): + def workspace_from_nothing(self, directory, mets_basename=DEFAULT_METS_BASENAME, clobber_mets=False): """ Create an empty workspace. @@ -252,7 +253,7 @@ def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_me return Workspace(self, directory, mets, mets_basename=mets_basename) - def resolve_mets_arguments(self, directory, mets_url, mets_basename, mets_server_url): + def resolve_mets_arguments(self, directory, mets_url, mets_basename=DEFAULT_METS_BASENAME, mets_server_url=None): """ Resolve the ``--mets``, ``--mets-basename``, `--directory``, ``--mets-server-url``, arguments into a coherent set of arguments @@ -275,7 +276,7 @@ def resolve_mets_arguments(self, directory, mets_url, mets_basename, mets_server if not mets_basename and mets_url: mets_basename = Path(mets_url).name elif not mets_basename and not mets_url: - mets_basename = 'mets.xml' + mets_basename = DEFAULT_METS_BASENAME elif mets_basename and mets_url: raise ValueError("Use either --mets or --mets-basename, not both") else: diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index c56c49622..7772c54d7 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -35,6 +35,7 @@ pushd_popd, is_local_filename, deprecated_alias, + DEFAULT_METS_BASENAME, MIME_TO_EXT, MIME_TO_PIL, MIMETYPE_PAGE, @@ -70,7 +71,7 @@ class Workspace(): baseurl (string) : Base URL to prefix to relative URL. """ - def __init__(self, resolver, directory, mets=None, mets_basename='mets.xml', automatic_backup=False, baseurl=None, mets_server_url=None): + def __init__(self, resolver, directory, mets=None, mets_basename=DEFAULT_METS_BASENAME, automatic_backup=False, baseurl=None, mets_server_url=None): self.resolver = resolver self.directory = directory self.mets_target = str(Path(directory, mets_basename)) diff --git a/ocrd/ocrd/workspace_backup.py b/ocrd/ocrd/workspace_backup.py index a23223b54..6cc3f1530 100644 --- a/ocrd/ocrd/workspace_backup.py +++ b/ocrd/ocrd/workspace_backup.py @@ -6,7 +6,7 @@ import hashlib from ocrd_models import OcrdMets -from ocrd_utils import getLogger, atomic_write +from ocrd_utils import getLogger, atomic_write, DEFAULT_METS_BASENAME from .constants import BACKUP_DIR @@ -17,7 +17,7 @@ class WorkspaceBackup(): @classmethod def from_path(cls, d): - mets_file = join(d, 'mets.xml') + mets_file = join(d, DEFAULT_METS_BASENAME) (chksum, lastmod) = basename(d).split('.', maxsplit=1) size = getsize(mets_file) mets_xml = OcrdMets(filename=mets_file) @@ -61,7 +61,7 @@ def restore(self, chksum, choose_first=False): bak = candidates[0] self.add() log.info("Restoring from %s/mets.xml" % bak) - src = join(bak, 'mets.xml') + src = join(bak, DEFAULT_METS_BASENAME) dest = self.workspace.mets_target log.debug('cp "%s" "%s"', src, dest) copy(src, dest) @@ -80,7 +80,7 @@ def add(self): else: timestamp = datetime.now().timestamp() d = join(self.backup_directory, '%s.%s' % (chksum, timestamp)) - mets_file = join(d, 'mets.xml') + mets_file = join(d, DEFAULT_METS_BASENAME) log.info("Backing up to %s" % mets_file) makedirs(d) with atomic_write(mets_file) as f: diff --git a/ocrd/ocrd/workspace_bagger.py b/ocrd/ocrd/workspace_bagger.py index af6519f01..f838a6589 100644 --- a/ocrd/ocrd/workspace_bagger.py +++ b/ocrd/ocrd/workspace_bagger.py @@ -16,7 +16,7 @@ getLogger, MIME_TO_EXT, unzip_file_to_dir, - + DEFAULT_METS_BASENAME, MIMETYPE_PAGE, VERSION, ) @@ -113,7 +113,7 @@ def _bag_mets_files( log.info("New vs. old: %s" % changed_local_filenames) return total_bytes, total_files - def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier, ocrd_base_version_checksum, ocrd_mets='mets.xml'): + def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier, ocrd_base_version_checksum, ocrd_mets=DEFAULT_METS_BASENAME): bag.info['BagIt-Profile-Identifier'] = OCRD_BAGIT_PROFILE_URL bag.info['Bag-Software-Agent'] = 'ocrd/core %s (bagit.py %s, bagit_profile %s) [cmdline: "%s"]' % ( VERSION, # TODO @@ -126,14 +126,14 @@ def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier, ocrd_bas bag.info['Ocrd-Base-Version-Checksum'] = ocrd_base_version_checksum bag.info['Bagging-Date'] = str(datetime.now()) bag.info['Payload-Oxum'] = '%s.%s' % (total_bytes, total_files) - if ocrd_mets != 'mets.xml': + if ocrd_mets != DEFAULT_METS_BASENAME: bag.info['Ocrd-Mets'] = ocrd_mets def bag(self, workspace, ocrd_identifier, dest=None, - ocrd_mets='mets.xml', + ocrd_mets=DEFAULT_METS_BASENAME, ocrd_base_version_checksum=None, processes=1, skip_zip=False, @@ -245,7 +245,7 @@ def spill(self, src, dest): rmtree(bagdir) # Create workspace - mets_basename = bag_info.get("Ocrd-Mets", "mets.xml") + mets_basename = bag_info.get("Ocrd-Mets", DEFAULT_METS_BASENAME) workspace = Workspace(self.resolver, directory=dest, mets_basename=mets_basename) # TODO validate workspace diff --git a/ocrd_network/ocrd_network/cli/client.py b/ocrd_network/ocrd_network/cli/client.py index 0af070992..5c62ac44e 100644 --- a/ocrd_network/ocrd_network/cli/client.py +++ b/ocrd_network/ocrd_network/cli/client.py @@ -1,6 +1,8 @@ import click from typing import Optional +from ocrd_utils import DEFAULT_METS_BASENAME + from ocrd.decorators import ( parameter_option, parameter_override_option @@ -36,7 +38,7 @@ def processing_cli(): @processing_cli.command('processor') @click.argument('processor_name', required=True, type=click.STRING) @click.option('--address') -@click.option('-m', '--mets', required=True, default="mets.xml") +@click.option('-m', '--mets', required=True, default=DEFAULT_METS_BASENAME) @click.option('-I', '--input-file-grp', default='OCR-D-INPUT') @click.option('-O', '--output-file-grp', default='OCR-D-OUTPUT') @click.option('-g', '--page-id') diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 1e7565afe..90cd55477 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -106,6 +106,7 @@ """ from .constants import ( + DEFAULT_METS_BASENAME, EXT_TO_MIME, MIMETYPE_PAGE, MIME_TO_EXT, diff --git a/ocrd_utils/ocrd_utils/constants.py b/ocrd_utils/ocrd_utils/constants.py index 4327f4217..1fbd9bf42 100644 --- a/ocrd_utils/ocrd_utils/constants.py +++ b/ocrd_utils/ocrd_utils/constants.py @@ -108,3 +108,5 @@ LOG_TIMEFMT = r'%H:%M:%S' RESOURCE_LOCATIONS = ['data', 'cwd', 'system', 'module'] + +DEFAULT_METS_BASENAME = 'mets.xml' diff --git a/ocrd_validators/ocrd_validators/workspace_validator.py b/ocrd_validators/ocrd_validators/workspace_validator.py index 4061cd887..60014e221 100644 --- a/ocrd_validators/ocrd_validators/workspace_validator.py +++ b/ocrd_validators/ocrd_validators/workspace_validator.py @@ -5,7 +5,7 @@ from traceback import format_exc from pathlib import Path -from ocrd_utils import getLogger, MIMETYPE_PAGE, pushd_popd, is_local_filename +from ocrd_utils import getLogger, MIMETYPE_PAGE, pushd_popd, is_local_filename, DEFAULT_METS_BASENAME from ocrd_models import ValidationReport from ocrd_modelfactory import page_from_file @@ -91,7 +91,7 @@ def __init__(self, resolver, mets_url, src_dir=None, skip=None, download=False, self.log.debug('resolver=%s mets_url=%s src_dir=%s', resolver, mets_url, src_dir) self.resolver = resolver if mets_url is None and src_dir is not None: - mets_url = '%s/mets.xml' % src_dir + mets_url = f'{src_dir}/{DEFAULT_METS_BASENAME}' self.mets_url = mets_url self.download = download self.page_strictness = page_strictness