diff --git a/Classes/Command/BaseCommand.php b/Classes/Command/BaseCommand.php
index 76e1a51d44..c6bd8d4435 100644
--- a/Classes/Command/BaseCommand.php
+++ b/Classes/Command/BaseCommand.php
@@ -22,6 +22,7 @@
use Kitodo\Dlf\Domain\Model\Collection;
use Kitodo\Dlf\Domain\Model\Document;
use Kitodo\Dlf\Domain\Model\Library;
+use Kitodo\Dlf\Validation\DocumentValidator;
use Symfony\Component\Console\Command\Command;
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration;
use TYPO3\CMS\Core\Utility\GeneralUtility;
@@ -213,71 +214,76 @@ protected function saveToDatabase(Document $document): bool
$doc->cPid = $this->storagePid;
$metadata = $doc->getToplevelMetadata($this->storagePid);
+ $validator = new DocumentValidator($metadata, explode(',', $this->extConf['requiredMetadataFields']));
+
+ if ($validator->hasAllMandatoryMetadataFields()) {
+ // set title data
+ $document->setTitle($metadata['title'][0] ? : '');
+ $document->setTitleSorting($metadata['title_sorting'][0] ? : '');
+ $document->setPlace(implode('; ', $metadata['place']));
+ $document->setYear(implode('; ', $metadata['year']));
+
+ // Remove appended "valueURI" from authors' names for storing in database.
+ foreach ($metadata['author'] as $i => $author) {
+ $splitName = explode(pack('C', 31), $author);
+ $metadata['author'][$i] = $splitName[0];
+ }
+ $document->setAuthor(implode('; ', $metadata['author']));
+ $document->setThumbnail($doc->thumbnail ? : '');
+ $document->setMetsLabel($metadata['mets_label'][0] ? : '');
+ $document->setMetsOrderlabel($metadata['mets_orderlabel'][0] ? : '');
- // set title data
- $document->setTitle($metadata['title'][0] ? : '');
- $document->setTitleSorting($metadata['title_sorting'][0] ? : '');
- $document->setPlace(implode('; ', $metadata['place']));
- $document->setYear(implode('; ', $metadata['year']));
+ $structure = $this->structureRepository->findOneByIndexName($metadata['type'][0]);
+ $document->setStructure($structure);
- // Remove appended "valueURI" from authors' names for storing in database.
- foreach ($metadata['author'] as $i => $author) {
- $splitName = explode(chr(31), $author);
- $metadata['author'][$i] = $splitName[0];
- }
- $document->setAuthor($this->getAuthors($metadata['author']));
- $document->setThumbnail($doc->thumbnail ? : '');
- $document->setMetsLabel($metadata['mets_label'][0] ? : '');
- $document->setMetsOrderlabel($metadata['mets_orderlabel'][0] ? : '');
+ if (is_array($metadata['collection'])) {
+ $this->addCollections($document, $metadata['collection']);
+ }
- $structure = $this->structureRepository->findOneByIndexName($metadata['type'][0]);
- $document->setStructure($structure);
+ // set identifiers
+ $document->setProdId($metadata['prod_id'][0] ? : '');
+ $document->setOpacId($metadata['opac_id'][0] ? : '');
+ $document->setUnionId($metadata['union_id'][0] ? : '');
+
+ $document->setRecordId($metadata['record_id'][0]);
+ $document->setUrn($metadata['urn'][0] ? : '');
+ $document->setPurl($metadata['purl'][0] ? : '');
+ $document->setDocumentFormat($metadata['document_format'][0] ? : '');
+
+ // set access
+ $document->setLicense($metadata['license'][0] ? : '');
+ $document->setTerms($metadata['terms'][0] ? : '');
+ $document->setRestrictions($metadata['restrictions'][0] ? : '');
+ $document->setOutOfPrint($metadata['out_of_print'][0] ? : '');
+ $document->setRightsInfo($metadata['rights_info'][0] ? : '');
+ $document->setStatus(0);
+
+ $this->setOwner($metadata['owner'][0]);
+ $document->setOwner($this->owner);
+
+ // set volume data
+ $document->setVolume($metadata['volume'][0] ? : '');
+ $document->setVolumeSorting($metadata['volume_sorting'][0] ? : $metadata['mets_order'][0] ? : '');
+
+ // Get UID of parent document.
+ if ($document->getDocumentFormat() === 'METS') {
+ $document->setPartof($this->getParentDocumentUidForSaving($document));
+ }
- if (is_array($metadata['collection'])) {
- $this->addCollections($document, $metadata['collection']);
- }
+ if ($document->getUid() === null) {
+ // new document
+ $this->documentRepository->add($document);
+ } else {
+ // update of existing document
+ $this->documentRepository->update($document);
+ }
- // set identifiers
- $document->setProdId($metadata['prod_id'][0] ? : '');
- $document->setOpacId($metadata['opac_id'][0] ? : '');
- $document->setUnionId($metadata['union_id'][0] ? : '');
-
- $document->setRecordId($metadata['record_id'][0] ? : ''); // (?) $doc->recordId
- $document->setUrn($metadata['urn'][0] ? : '');
- $document->setPurl($metadata['purl'][0] ? : '');
- $document->setDocumentFormat($metadata['document_format'][0] ? : '');
-
- // set access
- $document->setLicense($metadata['license'][0] ? : '');
- $document->setTerms($metadata['terms'][0] ? : '');
- $document->setRestrictions($metadata['restrictions'][0] ? : '');
- $document->setOutOfPrint($metadata['out_of_print'][0] ? : '');
- $document->setRightsInfo($metadata['rights_info'][0] ? : '');
- $document->setStatus(0);
-
- $this->setOwner($metadata['owner'][0]);
- $document->setOwner($this->owner);
-
- // set volume data
- $document->setVolume($metadata['volume'][0] ? : '');
- $document->setVolumeSorting($metadata['volume_sorting'][0] ? : $metadata['mets_order'][0] ? : '');
-
- // Get UID of parent document.
- if ($document->getDocumentFormat() === 'METS') {
- $document->setPartof($this->getParentDocumentUidForSaving($document));
- }
+ $this->persistenceManager->persistAll();
- if ($document->getUid() === null) {
- // new document
- $this->documentRepository->add($document);
- } else {
- // update of existing document
- $this->documentRepository->update($document);
+ return true;
}
- $this->persistenceManager->persistAll();
-
- return true;
+ return false;
}
/**
diff --git a/Classes/Command/IndexCommand.php b/Classes/Command/IndexCommand.php
index c8dbd4ce65..746c3b2589 100644
--- a/Classes/Command/IndexCommand.php
+++ b/Classes/Command/IndexCommand.php
@@ -180,20 +180,29 @@ protected function execute(InputInterface $input, OutputInterface $output): int
if ($dryRun) {
$io->section('DRY RUN: Would index ' . $document->getUid() . ' ("' . $document->getLocation() . '") on PID ' . $this->storagePid . ' and Solr core ' . $solrCoreUid . '.');
+ $io->success('All done!');
+ return BaseCommand::SUCCESS;
} else {
+ $document->setCurrentDocument($doc);
+
if ($io->isVerbose()) {
- $io->section('Indexing ' . $document->getUid() . ' ("' . $document->getLocation() . '") on PID ' . $this->storagePid . ' and Solr core ' . $solrCoreUid . '.');
+ $io->section('Indexing ' . $document->getUid() . ' ("' . $document->getLocation() . '") on PID ' . $this->storagePid . '.');
}
- $document->setCurrentDocument($doc);
- // save to database
- $this->saveToDatabase($document);
- // add to index
- Indexer::add($document, $this->documentRepository);
- }
+ $isSaved = $this->saveToDatabase($document);
- $io->success('All done!');
+ if ($isSaved) {
+ if ($io->isVerbose()) {
+ $io->section('Indexing ' . $document->getUid() . ' ("' . $document->getLocation() . '") on Solr core ' . $solrCoreUid . '.');
+ }
+ Indexer::add($document, $this->documentRepository);
- return BaseCommand::SUCCESS;
+ $io->success('All done!');
+ return BaseCommand::SUCCESS;
+ }
+
+ $io->error('ERROR: Document with UID "' . $document->getUid() . '" could not be indexed on PID ' . $this->storagePid . ' . There are missing mandatory fields (document format or record identifier) in this document.');
+ return BaseCommand::FAILURE;
+ }
}
/**
diff --git a/Classes/Validation/DocumentValidator.php b/Classes/Validation/DocumentValidator.php
new file mode 100644
index 0000000000..82d3f0f378
--- /dev/null
+++ b/Classes/Validation/DocumentValidator.php
@@ -0,0 +1,141 @@
+
+ *
+ * This file is part of the Kitodo and TYPO3 projects.
+ *
+ * @license GNU General Public License version 3 or later.
+ * For the full copyright and license information, please read the
+ * LICENSE.txt file that was distributed with this source code.
+ */
+
+namespace Kitodo\Dlf\Validation;
+
+use TYPO3\CMS\Core\Log\Logger;
+use TYPO3\CMS\Core\Log\LogManager;
+use TYPO3\CMS\Core\Utility\GeneralUtility;
+
+/**
+ * Class for document validation. Currently used for validating metadata
+ * fields but in the future should be extended also for other fields.
+ *
+ * @package TYPO3
+ * @subpackage dlf
+ *
+ * @access public
+ */
+class DocumentValidator
+{
+ /**
+ * @access protected
+ * @var Logger This holds the logger
+ */
+ protected Logger $logger;
+
+ /**
+ * @access private
+ * @var array
+ */
+ private array $metadata;
+
+ /**
+ * @access private
+ * @var array
+ */
+ private array $requiredMetadataFields;
+
+ /**
+ * @access private
+ * @var ?\SimpleXMLElement
+ */
+ private ?\SimpleXMLElement $xml;
+
+ /**
+ * Constructs DocumentValidator instance.
+ *
+ * @access public
+ *
+ * @param array $metadata
+ * @param array $requiredMetadataFields
+ *
+ * @return void
+ */
+ public function __construct(array $metadata = [], array $requiredMetadataFields = [], ?\SimpleXMLElement $xml = null)
+ {
+ $this->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(static::class);
+ $this->metadata = $metadata;
+ $this->requiredMetadataFields = $requiredMetadataFields;
+ $this->xml = $xml;
+ }
+
+ /**
+ * Check if metadata array contains all mandatory fields before save.
+ *
+ * @access public
+ *
+ * @return bool
+ */
+ public function hasAllMandatoryMetadataFields(): bool
+ {
+ foreach ($this->requiredMetadataFields as $requiredMetadataField) {
+ if (empty($this->metadata[$requiredMetadataField][0])) {
+ $this->logger->error('Missing required metadata field "' . $requiredMetadataField . '".');
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Check if xml contains at least one logical structure with given type.
+ *
+ * @access public
+ *
+ * @param string $type e.g. documentary, newspaper or object
+ *
+ * @return bool
+ */
+ public function hasCorrectLogicalStructure(string $type): bool
+ {
+ $expectedNodes = $this->xml->xpath('./mets:structMap[@TYPE="LOGICAL"]/mets:div[@TYPE="' . $type . '"]');
+ if ($expectedNodes) {
+ return true;
+ }
+
+ $existingNodes = $this->xml->xpath('./mets:structMap[@TYPE="LOGICAL"]/mets:div');
+ if ($existingNodes) {
+ $this->logger->error('Document contains logical structure but @TYPE="' . $type . '" is missing.');
+ return false;
+ }
+
+ $this->logger->error('Document does not contain logical structure.');
+ return false;
+ }
+
+ /**
+ * Check if xml contains at least one physical structure with type 'physSequence'.
+ *
+ * @access public
+ *
+ * @return bool
+ */
+ public function hasCorrectPhysicalStructure(): bool
+ {
+ $physSequenceNodes = $this->xml->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]');
+ if ($physSequenceNodes) {
+ return true;
+ }
+
+ $physicalStructureNodes = $this->xml->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div');
+ if ($physicalStructureNodes) {
+ $this->logger->error('Document contains physical structure but @TYPE="physSequence" is missing.');
+ return false;
+ }
+
+ $this->logger->error('Document does not contain physical structure.');
+ return false;
+ }
+}
diff --git a/Resources/Private/Language/de.locallang_labels.xlf b/Resources/Private/Language/de.locallang_labels.xlf
index 0d3adafdad..7b11028234 100644
--- a/Resources/Private/Language/de.locallang_labels.xlf
+++ b/Resources/Private/Language/de.locallang_labels.xlf
@@ -665,6 +665,10 @@
Verwende externe APIs zum Abrufen von Metadaten?: (Standard ist "FALSE")
+
+ Für die Indizierung von Dokumenten erforderliche Metadatenfelder
+
+
Seiten fileGrps: Komma-getrennte Liste der @USE Attributwerte der Seitenansichten nach aufsteigender Größe sortiert (Standard ist "DEFAULT,MAX")
diff --git a/Resources/Private/Language/locallang_labels.xlf b/Resources/Private/Language/locallang_labels.xlf
index 89938425c9..802dc413d7 100644
--- a/Resources/Private/Language/locallang_labels.xlf
+++ b/Resources/Private/Language/locallang_labels.xlf
@@ -500,6 +500,9 @@
+
+
+
diff --git a/ext_conf_template.txt b/ext_conf_template.txt
index 7f96ebe0e0..8fc469c425 100644
--- a/ext_conf_template.txt
+++ b/ext_conf_template.txt
@@ -14,6 +14,8 @@ publishNewCollections = 1
unhideOnIndex = 0
# cat=Basic; type=boolean; label=LLL:EXT:dlf/Resources/Private/Language/locallang_labels.xlf:config.useExternalApisForMetadata
useExternalApisForMetadata = 0
+# cat=Document; type=string; label=LLL:EXT:dlf/Resources/Private/Language/locallang_labels.xlf:config.requiredMetadataFields
+requiredMetadataFields = document_format,record_id
# cat=Files; type=string; label=LLL:EXT:dlf/Resources/Private/Language/locallang_labels.xlf:config.fileGrpImages
fileGrpImages = DEFAULT,MAX
# cat=Files; type=string; label=LLL:EXT:dlf/Resources/Private/Language/locallang_labels.xlf:config.fileGrpThumbs