From fad4b8cc2e83a9bda94dbcf809d57706bbdaddb8 Mon Sep 17 00:00:00 2001 From: Tim Allison Date: Thu, 21 Mar 2024 10:08:05 -0400 Subject: [PATCH] TIKA-4216 (#1673) * TIKA-4216 -- Avoid checking for imagemagick if image processing is disabled (cherry picked from commit 237e73f18f46af8322a910178fa8ed99e3710d8f) --- .../tika/parser/ocr/TesseractOCRParser.java | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java index a79e05b1d1..a28ae8951f 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java @@ -126,6 +126,8 @@ public class TesseractOCRParser extends AbstractExternalProcessParser implements }))); private static volatile boolean HAS_WARNED = false; + private static volatile boolean HAS_CHECKED_FOR_IMAGE_MAGICK = false; + //if a user specifies a custom tess path or tessdata path //load the available languages at initialization time private final Set langs = new HashSet<>(); @@ -190,7 +192,10 @@ public boolean hasTesseract() throws TikaConfigException { return hasTesseract; } - boolean hasImageMagick() throws TikaConfigException { + synchronized boolean hasImageMagick() throws TikaConfigException { + if (HAS_CHECKED_FOR_IMAGE_MAGICK) { + return hasImageMagick; + } // Fetch where the config says to find ImageMagick Program String fullImageMagickPath = imageMagickPath + getImageMagickProg(); @@ -208,7 +213,7 @@ boolean hasImageMagick() throws TikaConfigException { LOG.debug("ImageMagick does not appear to be installed " + "(commandline: " + fullImageMagickPath + ")"); } - + HAS_CHECKED_FOR_IMAGE_MAGICK = true; return hasImageMagick; } @@ -245,6 +250,11 @@ public void parse(InputStream stream, ContentHandler handler, Metadata metadata, return; } + //if you haven't checked yet, and a per file config requests imagemagick + //and if the default is not to use image processing + if (! HAS_CHECKED_FOR_IMAGE_MAGICK && config.isEnableImagePreprocessing()) { + hasImageMagick = hasImageMagick(); + } try (TemporaryResources tmp = new TemporaryResources()) { TikaInputStream tikaStream = TikaInputStream.get(stream, tmp, metadata); @@ -528,7 +538,11 @@ private Thread logStream(final InputStream stream, final StringBuilder out) { @Override public void initialize(Map params) throws TikaConfigException { hasTesseract = hasTesseract(); - hasImageMagick = hasImageMagick(); + if (isEnableImagePreprocessing()) { + hasImageMagick = hasImageMagick(); + } else { + hasImageMagick = false; + } if (preloadLangs) { preloadLangs(); if (!StringUtils.isBlank(defaultConfig.getLanguage())) {