diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java index 14bb07b1d7..a79e942e87 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java @@ -123,6 +123,9 @@ void extract(InputStream xfaIs, XHTMLContentHandler xhtml, Metadata m, ParseCont (field.toolTip == null || field.toolTip.trim().length() == 0) ? fieldName : field.toolTip; String[] fieldValues = pdfObjRToValues.getValues(fieldName); + if (fieldValues.length == 0) { + fieldValues = new String[]{""}; + } for (String fieldValue : fieldValues) { AttributesImpl attrs = new AttributesImpl(); attrs.addAttribute("", "fieldName", "fieldName", "CDATA", fieldName); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java index 6e9167f375..6eb0b4a0ae 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java @@ -986,7 +986,7 @@ public void testXFAExtractionBasic() throws Exception { while (matcher.find()) { listItems++; } - assertEquals(24, listItems); + assertEquals(27, listItems); } @Test