From 70b2e71124c24f2076d6175a93960a88ea4c0b8f Mon Sep 17 00:00:00 2001 From: Jeronimo Backes Date: Sat, 15 Aug 2020 16:45:47 +0930 Subject: [PATCH] Fixed error in auto-detection of CSV where quote escapes were detected as line endings. --- .../parsers/csv/CsvFormatDetector.java | 2 +- .../parsers/issues/github/Github_409.java | 81 +++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 src/test/java/com/univocity/parsers/issues/github/Github_409.java diff --git a/src/main/java/com/univocity/parsers/csv/CsvFormatDetector.java b/src/main/java/com/univocity/parsers/csv/CsvFormatDetector.java index dbdc7043..5e395bca 100644 --- a/src/main/java/com/univocity/parsers/csv/CsvFormatDetector.java +++ b/src/main/java/com/univocity/parsers/csv/CsvFormatDetector.java @@ -128,7 +128,7 @@ public void execute(char[] characters, int length) { if (Character.isLetterOrDigit(next) || (next <= ' ' && whitespaceRangeStart < next && next != '\n' && next != '\r')) { //no special characters after quote, might be escaping //special character before (potentially) closing quote, might be an escape char prev = characters[i - 1]; - if (!Character.isLetterOrDigit(prev)) { + if (!Character.isLetterOrDigit(prev) && prev != '\n' && prev != '\r') { increment(escape, prev); } } diff --git a/src/test/java/com/univocity/parsers/issues/github/Github_409.java b/src/test/java/com/univocity/parsers/issues/github/Github_409.java new file mode 100644 index 00000000..511ff76a --- /dev/null +++ b/src/test/java/com/univocity/parsers/issues/github/Github_409.java @@ -0,0 +1,81 @@ +/******************************************************************************* + * Copyright 2020 Univocity Software Pty Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package com.univocity.parsers.issues.github; + + +import com.univocity.parsers.common.*; +import com.univocity.parsers.common.processor.*; +import com.univocity.parsers.csv.*; +import org.testng.annotations.*; + +import java.io.*; +import java.util.*; + +import static org.testng.Assert.*; + +/** + * From: https://github.com/univocity/univocity-parsers/issues/405 + * + * @author Univocity Software Pty Ltd - dev@univocity.com + */ +public class Github_409 { + + @Test + public void testPaddingOnFixedWidth() { + String rawData = "A\tB\tC.\t\"G\n" + + "I\n" + + "\"\t\"J\n" + + "M\"\n" + + "\n"; + + final List rows = new ArrayList(); + final CsvParserSettings settings = new CsvParserSettings(); + settings.detectFormatAutomatically('\t'); + settings.setIgnoreLeadingWhitespaces(false); + settings.setIgnoreTrailingWhitespaces(false); + settings.setSkipEmptyLines(false); + + settings.setUnescapedQuoteHandling(UnescapedQuoteHandling.RAISE_ERROR); + + //Ansonsten sind leere Zeilen null-values und führen zu Fehlern. + settings.setNullValue(""); + + settings.setProcessor(new AbstractRowProcessor() { + @Override + public void rowProcessed(final String[] row, final ParsingContext __) { + if (row != null) { + rows.add(row); + } + } + }); + + final CsvParser parser = new CsvParser(settings); + + parser.beginParsing(new StringReader(rawData)); + assertEquals(parser.getDetectedFormat().getQuoteEscape(), '\"'); + parser.stopParsing(); + + parser.parse(new StringReader(rawData)); + + String[] row = rows.get(0); + assertEquals(row[0], "A"); + assertEquals(row[1], "B"); + assertEquals(row[2], "C."); + assertEquals(row[3], "G\nI\n"); + assertEquals(row[4], "J\nM"); + } + +}