From c76103c292fd671acc73a5c188226c84423c13a0 Mon Sep 17 00:00:00 2001 From: Jeronimo Backes Date: Fri, 14 Aug 2020 08:05:52 +0930 Subject: [PATCH] Implemented enhancement to allow configuring the number of sample rows to use when trying to detect the CSV format (#408) --- .../com/univocity/parsers/csv/CsvParser.java | 5 +++-- .../parsers/csv/CsvParserSettings.java | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/univocity/parsers/csv/CsvParser.java b/src/main/java/com/univocity/parsers/csv/CsvParser.java index 3c1b8b81..1fea4198 100755 --- a/src/main/java/com/univocity/parsers/csv/CsvParser.java +++ b/src/main/java/com/univocity/parsers/csv/CsvParser.java @@ -59,6 +59,7 @@ public final class CsvParser extends AbstractParser { private final boolean trimQuotedTrailing; private char[] delimiters; private int match = 0; + private int formatDetectorRowSampleCount; /** * The CsvParser supports all settings provided by {@link CsvParserSettings}, and requires this configuration to be properly initialized. @@ -78,7 +79,7 @@ public CsvParser(CsvParserSettings settings) { maxColumnLength = settings.getMaxCharsPerColumn(); trimQuotedTrailing = settings.getIgnoreTrailingWhitespacesInQuotes(); trimQuotedLeading = settings.getIgnoreLeadingWhitespacesInQuotes(); - + formatDetectorRowSampleCount = settings.getFormatDetectorRowSampleCount(); updateFormat(settings.getFormat()); whitespaceAppender = new ExpandingCharAppender(10, "", whitespaceRangeStart); @@ -463,7 +464,7 @@ private void parseQuotedValue() { @Override protected final InputAnalysisProcess getInputAnalysisProcess() { if (settings.isDelimiterDetectionEnabled() || settings.isQuoteDetectionEnabled()) { - return new CsvFormatDetector(20, settings, whitespaceRangeStart) { + return new CsvFormatDetector(formatDetectorRowSampleCount, settings, whitespaceRangeStart) { @Override void apply(char delimiter, char quote, char quoteEscape) { if (settings.isDelimiterDetectionEnabled()) { diff --git a/src/main/java/com/univocity/parsers/csv/CsvParserSettings.java b/src/main/java/com/univocity/parsers/csv/CsvParserSettings.java index e3a7e5fc..0063e87c 100755 --- a/src/main/java/com/univocity/parsers/csv/CsvParserSettings.java +++ b/src/main/java/com/univocity/parsers/csv/CsvParserSettings.java @@ -52,6 +52,7 @@ public class CsvParserSettings extends CommonParserSettings { private boolean quoteDetectionEnabled = false; private UnescapedQuoteHandling unescapedQuoteHandling = null; private char[] delimitersForDetection = null; + private int formatDetectorRowSampleCount = 20; /** * Returns the String representation of an empty value (defaults to null) @@ -487,4 +488,22 @@ public final void trimQuotedValues(boolean trim) { setIgnoreTrailingWhitespacesInQuotes(trim); setIgnoreLeadingWhitespacesInQuotes(trim); } + + /** + * Returns the number of sample rows used in the CSV format auto-detection process (defaults to 20) + * + * @return the number of sample rows used in the CSV format auto-detection process + */ + public int getFormatDetectorRowSampleCount() { + return formatDetectorRowSampleCount; + } + + /** + * Updates the number of sample rows used in the CSV format auto-detection process (defaults to 20) + * + * @param formatDetectorRowSampleCount the number of sample rows used in the CSV format auto-detection process + */ + public void setFormatDetectorRowSampleCount(int formatDetectorRowSampleCount) { + this.formatDetectorRowSampleCount = formatDetectorRowSampleCount <= 0 ? 20 : formatDetectorRowSampleCount; + } }