Skip to content

Commit

Permalink
Implemented enhancement to allow configuring the number of sample row…
Browse files Browse the repository at this point in the history
…s to use when trying to detect the CSV format (#408)
  • Loading branch information
jbax committed Aug 13, 2020
1 parent 952da13 commit c76103c
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
5 changes: 3 additions & 2 deletions src/main/java/com/univocity/parsers/csv/CsvParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public final class CsvParser extends AbstractParser<CsvParserSettings> {
private final boolean trimQuotedTrailing;
private char[] delimiters;
private int match = 0;
private int formatDetectorRowSampleCount;

/**
* The CsvParser supports all settings provided by {@link CsvParserSettings}, and requires this configuration to be properly initialized.
Expand All @@ -78,7 +79,7 @@ public CsvParser(CsvParserSettings settings) {
maxColumnLength = settings.getMaxCharsPerColumn();
trimQuotedTrailing = settings.getIgnoreTrailingWhitespacesInQuotes();
trimQuotedLeading = settings.getIgnoreLeadingWhitespacesInQuotes();

formatDetectorRowSampleCount = settings.getFormatDetectorRowSampleCount();
updateFormat(settings.getFormat());

whitespaceAppender = new ExpandingCharAppender(10, "", whitespaceRangeStart);
Expand Down Expand Up @@ -463,7 +464,7 @@ private void parseQuotedValue() {
@Override
protected final InputAnalysisProcess getInputAnalysisProcess() {
if (settings.isDelimiterDetectionEnabled() || settings.isQuoteDetectionEnabled()) {
return new CsvFormatDetector(20, settings, whitespaceRangeStart) {
return new CsvFormatDetector(formatDetectorRowSampleCount, settings, whitespaceRangeStart) {
@Override
void apply(char delimiter, char quote, char quoteEscape) {
if (settings.isDelimiterDetectionEnabled()) {
Expand Down
19 changes: 19 additions & 0 deletions src/main/java/com/univocity/parsers/csv/CsvParserSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ public class CsvParserSettings extends CommonParserSettings<CsvFormat> {
private boolean quoteDetectionEnabled = false;
private UnescapedQuoteHandling unescapedQuoteHandling = null;
private char[] delimitersForDetection = null;
private int formatDetectorRowSampleCount = 20;

/**
* Returns the String representation of an empty value (defaults to null)
Expand Down Expand Up @@ -487,4 +488,22 @@ public final void trimQuotedValues(boolean trim) {
setIgnoreTrailingWhitespacesInQuotes(trim);
setIgnoreLeadingWhitespacesInQuotes(trim);
}

/**
* Returns the number of sample rows used in the CSV format auto-detection process (defaults to 20)
*
* @return the number of sample rows used in the CSV format auto-detection process
*/
public int getFormatDetectorRowSampleCount() {
return formatDetectorRowSampleCount;
}

/**
* Updates the number of sample rows used in the CSV format auto-detection process (defaults to 20)
*
* @param formatDetectorRowSampleCount the number of sample rows used in the CSV format auto-detection process
*/
public void setFormatDetectorRowSampleCount(int formatDetectorRowSampleCount) {
this.formatDetectorRowSampleCount = formatDetectorRowSampleCount <= 0 ? 20 : formatDetectorRowSampleCount;
}
}

0 comments on commit c76103c

Please sign in to comment.