Skip to content

Commit

Permalink
fixed github #305 error determining column separator when input CSV h…
Browse files Browse the repository at this point in the history
…as comments.
  • Loading branch information
jbax committed Jan 22, 2019
1 parent 147f906 commit 154b2be
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 5 deletions.
10 changes: 5 additions & 5 deletions src/main/java/com/univocity/parsers/csv/CsvFormatDetector.java
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ private static void increment(Map<Character, Integer> map, char symbol, int incr
*
* @return the character with the lowest number associated.
*/
private static char min(Map<Character, Integer> map, Map<Character, Integer> totals, char defaultChar) {
private char min(Map<Character, Integer> map, Map<Character, Integer> totals, char defaultChar) {
return getChar(map, totals, defaultChar, true);
}

Expand All @@ -283,7 +283,7 @@ private static char min(Map<Character, Integer> map, Map<Character, Integer> tot
*
* @return the character with the highest number associated.
*/
private static char max(Map<Character, Integer> map, Map<Character, Integer> totals, char defaultChar) {
private char max(Map<Character, Integer> map, Map<Character, Integer> totals, char defaultChar) {
return getChar(map, totals, defaultChar, false);
}

Expand All @@ -297,7 +297,7 @@ private static char max(Map<Character, Integer> map, Map<Character, Integer> tot
*
* @return the character with the highest/lowest number associated.
*/
private static char getChar(Map<Character, Integer> map, Map<Character, Integer> totals, char defaultChar, boolean min) {
private char getChar(Map<Character, Integer> map, Map<Character, Integer> totals, char defaultChar, boolean min) {
int val = min ? Integer.MAX_VALUE : Integer.MIN_VALUE;
for (Entry<Character, Integer> e : map.entrySet()) {
int sum = e.getValue();
Expand All @@ -324,8 +324,8 @@ private static char getChar(Map<Character, Integer> map, Map<Character, Integer>
return defaultChar;
}

private static boolean isSymbol(char ch) {
return !Character.isLetterOrDigit(ch) && (ch == '\t' || ch > ' ');
private boolean isSymbol(char ch) {
return ch != comment && !Character.isLetterOrDigit(ch) && (ch == '\t' || ch > ' ');
}

/**
Expand Down
48 changes: 48 additions & 0 deletions src/test/java/com/univocity/parsers/issues/github/Github_305.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright (c) 2019 Univocity Software Pty Ltd. All rights reserved.
* This file is subject to the terms and conditions defined in file
* 'LICENSE.txt', which is part of this source code package.
*/
package com.univocity.parsers.issues.github;


import com.univocity.parsers.csv.*;
import org.testng.annotations.*;

import java.io.*;

import static org.testng.Assert.*;

/**
* From: https://github.com/univocity/univocity-parsers/issues/305
*
* @author Univocity Software Pty Ltd - <a href="mailto:[email protected]">[email protected]</a>
*/
public class Github_305 {

@Test
public void testSeparatorDetectionCrLfAndMultilineComment() {
String csv = "#created at 2019-01-22T11:39:43.312Z\r\n" +
"#CSV export\r\n" +
"Timestamp;Value;Metric;Entity;host\r\n" +
"2019-01-21T11:39:53.763Z;160527072;jvm_memory_used;dev;LOCALHOST\r\n" +
"2019-01-21T11:40:08.765Z;1.6270228E+8;jvm_memory_used;dev;LOCALHOST\r\n" +
"2019-01-21T11:40:23.765Z;454336496;jvm_memory_used;dev;LOCALHOST\r\n";
final CsvParserSettings settings = new CsvParserSettings();
settings.setReadInputOnSeparateThread(false);
settings.setQuoteDetectionEnabled(true);
settings.setLineSeparatorDetectionEnabled(true);
settings.setDelimiterDetectionEnabled(true, ',', ';', '\t', '|', ' ');
final CsvParser csvParser = new CsvParser(settings);

csvParser.beginParsing(new StringReader(csv));

assertEquals(csvParser.getDetectedFormat().getDelimiterString(), ";");

for (String[] columns : csvParser.parseAll()) {
assertEquals(columns.length, 5);
}

}

}

0 comments on commit 154b2be

Please sign in to comment.