-
-
Notifications
You must be signed in to change notification settings - Fork 252
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixed slowness issue caused BomInput - happens when user doesn't prov…
…ide a character encoding. #176
- Loading branch information
Showing
8 changed files
with
168 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,16 +23,16 @@ | |
* A default CharInputReader which only loads batches of characters when requested by the {@link AbstractCharInputReader} through the {@link DefaultCharInputReader#reloadBuffer} method. | ||
* | ||
* @author uniVocity Software Pty Ltd - <a href="mailto:[email protected]">[email protected]</a> | ||
* | ||
*/ | ||
public class DefaultCharInputReader extends AbstractCharInputReader { | ||
|
||
private Reader reader; | ||
|
||
/** | ||
* Creates a new instance with the mandatory characters for handling newlines transparently. Line separators will be detected automatically. | ||
* | ||
* @param normalizedLineSeparator the normalized newline character (as defined in {@link Format#getNormalizedNewline()}) that is used to replace any lineSeparator sequence found in the input. | ||
* @param bufferSize the buffer size used to store characters read from the input. | ||
* @param bufferSize the buffer size used to store characters read from the input. | ||
* @param whitespaceRangeStart starting range of characters considered to be whitespace. | ||
*/ | ||
public DefaultCharInputReader(char normalizedLineSeparator, int bufferSize, int whitespaceRangeStart) { | ||
|
@@ -42,9 +42,10 @@ public DefaultCharInputReader(char normalizedLineSeparator, int bufferSize, int | |
|
||
/** | ||
* Creates a new instance with the mandatory characters for handling newlines transparently. | ||
* @param lineSeparator the sequence of characters that represent a newline, as defined in {@link Format#getLineSeparator()} | ||
* | ||
* @param lineSeparator the sequence of characters that represent a newline, as defined in {@link Format#getLineSeparator()} | ||
* @param normalizedLineSeparator the normalized newline character (as defined in {@link Format#getNormalizedNewline()}) that is used to replace any lineSeparator sequence found in the input. | ||
* @param bufferSize the buffer size used to store characters read from the input. | ||
* @param bufferSize the buffer size used to store characters read from the input. | ||
* @param whitespaceRangeStart starting range of characters considered to be whitespace. | ||
*/ | ||
public DefaultCharInputReader(char[] lineSeparator, char normalizedLineSeparator, int bufferSize, int whitespaceRangeStart) { | ||
|
@@ -77,6 +78,9 @@ public void reloadBuffer() { | |
super.length = reader.read(buffer, 0, buffer.length); | ||
} catch (IOException e) { | ||
throw new IllegalStateException("Error reading from input", e); | ||
} catch (BomInput.BytesProcessedNotification notification) { | ||
stop(); | ||
unwrapInputStream(notification); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,18 +24,16 @@ | |
* A concurrent CharInputReader that loads batches of characters in a separate thread and assigns them to buffer in {@link AbstractCharInputReader} when requested. | ||
* | ||
* <p> This class loads "buckets" of characters in the background and provides them sequentially to the {@link ConcurrentCharInputReader#buffer} | ||
* attribute in {@link AbstractCharInputReader}. | ||
* attribute in {@link AbstractCharInputReader}. | ||
* <p> The bucket loading process will block and wait while all buckets are full. | ||
* <p> Similarly, the reader will block while all buckets are empty. | ||
* | ||
* This CharInputReader implementation provides a better throughput than {@link DefaultCharInputReader} when reading large inputs ({@code > 100 mb}). | ||
* | ||
* @author uniVocity Software Pty Ltd - <a href="mailto:[email protected]">[email protected]</a> | ||
* @see CharInputReader | ||
* @see ConcurrentCharLoader | ||
* @see CharBucket | ||
* | ||
* @author uniVocity Software Pty Ltd - <a href="mailto:[email protected]">[email protected]</a> | ||
* | ||
*/ | ||
public class ConcurrentCharInputReader extends AbstractCharInputReader { | ||
|
||
|
@@ -45,10 +43,11 @@ public class ConcurrentCharInputReader extends AbstractCharInputReader { | |
|
||
/** | ||
* Creates a new instance with the mandatory characters for handling newlines transparently. Line separators will be detected automatically. | ||
* | ||
* @param normalizedLineSeparator the normalized newline character (as defined in {@link Format#getNormalizedNewline()}) | ||
* that is used to replace any lineSeparator sequence found in the input. | ||
* @param bucketSize the size of an each individual "bucket" used to store characters read from the input. | ||
* @param bucketQuantity the number of "buckets" to load in memory. Note the reader will stop if all buckets are full. | ||
* that is used to replace any lineSeparator sequence found in the input. | ||
* @param bucketSize the size of an each individual "bucket" used to store characters read from the input. | ||
* @param bucketQuantity the number of "buckets" to load in memory. Note the reader will stop if all buckets are full. | ||
* @param whitespaceRangeStart starting range of characters considered to be whitespace. | ||
*/ | ||
public ConcurrentCharInputReader(char normalizedLineSeparator, int bucketSize, int bucketQuantity, int whitespaceRangeStart) { | ||
|
@@ -59,11 +58,12 @@ public ConcurrentCharInputReader(char normalizedLineSeparator, int bucketSize, i | |
|
||
/** | ||
* Creates a new instance with the mandatory characters for handling newlines transparently. | ||
* @param lineSeparator the sequence of characters that represent a newline, as defined in {@link Format#getLineSeparator()} | ||
* | ||
* @param lineSeparator the sequence of characters that represent a newline, as defined in {@link Format#getLineSeparator()} | ||
* @param normalizedLineSeparator the normalized newline character (as defined in {@link Format#getNormalizedNewline()}) | ||
* that is used to replace any lineSeparator sequence found in the input. | ||
* @param bucketSize the size of an each individual "bucket" used to store characters read from the input. | ||
* @param bucketQuantity the number of "buckets" to load in memory. Note the reader will stop if all buckets are full. | ||
* that is used to replace any lineSeparator sequence found in the input. | ||
* @param bucketSize the size of an each individual "bucket" used to store characters read from the input. | ||
* @param bucketQuantity the number of "buckets" to load in memory. Note the reader will stop if all buckets are full. | ||
* @param whitespaceRangeStart starting range of characters considered to be whitespace. | ||
*/ | ||
public ConcurrentCharInputReader(char[] lineSeparator, char normalizedLineSeparator, int bucketSize, int bucketQuantity, int whitespaceRangeStart) { | ||
|
@@ -81,6 +81,12 @@ public void stop() { | |
if (bucketLoader != null) { | ||
bucketLoader.stopReading(); | ||
bucketLoader.reportError(); | ||
|
||
if(bucketLoader.notification != null){ | ||
BomInput.BytesProcessedNotification notification = bucketLoader.notification; | ||
bucketLoader = null; | ||
unwrapInputStream(notification); | ||
} | ||
} | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -80,6 +80,7 @@ public void readWithBom(boolean extractFromBom, String encoding, byte[] prepend) | |
parserSettings.setLineSeparatorDetectionEnabled(true); | ||
parserSettings.setHeaderExtractionEnabled(true); | ||
parserSettings.setSkipEmptyLines(false); | ||
parserSettings.setReadInputOnSeparateThread(false); | ||
|
||
final CsvParser parser = new CsvParser(parserSettings); | ||
|
||
|
@@ -94,9 +95,19 @@ public void readWithBom(boolean extractFromBom, String encoding, byte[] prepend) | |
|
||
bytes = newBytes; | ||
} | ||
parser.parse(new ByteArrayInputStream(bytes), encoding); | ||
final List<User> actual = rowProcessor.getBeans(); | ||
|
||
assertEquals(actual.get(0).email, "[email protected]"); | ||
parser.beginParsing(new ByteArrayInputStream(bytes), encoding); | ||
String[] row = parser.parseNext(); | ||
parser.stopParsing(); | ||
|
||
if(prepend != null && prepend[prepend.length -1] == ' '){ | ||
assertEquals(parser.getContext().headers()[0], " Email"); | ||
assertEquals(row[0], "[email protected]"); | ||
|
||
} else { | ||
assertEquals(parser.getContext().headers()[0], "Email"); | ||
assertEquals(row[0], "[email protected]"); | ||
final List<User> actual = rowProcessor.getBeans(); | ||
assertEquals(actual.get(0).email, "[email protected]"); | ||
} | ||
} | ||
} |
Oops, something went wrong.