From 914ceaa948a550dbbd83aebbee89620ab4778def Mon Sep 17 00:00:00 2001 From: BartChris Date: Fri, 22 Mar 2024 16:47:41 +0100 Subject: [PATCH] allow quotes in imported csv and make mass import more resilient --- .../forms/massimport/MassImportForm.java | 10 +++-- .../services/data/MassImportService.java | 39 +++++++++---------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/Kitodo/src/main/java/org/kitodo/production/forms/massimport/MassImportForm.java b/Kitodo/src/main/java/org/kitodo/production/forms/massimport/MassImportForm.java index d7dbed06d75..91458f16fc9 100644 --- a/Kitodo/src/main/java/org/kitodo/production/forms/massimport/MassImportForm.java +++ b/Kitodo/src/main/java/org/kitodo/production/forms/massimport/MassImportForm.java @@ -57,6 +57,8 @@ public class MassImportForm extends BaseForm { private String previousCsvSeparator = null; private List metadataKeys = new LinkedList<>(Collections.singletonList("ID")); private List records = new LinkedList<>(); + private String importedCsvHeaderLine = ""; + private List importedCsvLines = new LinkedList<>(); private final List csvSeparatorCharacters = Arrays.asList(',', ';'); private final MassImportService massImportService = ServiceManager.getMassImportService(); private final AddMetadataDialog addMetadataDialog = new AddMetadataDialog(this); @@ -106,9 +108,11 @@ public void handleFileUpload(FileUploadEvent event) { List csvLines = massImportService.getLines(file); resetValues(); if (!csvLines.isEmpty()) { + importedCsvHeaderLine = csvLines.get(0); metadataKeys = new LinkedList<>(Arrays.asList(csvLines.get(0).split(csvSeparator, -1))); if (csvLines.size() > 1) { - records = massImportService.parseLines(csvLines.subList(1, csvLines.size()), csvSeparator); + importedCsvLines = csvLines.subList(1, csvLines.size()); + records = massImportService.parseLines(importedCsvLines, csvSeparator); } } } catch (IOException e) { @@ -126,8 +130,8 @@ private void resetValues() { * Event listender function called when user switches CSV separator character used to split text lines into cells. */ public void changeSeparator() { - metadataKeys = List.of(String.join(previousCsvSeparator, metadataKeys).split(csvSeparator)); - records = massImportService.updateSeparator(records, previousCsvSeparator, csvSeparator); + metadataKeys = new LinkedList<>(Arrays.asList(importedCsvHeaderLine.split(csvSeparator, -1))); + records = massImportService.parseLines(importedCsvLines, csvSeparator); } /** diff --git a/Kitodo/src/main/java/org/kitodo/production/services/data/MassImportService.java b/Kitodo/src/main/java/org/kitodo/production/services/data/MassImportService.java index 522fed00b9c..e0c804ffb75 100644 --- a/Kitodo/src/main/java/org/kitodo/production/services/data/MassImportService.java +++ b/Kitodo/src/main/java/org/kitodo/production/services/data/MassImportService.java @@ -73,25 +73,10 @@ public List getLines(UploadedFile file) throws IOException { } } - /** - * Creates and returns new list of records from given list of records by applying a different - * CSV separator character to the lines obtained by joining the CSV cell values of the existing - * CSV records with the previous CSV separator character. - * @param records CSV records created using previous CSV separator character - * @param oldSeparator previous CSV separator character - * @param newSeparator new CSV separator character - * @return list of CSV records using new CSV separator character - */ - public List updateSeparator(List records, String oldSeparator, String newSeparator) { - List lines = records.stream().map(record -> record.getCsvCells() - .stream().map(CsvCell::getValue) - .collect(Collectors.joining(oldSeparator))) - .collect(Collectors.toList()); - return parseLines(lines, newSeparator); - } - /** * Split provided lines by given 'separator'-String and return list of CsvRecord. + * The method also handles quoted csv values, which contain comma or semicolon to allow + * csv separators in csv cells. * @param lines lines to parse * @param separator String used to split lines into individual parts * @return list of CsvRecord @@ -100,10 +85,24 @@ public List parseLines(List lines, String separator) { List records = new LinkedList<>(); for (String line : lines) { List cells = new LinkedList<>(); - for (String value : line.split(separator, -1)) { - cells.add(new CsvCell(value)); + if (!Objects.isNull(line) && !line.isBlank()) { + StringBuilder currentCell = new StringBuilder(); + boolean inQuotes = false; + for (char c : line.toCharArray()) { + if ((c == '\"' || c == '\'') && !inQuotes) { + inQuotes = true; + } else if (c == '\"' || c == '\'') { + inQuotes = false; + } else if (String.valueOf(c).equals(separator) && !inQuotes) { + cells.add(new CsvCell(currentCell.toString())); + currentCell.setLength(0); // Reset currentCell + } else { + currentCell.append(c); + } + } + cells.add(new CsvCell(currentCell.toString())); + records.add(new CsvRecord(cells)); } - records.add(new CsvRecord(cells)); } return records; }