/** * Parses a CSV header line and creates the corresponding resulting schema in the section (i.e.: a * set of fields with positions) */ protected List<Field> parseHeaders(List<String[]> csvLines) { Section section = sectionInstance.getSection(); String sectionId = section.getId(); String headers[] = csvLines.get(0); log.trace("Headers for section " + sectionId + ": " + Arrays.toString(headers)); int nlines = csvLines.size(); for (int j = 0; j < headers.length; j++) { String header = StringUtils.trimToNull(headers[j]); if (header == null) { // Empty header, let's remove all! // log.warn(i18n.msg("empty_column", j)); for (int i = 0; i < nlines; i++) { String[] line = csvLines.get(i); if (j == line.length) // This happens sometime: the header has one extra-tab and the lines are OK // we need to recraft the header only in that case { continue; } csvLines.set(i, (String[]) ArrayUtils.remove(line, j)); } headers = csvLines.get(0); j--; // Restart from this point, which is now the next column continue; } log.trace("Working on header " + j + ":" + header); Field field = section.getFieldByHeader(header, false); if (field == null) { throw new TabValidationException( i18n.msg("unexpected_field_in_section_error", header, sectionId)); } // Let's add a new real field on the basis of the header. The new field created gets its id // from the // original one in the schema, so it will have the "canonical" form, independently on what we // found // on the input (e.g.: upper case). // field = field.parseHeader(header, j, false); sectionInstance.addField(field); } return sectionInstance.getFields(); }
public SectionInstance parseCsvLines( List<String[]> csvLines, int fromRow, int toRow, int fromCol, int toCol) { Section section = sectionInstance.getSection(); String sectionId = section.getId(); String fileId = sectionInstance.getParent().getFileId(); log.trace( String.format( "___ Parsing 'lines' of Section: %s ('%s', %d, %d, %d, %d) ___\n", sectionId, fileId, fromRow, toRow, fromCol, toCol)); if (csvLines == null || csvLines.size() == 0) { log.trace("WARNING, The CSV lines are empty, section: " + sectionId); return sectionInstance; } if (toCol == -1) { toCol = csvLines.get(0).length - 1; } if (fromRow == 0) { fromRow = 1; } if (toRow - fromRow < 0 || toCol - fromCol < 0) { log.trace("WARNING, The specified window is empty, section " + sectionId); return sectionInstance; } List<Field> fields = parseHeaders(csvLines); toCol = Math.min(toCol, fields.size() - 1); // Let's go over the lines // for (int i = fromRow; i <= toRow; i++) { String[] line = csvLines.get(i); if (line == null) { log.debug("Line #" + i + " is null, skipping"); continue; } // Let's go along the headers // Record record = new Record(sectionInstance); for (int icol = fromCol; icol <= toCol; icol++) { // Set up the current record's value // Field field = sectionInstance.getField(icol); if (field == null) { throw new TabInternalErrorException( i18n.msg("generic_field_error", icol, section.getId(), i)); } if (icol >= line.length) { log.debug("The (logical) line #" + i + " is too short, ignoring all from col #" + icol); break; } String value = line[icol]; record.set(icol, value); } // Let's add the record, provided that it is not empty if (record.isNull()) { log.trace("Parsing records for section " + sectionId + ": empty record"); } else { if (log.isTraceEnabled()) { log.trace("adding the record:" + record + " to the section " + sectionId); } sectionInstance.addRecord(record); } } // loop on the lines log.trace("___ /end of parsing section " + sectionId + " ___"); return sectionInstance; }