@Test public void testWithHeader() throws Exception { final String[] header = new String[] {"one", "two", "three"}; // withHeader() makes a copy of the header array. final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(header); assertArrayEquals(header, formatWithHeader.getHeader()); assertNotSame(header, formatWithHeader.getHeader()); }
@Test public void testGetHeader() throws Exception { final String[] header = new String[] {"one", "two", "three"}; final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(header); // getHeader() makes a copy of the header array. final String[] headerCopy = formatWithHeader.getHeader(); headerCopy[0] = "A"; headerCopy[1] = "B"; headerCopy[2] = "C"; assertFalse(Arrays.equals(formatWithHeader.getHeader(), headerCopy)); assertNotSame(formatWithHeader.getHeader(), headerCopy); }
@Test public void testApacheCommonCsvMapping() throws Exception { StringReader stringReader = new StringReader("foo,bar,15,true"); CSVFormat csvFormat = CSVFormat.DEFAULT.withHeader("firstName", "lastName", "age", "married"); ApacheCommonCsvRecord record = getApacheCommonCsvRecord(stringReader, csvFormat); Foo foo = mapper.mapRecord(record); assertThat(foo).isNotNull(); assertThat(foo.getFirstName()).isEqualTo("foo"); assertThat(foo.getLastName()).isEqualTo("bar"); assertThat(foo.getAge()).isEqualTo(15); assertThat(foo.isMarried()).isTrue(); }
public static DoubleMatrix readFileIntoMatrix(String filename, char delimiter) throws IOException { File csvData = new File(filename); CSVFormat format = CSVFormat.DEFAULT.withHeader().withDelimiter(delimiter); CSVParser parser = CSVParser.parse(csvData, StandardCharsets.UTF_8, format); int row = 0; List<CSVRecord> records = parser.getRecords(); DoubleMatrix x = new DoubleMatrix(records.size(), records.get(0).size()); for (CSVRecord csvRecord : records) { for (int column = 0; column < csvRecord.size(); column++) { String s = csvRecord.get(column); x.put(row, column, Double.valueOf(s)); } row++; } return x; }
@Test(expected = IllegalArgumentException.class) public void testDuplicateHeaderElements() { CSVFormat.DEFAULT.withHeader("A", "A"); }
@Test public void testWithEmptyEnum() throws Exception { final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(EmptyEnum.class); Assert.assertTrue(formatWithHeader.getHeader().length == 0); }
@Test public void testWithHeaderEnum() throws Exception { final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(Header.class); assertArrayEquals(new String[] {"Name", "Email", "Phone"}, formatWithHeader.getHeader()); }
/** * Parse the GTFS file. Reads in the header info and then each line. Calls the abstract * handleRecord() method for each record. Adds each resulting GTFS object to the _gtfsObjecgts * array. */ private void parse() { CSVRecord record = null; try { IntervalTimer timer = new IntervalTimer(); logger.debug("Parsing CSV file {} ...", fileName); // Open the file for reading. Use UTF-8 format since that will work // for both regular ASCII format and UTF-8 extended format files // since UTF-8 was designed to be backwards compatible with ASCII. // This way will work for Chinese and other character sets. Use // InputStreamReader so can specify that using UTF-8 format. Use // BufferedReader so that can determine if first character is an // optional BOM (Byte Order Mark) character used to indicate that // file is in UTF-8 format. BufferedReader allows us to read in // first character and then discard if it is a BOM character or // reset the reader to back to the beginning if it is not. This // way the CSV parser will process the file starting with the first // true character. Reader in = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "UTF-8")); // Deal with the possible BOM character at the beginning of the file in.mark(1); int firstRead = in.read(); final int BOM_CHARACTER = 0xFEFF; if (firstRead != BOM_CHARACTER) in.reset(); // Get ready to parse the CSV file. // Allow lines to be comments if they start with "-" so that can // easily comment out problems and also test what happens when // certain data is missing. Using the '-' character so can // comment out line that starts with "--", which is what is // used for SQL. CSVFormat formatter = CSVFormat.DEFAULT.withHeader().withCommentStart('-'); // Parse the file Iterable<CSVRecord> records = formatter.parse(in); logger.debug("Finished CSV parsing of file {}. Took {} msec.", fileName, timer.elapsedMsec()); int lineNumberWhenLogged = 0; timer = new IntervalTimer(); IntervalTimer loggingTimer = new IntervalTimer(); Iterator<CSVRecord> iterator = records.iterator(); while (iterator.hasNext()) { // Determine the record to process record = iterator.next(); // Process the record using appropriate handler // and create the corresponding GTFS object T gtfsObject; try { gtfsObject = handleRecord(record, supplemental); } catch (ParseException e) { logger.error( "ParseException occurred on line {} for filename {} . {}", record.getRecordNumber(), fileName, e.getMessage()); // Continue even though there was an error so that all errors // logged at once. continue; } catch (NumberFormatException e) { logger.error( "NumberFormatException occurred on line {} for filename {} . {}", record.getRecordNumber(), fileName, e.getMessage()); // Continue even though there was an error so that all errors // logged at once. continue; } // Add the newly created GTFS object to the object list gtfsObjects.add(gtfsObject); // Log info if it has been a while. Check only every 20,000 lines // to see if the 10 seconds has gone by. If so, then log number // of lines. By only looking at timer every 20,000 lines not slowing // things down by for every line doing system call for to get current time. final int LINES_TO_PROCESS_BEFORE_CHECKING_IF_SHOULD_LOG = 20000; final long SECONDS_ELSAPSED_UNTIL_SHOULD_LOG = 5; if (record.getRecordNumber() >= lineNumberWhenLogged + LINES_TO_PROCESS_BEFORE_CHECKING_IF_SHOULD_LOG) { lineNumberWhenLogged = (int) record.getRecordNumber(); if (loggingTimer.elapsedMsec() > SECONDS_ELSAPSED_UNTIL_SHOULD_LOG * Time.MS_PER_SEC) { logger.info( " Processed {} lines. Took {} msec...", lineNumberWhenLogged, timer.elapsedMsec()); loggingTimer = new IntervalTimer(); } } } // End of while iterating over records // Close up the file reader in.close(); logger.debug("Finished parsing file {} . Took {} msec.", fileName, timer.elapsedMsec()); } catch (FileNotFoundException e) { if (required) logger.error("Required GTFS file {} not found.", fileName); else logger.info("GTFS file {} not found but OK because this file not required.", fileName); } catch (IOException e) { logger.error("IOException occurred when reading in filename {}.", fileName, e); } }