Example #1
0
 @Test
 public void testWithHeader() throws Exception {
   final String[] header = new String[] {"one", "two", "three"};
   // withHeader() makes a copy of the header array.
   final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(header);
   assertArrayEquals(header, formatWithHeader.getHeader());
   assertNotSame(header, formatWithHeader.getHeader());
 }
Example #2
0
 @Test
 public void testGetHeader() throws Exception {
   final String[] header = new String[] {"one", "two", "three"};
   final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(header);
   // getHeader() makes a copy of the header array.
   final String[] headerCopy = formatWithHeader.getHeader();
   headerCopy[0] = "A";
   headerCopy[1] = "B";
   headerCopy[2] = "C";
   assertFalse(Arrays.equals(formatWithHeader.getHeader(), headerCopy));
   assertNotSame(formatWithHeader.getHeader(), headerCopy);
 }
  @Test
  public void testApacheCommonCsvMapping() throws Exception {
    StringReader stringReader = new StringReader("foo,bar,15,true");
    CSVFormat csvFormat = CSVFormat.DEFAULT.withHeader("firstName", "lastName", "age", "married");
    ApacheCommonCsvRecord record = getApacheCommonCsvRecord(stringReader, csvFormat);

    Foo foo = mapper.mapRecord(record);

    assertThat(foo).isNotNull();
    assertThat(foo.getFirstName()).isEqualTo("foo");
    assertThat(foo.getLastName()).isEqualTo("bar");
    assertThat(foo.getAge()).isEqualTo(15);
    assertThat(foo.isMarried()).isTrue();
  }
Example #4
0
 public static DoubleMatrix readFileIntoMatrix(String filename, char delimiter)
     throws IOException {
   File csvData = new File(filename);
   CSVFormat format = CSVFormat.DEFAULT.withHeader().withDelimiter(delimiter);
   CSVParser parser = CSVParser.parse(csvData, StandardCharsets.UTF_8, format);
   int row = 0;
   List<CSVRecord> records = parser.getRecords();
   DoubleMatrix x = new DoubleMatrix(records.size(), records.get(0).size());
   for (CSVRecord csvRecord : records) {
     for (int column = 0; column < csvRecord.size(); column++) {
       String s = csvRecord.get(column);
       x.put(row, column, Double.valueOf(s));
     }
     row++;
   }
   return x;
 }
Example #5
0
 @Test(expected = IllegalArgumentException.class)
 public void testDuplicateHeaderElements() {
   CSVFormat.DEFAULT.withHeader("A", "A");
 }
Example #6
0
 @Test
 public void testWithEmptyEnum() throws Exception {
   final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(EmptyEnum.class);
   Assert.assertTrue(formatWithHeader.getHeader().length == 0);
 }
Example #7
0
 @Test
 public void testWithHeaderEnum() throws Exception {
   final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(Header.class);
   assertArrayEquals(new String[] {"Name", "Email", "Phone"}, formatWithHeader.getHeader());
 }
Example #8
0
  /**
   * Parse the GTFS file. Reads in the header info and then each line. Calls the abstract
   * handleRecord() method for each record. Adds each resulting GTFS object to the _gtfsObjecgts
   * array.
   */
  private void parse() {
    CSVRecord record = null;
    try {
      IntervalTimer timer = new IntervalTimer();

      logger.debug("Parsing CSV file {} ...", fileName);

      // Open the file for reading. Use UTF-8 format since that will work
      // for both regular ASCII format and UTF-8 extended format files
      // since UTF-8 was designed to be backwards compatible with ASCII.
      // This way will work for Chinese and other character sets. Use
      // InputStreamReader so can specify that using UTF-8 format. Use
      // BufferedReader so that can determine if first character is an
      // optional BOM (Byte Order Mark) character used to indicate that
      // file is in UTF-8 format. BufferedReader allows us to read in
      // first character and then discard if it is a BOM character or
      // reset the reader to back to the beginning if it is not. This
      // way the CSV parser will process the file starting with the first
      // true character.
      Reader in = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "UTF-8"));

      // Deal with the possible BOM character at the beginning of the file
      in.mark(1);
      int firstRead = in.read();
      final int BOM_CHARACTER = 0xFEFF;
      if (firstRead != BOM_CHARACTER) in.reset();

      // Get ready to parse the CSV file.
      // Allow lines to be comments if they start with "-" so that can
      // easily comment out problems and also test what happens when
      // certain data is missing. Using the '-' character so can
      // comment out line that starts with "--", which is what is
      // used for SQL.
      CSVFormat formatter = CSVFormat.DEFAULT.withHeader().withCommentStart('-');

      // Parse the file
      Iterable<CSVRecord> records = formatter.parse(in);

      logger.debug("Finished CSV parsing of file {}. Took {} msec.", fileName, timer.elapsedMsec());

      int lineNumberWhenLogged = 0;
      timer = new IntervalTimer();
      IntervalTimer loggingTimer = new IntervalTimer();

      Iterator<CSVRecord> iterator = records.iterator();
      while (iterator.hasNext()) {
        // Determine the record to process
        record = iterator.next();

        // Process the record using appropriate handler
        // and create the corresponding GTFS object
        T gtfsObject;
        try {
          gtfsObject = handleRecord(record, supplemental);
        } catch (ParseException e) {
          logger.error(
              "ParseException occurred on line {} for filename {} . {}",
              record.getRecordNumber(),
              fileName,
              e.getMessage());

          // Continue even though there was an error so that all errors
          // logged at once.
          continue;
        } catch (NumberFormatException e) {
          logger.error(
              "NumberFormatException occurred on line {} for filename {} . {}",
              record.getRecordNumber(),
              fileName,
              e.getMessage());

          // Continue even though there was an error so that all errors
          // logged at once.
          continue;
        }

        // Add the newly created GTFS object to the object list
        gtfsObjects.add(gtfsObject);

        // Log info if it has been a while. Check only every 20,000 lines
        // to see if the 10 seconds has gone by. If so, then log number
        // of lines. By only looking at timer every 20,000 lines not slowing
        // things down by for every line doing system call for to get current time.
        final int LINES_TO_PROCESS_BEFORE_CHECKING_IF_SHOULD_LOG = 20000;
        final long SECONDS_ELSAPSED_UNTIL_SHOULD_LOG = 5;
        if (record.getRecordNumber()
            >= lineNumberWhenLogged + LINES_TO_PROCESS_BEFORE_CHECKING_IF_SHOULD_LOG) {
          lineNumberWhenLogged = (int) record.getRecordNumber();
          if (loggingTimer.elapsedMsec() > SECONDS_ELSAPSED_UNTIL_SHOULD_LOG * Time.MS_PER_SEC) {
            logger.info(
                "  Processed {} lines. Took {} msec...", lineNumberWhenLogged, timer.elapsedMsec());
            loggingTimer = new IntervalTimer();
          }
        }
      } // End of while iterating over records

      // Close up the file reader
      in.close();

      logger.debug("Finished parsing file {} . Took {} msec.", fileName, timer.elapsedMsec());
    } catch (FileNotFoundException e) {
      if (required) logger.error("Required GTFS file {} not found.", fileName);
      else logger.info("GTFS file {} not found but OK because this file not required.", fileName);
    } catch (IOException e) {
      logger.error("IOException occurred when reading in filename {}.", fileName, e);
    }
  }