@Test
  public void testNumSamplesOneFile() {
    try {
      final String tempFile = TestFileUtils.createTempFile(TEST_DATA1);
      final Configuration conf = new Configuration();

      final TestDelimitedInputFormat format = new TestDelimitedInputFormat();
      format.setFilePath(tempFile.replace("file", "test"));
      format.configure(conf);

      TestFileSystem.resetStreamOpenCounter();
      format.getStatistics(null);
      Assert.assertEquals(
          "Wrong number of samples taken.",
          DEFAULT_NUM_SAMPLES,
          TestFileSystem.getNumtimeStreamOpened());

      TestDelimitedInputFormat format2 = new TestDelimitedInputFormat();
      format2.setFilePath(tempFile.replace("file", "test"));
      format2.setNumLineSamples(8);
      format2.configure(conf);

      TestFileSystem.resetStreamOpenCounter();
      format2.getStatistics(null);
      Assert.assertEquals(
          "Wrong number of samples taken.", 8, TestFileSystem.getNumtimeStreamOpened());

    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
  @Test
  public void testCachedStatistics() {
    try {
      final String tempFile = TestFileUtils.createTempFile(TEST_DATA1);
      final Configuration conf = new Configuration();

      final TestDelimitedInputFormat format = new TestDelimitedInputFormat();
      format.setFilePath("test://" + tempFile);
      format.configure(conf);

      TestFileSystem.resetStreamOpenCounter();
      BaseStatistics stats = format.getStatistics(null);
      Assert.assertEquals(
          "Wrong number of samples taken.",
          DEFAULT_NUM_SAMPLES,
          TestFileSystem.getNumtimeStreamOpened());

      final TestDelimitedInputFormat format2 = new TestDelimitedInputFormat();
      format2.setFilePath("test://" + tempFile);
      format2.configure(conf);

      TestFileSystem.resetStreamOpenCounter();
      BaseStatistics stats2 = format2.getStatistics(stats);
      Assert.assertTrue(
          "Using cached statistics should cicumvent sampling.",
          0 == TestFileSystem.getNumtimeStreamOpened());
      Assert.assertTrue("Using cached statistics should cicumvent sampling.", stats == stats2);

    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
  @Test
  public void testDifferentDelimiter() {
    try {
      final String DELIMITER = "12345678-";
      String testData = TEST_DATA1.replace("\n", DELIMITER);

      final String tempFile = TestFileUtils.createTempFile(testData);
      final Configuration conf = new Configuration();

      final TestDelimitedInputFormat format = new TestDelimitedInputFormat();
      format.setFilePath(tempFile);
      format.setDelimiter(DELIMITER);
      format.configure(conf);

      BaseStatistics stats = format.getStatistics(null);
      final int numLines = TEST_DATA_1_LINES;
      final float avgWidth = ((float) testData.length()) / TEST_DATA_1_LINES;

      Assert.assertTrue(
          "Wrong record count.",
          stats.getNumberOfRecords() < numLines + 1 & stats.getNumberOfRecords() > numLines - 1);
      Assert.assertTrue(
          "Wrong avg record size.",
          stats.getAverageRecordWidth() < avgWidth + 1
              & stats.getAverageRecordWidth() > avgWidth - 1);
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
  @Test
  public void testSamplingOverlyLongRecord() {
    try {
      final String tempFile =
          TestFileUtils.createTempFile(
              2 * PactConfigConstants.DEFAULT_DELIMITED_FORMAT_MAX_SAMPLE_LEN);
      final Configuration conf = new Configuration();

      final TestDelimitedInputFormat format = new TestDelimitedInputFormat();
      format.setFilePath(tempFile);
      format.configure(conf);

      Assert.assertNull(
          "Expected exception due to overly long record.", format.getStatistics(null));
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }