@Test
  public void testNumSamplesOneFile() {
    try {
      final String tempFile = TestFileUtils.createTempFile(TEST_DATA1);
      final Configuration conf = new Configuration();

      final TestDelimitedInputFormat format = new TestDelimitedInputFormat();
      format.setFilePath(tempFile.replace("file", "test"));
      format.configure(conf);

      TestFileSystem.resetStreamOpenCounter();
      format.getStatistics(null);
      Assert.assertEquals(
          "Wrong number of samples taken.",
          DEFAULT_NUM_SAMPLES,
          TestFileSystem.getNumtimeStreamOpened());

      TestDelimitedInputFormat format2 = new TestDelimitedInputFormat();
      format2.setFilePath(tempFile.replace("file", "test"));
      format2.setNumLineSamples(8);
      format2.configure(conf);

      TestFileSystem.resetStreamOpenCounter();
      format2.getStatistics(null);
      Assert.assertEquals(
          "Wrong number of samples taken.", 8, TestFileSystem.getNumtimeStreamOpened());

    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
  @Test
  public void testCachedStatistics() {
    try {
      final String tempFile = TestFileUtils.createTempFile(TEST_DATA1);
      final Configuration conf = new Configuration();

      final TestDelimitedInputFormat format = new TestDelimitedInputFormat();
      format.setFilePath("test://" + tempFile);
      format.configure(conf);

      TestFileSystem.resetStreamOpenCounter();
      BaseStatistics stats = format.getStatistics(null);
      Assert.assertEquals(
          "Wrong number of samples taken.",
          DEFAULT_NUM_SAMPLES,
          TestFileSystem.getNumtimeStreamOpened());

      final TestDelimitedInputFormat format2 = new TestDelimitedInputFormat();
      format2.setFilePath("test://" + tempFile);
      format2.configure(conf);

      TestFileSystem.resetStreamOpenCounter();
      BaseStatistics stats2 = format2.getStatistics(stats);
      Assert.assertTrue(
          "Using cached statistics should cicumvent sampling.",
          0 == TestFileSystem.getNumtimeStreamOpened());
      Assert.assertTrue("Using cached statistics should cicumvent sampling.", stats == stats2);

    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
  @Test
  public void testDifferentDelimiter() {
    try {
      final String DELIMITER = "12345678-";
      String testData = TEST_DATA1.replace("\n", DELIMITER);

      final String tempFile = TestFileUtils.createTempFile(testData);
      final Configuration conf = new Configuration();

      final TestDelimitedInputFormat format = new TestDelimitedInputFormat();
      format.setFilePath(tempFile);
      format.setDelimiter(DELIMITER);
      format.configure(conf);

      BaseStatistics stats = format.getStatistics(null);
      final int numLines = TEST_DATA_1_LINES;
      final float avgWidth = ((float) testData.length()) / TEST_DATA_1_LINES;

      Assert.assertTrue(
          "Wrong record count.",
          stats.getNumberOfRecords() < numLines + 1 & stats.getNumberOfRecords() > numLines - 1);
      Assert.assertTrue(
          "Wrong avg record size.",
          stats.getAverageRecordWidth() < avgWidth + 1
              & stats.getAverageRecordWidth() > avgWidth - 1);
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
  @Test
  public void testSamplingOverlyLongRecord() {
    try {
      final String tempFile =
          TestFileUtils.createTempFile(
              2 * PactConfigConstants.DEFAULT_DELIMITED_FORMAT_MAX_SAMPLE_LEN);
      final Configuration conf = new Configuration();

      final TestDelimitedInputFormat format = new TestDelimitedInputFormat();
      format.setFilePath(tempFile);
      format.configure(conf);

      Assert.assertNull(
          "Expected exception due to overly long record.", format.getStatistics(null));
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
  @Test
  public void testSamplingDirectory() {
    try {
      final String tempFile = TestFileUtils.createTempFileDir(TEST_DATA1, TEST_DATA2);
      final Configuration conf = new Configuration();

      final TestDelimitedInputFormat format = new TestDelimitedInputFormat();
      format.setFilePath(tempFile);
      format.configure(conf);
      BaseStatistics stats = format.getStatistics(null);

      final int maxNumLines =
          (int)
              Math.ceil(
                  TOTAL_SIZE / ((double) Math.min(TEST_DATA_1_LINEWIDTH, TEST_DATA_2_LINEWIDTH)));
      final int minNumLines =
          (int) (TOTAL_SIZE / ((double) Math.max(TEST_DATA_1_LINEWIDTH, TEST_DATA_2_LINEWIDTH)));
      final float maxAvgWidth = ((float) (TOTAL_SIZE)) / minNumLines;
      final float minAvgWidth = ((float) (TOTAL_SIZE)) / maxNumLines;

      if (!(stats.getNumberOfRecords() <= maxNumLines
          & stats.getNumberOfRecords() >= minNumLines)) {
        System.err.println(
            "Records: "
                + stats.getNumberOfRecords()
                + " out of ("
                + minNumLines
                + ", "
                + maxNumLines
                + ").");
        Assert.fail("Wrong record count.");
      }
      if (!(stats.getAverageRecordWidth() <= maxAvgWidth
          & stats.getAverageRecordWidth() >= minAvgWidth)) {
        Assert.fail("Wrong avg record size.");
      }
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }