@Test public void testNumSamplesMultipleFiles() { try { final String tempFile = TestFileUtils.createTempFileDir(TEST_DATA1, TEST_DATA1, TEST_DATA1, TEST_DATA1); final Configuration conf = new Configuration(); final TestDelimitedInputFormat format = new TestDelimitedInputFormat(); format.setFilePath(tempFile.replace("file", "test")); format.configure(conf); TestFileSystem.resetStreamOpenCounter(); format.getStatistics(null); Assert.assertEquals( "Wrong number of samples taken.", DEFAULT_NUM_SAMPLES, TestFileSystem.getNumtimeStreamOpened()); TestDelimitedInputFormat format2 = new TestDelimitedInputFormat(); format2.setFilePath(tempFile.replace("file", "test")); format2.setNumLineSamples(8); format2.configure(conf); TestFileSystem.resetStreamOpenCounter(); format2.getStatistics(null); Assert.assertEquals( "Wrong number of samples taken.", 8, TestFileSystem.getNumtimeStreamOpened()); } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
@Test public void testSamplingDirectory() { try { final String tempFile = TestFileUtils.createTempFileDir(TEST_DATA1, TEST_DATA2); final Configuration conf = new Configuration(); final TestDelimitedInputFormat format = new TestDelimitedInputFormat(); format.setFilePath(tempFile); format.configure(conf); BaseStatistics stats = format.getStatistics(null); final int maxNumLines = (int) Math.ceil( TOTAL_SIZE / ((double) Math.min(TEST_DATA_1_LINEWIDTH, TEST_DATA_2_LINEWIDTH))); final int minNumLines = (int) (TOTAL_SIZE / ((double) Math.max(TEST_DATA_1_LINEWIDTH, TEST_DATA_2_LINEWIDTH))); final float maxAvgWidth = ((float) (TOTAL_SIZE)) / minNumLines; final float minAvgWidth = ((float) (TOTAL_SIZE)) / maxNumLines; if (!(stats.getNumberOfRecords() <= maxNumLines & stats.getNumberOfRecords() >= minNumLines)) { System.err.println( "Records: " + stats.getNumberOfRecords() + " out of (" + minNumLines + ", " + maxNumLines + ")."); Assert.fail("Wrong record count."); } if (!(stats.getAverageRecordWidth() <= maxAvgWidth & stats.getAverageRecordWidth() >= minAvgWidth)) { Assert.fail("Wrong avg record size."); } } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }