private void validateFooters(final List<Footer> metadata) { logger.debug(metadata.toString()); assertEquals(3, metadata.size()); for (Footer footer : metadata) { final File file = new File(footer.getFile().toUri()); assertTrue(file.getName(), file.getName().startsWith("part")); assertTrue(file.getPath(), file.exists()); final ParquetMetadata parquetMetadata = footer.getParquetMetadata(); assertEquals(2, parquetMetadata.getBlocks().size()); final Map<String, String> keyValueMetaData = parquetMetadata.getFileMetaData().getKeyValueMetaData(); assertEquals("bar", keyValueMetaData.get("foo")); assertEquals(footer.getFile().getName(), keyValueMetaData.get(footer.getFile().getName())); } }
@Test @Ignore public void testPerformance( @Injectable final DrillbitContext bitContext, @Injectable UserServer.UserClientConnection connection) throws Exception { DrillConfig c = DrillConfig.create(); FunctionImplementationRegistry registry = new FunctionImplementationRegistry(c); FragmentContext context = new FragmentContext( bitContext, BitControl.PlanFragment.getDefaultInstance(), connection, registry); // new NonStrictExpectations() { // { // context.getAllocator(); result = BufferAllocator.getAllocator(DrillConfig.create()); // } // }; final String fileName = "/tmp/parquet_test_performance.parquet"; HashMap<String, FieldInfo> fields = new HashMap<>(); ParquetTestProperties props = new ParquetTestProperties(1, 20 * 1000 * 1000, DEFAULT_BYTES_PER_PAGE, fields); populateFieldInfoMap(props); // generateParquetFile(fileName, props); Configuration dfsConfig = new Configuration(); List<Footer> footers = ParquetFileReader.readFooters(dfsConfig, new Path(fileName)); Footer f = footers.iterator().next(); List<SchemaPath> columns = Lists.newArrayList(); columns.add(new SchemaPath("_MAP.integer", ExpressionPosition.UNKNOWN)); columns.add(new SchemaPath("_MAP.bigInt", ExpressionPosition.UNKNOWN)); columns.add(new SchemaPath("_MAP.f", ExpressionPosition.UNKNOWN)); columns.add(new SchemaPath("_MAP.d", ExpressionPosition.UNKNOWN)); columns.add(new SchemaPath("_MAP.b", ExpressionPosition.UNKNOWN)); columns.add(new SchemaPath("_MAP.bin", ExpressionPosition.UNKNOWN)); columns.add(new SchemaPath("_MAP.bin2", ExpressionPosition.UNKNOWN)); int totalRowCount = 0; FileSystem fs = new CachedSingleFileSystem(fileName); BufferAllocator allocator = new TopLevelAllocator(); for (int i = 0; i < 25; i++) { ParquetRecordReader rr = new ParquetRecordReader( context, 256000, fileName, 0, fs, new CodecFactoryExposer(dfsConfig), f.getParquetMetadata(), columns); TestOutputMutator mutator = new TestOutputMutator(allocator); rr.setup(mutator); Stopwatch watch = new Stopwatch(); watch.start(); int rowCount = 0; while ((rowCount = rr.next()) > 0) { totalRowCount += rowCount; } System.out.println( String.format("Time completed: %s. ", watch.elapsed(TimeUnit.MILLISECONDS))); rr.cleanup(); } allocator.close(); System.out.println(String.format("Total row count %s", totalRowCount)); }