private void testFull( QueryType type, String planText, String filename, int numberOfTimesRead /* specified in json plan */, int numberOfRowGroups, int recordsPerRowGroup, boolean testValues) throws Exception { // RecordBatchLoader batchLoader = new RecordBatchLoader(getAllocator()); HashMap<String, FieldInfo> fields = new HashMap<>(); ParquetTestProperties props = new ParquetTestProperties( numberRowGroups, recordsPerRowGroup, DEFAULT_BYTES_PER_PAGE, fields); TestFileGenerator.populateFieldInfoMap(props); ParquetResultListener resultListener = new ParquetResultListener(getAllocator(), props, numberOfTimesRead, testValues); Stopwatch watch = new Stopwatch().start(); testWithListener(type, planText, resultListener); resultListener.getResults(); // batchLoader.clear(); System.out.println( String.format("Took %d ms to run query", watch.elapsed(TimeUnit.MILLISECONDS))); }
@Test public void testMultipleRowGroupsEvent() throws Exception { HashMap<String, FieldInfo> fields = new HashMap<>(); ParquetTestProperties props = new ParquetTestProperties(2, 300, DEFAULT_BYTES_PER_PAGE, fields); populateFieldInfoMap(props); testParquetFullEngineEventBased( true, "/parquet/parquet_scan_screen.json", "/tmp/test.parquet", 1, props); }
@BeforeClass public static void generateFile() throws Exception { File f = new File(fileName); ParquetTestProperties props = new ParquetTestProperties( numberRowGroups, recordsPerRowGroup, DEFAULT_BYTES_PER_PAGE, new HashMap<String, FieldInfo>()); populateFieldInfoMap(props); if (!f.exists()) TestFileGenerator.generateParquetFile(fileName, props); }
/** * Tests the attribute in a scan node to limit the columns read by a scan. * * <p>The functionality of selecting all columns is tested in all of the other tests that leave * out the attribute. * * @throws Exception */ @Test public void testSelectColumnRead() throws Exception { HashMap<String, FieldInfo> fields = new HashMap<>(); ParquetTestProperties props = new ParquetTestProperties(4, 3000, DEFAULT_BYTES_PER_PAGE, fields); // generate metatdata for a series of test columns, these columns are all generated in the test // file populateFieldInfoMap(props); TestFileGenerator.generateParquetFile("/tmp/test.parquet", props); fields.clear(); // create a new object to describe the dataset expected out of the scan operation // the fields added below match those requested in the plan specified in // parquet_selective_column_read.json // that is used below in the test query props = new ParquetTestProperties(4, 3000, DEFAULT_BYTES_PER_PAGE, fields); props.fields.put( "integer", new FieldInfo( "int32", "integer", 32, TestFileGenerator.intVals, TypeProtos.MinorType.INT, props)); props.fields.put( "bigInt", new FieldInfo( "int64", "bigInt", 64, TestFileGenerator.longVals, TypeProtos.MinorType.BIGINT, props)); props.fields.put( "bin", new FieldInfo( "binary", "bin", -1, TestFileGenerator.binVals, TypeProtos.MinorType.VARBINARY, props)); props.fields.put( "bin2", new FieldInfo( "binary", "bin2", -1, TestFileGenerator.bin2Vals, TypeProtos.MinorType.VARBINARY, props)); testParquetFullEngineEventBased( true, false, "/parquet/parquet_selective_column_read.json", null, "/tmp/test.parquet", 1, props, QueryType.PHYSICAL); }
public void testParquetFullEngineRemote( String plan, String filename, int numberOfTimesRead /* specified in json plan */, int numberOfRowGroups, int recordsPerRowGroup) throws Exception { HashMap<String, FieldInfo> fields = new HashMap<>(); ParquetTestProperties props = new ParquetTestProperties( numberRowGroups, recordsPerRowGroup, DEFAULT_BYTES_PER_PAGE, fields); TestFileGenerator.populateFieldInfoMap(props); ParquetResultListener resultListener = new ParquetResultListener(getAllocator(), props, numberOfTimesRead, true); testWithListener( QueryType.PHYSICAL, Files.toString(FileUtils.getResourceAsFile(plan), Charsets.UTF_8), resultListener); resultListener.getResults(); }
@Test public void testMultipleRowGroupsAndReads() throws Exception { HashMap<String, FieldInfo> fields = new HashMap<>(); ParquetTestProperties props = new ParquetTestProperties(4, 3000, DEFAULT_BYTES_PER_PAGE, fields); populateFieldInfoMap(props); String readEntries = ""; // number of times to read the file int i = 3; for (int j = 0; j < i; j++) { readEntries += "\"/tmp/test.parquet\""; if (j < i - 1) readEntries += ","; } testParquetFullEngineEventBased( true, "/parquet/parquet_scan_screen_read_entry_replace.json", readEntries, "/tmp/test.parquet", i, props); }
@Test @Ignore public void testPerformance( @Injectable final DrillbitContext bitContext, @Injectable UserServer.UserClientConnection connection) throws Exception { DrillConfig c = DrillConfig.create(); FunctionImplementationRegistry registry = new FunctionImplementationRegistry(c); FragmentContext context = new FragmentContext( bitContext, BitControl.PlanFragment.getDefaultInstance(), connection, registry); // new NonStrictExpectations() { // { // context.getAllocator(); result = BufferAllocator.getAllocator(DrillConfig.create()); // } // }; final String fileName = "/tmp/parquet_test_performance.parquet"; HashMap<String, FieldInfo> fields = new HashMap<>(); ParquetTestProperties props = new ParquetTestProperties(1, 20 * 1000 * 1000, DEFAULT_BYTES_PER_PAGE, fields); populateFieldInfoMap(props); // generateParquetFile(fileName, props); Configuration dfsConfig = new Configuration(); List<Footer> footers = ParquetFileReader.readFooters(dfsConfig, new Path(fileName)); Footer f = footers.iterator().next(); List<SchemaPath> columns = Lists.newArrayList(); columns.add(new SchemaPath("_MAP.integer", ExpressionPosition.UNKNOWN)); columns.add(new SchemaPath("_MAP.bigInt", ExpressionPosition.UNKNOWN)); columns.add(new SchemaPath("_MAP.f", ExpressionPosition.UNKNOWN)); columns.add(new SchemaPath("_MAP.d", ExpressionPosition.UNKNOWN)); columns.add(new SchemaPath("_MAP.b", ExpressionPosition.UNKNOWN)); columns.add(new SchemaPath("_MAP.bin", ExpressionPosition.UNKNOWN)); columns.add(new SchemaPath("_MAP.bin2", ExpressionPosition.UNKNOWN)); int totalRowCount = 0; FileSystem fs = new CachedSingleFileSystem(fileName); BufferAllocator allocator = new TopLevelAllocator(); for (int i = 0; i < 25; i++) { ParquetRecordReader rr = new ParquetRecordReader( context, 256000, fileName, 0, fs, new CodecFactoryExposer(dfsConfig), f.getParquetMetadata(), columns); TestOutputMutator mutator = new TestOutputMutator(allocator); rr.setup(mutator); Stopwatch watch = new Stopwatch(); watch.start(); int rowCount = 0; while ((rowCount = rr.next()) > 0) { totalRowCount += rowCount; } System.out.println( String.format("Time completed: %s. ", watch.elapsed(TimeUnit.MILLISECONDS))); rr.cleanup(); } allocator.close(); System.out.println(String.format("Total row count %s", totalRowCount)); }