// requires binary file generated by pig from TPCH data, also have to disable assert where data is // coming in @Ignore @Test public void testMultipleRowGroupsAndReadsPigError() throws Exception { HashMap<String, FieldInfo> fields = new HashMap<>(); ParquetTestProperties props = new ParquetTestProperties(1, 1500000, DEFAULT_BYTES_PER_PAGE, fields); TestFileGenerator.populatePigTPCHCustomerFields(props); String readEntries = "\"/tmp/tpc-h/customer\""; testParquetFullEngineEventBased( false, false, "/parquet/parquet_scan_screen_read_entry_replace.json", readEntries, "unused, no file is generated", 1, props, QueryType.LOGICAL); fields = new HashMap(); props = new ParquetTestProperties(1, 100000, DEFAULT_BYTES_PER_PAGE, fields); TestFileGenerator.populatePigTPCHSupplierFields(props); readEntries = "\"/tmp/tpc-h/supplier\""; testParquetFullEngineEventBased( false, false, "/parquet/parquet_scan_screen_read_entry_replace.json", readEntries, "unused, no file is generated", 1, props, QueryType.LOGICAL); }
// specific tests should call this method, but it is not marked as a test itself intentionally public void testParquetFullEngineEventBased( boolean testValues, boolean generateNew, String plan, String readEntries, String filename, int numberOfTimesRead /* specified in json plan */, ParquetTestProperties props, QueryType queryType) throws Exception { if (generateNew) TestFileGenerator.generateParquetFile(filename, props); ParquetResultListener resultListener = new ParquetResultListener(getAllocator(), props, numberOfTimesRead, testValues); long C = System.nanoTime(); String planText = Files.toString(FileUtils.getResourceAsFile(plan), Charsets.UTF_8); // substitute in the string for the read entries, allows reuse of the plan file for several // tests if (readEntries != null) { planText = planText.replaceFirst("&REPLACED_IN_PARQUET_TEST&", readEntries); } this.testWithListener(queryType, planText, resultListener); resultListener.getResults(); long D = System.nanoTime(); System.out.println(String.format("Took %f s to run query", (float) (D - C) / 1E9)); }
private void testFull( QueryType type, String planText, String filename, int numberOfTimesRead /* specified in json plan */, int numberOfRowGroups, int recordsPerRowGroup, boolean testValues) throws Exception { // RecordBatchLoader batchLoader = new RecordBatchLoader(getAllocator()); HashMap<String, FieldInfo> fields = new HashMap<>(); ParquetTestProperties props = new ParquetTestProperties( numberRowGroups, recordsPerRowGroup, DEFAULT_BYTES_PER_PAGE, fields); TestFileGenerator.populateFieldInfoMap(props); ParquetResultListener resultListener = new ParquetResultListener(getAllocator(), props, numberOfTimesRead, testValues); Stopwatch watch = new Stopwatch().start(); testWithListener(type, planText, resultListener); resultListener.getResults(); // batchLoader.clear(); System.out.println( String.format("Took %d ms to run query", watch.elapsed(TimeUnit.MILLISECONDS))); }
@BeforeClass public static void generateFile() throws Exception { File f = new File(fileName); ParquetTestProperties props = new ParquetTestProperties( numberRowGroups, recordsPerRowGroup, DEFAULT_BYTES_PER_PAGE, new HashMap<String, FieldInfo>()); populateFieldInfoMap(props); if (!f.exists()) TestFileGenerator.generateParquetFile(fileName, props); }
/** * Tests the attribute in a scan node to limit the columns read by a scan. * * <p>The functionality of selecting all columns is tested in all of the other tests that leave * out the attribute. * * @throws Exception */ @Test public void testSelectColumnRead() throws Exception { HashMap<String, FieldInfo> fields = new HashMap<>(); ParquetTestProperties props = new ParquetTestProperties(4, 3000, DEFAULT_BYTES_PER_PAGE, fields); // generate metatdata for a series of test columns, these columns are all generated in the test // file populateFieldInfoMap(props); TestFileGenerator.generateParquetFile("/tmp/test.parquet", props); fields.clear(); // create a new object to describe the dataset expected out of the scan operation // the fields added below match those requested in the plan specified in // parquet_selective_column_read.json // that is used below in the test query props = new ParquetTestProperties(4, 3000, DEFAULT_BYTES_PER_PAGE, fields); props.fields.put( "integer", new FieldInfo( "int32", "integer", 32, TestFileGenerator.intVals, TypeProtos.MinorType.INT, props)); props.fields.put( "bigInt", new FieldInfo( "int64", "bigInt", 64, TestFileGenerator.longVals, TypeProtos.MinorType.BIGINT, props)); props.fields.put( "bin", new FieldInfo( "binary", "bin", -1, TestFileGenerator.binVals, TypeProtos.MinorType.VARBINARY, props)); props.fields.put( "bin2", new FieldInfo( "binary", "bin2", -1, TestFileGenerator.bin2Vals, TypeProtos.MinorType.VARBINARY, props)); testParquetFullEngineEventBased( true, false, "/parquet/parquet_selective_column_read.json", null, "/tmp/test.parquet", 1, props, QueryType.PHYSICAL); }
@Ignore @Test public void drill_958bugTest() throws Exception { HashMap<String, FieldInfo> fields = new HashMap<>(); ParquetTestProperties props = new ParquetTestProperties(1, 2880404, DEFAULT_BYTES_PER_PAGE, fields); TestFileGenerator.populatePigTPCHCustomerFields(props); String readEntries = "\"/tmp/store_sales\""; testParquetFullEngineEventBased( false, false, "/parquet/parquet_scan_screen_read_entry_replace.json", readEntries, "unused, no file is generated", 1, props, QueryType.LOGICAL); }
@Ignore @Test public void testReadBug_Drill_418() throws Exception { HashMap<String, FieldInfo> fields = new HashMap<>(); ParquetTestProperties props = new ParquetTestProperties(1, 150000, DEFAULT_BYTES_PER_PAGE, fields); TestFileGenerator.populateDrill_418_fields(props); String readEntries = "\"/tmp/customer.plain.parquet\""; testParquetFullEngineEventBased( false, false, "/parquet/parquet_scan_screen_read_entry_replace.json", readEntries, "unused, no file is generated", 1, props, QueryType.LOGICAL); }
public void testParquetFullEngineRemote( String plan, String filename, int numberOfTimesRead /* specified in json plan */, int numberOfRowGroups, int recordsPerRowGroup) throws Exception { HashMap<String, FieldInfo> fields = new HashMap<>(); ParquetTestProperties props = new ParquetTestProperties( numberRowGroups, recordsPerRowGroup, DEFAULT_BYTES_PER_PAGE, fields); TestFileGenerator.populateFieldInfoMap(props); ParquetResultListener resultListener = new ParquetResultListener(getAllocator(), props, numberOfTimesRead, true); testWithListener( QueryType.PHYSICAL, Files.toString(FileUtils.getResourceAsFile(plan), Charsets.UTF_8), resultListener); resultListener.getResults(); }