static GlobalMetaData getGlobalMetaData(List<Footer> footers, boolean strict) { GlobalMetaData fileMetaData = null; for (Footer footer : footers) { ParquetMetadata currentMetadata = footer.getParquetMetadata(); fileMetaData = mergeInto(currentMetadata.getFileMetaData(), fileMetaData, strict); } return fileMetaData; }
/** * writes a _metadata and _common_metadata file * * @param configuration the configuration to use to get the FileSystem * @param outputPath the directory to write the _metadata file to * @param footers the list of footers to merge * @throws IOException */ public static void writeMetadataFile( Configuration configuration, Path outputPath, List<Footer> footers) throws IOException { ParquetMetadata metadataFooter = mergeFooters(outputPath, footers); FileSystem fs = outputPath.getFileSystem(configuration); outputPath = outputPath.makeQualified(fs); writeMetadataFile(outputPath, metadataFooter, fs, PARQUET_METADATA_FILE); metadataFooter.getBlocks().clear(); writeMetadataFile(outputPath, metadataFooter, fs, PARQUET_COMMON_METADATA_FILE); }
private ParquetRecordReader<FakeParquetRecord> createParquetRecordReader( Configuration configuration, Path path, long start, long length, List<HiveColumnHandle> columns, boolean useParquetColumnNames) { try { ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(configuration, path); List<BlockMetaData> blocks = parquetMetadata.getBlocks(); FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); PrestoReadSupport readSupport = new PrestoReadSupport(useParquetColumnNames, columns, fileMetaData.getSchema()); ReadContext readContext = readSupport.init( configuration, fileMetaData.getKeyValueMetaData(), fileMetaData.getSchema()); List<BlockMetaData> splitGroup = new ArrayList<>(); long splitStart = start; long splitLength = length; for (BlockMetaData block : blocks) { long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset(); if (firstDataPage >= splitStart && firstDataPage < splitStart + splitLength) { splitGroup.add(block); } } ParquetInputSplit split; split = new ParquetInputSplit( path, splitStart, splitLength, null, splitGroup, readContext.getRequestedSchema().toString(), fileMetaData.getSchema().toString(), fileMetaData.getKeyValueMetaData(), readContext.getReadSupportMetadata()); TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(configuration, new TaskAttemptID()); ParquetRecordReader<FakeParquetRecord> realReader = new PrestoParquetRecordReader(readSupport); realReader.initialize(split, taskContext); return realReader; } catch (IOException e) { throw Throwables.propagate(e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw Throwables.propagate(e); } }
private void validateFooters(final List<Footer> metadata) { logger.debug(metadata.toString()); assertEquals(3, metadata.size()); for (Footer footer : metadata) { final File file = new File(footer.getFile().toUri()); assertTrue(file.getName(), file.getName().startsWith("part")); assertTrue(file.getPath(), file.exists()); final ParquetMetadata parquetMetadata = footer.getParquetMetadata(); assertEquals(2, parquetMetadata.getBlocks().size()); final Map<String, String> keyValueMetaData = parquetMetadata.getFileMetaData().getKeyValueMetaData(); assertEquals("bar", keyValueMetaData.get("foo")); assertEquals(footer.getFile().getName(), keyValueMetaData.get(footer.getFile().getName())); } }