Exemplo n.º 1
0
 static GlobalMetaData getGlobalMetaData(List<Footer> footers, boolean strict) {
   GlobalMetaData fileMetaData = null;
   for (Footer footer : footers) {
     ParquetMetadata currentMetadata = footer.getParquetMetadata();
     fileMetaData = mergeInto(currentMetadata.getFileMetaData(), fileMetaData, strict);
   }
   return fileMetaData;
 }
Exemplo n.º 2
0
 /**
  * writes a _metadata and _common_metadata file
  *
  * @param configuration the configuration to use to get the FileSystem
  * @param outputPath the directory to write the _metadata file to
  * @param footers the list of footers to merge
  * @throws IOException
  */
 public static void writeMetadataFile(
     Configuration configuration, Path outputPath, List<Footer> footers) throws IOException {
   ParquetMetadata metadataFooter = mergeFooters(outputPath, footers);
   FileSystem fs = outputPath.getFileSystem(configuration);
   outputPath = outputPath.makeQualified(fs);
   writeMetadataFile(outputPath, metadataFooter, fs, PARQUET_METADATA_FILE);
   metadataFooter.getBlocks().clear();
   writeMetadataFile(outputPath, metadataFooter, fs, PARQUET_COMMON_METADATA_FILE);
 }
Exemplo n.º 3
0
  private ParquetRecordReader<FakeParquetRecord> createParquetRecordReader(
      Configuration configuration,
      Path path,
      long start,
      long length,
      List<HiveColumnHandle> columns,
      boolean useParquetColumnNames) {
    try {
      ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(configuration, path);
      List<BlockMetaData> blocks = parquetMetadata.getBlocks();
      FileMetaData fileMetaData = parquetMetadata.getFileMetaData();

      PrestoReadSupport readSupport =
          new PrestoReadSupport(useParquetColumnNames, columns, fileMetaData.getSchema());
      ReadContext readContext =
          readSupport.init(
              configuration, fileMetaData.getKeyValueMetaData(), fileMetaData.getSchema());

      List<BlockMetaData> splitGroup = new ArrayList<>();
      long splitStart = start;
      long splitLength = length;
      for (BlockMetaData block : blocks) {
        long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
        if (firstDataPage >= splitStart && firstDataPage < splitStart + splitLength) {
          splitGroup.add(block);
        }
      }

      ParquetInputSplit split;

      split =
          new ParquetInputSplit(
              path,
              splitStart,
              splitLength,
              null,
              splitGroup,
              readContext.getRequestedSchema().toString(),
              fileMetaData.getSchema().toString(),
              fileMetaData.getKeyValueMetaData(),
              readContext.getReadSupportMetadata());

      TaskAttemptContext taskContext =
          ContextUtil.newTaskAttemptContext(configuration, new TaskAttemptID());
      ParquetRecordReader<FakeParquetRecord> realReader =
          new PrestoParquetRecordReader(readSupport);
      realReader.initialize(split, taskContext);
      return realReader;
    } catch (IOException e) {
      throw Throwables.propagate(e);
    } catch (InterruptedException e) {
      Thread.currentThread().interrupt();
      throw Throwables.propagate(e);
    }
  }
 private void validateFooters(final List<Footer> metadata) {
   logger.debug(metadata.toString());
   assertEquals(3, metadata.size());
   for (Footer footer : metadata) {
     final File file = new File(footer.getFile().toUri());
     assertTrue(file.getName(), file.getName().startsWith("part"));
     assertTrue(file.getPath(), file.exists());
     final ParquetMetadata parquetMetadata = footer.getParquetMetadata();
     assertEquals(2, parquetMetadata.getBlocks().size());
     final Map<String, String> keyValueMetaData =
         parquetMetadata.getFileMetaData().getKeyValueMetaData();
     assertEquals("bar", keyValueMetaData.get("foo"));
     assertEquals(footer.getFile().getName(), keyValueMetaData.get(footer.getFile().getName()));
   }
 }