Java MessageType Examples

Programming Language: Java

Namespace/Package Name: parquet.schema

Class/Type: MessageType

Examples at hotexamples.com: 11

Java MessageType - 11 examples found. These are the top rated real world Java examples of parquet.schema.MessageType extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getType(3)

containsField(2)

getColumns(2)

getFieldCount(2)

toString(2)

equals(1)

getColumnDescription(1)

getFieldIndex(1)

getFields(1)

getName(1)

union(1)

Example #1

Show file

File: ParquetFileWriter.java Project: ajay0221/parquet-mr

 static GlobalMetaData mergeInto(
     FileMetaData toMerge, GlobalMetaData mergedMetadata, boolean strict) {
   MessageType schema = null;
   Map<String, Set<String>> newKeyValues = new HashMap<String, Set<String>>();
   Set<String> createdBy = new HashSet<String>();
   if (mergedMetadata != null) {
     schema = mergedMetadata.getSchema();
     newKeyValues.putAll(mergedMetadata.getKeyValueMetaData());
     createdBy.addAll(mergedMetadata.getCreatedBy());
   }
   if ((schema == null && toMerge.getSchema() != null)
       || (schema != null && !schema.equals(toMerge.getSchema()))) {
     schema = mergeInto(toMerge.getSchema(), schema, strict);
   }
   for (Entry<String, String> entry : toMerge.getKeyValueMetaData().entrySet()) {
     Set<String> values = newKeyValues.get(entry.getKey());
     if (values == null) {
       values = new HashSet<String>();
       newKeyValues.put(entry.getKey(), values);
     }
     values.add(entry.getValue());
   }
   createdBy.add(toMerge.getCreatedBy());
   return new GlobalMetaData(schema, newKeyValues, createdBy);
 }

Example #2

Show file

File: ParquetTypeUtils.java Project: hongbozeng/presto

  public static parquet.schema.Type getParquetType(
      HiveColumnHandle column, MessageType messageType, boolean useParquetColumnNames) {
    if (useParquetColumnNames) {
      return getParquetTypeByName(column.getName(), messageType);
    }

    if (column.getHiveColumnIndex() < messageType.getFieldCount()) {
      return messageType.getType(column.getHiveColumnIndex());
    }
    return null;
  }

Example #3

Show file

File: ParquetHiveRecordCursor.java Project: fbfsber/presto

    private parquet.schema.Type getParquetType(HiveColumnHandle column, MessageType messageType) {
      if (useParquetColumnNames) {
        if (messageType.containsField(column.getName())) {
          return messageType.getType(column.getName());
        }
        return null;
      }

      if (column.getHiveColumnIndex() < messageType.getFieldCount()) {
        return messageType.getType(column.getHiveColumnIndex());
      }
      return null;
    }

Example #4

Show file

File: ParquetTypeUtils.java Project: hongbozeng/presto

  private static parquet.schema.Type getParquetTypeByName(
      String columnName, MessageType messageType) {
    if (messageType.containsField(columnName)) {
      return messageType.getType(columnName);
    }
    // parquet is case-sensitive, but hive is not. all hive columns get converted to lowercase
    // check for direct match above but if no match found, try case-insensitive match
    for (Type type : messageType.getFields()) {
      if (type.getName().equalsIgnoreCase(columnName)) {
        return type;
      }
    }

    return null;
  }

Example #5

Show file

File: TestInputFormat.java Project: ajay0221/parquet-mr

  private List<ParquetInputSplit> generateSplitByDeprecatedConstructor(long min, long max)
      throws IOException {
    List<ParquetInputSplit> splits = new ArrayList<ParquetInputSplit>();
    List<ClientSideMetadataSplitStrategy.SplitInfo> splitInfos =
        ClientSideMetadataSplitStrategy.generateSplitInfo(blocks, hdfsBlocks, min, max);

    for (ClientSideMetadataSplitStrategy.SplitInfo splitInfo : splitInfos) {
      BlockMetaData lastRowGroup = splitInfo.getRowGroups().get(splitInfo.getRowGroupCount() - 1);
      long end = lastRowGroup.getStartingPos() + lastRowGroup.getTotalByteSize();

      ParquetInputSplit split =
          new ParquetInputSplit(
              fileStatus.getPath(),
              splitInfo.hdfsBlock.getOffset(),
              end,
              splitInfo.hdfsBlock.getHosts(),
              splitInfo.rowGroups,
              schema.toString(),
              null,
              null,
              extramd);
      splits.add(split);
    }

    return splits;
  }

Example #6

Show file

File: ParquetFileWriter.java Project: ajay0221/parquet-mr

  /**
   * will return the result of merging toMerge into mergedSchema
   *
   * @param toMerge the schema to merge into mergedSchema
   * @param mergedSchema the schema to append the fields to
   * @param strict should schema primitive types match
   * @return the resulting schema
   */
  static MessageType mergeInto(MessageType toMerge, MessageType mergedSchema, boolean strict) {
    if (mergedSchema == null) {
      return toMerge;
    }

    return mergedSchema.union(toMerge, strict);
  }

Example #7

Show file

File: ParquetRecordReaderTest.java Project: kameshb/incubator-drill

 private void validateContains(
     MessageType schema, PageReadStore pages, String[] path, int values, BytesInput bytes)
     throws IOException {
   PageReader pageReader = pages.getPageReader(schema.getColumnDescription(path));
   Page page = pageReader.readPage();
   assertEquals(values, page.getValueCount());
   assertArrayEquals(bytes.toByteArray(), page.getBytes().toByteArray());
 }

Example #8

Show file

File: ParquetPageSource.java Project: albertocsm/presto

  @Override
  public Page getNextPage() {
    try {
      batchId++;
      long start = System.nanoTime();

      int batchSize = parquetReader.nextBatch();

      readTimeNanos += System.nanoTime() - start;

      if (closed || batchSize <= 0) {
        close();
        return null;
      }

      Block[] blocks = new Block[hiveColumnIndexes.length];
      for (int fieldId = 0; fieldId < blocks.length; fieldId++) {
        Type type = types.get(fieldId);
        if (constantBlocks[fieldId] != null) {
          blocks[fieldId] = constantBlocks[fieldId].getRegion(0, batchSize);
        } else {
          int fieldIndex = requestedSchema.getFieldIndex(columnNames.get(fieldId));
          ColumnDescriptor columnDescriptor = requestedSchema.getColumns().get(fieldIndex);
          blocks[fieldId] =
              new LazyBlock(batchSize, new ParquetBlockLoader(columnDescriptor, type));
        }
      }
      return new Page(batchSize, blocks);
    } catch (PrestoException e) {
      closeWithSuppression(e);
      throw e;
    } catch (IOException | RuntimeException | InterruptedException e) {
      if (e instanceof InterruptedException) {
        Thread.currentThread().interrupt();
      }
      closeWithSuppression(e);
      throw new PrestoException(HIVE_CURSOR_ERROR, e);
    }
  }

Example #9

Show file

File: ParquetHiveRecordCursor.java Project: fbfsber/presto

 @Override
 @SuppressWarnings("deprecation")
 public ReadContext init(
     Configuration configuration,
     Map<String, String> keyValueMetaData,
     MessageType messageType) {
   List<parquet.schema.Type> fields =
       columns
           .stream()
           .filter(column -> !column.isPartitionKey())
           .map(column -> getParquetType(column, messageType))
           .filter(Objects::nonNull)
           .collect(toList());
   MessageType requestedProjection = new MessageType(messageType.getName(), fields);
   return new ReadContext(requestedProjection);
 }

Example #10

Show file

File: ParquetRecordReader.java Project: 0xh3x/parquet-mr

  /** {@inheritDoc} */
  @Override
  public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
      throws IOException, InterruptedException {
    Configuration configuration = taskAttemptContext.getConfiguration();
    ParquetInputSplit parquetInputSplit = (ParquetInputSplit) inputSplit;
    this.requestedSchema =
        MessageTypeParser.parseMessageType(parquetInputSplit.getRequestedSchema());
    this.columnCount = this.requestedSchema.getPaths().size();
    this.recordConverter =
        readSupport.prepareForRead(
            configuration,
            parquetInputSplit.getExtraMetadata(),
            MessageTypeParser.parseMessageType(parquetInputSplit.getSchema()),
            new ReadSupport.ReadContext(requestedSchema));

    Path path = parquetInputSplit.getPath();
    List<BlockMetaData> blocks = parquetInputSplit.getBlocks();
    List<ColumnDescriptor> columns = requestedSchema.getColumns();
    reader = new ParquetFileReader(configuration, path, blocks, columns);
    for (BlockMetaData block : blocks) {
      total += block.getRowCount();
    }
    LOG.info("RecordReader initialized will read a total of " + total + " records.");
  }

Example #11

Show file

File: TestInputFormat.java Project: ajay0221/parquet-mr

 private List<ParquetInputSplit> generateSplitByMinMaxSize(long min, long max) throws IOException {
   return ClientSideMetadataSplitStrategy.generateSplits(
       blocks, hdfsBlocks, fileStatus, schema.toString(), extramd, min, max);
 }