コード例 #1
0
    @Override
    public void writePageV2(
        int rowCount,
        int nullCount,
        int valueCount,
        BytesInput repetitionLevels,
        BytesInput definitionLevels,
        Encoding dataEncoding,
        BytesInput data,
        Statistics<?> statistics)
        throws IOException {
      int rlByteLength = toIntWithCheck(repetitionLevels.size());
      int dlByteLength = toIntWithCheck(definitionLevels.size());
      int uncompressedSize =
          toIntWithCheck(data.size() + repetitionLevels.size() + definitionLevels.size());
      // TODO: decide if we compress
      BytesInput compressedData = compressor.compress(data);
      int compressedSize =
          toIntWithCheck(compressedData.size() + repetitionLevels.size() + definitionLevels.size());
      tempOutputStream.reset();
      parquetMetadataConverter.writeDataPageV2Header(
          uncompressedSize,
          compressedSize,
          valueCount,
          nullCount,
          rowCount,
          statistics,
          dataEncoding,
          rlByteLength,
          dlByteLength,
          tempOutputStream);
      this.uncompressedLength += uncompressedSize;
      this.compressedLength += compressedSize;
      this.totalValueCount += valueCount;
      this.pageCount += 1;
      this.totalStatistics.mergeStatistics(statistics);

      // by concatenating before collecting instead of collecting twice,
      // we only allocate one buffer to copy into instead of multiple.
      buf.collect(
          BytesInput.concat(
              BytesInput.from(tempOutputStream),
              repetitionLevels,
              definitionLevels,
              compressedData));
      dataEncodings.add(dataEncoding);
    }
コード例 #2
0
 @Override
 public void writePage(
     BytesInput bytes,
     int valueCount,
     Statistics statistics,
     Encoding rlEncoding,
     Encoding dlEncoding,
     Encoding valuesEncoding)
     throws IOException {
   long uncompressedSize = bytes.size();
   if (uncompressedSize > Integer.MAX_VALUE) {
     throw new ParquetEncodingException(
         "Cannot write page larger than Integer.MAX_VALUE bytes: " + uncompressedSize);
   }
   BytesInput compressedBytes = compressor.compress(bytes);
   long compressedSize = compressedBytes.size();
   if (compressedSize > Integer.MAX_VALUE) {
     throw new ParquetEncodingException(
         "Cannot write compressed page larger than Integer.MAX_VALUE bytes: " + compressedSize);
   }
   tempOutputStream.reset();
   parquetMetadataConverter.writeDataPageHeader(
       (int) uncompressedSize,
       (int) compressedSize,
       valueCount,
       statistics,
       rlEncoding,
       dlEncoding,
       valuesEncoding,
       tempOutputStream);
   this.uncompressedLength += uncompressedSize;
   this.compressedLength += compressedSize;
   this.totalValueCount += valueCount;
   this.pageCount += 1;
   this.totalStatistics.mergeStatistics(statistics);
   // by concatenating before collecting instead of collecting twice,
   // we only allocate one buffer to copy into instead of multiple.
   buf.collect(BytesInput.concat(BytesInput.from(tempOutputStream), compressedBytes));
   rlEncodings.add(rlEncoding);
   dlEncodings.add(dlEncoding);
   dataEncodings.add(valuesEncoding);
 }
コード例 #3
0
 public void writeToFileWriter(ParquetFileWriter writer) throws IOException {
   writer.startColumn(path, totalValueCount, compressor.getCodecName());
   if (dictionaryPage != null) {
     writer.writeDictionaryPage(dictionaryPage);
     // tracking the dictionary encoding is handled in writeDictionaryPage
   }
   writer.writeDataPages(
       buf,
       uncompressedLength,
       compressedLength,
       totalStatistics,
       rlEncodings,
       dlEncodings,
       dataEncodings);
   writer.endColumn();
   if (LOG.isDebugEnabled()) {
     LOG.debug(
         String.format(
                 "written %,dB for %s: %,d values, %,dB raw, %,dB comp, %d pages, encodings: %s",
                 buf.size(),
                 path,
                 totalValueCount,
                 uncompressedLength,
                 compressedLength,
                 pageCount,
                 new HashSet<Encoding>(dataEncodings))
             + (dictionaryPage != null
                 ? String.format(
                     ", dic { %,d entries, %,dB raw, %,dB comp}",
                     dictionaryPage.getDictionarySize(),
                     dictionaryPage.getUncompressedSize(),
                     dictionaryPage.getDictionarySize())
                 : ""));
   }
   rlEncodings.clear();
   dlEncodings.clear();
   dataEncodings.clear();
   pageCount = 0;
 }
コード例 #4
0
 @Override
 public String memUsageString(String prefix) {
   return buf.memUsageString(prefix + " ColumnChunkPageWriter");
 }
コード例 #5
0
 @Override
 public long allocatedSize() {
   return buf.size();
 }
コード例 #6
0
 @Override
 public long getMemSize() {
   return buf.size();
 }