コード例 #1
0
 @Override
 public void writeBytes(Binary v) {
   if (!dictionaryTooBig) {
     writeBytesUsingDict(v);
     if (dictionaryByteSize > maxDictionaryByteSize || dict.size() > MAX_DICTIONARY_ENTRIES) {
       // if the dictionary reaches the max byte size or the values can not be encoded on two bytes
       // anymore.
       if (DEBUG)
         LOG.debug(
             "dictionary is now too big, falling back to plain: "
                 + dictionaryByteSize
                 + "B and "
                 + dict.size()
                 + " entries");
       dictionaryTooBig = true;
       if (lastUsedDictionarySize == 0) {
         // if we never used the dictionary
         // we free dictionary encoded data
         dict = null;
         dictionaryByteSize = 0;
         out = null;
       }
     }
   }
   // write also to plain encoding if we need to fall back
   plainValuesWriter.writeBytes(v);
 }
コード例 #2
0
 @Override
 public void reset() {
   if (out != null) {
     out = new IntList();
   }
   plainValuesWriter.reset();
 }
コード例 #3
0
 @Override
 public Encoding getEncoding() {
   if (!dictionaryTooBig && dict.size() > 0) {
     return PLAIN_DICTIONARY;
   }
   return plainValuesWriter.getEncoding();
 }
コード例 #4
0
  @Override
  public BytesInput getBytes() {
    if (!dictionaryTooBig && dict.size() > 0) {
      // remember size of dictionary when we last wrote a page
      lastUsedDictionarySize = dict.size();
      lastUsedDictionaryByteSize = dictionaryByteSize;
      int maxDicId = dict.size() - 1;
      if (DEBUG) LOG.debug("max dic id " + maxDicId);
      int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId);

      // TODO: what is a good initialCapacity?
      final RunLengthBitPackingHybridEncoder encoder =
          new RunLengthBitPackingHybridEncoder(BytesUtils.getWidthFromMaxInt(maxDicId), 64 * 1024);
      IntIterator iterator = out.iterator();
      try {
        while (iterator.hasNext()) {
          encoder.writeInt(iterator.next());
        }
        // encodes the bit width
        byte[] bytesHeader = new byte[] {(byte) bitWidth};
        BytesInput rleEncodedBytes = encoder.toBytes();
        if (DEBUG) LOG.debug("rle encoded bytes " + rleEncodedBytes.size());
        return concat(BytesInput.from(bytesHeader), rleEncodedBytes);
      } catch (IOException e) {
        throw new ParquetEncodingException("could not encode the values", e);
      }
    }
    return plainValuesWriter.getBytes();
  }
コード例 #5
0
 @Override
 public long getAllocatedSize() {
   // size used in memory
   return (out == null ? 0 : out.size() * 4)
       + dictionaryByteSize
       + plainValuesWriter.getAllocatedSize();
 }
コード例 #6
0
 @Override
 public String memUsageString(String prefix) {
   return String.format(
       "%s DictionaryValuesWriter{\n%s\n%s\n%s\n%s}\n",
       prefix,
       plainValuesWriter.memUsageString(prefix + " plain:"),
       prefix + " dict:" + dictionaryByteSize,
       prefix + " values:" + (out.size() * 4),
       prefix);
 }
コード例 #7
0
 @Override
 public DictionaryPage createDictionaryPage() {
   if (lastUsedDictionarySize > 0) {
     // return a dictionary only if we actually used it
     try {
       CapacityByteArrayOutputStream dictBuf =
           new CapacityByteArrayOutputStream(lastUsedDictionaryByteSize);
       LittleEndianDataOutputStream dictOut = new LittleEndianDataOutputStream(dictBuf);
       Iterator<Binary> entryIterator = dict.keySet().iterator();
       // write only the part of the dict that we used
       for (int i = 0; i < lastUsedDictionarySize; i++) {
         Binary entry = entryIterator.next();
         dictOut.writeInt(entry.length());
         entry.writeTo(dictOut);
       }
       return new DictionaryPage(
           BytesInput.from(dictBuf), lastUsedDictionarySize, PLAIN_DICTIONARY);
     } catch (IOException e) {
       throw new ParquetEncodingException("Could not generate dictionary Page", e);
     }
   }
   return plainValuesWriter.createDictionaryPage();
 }
コード例 #8
0
 @Override
 public long getBufferedSize() {
   // size that will be written to a page
   // not including the dictionary size
   return dictionaryTooBig ? plainValuesWriter.getBufferedSize() : out.size() * 4;
 }