@Override
  public BytesInput getBytes() {
    if (!dictionaryTooBig && dict.size() > 0) {
      // remember size of dictionary when we last wrote a page
      lastUsedDictionarySize = dict.size();
      lastUsedDictionaryByteSize = dictionaryByteSize;
      int maxDicId = dict.size() - 1;
      if (DEBUG) LOG.debug("max dic id " + maxDicId);
      int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId);

      // TODO: what is a good initialCapacity?
      final RunLengthBitPackingHybridEncoder encoder =
          new RunLengthBitPackingHybridEncoder(BytesUtils.getWidthFromMaxInt(maxDicId), 64 * 1024);
      IntIterator iterator = out.iterator();
      try {
        while (iterator.hasNext()) {
          encoder.writeInt(iterator.next());
        }
        // encodes the bit width
        byte[] bytesHeader = new byte[] {(byte) bitWidth};
        BytesInput rleEncodedBytes = encoder.toBytes();
        if (DEBUG) LOG.debug("rle encoded bytes " + rleEncodedBytes.size());
        return concat(BytesInput.from(bytesHeader), rleEncodedBytes);
      } catch (IOException e) {
        throw new ParquetEncodingException("could not encode the values", e);
      }
    }
    return plainValuesWriter.getBytes();
  }
 @Override
 public BytesInput getBytes() {
   try {
     bitPackingWriter.finish();
   } catch (IOException e) {
     throw new ParquetEncodingException("could not write page", e);
   }
   if (Log.DEBUG) LOG.debug("writing a buffer of size " + out.size());
   return BytesInput.from(out);
 }
Esempio n. 3
0
 @Test
 public void test() throws IOException {
   MemPageStore memPageStore = new MemPageStore(10);
   ColumnDescriptor col = new ColumnDescriptor(path, PrimitiveTypeName.INT64, 2, 2);
   LongStatistics stats = new LongStatistics();
   PageWriter pageWriter = memPageStore.getPageWriter(col);
   pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
   pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
   pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
   pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
   PageReader pageReader = memPageStore.getPageReader(col);
   long totalValueCount = pageReader.getTotalValueCount();
   System.out.println(totalValueCount);
   int total = 0;
   do {
     Page readPage = pageReader.readPage();
     total += readPage.getValueCount();
     System.out.println(readPage);
     // TODO: assert
   } while (total < totalValueCount);
 }
 @Override
 public DictionaryPage createDictionaryPage() {
   if (lastUsedDictionarySize > 0) {
     // return a dictionary only if we actually used it
     try {
       CapacityByteArrayOutputStream dictBuf =
           new CapacityByteArrayOutputStream(lastUsedDictionaryByteSize);
       LittleEndianDataOutputStream dictOut = new LittleEndianDataOutputStream(dictBuf);
       Iterator<Binary> entryIterator = dict.keySet().iterator();
       // write only the part of the dict that we used
       for (int i = 0; i < lastUsedDictionarySize; i++) {
         Binary entry = entryIterator.next();
         dictOut.writeInt(entry.length());
         entry.writeTo(dictOut);
       }
       return new DictionaryPage(
           BytesInput.from(dictBuf), lastUsedDictionarySize, PLAIN_DICTIONARY);
     } catch (IOException e) {
       throw new ParquetEncodingException("Could not generate dictionary Page", e);
     }
   }
   return plainValuesWriter.createDictionaryPage();
 }