@Override public BytesInput getBytes() { if (!dictionaryTooBig && dict.size() > 0) { // remember size of dictionary when we last wrote a page lastUsedDictionarySize = dict.size(); lastUsedDictionaryByteSize = dictionaryByteSize; int maxDicId = dict.size() - 1; if (DEBUG) LOG.debug("max dic id " + maxDicId); int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId); // TODO: what is a good initialCapacity? final RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(BytesUtils.getWidthFromMaxInt(maxDicId), 64 * 1024); IntIterator iterator = out.iterator(); try { while (iterator.hasNext()) { encoder.writeInt(iterator.next()); } // encodes the bit width byte[] bytesHeader = new byte[] {(byte) bitWidth}; BytesInput rleEncodedBytes = encoder.toBytes(); if (DEBUG) LOG.debug("rle encoded bytes " + rleEncodedBytes.size()); return concat(BytesInput.from(bytesHeader), rleEncodedBytes); } catch (IOException e) { throw new ParquetEncodingException("could not encode the values", e); } } return plainValuesWriter.getBytes(); }
@Override public BytesInput getBytes() { try { bitPackingWriter.finish(); } catch (IOException e) { throw new ParquetEncodingException("could not write page", e); } if (Log.DEBUG) LOG.debug("writing a buffer of size " + out.size()); return BytesInput.from(out); }
@Test public void test() throws IOException { MemPageStore memPageStore = new MemPageStore(10); ColumnDescriptor col = new ColumnDescriptor(path, PrimitiveTypeName.INT64, 2, 2); LongStatistics stats = new LongStatistics(); PageWriter pageWriter = memPageStore.getPageWriter(col); pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN); pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN); pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN); pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN); PageReader pageReader = memPageStore.getPageReader(col); long totalValueCount = pageReader.getTotalValueCount(); System.out.println(totalValueCount); int total = 0; do { Page readPage = pageReader.readPage(); total += readPage.getValueCount(); System.out.println(readPage); // TODO: assert } while (total < totalValueCount); }
@Override public DictionaryPage createDictionaryPage() { if (lastUsedDictionarySize > 0) { // return a dictionary only if we actually used it try { CapacityByteArrayOutputStream dictBuf = new CapacityByteArrayOutputStream(lastUsedDictionaryByteSize); LittleEndianDataOutputStream dictOut = new LittleEndianDataOutputStream(dictBuf); Iterator<Binary> entryIterator = dict.keySet().iterator(); // write only the part of the dict that we used for (int i = 0; i < lastUsedDictionarySize; i++) { Binary entry = entryIterator.next(); dictOut.writeInt(entry.length()); entry.writeTo(dictOut); } return new DictionaryPage( BytesInput.from(dictBuf), lastUsedDictionarySize, PLAIN_DICTIONARY); } catch (IOException e) { throw new ParquetEncodingException("Could not generate dictionary Page", e); } } return plainValuesWriter.createDictionaryPage(); }