@Override public long getBufferedSizeInMemory() { return repetitionLevelColumn.getBufferedSize() + definitionLevelColumn.getBufferedSize() + dataColumn.getBufferedSize() + pageWriter.getMemSize(); }
/** * Counts how many values have been written and checks the memory usage to flush the page when we * reach the page threshold. * * <p>We measure the memory used when we reach the mid point toward our estimated count. We then * update the estimate and flush the page if we reached the threshold. * * <p>That way we check the memory size log2(n) times. */ private void accountForValueWritten() { ++valueCount; if (valueCount > valueCountForNextSizeCheck) { // not checking the memory used for every value long memSize = repetitionLevelColumn.getBufferedSize() + definitionLevelColumn.getBufferedSize() + dataColumn.getBufferedSize(); if (memSize > props.getPageSizeThreshold()) { // we will write the current page and check again the size at the predicted middle of next // page if (props.estimateNextSizeCheck()) { valueCountForNextSizeCheck = valueCount / 2; } else { valueCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck(); } writePage(); } else if (props.estimateNextSizeCheck()) { // not reached the threshold, will check again midway valueCountForNextSizeCheck = (int) (valueCount + ((float) valueCount * props.getPageSizeThreshold() / memSize)) / 2 + 1; } else { valueCountForNextSizeCheck += props.getMinRowCountForPageSizeCheck(); } } }
@Override public void close() { flush(); // Close the Values writers. repetitionLevelColumn.close(); definitionLevelColumn.close(); dataColumn.close(); }
@Override public void writeNull(int repetitionLevel, int definitionLevel) { if (DEBUG) log(null, repetitionLevel, definitionLevel); repetitionLevelColumn.writeInteger(repetitionLevel); definitionLevelColumn.writeInteger(definitionLevel); updateStatisticsNumNulls(); accountForValueWritten(); }
@Override public void write(long value, int repetitionLevel, int definitionLevel) { if (DEBUG) log(value, repetitionLevel, definitionLevel); repetitionLevelColumn.writeInteger(repetitionLevel); definitionLevelColumn.writeInteger(definitionLevel); dataColumn.writeLong(value); updateStatistics(value); accountForValueWritten(); }
public String memUsageString(String indent) { StringBuilder b = new StringBuilder(indent).append(path).append(" {\n"); b.append(repetitionLevelColumn.memUsageString(indent + " r:")).append("\n"); b.append(definitionLevelColumn.memUsageString(indent + " d:")).append("\n"); b.append(dataColumn.memUsageString(indent + " data:")).append("\n"); b.append(pageWriter.memUsageString(indent + " pages:")).append("\n"); b.append(indent) .append(String.format(" total: %,d/%,d", getBufferedSizeInMemory(), allocatedSize())) .append("\n"); b.append(indent).append("}\n"); return b.toString(); }
public void flush() { if (valueCount > 0) { writePage(); } final DictionaryPage dictionaryPage = dataColumn.toDictPageAndClose(); if (dictionaryPage != null) { if (DEBUG) LOG.debug("write dictionary"); try { pageWriter.writeDictionaryPage(dictionaryPage); } catch (IOException e) { throw new ParquetEncodingException("could not write dictionary page for " + path, e); } dataColumn.resetDictionary(); } }
private void writePage() { if (DEBUG) LOG.debug("write page"); try { pageWriter.writePage( concat( repetitionLevelColumn.getBytes(), definitionLevelColumn.getBytes(), dataColumn.getBytes()), valueCount, statistics, repetitionLevelColumn.getEncoding(), definitionLevelColumn.getEncoding(), dataColumn.getEncoding()); } catch (IOException e) { throw new ParquetEncodingException("could not write page for " + path, e); } repetitionLevelColumn.reset(); definitionLevelColumn.reset(); dataColumn.reset(); valueCount = 0; resetStatistics(); }
public long allocatedSize() { return repetitionLevelColumn.getAllocatedSize() + definitionLevelColumn.getAllocatedSize() + dataColumn.getAllocatedSize() + pageWriter.allocatedSize(); }