public ColumnWriterV1(ColumnDescriptor path, PageWriter pageWriter, ParquetProperties props) { this.path = path; this.pageWriter = pageWriter; this.props = props; // initial check of memory usage. So that we have enough data to make an initial prediction this.valueCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck(); resetStatistics(); this.repetitionLevelColumn = props.newRepetitionLevelWriter(path); this.definitionLevelColumn = props.newDefinitionLevelWriter(path); this.dataColumn = props.newValuesWriter(path); }
/** * Counts how many values have been written and checks the memory usage to flush the page when we * reach the page threshold. * * <p>We measure the memory used when we reach the mid point toward our estimated count. We then * update the estimate and flush the page if we reached the threshold. * * <p>That way we check the memory size log2(n) times. */ private void accountForValueWritten() { ++valueCount; if (valueCount > valueCountForNextSizeCheck) { // not checking the memory used for every value long memSize = repetitionLevelColumn.getBufferedSize() + definitionLevelColumn.getBufferedSize() + dataColumn.getBufferedSize(); if (memSize > props.getPageSizeThreshold()) { // we will write the current page and check again the size at the predicted middle of next // page if (props.estimateNextSizeCheck()) { valueCountForNextSizeCheck = valueCount / 2; } else { valueCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck(); } writePage(); } else if (props.estimateNextSizeCheck()) { // not reached the threshold, will check again midway valueCountForNextSizeCheck = (int) (valueCount + ((float) valueCount * props.getPageSizeThreshold() / memSize)) / 2 + 1; } else { valueCountForNextSizeCheck += props.getMinRowCountForPageSizeCheck(); } } }