예제 #1
0
  public ColumnWriterV1(ColumnDescriptor path, PageWriter pageWriter, ParquetProperties props) {
    this.path = path;
    this.pageWriter = pageWriter;
    this.props = props;

    // initial check of memory usage. So that we have enough data to make an initial prediction
    this.valueCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck();

    resetStatistics();

    this.repetitionLevelColumn = props.newRepetitionLevelWriter(path);
    this.definitionLevelColumn = props.newDefinitionLevelWriter(path);
    this.dataColumn = props.newValuesWriter(path);
  }
예제 #2
0
 /**
  * Counts how many values have been written and checks the memory usage to flush the page when we
  * reach the page threshold.
  *
  * <p>We measure the memory used when we reach the mid point toward our estimated count. We then
  * update the estimate and flush the page if we reached the threshold.
  *
  * <p>That way we check the memory size log2(n) times.
  */
 private void accountForValueWritten() {
   ++valueCount;
   if (valueCount > valueCountForNextSizeCheck) {
     // not checking the memory used for every value
     long memSize =
         repetitionLevelColumn.getBufferedSize()
             + definitionLevelColumn.getBufferedSize()
             + dataColumn.getBufferedSize();
     if (memSize > props.getPageSizeThreshold()) {
       // we will write the current page and check again the size at the predicted middle of next
       // page
       if (props.estimateNextSizeCheck()) {
         valueCountForNextSizeCheck = valueCount / 2;
       } else {
         valueCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck();
       }
       writePage();
     } else if (props.estimateNextSizeCheck()) {
       // not reached the threshold, will check again midway
       valueCountForNextSizeCheck =
           (int) (valueCount + ((float) valueCount * props.getPageSizeThreshold() / memSize)) / 2
               + 1;
     } else {
       valueCountForNextSizeCheck += props.getMinRowCountForPageSizeCheck();
     }
   }
 }