Example #1
0
 @Override
 public long getBufferedSizeInMemory() {
   return repetitionLevelColumn.getBufferedSize()
       + definitionLevelColumn.getBufferedSize()
       + dataColumn.getBufferedSize()
       + pageWriter.getMemSize();
 }
Example #2
0
 /**
  * Counts how many values have been written and checks the memory usage to flush the page when we
  * reach the page threshold.
  *
  * <p>We measure the memory used when we reach the mid point toward our estimated count. We then
  * update the estimate and flush the page if we reached the threshold.
  *
  * <p>That way we check the memory size log2(n) times.
  */
 private void accountForValueWritten() {
   ++valueCount;
   if (valueCount > valueCountForNextSizeCheck) {
     // not checking the memory used for every value
     long memSize =
         repetitionLevelColumn.getBufferedSize()
             + definitionLevelColumn.getBufferedSize()
             + dataColumn.getBufferedSize();
     if (memSize > props.getPageSizeThreshold()) {
       // we will write the current page and check again the size at the predicted middle of next
       // page
       if (props.estimateNextSizeCheck()) {
         valueCountForNextSizeCheck = valueCount / 2;
       } else {
         valueCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck();
       }
       writePage();
     } else if (props.estimateNextSizeCheck()) {
       // not reached the threshold, will check again midway
       valueCountForNextSizeCheck =
           (int) (valueCount + ((float) valueCount * props.getPageSizeThreshold() / memSize)) / 2
               + 1;
     } else {
       valueCountForNextSizeCheck += props.getMinRowCountForPageSizeCheck();
     }
   }
 }
Example #3
0
 @Override
 public void close() {
   flush();
   // Close the Values writers.
   repetitionLevelColumn.close();
   definitionLevelColumn.close();
   dataColumn.close();
 }
Example #4
0
 @Override
 public void writeNull(int repetitionLevel, int definitionLevel) {
   if (DEBUG) log(null, repetitionLevel, definitionLevel);
   repetitionLevelColumn.writeInteger(repetitionLevel);
   definitionLevelColumn.writeInteger(definitionLevel);
   updateStatisticsNumNulls();
   accountForValueWritten();
 }
Example #5
0
 @Override
 public void write(long value, int repetitionLevel, int definitionLevel) {
   if (DEBUG) log(value, repetitionLevel, definitionLevel);
   repetitionLevelColumn.writeInteger(repetitionLevel);
   definitionLevelColumn.writeInteger(definitionLevel);
   dataColumn.writeLong(value);
   updateStatistics(value);
   accountForValueWritten();
 }
Example #6
0
 public String memUsageString(String indent) {
   StringBuilder b = new StringBuilder(indent).append(path).append(" {\n");
   b.append(repetitionLevelColumn.memUsageString(indent + "  r:")).append("\n");
   b.append(definitionLevelColumn.memUsageString(indent + "  d:")).append("\n");
   b.append(dataColumn.memUsageString(indent + "  data:")).append("\n");
   b.append(pageWriter.memUsageString(indent + "  pages:")).append("\n");
   b.append(indent)
       .append(String.format("  total: %,d/%,d", getBufferedSizeInMemory(), allocatedSize()))
       .append("\n");
   b.append(indent).append("}\n");
   return b.toString();
 }
Example #7
0
 public void flush() {
   if (valueCount > 0) {
     writePage();
   }
   final DictionaryPage dictionaryPage = dataColumn.toDictPageAndClose();
   if (dictionaryPage != null) {
     if (DEBUG) LOG.debug("write dictionary");
     try {
       pageWriter.writeDictionaryPage(dictionaryPage);
     } catch (IOException e) {
       throw new ParquetEncodingException("could not write dictionary page for " + path, e);
     }
     dataColumn.resetDictionary();
   }
 }
Example #8
0
 private void writePage() {
   if (DEBUG) LOG.debug("write page");
   try {
     pageWriter.writePage(
         concat(
             repetitionLevelColumn.getBytes(),
             definitionLevelColumn.getBytes(),
             dataColumn.getBytes()),
         valueCount,
         statistics,
         repetitionLevelColumn.getEncoding(),
         definitionLevelColumn.getEncoding(),
         dataColumn.getEncoding());
   } catch (IOException e) {
     throw new ParquetEncodingException("could not write page for " + path, e);
   }
   repetitionLevelColumn.reset();
   definitionLevelColumn.reset();
   dataColumn.reset();
   valueCount = 0;
   resetStatistics();
 }
Example #9
0
 public long allocatedSize() {
   return repetitionLevelColumn.getAllocatedSize()
       + definitionLevelColumn.getAllocatedSize()
       + dataColumn.getAllocatedSize()
       + pageWriter.allocatedSize();
 }