public int getVariableSize() {
   int variableSize = 0;
   for (int i = 0; i < byteLengths.length; ++i) {
     JavaDataModel model = JavaDataModel.get();
     variableSize += model.lengthForByteArrayOfSize(byteLengths[i]);
   }
   return variableSize;
 }
Beispiel #2
0
  private long getRawDataSizeOfColumn(int colIdx) {
    OrcProto.ColumnStatistics colStat = footer.getStatistics(colIdx);
    long numVals = colStat.getNumberOfValues();
    Type type = footer.getTypes(colIdx);

    switch (type.getKind()) {
      case BINARY:
        // old orc format doesn't support binary statistics. checking for binary
        // statistics is not required as protocol buffers takes care of it.
        return colStat.getBinaryStatistics().getSum();
      case STRING:
      case CHAR:
      case VARCHAR:
        // old orc format doesn't support sum for string statistics. checking for
        // existence is not required as protocol buffers takes care of it.

        // ORC strings are deserialized to java strings. so use java data model's
        // string size
        numVals = numVals == 0 ? 1 : numVals;
        int avgStrLen = (int) (colStat.getStringStatistics().getSum() / numVals);
        return numVals * JavaDataModel.get().lengthForStringOfLength(avgStrLen);
      case TIMESTAMP:
        return numVals * JavaDataModel.get().lengthOfTimestamp();
      case DATE:
        return numVals * JavaDataModel.get().lengthOfDate();
      case DECIMAL:
        return numVals * JavaDataModel.get().lengthOfDecimal();
      case DOUBLE:
      case LONG:
        return numVals * JavaDataModel.get().primitive2();
      case FLOAT:
      case INT:
      case SHORT:
      case BOOLEAN:
      case BYTE:
        return numVals * JavaDataModel.get().primitive1();
      default:
        LOG.debug("Unknown primitive category.");
        break;
    }

    return 0;
  }