public int getVariableSize() { int variableSize = 0; for (int i = 0; i < byteLengths.length; ++i) { JavaDataModel model = JavaDataModel.get(); variableSize += model.lengthForByteArrayOfSize(byteLengths[i]); } return variableSize; }
private long getRawDataSizeOfColumn(int colIdx) { OrcProto.ColumnStatistics colStat = footer.getStatistics(colIdx); long numVals = colStat.getNumberOfValues(); Type type = footer.getTypes(colIdx); switch (type.getKind()) { case BINARY: // old orc format doesn't support binary statistics. checking for binary // statistics is not required as protocol buffers takes care of it. return colStat.getBinaryStatistics().getSum(); case STRING: case CHAR: case VARCHAR: // old orc format doesn't support sum for string statistics. checking for // existence is not required as protocol buffers takes care of it. // ORC strings are deserialized to java strings. so use java data model's // string size numVals = numVals == 0 ? 1 : numVals; int avgStrLen = (int) (colStat.getStringStatistics().getSum() / numVals); return numVals * JavaDataModel.get().lengthForStringOfLength(avgStrLen); case TIMESTAMP: return numVals * JavaDataModel.get().lengthOfTimestamp(); case DATE: return numVals * JavaDataModel.get().lengthOfDate(); case DECIMAL: return numVals * JavaDataModel.get().lengthOfDecimal(); case DOUBLE: case LONG: return numVals * JavaDataModel.get().primitive2(); case FLOAT: case INT: case SHORT: case BOOLEAN: case BYTE: return numVals * JavaDataModel.get().primitive1(); default: LOG.debug("Unknown primitive category."); break; } return 0; }