Example #1
0
 private int getLastIdx() {
   Set<Integer> indices = Sets.newHashSet();
   for (Type type : footer.getTypesList()) {
     indices.addAll(type.getSubtypesList());
   }
   return Collections.max(indices);
 }
Example #2
0
  private long getRawDataSizeOfColumn(int colIdx) {
    OrcProto.ColumnStatistics colStat = footer.getStatistics(colIdx);
    long numVals = colStat.getNumberOfValues();
    Type type = footer.getTypes(colIdx);

    switch (type.getKind()) {
      case BINARY:
        // old orc format doesn't support binary statistics. checking for binary
        // statistics is not required as protocol buffers takes care of it.
        return colStat.getBinaryStatistics().getSum();
      case STRING:
      case CHAR:
      case VARCHAR:
        // old orc format doesn't support sum for string statistics. checking for
        // existence is not required as protocol buffers takes care of it.

        // ORC strings are deserialized to java strings. so use java data model's
        // string size
        numVals = numVals == 0 ? 1 : numVals;
        int avgStrLen = (int) (colStat.getStringStatistics().getSum() / numVals);
        return numVals * JavaDataModel.get().lengthForStringOfLength(avgStrLen);
      case TIMESTAMP:
        return numVals * JavaDataModel.get().lengthOfTimestamp();
      case DATE:
        return numVals * JavaDataModel.get().lengthOfDate();
      case DECIMAL:
        return numVals * JavaDataModel.get().lengthOfDecimal();
      case DOUBLE:
      case LONG:
        return numVals * JavaDataModel.get().primitive2();
      case FLOAT:
      case INT:
      case SHORT:
      case BOOLEAN:
      case BYTE:
        return numVals * JavaDataModel.get().primitive1();
      default:
        LOG.debug("Unknown primitive category.");
        break;
    }

    return 0;
  }
Example #3
0
  private List<Integer> getColumnIndicesFromNames(List<String> colNames) {
    // top level struct
    Type type = footer.getTypesList().get(0);
    List<Integer> colIndices = Lists.newArrayList();
    List<String> fieldNames = type.getFieldNamesList();
    int fieldIdx = 0;
    for (String colName : colNames) {
      if (fieldNames.contains(colName)) {
        fieldIdx = fieldNames.indexOf(colName);
      }

      // a single field may span multiple columns. find start and end column
      // index for the requested field
      int idxStart = type.getSubtypes(fieldIdx);

      int idxEnd;

      // if the specified is the last field and then end index will be last
      // column index
      if (fieldIdx + 1 > fieldNames.size() - 1) {
        idxEnd = getLastIdx() + 1;
      } else {
        idxEnd = type.getSubtypes(fieldIdx + 1);
      }

      // if start index and end index are same then the field is a primitive
      // field else complex field (like map, list, struct, union)
      if (idxStart == idxEnd) {
        // simple field
        colIndices.add(idxStart);
      } else {
        // complex fields spans multiple columns
        for (int i = idxStart; i < idxEnd; i++) {
          colIndices.add(i);
        }
      }
    }
    return colIndices;
  }