Exemplo n.º 1
0
  private LazyBinaryStructObjectInspector createInternalOi(MapJoinObjectSerDeContext valCtx)
      throws SerDeException {
    // We are going to use LBSerDe to serialize values; create OI for retrieval.
    List<? extends StructField> fields =
        ((StructObjectInspector) valCtx.getSerDe().getObjectInspector()).getAllStructFieldRefs();
    List<String> colNames = new ArrayList<String>(fields.size());
    List<ObjectInspector> colOis = new ArrayList<ObjectInspector>(fields.size());
    for (int i = 0; i < fields.size(); ++i) {
      StructField field = fields.get(i);
      colNames.add(field.getFieldName());
      // It would be nice if OI could return typeInfo...
      TypeInfo typeInfo =
          TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName());
      colOis.add(LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(typeInfo));
    }

    return LazyBinaryObjectInspectorFactory.getLazyBinaryStructObjectInspector(colNames, colOis);
  }
Exemplo n.º 2
0
  static StructObjectInspector createStructObjectInspector(HCatSchema outputSchema)
      throws IOException {

    if (outputSchema == null) {
      throw new IOException("Invalid output schema specified");
    }

    List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
    List<String> fieldNames = new ArrayList<String>();

    for (HCatFieldSchema hcatFieldSchema : outputSchema.getFields()) {
      TypeInfo type = TypeInfoUtils.getTypeInfoFromTypeString(hcatFieldSchema.getTypeString());

      fieldNames.add(hcatFieldSchema.getName());
      fieldInspectors.add(getObjectInspector(type));
    }

    StructObjectInspector structInspector =
        ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldInspectors);
    return structInspector;
  }
Exemplo n.º 3
0
  @Test
  public void test_getWritable() throws Exception {
    assertTrue(NiFiOrcUtils.convertToORCObject(null, 1) instanceof IntWritable);
    assertTrue(NiFiOrcUtils.convertToORCObject(null, 1L) instanceof LongWritable);
    assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0f) instanceof FloatWritable);
    assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0) instanceof DoubleWritable);
    assertTrue(NiFiOrcUtils.convertToORCObject(null, new int[] {1, 2, 3}) instanceof List);
    assertTrue(NiFiOrcUtils.convertToORCObject(null, Arrays.asList(1, 2, 3)) instanceof List);
    Map<String, Float> map = new HashMap<>();
    map.put("Hello", 1.0f);
    map.put("World", 2.0f);

    Object writable =
        NiFiOrcUtils.convertToORCObject(
            TypeInfoUtils.getTypeInfoFromTypeString("map<string,float>"), map);
    assertTrue(writable instanceof MapWritable);
    MapWritable mapWritable = (MapWritable) writable;
    mapWritable.forEach(
        (key, value) -> {
          assertTrue(key instanceof Text);
          assertTrue(value instanceof FloatWritable);
        });
  }
Exemplo n.º 4
0
 public static TypeInfo buildComplexOrcSchema() {
   return TypeInfoUtils.getTypeInfoFromTypeString(
       "struct<myInt:int,myMap:map<string,double>,myEnum:string,myLongOrFloat:uniontype<int>,myIntList:array<int>>");
 }
Exemplo n.º 5
0
  /**
   * Validate partition schema, checks if the column types match between the partition and the
   * existing table schema. Returns the list of columns present in the partition but not in the
   * table.
   *
   * @param table the table
   * @param partitionSchema the partition schema
   * @return the list of newly added fields
   * @throws IOException Signals that an I/O exception has occurred.
   */
  public static List<FieldSchema> validatePartitionSchema(Table table, HCatSchema partitionSchema)
      throws IOException {
    Map<String, FieldSchema> partitionKeyMap = new HashMap<String, FieldSchema>();

    for (FieldSchema field : table.getPartitionKeys()) {
      partitionKeyMap.put(field.getName().toLowerCase(), field);
    }

    List<FieldSchema> tableCols = table.getCols();
    List<FieldSchema> newFields = new ArrayList<FieldSchema>();

    for (int i = 0; i < partitionSchema.getFields().size(); i++) {

      FieldSchema field = HCatSchemaUtils.getFieldSchema(partitionSchema.getFields().get(i));

      FieldSchema tableField;
      if (i < tableCols.size()) {
        tableField = tableCols.get(i);

        if (!tableField.getName().equalsIgnoreCase(field.getName())) {
          throw new HCatException(
              ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH,
              "Expected column <"
                  + tableField.getName()
                  + "> at position "
                  + (i + 1)
                  + ", found column <"
                  + field.getName()
                  + ">");
        }
      } else {
        tableField = partitionKeyMap.get(field.getName().toLowerCase());

        if (tableField != null) {
          throw new HCatException(
              ErrorType.ERROR_SCHEMA_PARTITION_KEY, "Key <" + field.getName() + ">");
        }
      }

      if (tableField == null) {
        // field present in partition but not in table
        newFields.add(field);
      } else {
        // field present in both. validate type has not changed
        TypeInfo partitionType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType());
        TypeInfo tableType = TypeInfoUtils.getTypeInfoFromTypeString(tableField.getType());

        if (!partitionType.equals(tableType)) {
          throw new HCatException(
              ErrorType.ERROR_SCHEMA_TYPE_MISMATCH,
              "Column <"
                  + field.getName()
                  + ">, expected <"
                  + tableType.getTypeName()
                  + ">, got <"
                  + partitionType.getTypeName()
                  + ">");
        }
      }
    }

    return newFields;
  }
Exemplo n.º 6
0
  @Override
  public void evaluate(VectorizedRowBatch batch) {

    if (childExpressions != null) {
      super.evaluateChildren(batch);
    }

    if (!integerPrimitiveCategoryKnown) {
      String typeName = getOutputType().toLowerCase();
      TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
      integerPrimitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
      integerPrimitiveCategoryKnown = true;
    }

    DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn];
    int[] sel = batch.selected;
    int n = batch.size;
    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];

    if (n == 0) {

      // Nothing to do
      return;
    }

    if (inV.noNulls) {
      outV.noNulls = true;
      if (inV.isRepeating) {
        outV.isRepeating = true;
        func(outV, inV, 0);
      } else if (batch.selectedInUse) {
        for (int j = 0; j != n; j++) {
          int i = sel[j];
          func(outV, inV, i);
        }
        outV.isRepeating = false;
      } else {
        for (int i = 0; i != n; i++) {
          func(outV, inV, i);
        }
        outV.isRepeating = false;
      }
    } else {

      // Handle case with nulls. Don't do function if the value is null,
      // because the data may be undefined for a null value.
      outV.noNulls = false;
      if (inV.isRepeating) {
        outV.isRepeating = true;
        outV.isNull[0] = inV.isNull[0];
        if (!inV.isNull[0]) {
          func(outV, inV, 0);
        }
      } else if (batch.selectedInUse) {
        for (int j = 0; j != n; j++) {
          int i = sel[j];
          outV.isNull[i] = inV.isNull[i];
          if (!inV.isNull[i]) {
            func(outV, inV, i);
          }
        }
        outV.isRepeating = false;
      } else {
        System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
        for (int i = 0; i != n; i++) {
          if (!inV.isNull[i]) {
            func(outV, inV, i);
          }
        }
        outV.isRepeating = false;
      }
    }
  }