Example #1
0
 @Test
 public void test_getOrcField_complex_array() throws Exception {
   final SchemaBuilder.FieldAssembler<Schema> builder =
       SchemaBuilder.record("testRecord").namespace("any.data").fields();
   builder.name("array").type().array().items().map().values().floatType().noDefault();
   Schema testSchema = builder.endRecord();
   TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("array").schema());
   assertEquals(
       TypeInfoFactory.getListTypeInfo(
           TypeInfoFactory.getMapTypeInfo(
               TypeInfoCreator.createString(), TypeInfoCreator.createFloat())),
       orcType);
 }
  @Test
  public void testDecimalPlusDecimalSameParams() throws HiveException {
    GenericUDFOPPlus udf = new GenericUDFOPPlus();

    ObjectInspector[] inputOIs = {
      PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
          TypeInfoFactory.getDecimalTypeInfo(5, 2)),
      PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
          TypeInfoFactory.getDecimalTypeInfo(5, 2))
    };

    PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
    Assert.assertEquals(TypeInfoFactory.getDecimalTypeInfo(6, 2), oi.getTypeInfo());
  }
  @Override
  public void initialize(Configuration conf, Properties tbl) throws SerDeException {

    // Get column names and sort order
    String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
    if (columnNameProperty.length() == 0) {
      columnNames = new ArrayList<String>();
    } else {
      columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (columnTypeProperty.length() == 0) {
      columnTypes = new ArrayList<TypeInfo>();
    } else {
      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    assert (columnNames.size() == columnTypes.size());

    // Create row related objects
    rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    rowObjectInspector =
        (StructObjectInspector)
            TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(rowTypeInfo);
    row = new ArrayList<Object>(columnNames.size());
    for (int i = 0; i < columnNames.size(); i++) {
      row.add(null);
    }

    // Get the sort order
    String columnSortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER);
    columnSortOrderIsDesc = new boolean[columnNames.size()];
    for (int i = 0; i < columnSortOrderIsDesc.length; i++) {
      columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-');
    }
  }
Example #4
0
 @Test
 public void test_getOrcField_record() throws Exception {
   final SchemaBuilder.FieldAssembler<Schema> builder =
       SchemaBuilder.record("testRecord").namespace("any.data").fields();
   builder.name("int").type().intType().noDefault();
   builder.name("long").type().longType().longDefault(1L);
   builder.name("array").type().array().items().stringType().noDefault();
   Schema testSchema = builder.endRecord();
   TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema);
   assertEquals(
       TypeInfoFactory.getStructTypeInfo(
           Arrays.asList("int", "long", "array"),
           Arrays.asList(
               TypeInfoCreator.createInt(),
               TypeInfoCreator.createLong(),
               TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createString()))),
       orcType);
 }
  private ArrayWritableObjectInspector getObjectInspector(
      final String columnNames, final String columnTypes) {
    List<TypeInfo> columnTypeList = createHiveTypeInfoFrom(columnTypes);
    List<String> columnNameList = createHiveColumnsFrom(columnNames);
    StructTypeInfo rowTypeInfo =
        (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNameList, columnTypeList);

    return new ArrayWritableObjectInspector(rowTypeInfo);
  }
  @Test
  public void testLongPlusDecimal() throws HiveException {
    GenericUDFOPPlus udf = new GenericUDFOPPlus();

    // Long
    LongWritable left = new LongWritable(104);
    HiveDecimalWritable right = new HiveDecimalWritable(HiveDecimal.create("234.97"));
    ObjectInspector[] inputOIs = {
      PrimitiveObjectInspectorFactory.writableLongObjectInspector,
      PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
          TypeInfoFactory.getDecimalTypeInfo(9, 4))
    };
    DeferredObject[] args = {
      new DeferredJavaObject(left), new DeferredJavaObject(right),
    };

    PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
    Assert.assertEquals(TypeInfoFactory.getDecimalTypeInfo(24, 4), oi.getTypeInfo());
    HiveDecimalWritable res = (HiveDecimalWritable) udf.evaluate(args);
    Assert.assertEquals(HiveDecimal.create("338.97"), res.getHiveDecimal());
  }
Example #7
0
 public static TypeInfo buildPrimitiveOrcSchema() {
   return TypeInfoFactory.getStructTypeInfo(
       Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"),
       Arrays.asList(
           TypeInfoCreator.createInt(),
           TypeInfoCreator.createLong(),
           TypeInfoCreator.createBoolean(),
           TypeInfoCreator.createFloat(),
           TypeInfoCreator.createDouble(),
           TypeInfoCreator.createBinary(),
           TypeInfoCreator.createString()));
 }
Example #8
0
 @Test
 public void test_getOrcField_union() throws Exception {
   final SchemaBuilder.FieldAssembler<Schema> builder =
       SchemaBuilder.record("testRecord").namespace("any.data").fields();
   builder.name("union").type().unionOf().intType().and().booleanType().endUnion().noDefault();
   Schema testSchema = builder.endRecord();
   TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema());
   assertEquals(
       TypeInfoFactory.getUnionTypeInfo(
           Arrays.asList(TypeInfoCreator.createInt(), TypeInfoCreator.createBoolean())),
       orcType);
 }
  @Test
  public void testDecimalPlusDecimal() throws HiveException {
    GenericUDFOPPlus udf = new GenericUDFOPPlus();

    // Decimal
    HiveDecimalWritable left = new HiveDecimalWritable(HiveDecimal.create("14.5"));
    HiveDecimalWritable right = new HiveDecimalWritable(HiveDecimal.create("234.97"));
    ObjectInspector[] inputOIs = {
      PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
          TypeInfoFactory.getDecimalTypeInfo(3, 1)),
      PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
          TypeInfoFactory.getDecimalTypeInfo(5, 2))
    };
    DeferredObject[] args = {
      new DeferredJavaObject(left), new DeferredJavaObject(right),
    };

    PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
    Assert.assertEquals(TypeInfoFactory.getDecimalTypeInfo(6, 2), oi.getTypeInfo());
    HiveDecimalWritable res = (HiveDecimalWritable) udf.evaluate(args);
    Assert.assertEquals(HiveDecimal.create("249.47"), res.getHiveDecimal());
  }
  /**
   * Initializes the SerDe. Gets the list of columns and their types from the table properties. Will
   * use them to look into/create JSON data.
   *
   * @param conf Hadoop configuration object
   * @param tbl Table Properties
   * @throws SerDeException
   */
  @Override
  public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    LOG.debug("Initializing SerDe");
    // Get column names and sort order
    String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);

    LOG.debug("columns " + columnNameProperty + " types " + columnTypeProperty);

    // all table column names
    if (columnNameProperty.length() == 0) {
      columnNames = new ArrayList<String>();
    } else {
      columnNames = Arrays.asList(columnNameProperty.split(","));
    }

    // all column types
    if (columnTypeProperty.length() == 0) {
      columnTypes = new ArrayList<TypeInfo>();
    } else {
      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    assert (columnNames.size() == columnTypes.size());

    stats = new SerDeStats();

    // Create row related objects
    rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);

    // build options
    options = new JsonStructOIOptions(getMappings(tbl));

    rowObjectInspector =
        (StructObjectInspector)
            JsonObjectInspectorFactory.getJsonObjectInspectorFromTypeInfo(rowTypeInfo, options);

    // Get the sort order
    String columnSortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER);
    columnSortOrderIsDesc = new boolean[columnNames.size()];
    for (int i = 0; i < columnSortOrderIsDesc.length; i++) {
      columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-');
    }

    // other configuration
    ignoreMalformedJson =
        Boolean.parseBoolean(tbl.getProperty(PROP_IGNORE_MALFORMED_JSON, "false"));
  }
  @Test
  public void testMapColumnPairs() throws TooManyAccumuloColumnsException {
    ColumnMapper columnMapper =
        new ColumnMapper(
            ":rowID,cf:*",
            conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE),
            Arrays.asList("row", "col"),
            Arrays.<TypeInfo>asList(
                TypeInfoFactory.stringTypeInfo,
                TypeInfoFactory.getMapTypeInfo(
                    TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo)));
    Set<Pair<Text, Text>> pairs = inputformat.getPairCollection(columnMapper.getColumnMappings());

    Assert.assertEquals(1, pairs.size());

    Pair<Text, Text> cfCq = pairs.iterator().next();
    Assert.assertEquals("cf", cfCq.getFirst().toString());
    Assert.assertNull(cfCq.getSecond());
  }
  @Override
  public final void initialize(final Configuration conf, final Properties tbl)
      throws SerDeException {

    final TypeInfo rowTypeInfo;
    final List<String> columnNames;
    final List<TypeInfo> columnTypes;
    // Get column names and sort order
    final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);

    // Get compression properties
    compressionType = tbl.getProperty(ParquetOutputFormat.COMPRESSION, DEFAULTCOMPRESSION);

    if (columnNameProperty.length() == 0) {
      columnNames = new ArrayList<String>();
    } else {
      columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (columnTypeProperty.length() == 0) {
      columnTypes = new ArrayList<TypeInfo>();
    } else {
      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }

    if (columnNames.size() != columnTypes.size()) {
      throw new IllegalArgumentException(
          "ParquetHiveSerde initialization failed. Number of column "
              + "name and column type differs. columnNames = "
              + columnNames
              + ", columnTypes = "
              + columnTypes);
    }
    // Create row related objects
    rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

    // Stats part
    stats = new SerDeStats();
    serializedSize = 0;
    deserializedSize = 0;
    status = LAST_OPERATION.UNKNOWN;
  }
Example #13
0
  @Test
  public void testDoulePlusDecimal() throws HiveException {
    GenericUDFOPPlus udf = new GenericUDFOPPlus();

    // Double
    DoubleWritable left = new DoubleWritable(74.52);
    HiveDecimalWritable right = new HiveDecimalWritable(HiveDecimal.create("234.97"));
    ObjectInspector[] inputOIs = {
      PrimitiveObjectInspectorFactory.writableDoubleObjectInspector,
      PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
          TypeInfoFactory.getDecimalTypeInfo(5, 2))
    };
    DeferredObject[] args = {
      new DeferredJavaObject(left), new DeferredJavaObject(right),
    };

    PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
    Assert.assertEquals(TypeInfoFactory.doubleTypeInfo, oi.getTypeInfo());
    DoubleWritable res = (DoubleWritable) udf.evaluate(args);
    Assert.assertEquals(new Double(309.49), new Double(res.get()));
  }
Example #14
0
  @Test
  public void test_getOrcField_primitive() throws Exception {
    // Expected ORC types
    TypeInfo[] expectedTypes = {
      TypeInfoFactory.getPrimitiveTypeInfo("int"),
      TypeInfoFactory.getPrimitiveTypeInfo("bigint"),
      TypeInfoFactory.getPrimitiveTypeInfo("boolean"),
      TypeInfoFactory.getPrimitiveTypeInfo("float"),
      TypeInfoFactory.getPrimitiveTypeInfo("double"),
      TypeInfoFactory.getPrimitiveTypeInfo("binary"),
      TypeInfoFactory.getPrimitiveTypeInfo("string")
    };

    // Build a fake Avro record with all types
    Schema testSchema = buildPrimitiveAvroSchema();
    List<Schema.Field> fields = testSchema.getFields();
    for (int i = 0; i < fields.size(); i++) {
      assertEquals(expectedTypes[i], NiFiOrcUtils.getOrcField(fields.get(i).schema()));
    }
  }
Example #15
0
    @Override
    public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
      assert (parameters.length == 1);
      super.init(m, parameters);
      result = new HiveDecimalWritable(HiveDecimal.ZERO);
      inputOI = (PrimitiveObjectInspector) parameters[0];
      // The output precision is 10 greater than the input which should cover at least
      // 10b rows. The scale is the same as the input.
      DecimalTypeInfo outputTypeInfo = null;
      if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
        int precision = Math.min(HiveDecimal.MAX_PRECISION, inputOI.precision() + 10);
        outputTypeInfo = TypeInfoFactory.getDecimalTypeInfo(precision, inputOI.scale());
      } else {
        outputTypeInfo = (DecimalTypeInfo) inputOI.getTypeInfo();
      }
      ObjectInspector oi =
          PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(outputTypeInfo);
      outputOI =
          (PrimitiveObjectInspector)
              ObjectInspectorUtils.getStandardObjectInspector(oi, ObjectInspectorCopyOption.JAVA);

      return oi;
    }
Example #16
0
 static TypeInfo createDouble() {
   return TypeInfoFactory.getPrimitiveTypeInfo("double");
 }
 static {
   cachedPrimitiveLazyObjectInspectors.put(
       TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME),
       LAZY_BOOLEAN_OBJECT_INSPECTOR);
   cachedPrimitiveLazyObjectInspectors.put(
       TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME),
       LAZY_BYTE_OBJECT_INSPECTOR);
   cachedPrimitiveLazyObjectInspectors.put(
       TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME),
       LAZY_SHORT_OBJECT_INSPECTOR);
   cachedPrimitiveLazyObjectInspectors.put(
       TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME),
       LAZY_INT_OBJECT_INSPECTOR);
   cachedPrimitiveLazyObjectInspectors.put(
       TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME),
       LAZY_FLOAT_OBJECT_INSPECTOR);
   cachedPrimitiveLazyObjectInspectors.put(
       TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME),
       LAZY_DOUBLE_OBJECT_INSPECTOR);
   cachedPrimitiveLazyObjectInspectors.put(
       TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME),
       LAZY_LONG_OBJECT_INSPECTOR);
   cachedPrimitiveLazyObjectInspectors.put(
       TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.VOID_TYPE_NAME),
       LAZY_VOID_OBJECT_INSPECTOR);
   cachedPrimitiveLazyObjectInspectors.put(
       TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.DATE_TYPE_NAME),
       LAZY_DATE_OBJECT_INSPECTOR);
   cachedPrimitiveLazyObjectInspectors.put(
       TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME),
       LAZY_TIMESTAMP_OBJECT_INSPECTOR);
   cachedPrimitiveLazyObjectInspectors.put(
       TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME),
       LAZY_INTERVAL_YEAR_MONTH_OBJECT_INSPECTOR);
   cachedPrimitiveLazyObjectInspectors.put(
       TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME),
       LAZY_INTERVAL_DAY_TIME_OBJECT_INSPECTOR);
   cachedPrimitiveLazyObjectInspectors.put(
       TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BINARY_TYPE_NAME),
       LAZY_BINARY_OBJECT_INSPECTOR);
 }
Example #18
0
 static TypeInfo createInt() {
   return TypeInfoFactory.getPrimitiveTypeInfo("int");
 }
Example #19
0
 static TypeInfo createLong() {
   return TypeInfoFactory.getPrimitiveTypeInfo("bigint");
 }
Example #20
0
 static TypeInfo createBoolean() {
   return TypeInfoFactory.getPrimitiveTypeInfo("boolean");
 }
Example #21
0
 static TypeInfo createFloat() {
   return TypeInfoFactory.getPrimitiveTypeInfo("float");
 }
Example #22
0
 static TypeInfo createString() {
   return TypeInfoFactory.getPrimitiveTypeInfo("string");
 }
Example #23
0
 static TypeInfo createBinary() {
   return TypeInfoFactory.getPrimitiveTypeInfo("binary");
 }
Example #24
0
  private MapOpCtx initObjectInspector(
      Configuration hconf, MapOpCtx opCtx, StructObjectInspector tableRowOI) throws Exception {
    PartitionDesc pd = opCtx.partDesc;
    TableDesc td = pd.getTableDesc();

    // Use table properties in case of unpartitioned tables,
    // and the union of table properties and partition properties, with partition
    // taking precedence, in the case of partitioned tables
    Properties overlayedProps =
        SerDeUtils.createOverlayedProperties(td.getProperties(), pd.getProperties());

    Map<String, String> partSpec = pd.getPartSpec();

    opCtx.tableName = String.valueOf(overlayedProps.getProperty("name"));
    opCtx.partName = String.valueOf(partSpec);
    opCtx.deserializer = pd.getDeserializer(hconf);

    StructObjectInspector partRawRowObjectInspector;
    if (Utilities.isInputFileFormatSelfDescribing(pd)) {
      partRawRowObjectInspector = tableRowOI;
    } else {
      partRawRowObjectInspector = (StructObjectInspector) opCtx.deserializer.getObjectInspector();
    }

    opCtx.partTblObjectInspectorConverter =
        ObjectInspectorConverters.getConverter(partRawRowObjectInspector, tableRowOI);

    // Next check if this table has partitions and if so
    // get the list of partition names as well as allocate
    // the serdes for the partition columns
    String pcols = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);

    if (pcols != null && pcols.length() > 0) {
      String[] partKeys = pcols.trim().split("/");
      String pcolTypes =
          overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
      String[] partKeyTypes = pcolTypes.trim().split(":");

      if (partKeys.length > partKeyTypes.length) {
        throw new HiveException(
            "Internal error : partKeys length, "
                + partKeys.length
                + " greater than partKeyTypes length, "
                + partKeyTypes.length);
      }

      List<String> partNames = new ArrayList<String>(partKeys.length);
      Object[] partValues = new Object[partKeys.length];
      List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>(partKeys.length);

      for (int i = 0; i < partKeys.length; i++) {
        String key = partKeys[i];
        partNames.add(key);
        ObjectInspector oi =
            PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
                TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]));

        // Partitions do not exist for this table
        if (partSpec == null) {
          // for partitionless table, initialize partValue to null
          partValues[i] = null;
        } else {
          partValues[i] =
              ObjectInspectorConverters.getConverter(
                      PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi)
                  .convert(partSpec.get(key));
        }
        partObjectInspectors.add(oi);
      }
      opCtx.rowWithPart = new Object[] {null, partValues};
      opCtx.partObjectInspector =
          ObjectInspectorFactory.getStandardStructObjectInspector(partNames, partObjectInspectors);
    }

    // The op may not be a TableScan for mapjoins
    // Consider the query: select /*+MAPJOIN(a)*/ count(*) FROM T1 a JOIN T2 b ON a.key = b.key;
    // In that case, it will be a Select, but the rowOI need not be amended
    if (opCtx.op instanceof TableScanOperator) {
      TableScanOperator tsOp = (TableScanOperator) opCtx.op;
      TableScanDesc tsDesc = tsOp.getConf();
      if (tsDesc != null && tsDesc.hasVirtualCols()) {
        opCtx.vcs = tsDesc.getVirtualCols();
        opCtx.vcValues = new Object[opCtx.vcs.size()];
        opCtx.vcsObjectInspector = VirtualColumn.getVCSObjectInspector(opCtx.vcs);
        if (opCtx.isPartitioned()) {
          opCtx.rowWithPartAndVC = Arrays.copyOfRange(opCtx.rowWithPart, 0, 3);
        } else {
          opCtx.rowWithPartAndVC = new Object[2];
        }
      }
    }
    if (!opCtx.hasVC() && !opCtx.isPartitioned()) {
      opCtx.rowObjectInspector = tableRowOI;
      return opCtx;
    }
    List<StructObjectInspector> inspectors = new ArrayList<StructObjectInspector>();
    inspectors.add(tableRowOI);
    if (opCtx.isPartitioned()) {
      inspectors.add(opCtx.partObjectInspector);
    }
    if (opCtx.hasVC()) {
      inspectors.add(opCtx.vcsObjectInspector);
    }
    opCtx.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(inspectors);
    return opCtx;
  }