@Override
  public void initialize(Configuration conf, Properties tbl) throws SerDeException {

    // Get column names and sort order
    String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
    if (columnNameProperty.length() == 0) {
      columnNames = new ArrayList<String>();
    } else {
      columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (columnTypeProperty.length() == 0) {
      columnTypes = new ArrayList<TypeInfo>();
    } else {
      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    assert (columnNames.size() == columnTypes.size());

    // Create row related objects
    rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    rowObjectInspector =
        (StructObjectInspector)
            TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(rowTypeInfo);
    row = new ArrayList<Object>(columnNames.size());
    for (int i = 0; i < columnNames.size(); i++) {
      row.add(null);
    }

    // Get the sort order
    String columnSortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER);
    columnSortOrderIsDesc = new boolean[columnNames.size()];
    for (int i = 0; i < columnSortOrderIsDesc.length; i++) {
      columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-');
    }
  }
  // Initialize this SerDe with the system properties and table properties
  @Override
  public void initialize(Configuration sysProps, Properties tblProps) throws SerDeException {
    LOG.debug("Initializing QueryStringSerDe");

    // Get the names of the columns for the table this SerDe is being used
    // with
    String columnNameProperty = tblProps.getProperty(serdeConstants.LIST_COLUMNS);
    columnNames = Arrays.asList(columnNameProperty.split(","));

    // Convert column types from text to TypeInfo objects
    String columnTypeProperty = tblProps.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    assert columnNames.size() == columnTypes.size();
    numColumns = columnNames.size();

    // Create ObjectInspectors from the type information for each column
    List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
    ObjectInspector oi;
    for (int c = 0; c < numColumns; c++) {
      oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(columnTypes.get(c));
      columnOIs.add(oi);
    }
    rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);

    // Create an empty row object to be reused during deserialization
    row = new ArrayList<Object>(numColumns);
    for (int c = 0; c < numColumns; c++) {
      row.add(null);
    }

    LOG.debug("QueryStringSerDe initialization complete");
  }
Beispiel #3
0
 /**
  * Converts the skewedValue available as a string in the metadata to the appropriate object by
  * using the type of the column from the join key.
  *
  * @param skewedValue
  * @param keyCol
  * @return an expression node descriptor of the appropriate constant
  */
 private ExprNodeConstantDesc createConstDesc(String skewedValue, ExprNodeColumnDesc keyCol) {
   ObjectInspector inputOI =
       TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(TypeInfoFactory.stringTypeInfo);
   ObjectInspector outputOI =
       TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(keyCol.getTypeInfo());
   Converter converter = ObjectInspectorConverters.getConverter(inputOI, outputOI);
   Object skewedValueObject = converter.convert(skewedValue);
   return new ExprNodeConstantDesc(keyCol.getTypeInfo(), skewedValueObject);
 }
  @Override
  public void initialize(final Configuration conf, final Properties tbl) throws SerDeException {
    List<String> columnNames = Arrays.asList(tbl.getProperty(Constants.LIST_COLUMNS).split(","));
    List<TypeInfo> columnTypes =
        TypeInfoUtils.getTypeInfosFromTypeString(tbl.getProperty(Constants.LIST_COLUMN_TYPES));

    numCols = columnNames.size();

    List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(numCols);

    for (int i = 0; i < numCols; i++) {
      columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    }

    this.inspector =
        ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);
    this.outputFields = new String[numCols];
    row = new ArrayList<String>(numCols);

    for (int i = 0; i < numCols; i++) {
      row.add(null);
    }

    separatorChar = getProperty(tbl, "separatorChar", CSVWriter.DEFAULT_SEPARATOR);
    quoteChar = getProperty(tbl, "quoteChar", CSVWriter.DEFAULT_QUOTE_CHARACTER);
    escapeChar = getProperty(tbl, "escapeChar", CSVWriter.DEFAULT_ESCAPE_CHARACTER);
  }
Beispiel #5
0
 public static double getAsConstDouble(@Nonnull final ObjectInspector numberOI)
     throws UDFArgumentException {
   final String typeName = numberOI.getTypeName();
   if (DOUBLE_TYPE_NAME.equals(typeName)) {
     DoubleWritable v = getConstValue(numberOI);
     return v.get();
   } else if (FLOAT_TYPE_NAME.equals(typeName)) {
     FloatWritable v = getConstValue(numberOI);
     return v.get();
   } else if (INT_TYPE_NAME.equals(typeName)) {
     IntWritable v = getConstValue(numberOI);
     return v.get();
   } else if (BIGINT_TYPE_NAME.equals(typeName)) {
     LongWritable v = getConstValue(numberOI);
     return v.get();
   } else if (SMALLINT_TYPE_NAME.equals(typeName)) {
     ShortWritable v = getConstValue(numberOI);
     return v.get();
   } else if (TINYINT_TYPE_NAME.equals(typeName)) {
     ByteWritable v = getConstValue(numberOI);
     return v.get();
   }
   throw new UDFArgumentException(
       "Unexpected argument type to cast as double: "
           + TypeInfoUtils.getTypeInfoFromObjectInspector(numberOI));
 }
Beispiel #6
0
 public static PrimitiveObjectInspector asPrimitiveObjectInspector(
     @Nonnull final ObjectInspector oi) throws UDFArgumentException {
   if (oi.getCategory() != Category.PRIMITIVE) {
     throw new UDFArgumentException(
         "Is not PrimitiveObjectInspector: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
   }
   return (PrimitiveObjectInspector) oi;
 }
Beispiel #7
0
 public static long getConstLong(@Nonnull final ObjectInspector oi) throws UDFArgumentException {
   if (!isBigIntOI(oi)) {
     throw new UDFArgumentException(
         "argument must be a BigInt value: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
   }
   LongWritable v = getConstValue(oi);
   return v.get();
 }
Beispiel #8
0
 public static boolean getConstBoolean(@Nonnull final ObjectInspector oi)
     throws UDFArgumentException {
   if (!isBooleanOI(oi)) {
     throw new UDFArgumentException(
         "argument must be a Boolean value: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
   }
   BooleanWritable v = getConstValue(oi);
   return v.get();
 }
Beispiel #9
0
 public static String getConstString(@Nonnull final ObjectInspector oi)
     throws UDFArgumentException {
   if (!isStringOI(oi)) {
     throw new UDFArgumentException(
         "argument must be a Text value: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
   }
   Text v = getConstValue(oi);
   return v == null ? null : v.toString();
 }
Beispiel #10
0
 @Nonnull
 public static ConstantObjectInspector asConstantObjectInspector(@Nonnull final ObjectInspector oi)
     throws UDFArgumentException {
   if (!ObjectInspectorUtils.isConstantObjectInspector(oi)) {
     throw new UDFArgumentException(
         "argument must be a constant value: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
   }
   return (ConstantObjectInspector) oi;
 }
 @Override
 public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
   // Verify that the first parameter supports comparisons.
   ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]);
   if (!ObjectInspectorUtils.compareSupported(oi)) {
     throw new UDFArgumentTypeException(
         0, "Cannot support comparison of map<> type or complex type containing map<>.");
   }
   return new GenericUDAFFirstRowEvaluator();
 }
  private List<TypeInfo> createHiveTypeInfoFrom(final String columnsTypeStr) {
    List<TypeInfo> columnTypes;

    if (columnsTypeStr.length() == 0) {
      columnTypes = new ArrayList<TypeInfo>();
    } else {
      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnsTypeStr);
    }

    return columnTypes;
  }
Beispiel #13
0
  /*
   * (non-Javadoc)
   *
   * @see org.apache.hadoop.hive.serde2.AbstractSerDe#initialize(org.apache.hadoop.conf.Configuration,
   * java.util.Properties)
   */
  @Override
  public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    String columnNameProperty = tbl.getProperty(LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(LIST_COLUMN_TYPES);
    List<String> columnNames = Arrays.asList(columnNameProperty.split(","));
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);

    List<ObjectInspector> columnObjectInspectors =
        new ArrayList<ObjectInspector>(columnNames.size());
    ObjectInspector colObjectInspector;
    for (int col = 0; col < columnNames.size(); col++) {
      colObjectInspector =
          TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(columnTypes.get(col));
      columnObjectInspectors.add(colObjectInspector);
    }

    cachedObjectInspector =
        ObjectInspectorFactory.getColumnarStructObjectInspector(
            columnNames, columnObjectInspectors);
  }
Beispiel #14
0
 @SuppressWarnings("unchecked")
 @Nullable
 public static <T extends Writable> T getConstValue(@Nonnull final ObjectInspector oi)
     throws UDFArgumentException {
   if (!ObjectInspectorUtils.isConstantObjectInspector(oi)) {
     throw new UDFArgumentException(
         "argument must be a constant value: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
   }
   ConstantObjectInspector constOI = (ConstantObjectInspector) oi;
   Object v = constOI.getWritableConstantValue();
   return (T) v;
 }
  static StandardStructObjectInspector structObjectInspector(Properties tableProperties) {
    // extract column info - don't use Hive constants as they were renamed in 0.9 breaking
    // compatibility
    // the column names are saved as the given inspector to #serialize doesn't preserves them (maybe
    // because it's an external table)
    // use the class since StructType requires it ...
    List<String> columnNames =
        StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ",");
    List<TypeInfo> colTypes =
        TypeInfoUtils.getTypeInfosFromTypeString(
            tableProperties.getProperty(HiveConstants.COLUMNS_TYPES));

    // create a standard writable Object Inspector - used later on by serialization/deserialization
    List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>();

    for (TypeInfo typeInfo : colTypes) {
      inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo));
    }

    return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
  }
 /** Get the list of field type as csv from a StructObjectInspector. */
 public static String getFieldTypes(StructObjectInspector soi) {
   List<? extends StructField> fields = soi.getAllStructFieldRefs();
   StringBuilder sb = new StringBuilder();
   for (int i = 0; i < fields.size(); i++) {
     if (i > 0) {
       sb.append(":");
     }
     sb.append(
         TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector())
             .getTypeName());
   }
   return sb.toString();
 }
Beispiel #17
0
  @Override
  public void initialize(Configuration cfg, Properties props) throws SerDeException {
    String columnNameProperty = props.getProperty(serdeConstants.LIST_COLUMNS);
    columnNames = Arrays.asList(columnNameProperty.split(","));
    numColumns = columnNames.size();

    String columnTypeProperty = props.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);

    // Ensure we have the same number of column names and types
    assert numColumns == columnTypes.size();

    List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>(numColumns);
    row = new ArrayList<Object>(numColumns);
    for (int c = 0; c < numColumns; c++) {
      ObjectInspector oi =
          TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(columnTypes.get(c));
      inspectors.add(oi);
      row.add(null);
    }
    inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
  }
Beispiel #18
0
 @Override
 public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
   if (parameters.length != 1) {
     throw new UDFArgumentTypeException(
         parameters.length - 1, "Exactly one argument is expected.");
   }
   ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]);
   if (!ObjectInspectorUtils.compareSupported(oi)) {
     throw new UDFArgumentTypeException(
         parameters.length - 1,
         "Cannot support comparison of map<> type or complex type containing map<>.");
   }
   return new GenericUDAFMaxEvaluator();
 }
Beispiel #19
0
  /*
   * add array<struct> to the list of columns
   */
  protected static RowResolver createSelectListRR(MatchPath evaluator, PTFInputDef inpDef)
      throws SemanticException {
    RowResolver rr = new RowResolver();
    RowResolver inputRR = inpDef.getOutputShape().getRr();

    evaluator.inputColumnNamesMap = new HashMap<String, String>();
    ArrayList<String> inputColumnNames = new ArrayList<String>();

    ArrayList<ObjectInspector> inpColOIs = new ArrayList<ObjectInspector>();

    for (ColumnInfo inpCInfo : inputRR.getColumnInfos()) {
      ColumnInfo cInfo = new ColumnInfo(inpCInfo);
      String colAlias = cInfo.getAlias();

      String[] tabColAlias = inputRR.reverseLookup(inpCInfo.getInternalName());
      if (tabColAlias != null) {
        colAlias = tabColAlias[1];
      }
      ASTNode inExpr = null;
      inExpr = PTFTranslator.getASTNode(inpCInfo, inputRR);
      if (inExpr != null) {
        rr.putExpression(inExpr, cInfo);
        colAlias = inExpr.toStringTree().toLowerCase();
      } else {
        colAlias = colAlias == null ? cInfo.getInternalName() : colAlias;
        rr.put(cInfo.getTabAlias(), colAlias, cInfo);
      }

      evaluator.inputColumnNamesMap.put(cInfo.getInternalName(), colAlias);
      inputColumnNames.add(colAlias);
      inpColOIs.add(cInfo.getObjectInspector());
    }

    StandardListObjectInspector pathAttrOI =
        ObjectInspectorFactory.getStandardListObjectInspector(
            ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames, inpColOIs));

    ColumnInfo pathColumn =
        new ColumnInfo(
            PATHATTR_NAME,
            TypeInfoUtils.getTypeInfoFromObjectInspector(pathAttrOI),
            null,
            false,
            false);
    rr.put(null, PATHATTR_NAME, pathColumn);

    return rr;
  }
  /**
   * Initializes the SerDe. Gets the list of columns and their types from the table properties. Will
   * use them to look into/create JSON data.
   *
   * @param conf Hadoop configuration object
   * @param tbl Table Properties
   * @throws SerDeException
   */
  @Override
  public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    LOG.debug("Initializing SerDe");
    // Get column names and sort order
    String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);

    LOG.debug("columns " + columnNameProperty + " types " + columnTypeProperty);

    // all table column names
    if (columnNameProperty.length() == 0) {
      columnNames = new ArrayList<String>();
    } else {
      columnNames = Arrays.asList(columnNameProperty.split(","));
    }

    // all column types
    if (columnTypeProperty.length() == 0) {
      columnTypes = new ArrayList<TypeInfo>();
    } else {
      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    assert (columnNames.size() == columnTypes.size());

    stats = new SerDeStats();

    // Create row related objects
    rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);

    // build options
    options = new JsonStructOIOptions(getMappings(tbl));

    rowObjectInspector =
        (StructObjectInspector)
            JsonObjectInspectorFactory.getJsonObjectInspectorFromTypeInfo(rowTypeInfo, options);

    // Get the sort order
    String columnSortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER);
    columnSortOrderIsDesc = new boolean[columnNames.size()];
    for (int i = 0; i < columnSortOrderIsDesc.length; i++) {
      columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-');
    }

    // other configuration
    ignoreMalformedJson =
        Boolean.parseBoolean(tbl.getProperty(PROP_IGNORE_MALFORMED_JSON, "false"));
  }
  /**
   * Reads the following SERDEPROPERTIES
   *
   * <p>
   *
   * <ul>
   *   <li>{@code voltdb.servers} (required) comma separated list of VoltDB servers that comprise a
   *       VoltDB cluster
   *   <li>{@code voltdb.table} (required) destination VoltDB table
   *   <li>{@code voltdb.user} (optional) VoltDB user name
   *   <li>{@code voltdb.password} (optional) VoltDB user password
   * </ul>
   *
   * <p>and makes sure that the Hive table column types match the destination VoltDB column types
   */
  @Override
  public void initialize(Configuration conf, Properties props) throws SerDeException {

    String columnNamesPropVal = props.getProperty(serdeConstants.LIST_COLUMNS, "");
    String columnTypesPropVal = props.getProperty(serdeConstants.LIST_COLUMN_TYPES, "");
    String serversPropVal = props.getProperty(SERVERS_PROP, "");

    String table = props.getProperty(TABLE_PROP, "");
    String user = props.getProperty(USER_PROP);
    String password = props.getProperty(PASSWORD_PROP);

    if (serversPropVal.trim().isEmpty() || table.trim().isEmpty()) {
      throw new VoltSerdeException(
          "properties \""
              + SERVERS_PROP
              + "\", and \""
              + TABLE_PROP
              + "\" must be minimally defined");
    }

    List<String> columnNames = m_splitter.splitToList(columnNamesPropVal);
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesPropVal);

    String[] servers = m_splitter.splitToList(serversPropVal).toArray(new String[0]);
    if (servers.length == 0) {
      throw new VoltSerdeException(
          "properties \""
              + SERVERS_PROP
              + "\", and \""
              + TABLE_PROP
              + "\" must be minimally defined");
    }

    if (conf != null) {
      VoltConfiguration.configureVoltDB(conf, servers, user, password, table);
    }

    VoltType[] voltTypes = null;
    m_voltConf = new VoltConfiguration(table, servers, user, password);
    try {
      m_voltConf.isMinimallyConfigured();
      voltTypes = m_voltConf.getTableColumnTypes();
    } catch (IOException e) {
      throw new VoltSerdeException("uanble to setup a VoltDB context", e);
    }
    m_oig = new VoltObjectInspectorGenerator(columnNames, columnTypes, voltTypes);
  }
  private LazyBinaryStructObjectInspector createInternalOi(MapJoinObjectSerDeContext valCtx)
      throws SerDeException {
    // We are going to use LBSerDe to serialize values; create OI for retrieval.
    List<? extends StructField> fields =
        ((StructObjectInspector) valCtx.getSerDe().getObjectInspector()).getAllStructFieldRefs();
    List<String> colNames = new ArrayList<String>(fields.size());
    List<ObjectInspector> colOis = new ArrayList<ObjectInspector>(fields.size());
    for (int i = 0; i < fields.size(); ++i) {
      StructField field = fields.get(i);
      colNames.add(field.getFieldName());
      // It would be nice if OI could return typeInfo...
      TypeInfo typeInfo =
          TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName());
      colOis.add(LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(typeInfo));
    }

    return LazyBinaryObjectInspectorFactory.getLazyBinaryStructObjectInspector(colNames, colOis);
  }
  @Override
  public final void initialize(final Configuration conf, final Properties tbl)
      throws SerDeException {

    final TypeInfo rowTypeInfo;
    final List<String> columnNames;
    final List<TypeInfo> columnTypes;
    // Get column names and sort order
    final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);

    // Get compression properties
    compressionType = tbl.getProperty(ParquetOutputFormat.COMPRESSION, DEFAULTCOMPRESSION);

    if (columnNameProperty.length() == 0) {
      columnNames = new ArrayList<String>();
    } else {
      columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (columnTypeProperty.length() == 0) {
      columnTypes = new ArrayList<TypeInfo>();
    } else {
      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }

    if (columnNames.size() != columnTypes.size()) {
      throw new IllegalArgumentException(
          "ParquetHiveSerde initialization failed. Number of column "
              + "name and column type differs. columnNames = "
              + columnNames
              + ", columnTypes = "
              + columnTypes);
    }
    // Create row related objects
    rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

    // Stats part
    stats = new SerDeStats();
    serializedSize = 0;
    deserializedSize = 0;
    status = LAST_OPERATION.UNKNOWN;
  }
Beispiel #24
0
 @Nonnull
 public static String[] getConstStringArray(@Nonnull final ObjectInspector oi)
     throws UDFArgumentException {
   if (!ObjectInspectorUtils.isConstantObjectInspector(oi)) {
     throw new UDFArgumentException(
         "argument must be a constant value: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
   }
   ConstantObjectInspector constOI = (ConstantObjectInspector) oi;
   final List<?> lst = (List<?>) constOI.getWritableConstantValue();
   final int size = lst.size();
   final String[] ary = new String[size];
   for (int i = 0; i < size; i++) {
     Object o = lst.get(i);
     if (o != null) {
       ary[i] = o.toString();
     }
   }
   return ary;
 }
  @Override
  public ObjectInspector getObjectInspector() {
    // Read the configuration parameters
    String columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS);
    // NOTE: if "columns.types" is missing, all columns will be of String type
    String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES);

    // Parse the configuration parameters
    ArrayList<String> columnNames = new ArrayList<String>();
    Deque<Integer> virtualColumns = new ArrayDeque<Integer>();
    if (columnNameProperty != null && columnNameProperty.length() > 0) {
      String[] colNames = columnNameProperty.split(",");
      for (int i = 0; i < colNames.length; i++) {
        if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(colNames[i])) {
          virtualColumns.addLast(i);
        } else {
          columnNames.add(colNames[i]);
        }
      }
    }
    if (columnTypeProperty == null) {
      // Default type: all string
      StringBuilder sb = new StringBuilder();
      for (int i = 0; i < columnNames.size(); i++) {
        if (i > 0) {
          sb.append(":");
        }
        sb.append("string");
      }
      columnTypeProperty = sb.toString();
    }

    ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    while (virtualColumns.size() > 0) {
      fieldTypes.remove(virtualColumns.removeLast());
    }
    StructTypeInfo rowType = new StructTypeInfo();
    rowType.setAllStructFieldNames(columnNames);
    rowType.setAllStructFieldTypeInfos(fieldTypes);
    return OrcRecordUpdater.createEventSchema(OrcStruct.createObjectInspector(rowType));
  }
  static StructObjectInspector createStructObjectInspector(HCatSchema outputSchema)
      throws IOException {

    if (outputSchema == null) {
      throw new IOException("Invalid output schema specified");
    }

    List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
    List<String> fieldNames = new ArrayList<String>();

    for (HCatFieldSchema hcatFieldSchema : outputSchema.getFields()) {
      TypeInfo type = TypeInfoUtils.getTypeInfoFromTypeString(hcatFieldSchema.getTypeString());

      fieldNames.add(hcatFieldSchema.getName());
      fieldInspectors.add(getObjectInspector(type));
    }

    StructObjectInspector structInspector =
        ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldInspectors);
    return structInspector;
  }
Beispiel #27
0
  @Test
  public void test_getWritable() throws Exception {
    assertTrue(NiFiOrcUtils.convertToORCObject(null, 1) instanceof IntWritable);
    assertTrue(NiFiOrcUtils.convertToORCObject(null, 1L) instanceof LongWritable);
    assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0f) instanceof FloatWritable);
    assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0) instanceof DoubleWritable);
    assertTrue(NiFiOrcUtils.convertToORCObject(null, new int[] {1, 2, 3}) instanceof List);
    assertTrue(NiFiOrcUtils.convertToORCObject(null, Arrays.asList(1, 2, 3)) instanceof List);
    Map<String, Float> map = new HashMap<>();
    map.put("Hello", 1.0f);
    map.put("World", 2.0f);

    Object writable =
        NiFiOrcUtils.convertToORCObject(
            TypeInfoUtils.getTypeInfoFromTypeString("map<string,float>"), map);
    assertTrue(writable instanceof MapWritable);
    MapWritable mapWritable = (MapWritable) writable;
    mapWritable.forEach(
        (key, value) -> {
          assertTrue(key instanceof Text);
          assertTrue(value instanceof FloatWritable);
        });
  }
Beispiel #28
0
  private OrcFile.WriterOptions getOptions(JobConf conf, Properties props) {
    OrcFile.WriterOptions result = OrcFile.writerOptions(props, conf);
    if (props != null) {
      final String columnNameProperty = props.getProperty(IOConstants.COLUMNS);
      final String columnTypeProperty = props.getProperty(IOConstants.COLUMNS_TYPES);
      if (columnNameProperty != null
          && !columnNameProperty.isEmpty()
          && columnTypeProperty != null
          && !columnTypeProperty.isEmpty()) {
        List<String> columnNames;
        List<TypeInfo> columnTypes;

        if (columnNameProperty.length() == 0) {
          columnNames = new ArrayList<String>();
        } else {
          columnNames = Arrays.asList(columnNameProperty.split(","));
        }

        if (columnTypeProperty.length() == 0) {
          columnTypes = new ArrayList<TypeInfo>();
        } else {
          columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
        }

        TypeDescription schema = TypeDescription.createStruct();
        for (int i = 0; i < columnNames.size(); ++i) {
          schema.addField(columnNames.get(i), convertTypeInfo(columnTypes.get(i)));
        }
        if (LOG.isDebugEnabled()) {
          LOG.debug("ORC schema = " + schema);
        }
        result.setSchema(schema);
      }
    }
    return result;
  }
  @Override
  public RecordWriter getHiveRecordWriter(
      JobConf jc,
      Path finalOutPath,
      Class<? extends Writable> valueClass,
      boolean isCompressed,
      Properties tbl,
      Progressable progress)
      throws IOException {

    boolean usenewformat = jc.getBoolean("fdf.newformat", false);
    IHead head = new IHead(usenewformat ? ConstVar.NewFormatFile : ConstVar.OldFormatFile);
    String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
    IFieldMap map = new IFieldMap();
    ArrayList<TypeInfo> types;
    if (columnTypeProperty == null) {
      types = new ArrayList<TypeInfo>();
      map.addFieldType(new IRecord.IFType(ConstVar.FieldType_Int, 0));
    } else types = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    String compress = tbl.getProperty(ConstVar.Compress);
    if (compress != null && compress.equalsIgnoreCase("true")) head.setCompress((byte) 1);
    int i = 0;
    for (TypeInfo type : types) {
      byte fdftype = 0;
      String name = type.getTypeName();
      if (name.equals(Constants.TINYINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Byte;
      else if (name.equals(Constants.SMALLINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Short;
      else if (name.equals(Constants.INT_TYPE_NAME)) fdftype = ConstVar.FieldType_Int;
      else if (name.equals(Constants.BIGINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Long;
      else if (name.equals(Constants.FLOAT_TYPE_NAME)) fdftype = ConstVar.FieldType_Float;
      else if (name.equals(Constants.DOUBLE_TYPE_NAME)) fdftype = ConstVar.FieldType_Double;
      else if (name.equals(Constants.STRING_TYPE_NAME)) fdftype = ConstVar.FieldType_String;

      map.addFieldType(new IRecord.IFType(fdftype, i++));
    }
    head.setFieldMap(map);

    ArrayList<ArrayList<Integer>> columnprojects = null;
    String projectionString = jc.get(ConstVar.Projection);
    if (projectionString != null) {
      columnprojects = new ArrayList<ArrayList<Integer>>();
      String[] projectionList = projectionString.split(ConstVar.RecordSplit);
      for (String str : projectionList) {
        ArrayList<Integer> cp = new ArrayList<Integer>();
        String[] item = str.split(ConstVar.FieldSplit);
        for (String s : item) {
          cp.add(Integer.valueOf(s));
        }
        columnprojects.add(cp);
      }
    }

    if (!jc.getBoolean(ConstVar.NeedPostfix, true)) {
      final Configuration conf = new Configuration(jc);
      final IFormatDataFile ifdf = new IFormatDataFile(conf);
      ifdf.create(finalOutPath.toString(), head);
      return new RecordWriter() {

        @Override
        public void write(Writable w) throws IOException {}

        @Override
        public void close(boolean abort) throws IOException {
          ifdf.close();
        }
      };
    }

    final IColumnDataFile icdf = new IColumnDataFile(jc);
    icdf.create(finalOutPath.toString(), head, columnprojects);

    LOG.info(finalOutPath.toString());
    LOG.info("output file compress?\t" + compress);
    LOG.info("head:\t" + head.toStr());

    return new RecordWriter() {

      @Override
      public void write(Writable w) throws IOException {
        icdf.addRecord((IRecord) w);
      }

      @Override
      public void close(boolean abort) throws IOException {
        icdf.close();
      }
    };
  }
Beispiel #30
0
  private static void splitJoinCondition(
      List<RelDataTypeField> sysFieldList,
      List<RelNode> inputs,
      RexNode condition,
      List<List<RexNode>> joinKeys,
      List<Integer> filterNulls,
      List<SqlOperator> rangeOp,
      List<RexNode> nonEquiList)
      throws CalciteSemanticException {
    final int sysFieldCount = sysFieldList.size();
    final RelOptCluster cluster = inputs.get(0).getCluster();
    final RexBuilder rexBuilder = cluster.getRexBuilder();

    if (condition instanceof RexCall) {
      RexCall call = (RexCall) condition;
      if (call.getOperator() == SqlStdOperatorTable.AND) {
        for (RexNode operand : call.getOperands()) {
          splitJoinCondition(
              sysFieldList, inputs, operand, joinKeys, filterNulls, rangeOp, nonEquiList);
        }
        return;
      }

      RexNode leftKey = null;
      RexNode rightKey = null;
      int leftInput = 0;
      int rightInput = 0;
      List<RelDataTypeField> leftFields = null;
      List<RelDataTypeField> rightFields = null;
      boolean reverse = false;

      SqlKind kind = call.getKind();

      // Only consider range operators if we haven't already seen one
      if ((kind == SqlKind.EQUALS)
          || (filterNulls != null && kind == SqlKind.IS_NOT_DISTINCT_FROM)
          || (rangeOp != null
              && rangeOp.isEmpty()
              && (kind == SqlKind.GREATER_THAN
                  || kind == SqlKind.GREATER_THAN_OR_EQUAL
                  || kind == SqlKind.LESS_THAN
                  || kind == SqlKind.LESS_THAN_OR_EQUAL))) {
        final List<RexNode> operands = call.getOperands();
        RexNode op0 = operands.get(0);
        RexNode op1 = operands.get(1);

        final ImmutableBitSet projRefs0 = InputFinder.bits(op0);
        final ImmutableBitSet projRefs1 = InputFinder.bits(op1);

        final ImmutableBitSet[] inputsRange = new ImmutableBitSet[inputs.size()];
        int totalFieldCount = 0;
        for (int i = 0; i < inputs.size(); i++) {
          final int firstField = totalFieldCount + sysFieldCount;
          totalFieldCount = firstField + inputs.get(i).getRowType().getFieldCount();
          inputsRange[i] = ImmutableBitSet.range(firstField, totalFieldCount);
        }

        boolean foundBothInputs = false;
        for (int i = 0; i < inputs.size() && !foundBothInputs; i++) {
          if (projRefs0.intersects(inputsRange[i])
              && projRefs0.union(inputsRange[i]).equals(inputsRange[i])) {
            if (leftKey == null) {
              leftKey = op0;
              leftInput = i;
              leftFields = inputs.get(leftInput).getRowType().getFieldList();
            } else {
              rightKey = op0;
              rightInput = i;
              rightFields = inputs.get(rightInput).getRowType().getFieldList();
              reverse = true;
              foundBothInputs = true;
            }
          } else if (projRefs1.intersects(inputsRange[i])
              && projRefs1.union(inputsRange[i]).equals(inputsRange[i])) {
            if (leftKey == null) {
              leftKey = op1;
              leftInput = i;
              leftFields = inputs.get(leftInput).getRowType().getFieldList();
            } else {
              rightKey = op1;
              rightInput = i;
              rightFields = inputs.get(rightInput).getRowType().getFieldList();
              foundBothInputs = true;
            }
          }
        }

        if ((leftKey != null) && (rightKey != null)) {
          // adjustment array
          int[] adjustments = new int[totalFieldCount];
          for (int i = 0; i < inputs.size(); i++) {
            final int adjustment = inputsRange[i].nextSetBit(0);
            for (int j = adjustment; j < inputsRange[i].length(); j++) {
              adjustments[j] = -adjustment;
            }
          }

          // replace right Key input ref
          rightKey =
              rightKey.accept(
                  new RelOptUtil.RexInputConverter(
                      rexBuilder, rightFields, rightFields, adjustments));

          // left key only needs to be adjusted if there are system
          // fields, but do it for uniformity
          leftKey =
              leftKey.accept(
                  new RelOptUtil.RexInputConverter(
                      rexBuilder, leftFields, leftFields, adjustments));

          RelDataType leftKeyType = leftKey.getType();
          RelDataType rightKeyType = rightKey.getType();

          if (leftKeyType != rightKeyType) {
            // perform casting using Hive rules
            TypeInfo rType = TypeConverter.convert(rightKeyType);
            TypeInfo lType = TypeConverter.convert(leftKeyType);
            TypeInfo tgtType = FunctionRegistry.getCommonClassForComparison(lType, rType);

            if (tgtType == null) {
              throw new CalciteSemanticException(
                  "Cannot find common type for join keys "
                      + leftKey
                      + " (type "
                      + leftKeyType
                      + ") and "
                      + rightKey
                      + " (type "
                      + rightKeyType
                      + ")");
            }
            RelDataType targetKeyType = TypeConverter.convert(tgtType, rexBuilder.getTypeFactory());

            if (leftKeyType != targetKeyType
                && TypeInfoUtils.isConversionRequiredForComparison(tgtType, lType)) {
              leftKey = rexBuilder.makeCast(targetKeyType, leftKey);
            }

            if (rightKeyType != targetKeyType
                && TypeInfoUtils.isConversionRequiredForComparison(tgtType, rType)) {
              rightKey = rexBuilder.makeCast(targetKeyType, rightKey);
            }
          }
        }
      }

      if ((leftKey != null) && (rightKey != null)) {
        // found suitable join keys
        // add them to key list, ensuring that if there is a
        // non-equi join predicate, it appears at the end of the
        // key list; also mark the null filtering property
        addJoinKey(joinKeys.get(leftInput), leftKey, (rangeOp != null) && !rangeOp.isEmpty());
        addJoinKey(joinKeys.get(rightInput), rightKey, (rangeOp != null) && !rangeOp.isEmpty());
        if (filterNulls != null && kind == SqlKind.EQUALS) {
          // nulls are considered not matching for equality comparison
          // add the position of the most recently inserted key
          filterNulls.add(joinKeys.get(leftInput).size() - 1);
        }
        if (rangeOp != null && kind != SqlKind.EQUALS && kind != SqlKind.IS_DISTINCT_FROM) {
          if (reverse) {
            kind = reverse(kind);
          }
          rangeOp.add(op(kind, call.getOperator()));
        }
        return;
      } // else fall through and add this condition as nonEqui condition
    }

    // The operator is not of RexCall type
    // So we fail. Fall through.
    // Add this condition to the list of non-equi-join conditions.
    nonEquiList.add(condition);
  }