@Override
  public void initialize(final Configuration conf, final Properties tbl) throws SerDeException {
    List<String> columnNames = Arrays.asList(tbl.getProperty(Constants.LIST_COLUMNS).split(","));
    List<TypeInfo> columnTypes =
        TypeInfoUtils.getTypeInfosFromTypeString(tbl.getProperty(Constants.LIST_COLUMN_TYPES));

    numCols = columnNames.size();

    List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(numCols);

    for (int i = 0; i < numCols; i++) {
      columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    }

    this.inspector =
        ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);
    this.outputFields = new String[numCols];
    row = new ArrayList<String>(numCols);

    for (int i = 0; i < numCols; i++) {
      row.add(null);
    }

    separatorChar = getProperty(tbl, "separatorChar", CSVWriter.DEFAULT_SEPARATOR);
    quoteChar = getProperty(tbl, "quoteChar", CSVWriter.DEFAULT_QUOTE_CHARACTER);
    escapeChar = getProperty(tbl, "escapeChar", CSVWriter.DEFAULT_ESCAPE_CHARACTER);
  }
  // Initialize this SerDe with the system properties and table properties
  @Override
  public void initialize(Configuration sysProps, Properties tblProps) throws SerDeException {
    LOG.debug("Initializing QueryStringSerDe");

    // Get the names of the columns for the table this SerDe is being used
    // with
    String columnNameProperty = tblProps.getProperty(serdeConstants.LIST_COLUMNS);
    columnNames = Arrays.asList(columnNameProperty.split(","));

    // Convert column types from text to TypeInfo objects
    String columnTypeProperty = tblProps.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    assert columnNames.size() == columnTypes.size();
    numColumns = columnNames.size();

    // Create ObjectInspectors from the type information for each column
    List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
    ObjectInspector oi;
    for (int c = 0; c < numColumns; c++) {
      oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(columnTypes.get(c));
      columnOIs.add(oi);
    }
    rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);

    // Create an empty row object to be reused during deserialization
    row = new ArrayList<Object>(numColumns);
    for (int c = 0; c < numColumns; c++) {
      row.add(null);
    }

    LOG.debug("QueryStringSerDe initialization complete");
  }
  @Override
  public void initialize(Configuration conf, Properties tbl) throws SerDeException {

    // Get column names and sort order
    String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
    if (columnNameProperty.length() == 0) {
      columnNames = new ArrayList<String>();
    } else {
      columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (columnTypeProperty.length() == 0) {
      columnTypes = new ArrayList<TypeInfo>();
    } else {
      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    assert (columnNames.size() == columnTypes.size());

    // Create row related objects
    rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    rowObjectInspector =
        (StructObjectInspector)
            TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(rowTypeInfo);
    row = new ArrayList<Object>(columnNames.size());
    for (int i = 0; i < columnNames.size(); i++) {
      row.add(null);
    }

    // Get the sort order
    String columnSortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER);
    columnSortOrderIsDesc = new boolean[columnNames.size()];
    for (int i = 0; i < columnSortOrderIsDesc.length; i++) {
      columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-');
    }
  }
  private List<TypeInfo> createHiveTypeInfoFrom(final String columnsTypeStr) {
    List<TypeInfo> columnTypes;

    if (columnsTypeStr.length() == 0) {
      columnTypes = new ArrayList<TypeInfo>();
    } else {
      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnsTypeStr);
    }

    return columnTypes;
  }
  /**
   * Reads the following SERDEPROPERTIES
   *
   * <p>
   *
   * <ul>
   *   <li>{@code voltdb.servers} (required) comma separated list of VoltDB servers that comprise a
   *       VoltDB cluster
   *   <li>{@code voltdb.table} (required) destination VoltDB table
   *   <li>{@code voltdb.user} (optional) VoltDB user name
   *   <li>{@code voltdb.password} (optional) VoltDB user password
   * </ul>
   *
   * <p>and makes sure that the Hive table column types match the destination VoltDB column types
   */
  @Override
  public void initialize(Configuration conf, Properties props) throws SerDeException {

    String columnNamesPropVal = props.getProperty(serdeConstants.LIST_COLUMNS, "");
    String columnTypesPropVal = props.getProperty(serdeConstants.LIST_COLUMN_TYPES, "");
    String serversPropVal = props.getProperty(SERVERS_PROP, "");

    String table = props.getProperty(TABLE_PROP, "");
    String user = props.getProperty(USER_PROP);
    String password = props.getProperty(PASSWORD_PROP);

    if (serversPropVal.trim().isEmpty() || table.trim().isEmpty()) {
      throw new VoltSerdeException(
          "properties \""
              + SERVERS_PROP
              + "\", and \""
              + TABLE_PROP
              + "\" must be minimally defined");
    }

    List<String> columnNames = m_splitter.splitToList(columnNamesPropVal);
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesPropVal);

    String[] servers = m_splitter.splitToList(serversPropVal).toArray(new String[0]);
    if (servers.length == 0) {
      throw new VoltSerdeException(
          "properties \""
              + SERVERS_PROP
              + "\", and \""
              + TABLE_PROP
              + "\" must be minimally defined");
    }

    if (conf != null) {
      VoltConfiguration.configureVoltDB(conf, servers, user, password, table);
    }

    VoltType[] voltTypes = null;
    m_voltConf = new VoltConfiguration(table, servers, user, password);
    try {
      m_voltConf.isMinimallyConfigured();
      voltTypes = m_voltConf.getTableColumnTypes();
    } catch (IOException e) {
      throw new VoltSerdeException("uanble to setup a VoltDB context", e);
    }
    m_oig = new VoltObjectInspectorGenerator(columnNames, columnTypes, voltTypes);
  }
  /**
   * Initializes the SerDe. Gets the list of columns and their types from the table properties. Will
   * use them to look into/create JSON data.
   *
   * @param conf Hadoop configuration object
   * @param tbl Table Properties
   * @throws SerDeException
   */
  @Override
  public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    LOG.debug("Initializing SerDe");
    // Get column names and sort order
    String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);

    LOG.debug("columns " + columnNameProperty + " types " + columnTypeProperty);

    // all table column names
    if (columnNameProperty.length() == 0) {
      columnNames = new ArrayList<String>();
    } else {
      columnNames = Arrays.asList(columnNameProperty.split(","));
    }

    // all column types
    if (columnTypeProperty.length() == 0) {
      columnTypes = new ArrayList<TypeInfo>();
    } else {
      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    assert (columnNames.size() == columnTypes.size());

    stats = new SerDeStats();

    // Create row related objects
    rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);

    // build options
    options = new JsonStructOIOptions(getMappings(tbl));

    rowObjectInspector =
        (StructObjectInspector)
            JsonObjectInspectorFactory.getJsonObjectInspectorFromTypeInfo(rowTypeInfo, options);

    // Get the sort order
    String columnSortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER);
    columnSortOrderIsDesc = new boolean[columnNames.size()];
    for (int i = 0; i < columnSortOrderIsDesc.length; i++) {
      columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-');
    }

    // other configuration
    ignoreMalformedJson =
        Boolean.parseBoolean(tbl.getProperty(PROP_IGNORE_MALFORMED_JSON, "false"));
  }
  @Override
  public final void initialize(final Configuration conf, final Properties tbl)
      throws SerDeException {

    final TypeInfo rowTypeInfo;
    final List<String> columnNames;
    final List<TypeInfo> columnTypes;
    // Get column names and sort order
    final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);

    // Get compression properties
    compressionType = tbl.getProperty(ParquetOutputFormat.COMPRESSION, DEFAULTCOMPRESSION);

    if (columnNameProperty.length() == 0) {
      columnNames = new ArrayList<String>();
    } else {
      columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (columnTypeProperty.length() == 0) {
      columnTypes = new ArrayList<TypeInfo>();
    } else {
      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }

    if (columnNames.size() != columnTypes.size()) {
      throw new IllegalArgumentException(
          "ParquetHiveSerde initialization failed. Number of column "
              + "name and column type differs. columnNames = "
              + columnNames
              + ", columnTypes = "
              + columnTypes);
    }
    // Create row related objects
    rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

    // Stats part
    stats = new SerDeStats();
    serializedSize = 0;
    deserializedSize = 0;
    status = LAST_OPERATION.UNKNOWN;
  }
示例#8
0
  /*
   * (non-Javadoc)
   *
   * @see org.apache.hadoop.hive.serde2.AbstractSerDe#initialize(org.apache.hadoop.conf.Configuration,
   * java.util.Properties)
   */
  @Override
  public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    String columnNameProperty = tbl.getProperty(LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(LIST_COLUMN_TYPES);
    List<String> columnNames = Arrays.asList(columnNameProperty.split(","));
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);

    List<ObjectInspector> columnObjectInspectors =
        new ArrayList<ObjectInspector>(columnNames.size());
    ObjectInspector colObjectInspector;
    for (int col = 0; col < columnNames.size(); col++) {
      colObjectInspector =
          TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(columnTypes.get(col));
      columnObjectInspectors.add(colObjectInspector);
    }

    cachedObjectInspector =
        ObjectInspectorFactory.getColumnarStructObjectInspector(
            columnNames, columnObjectInspectors);
  }
  @Override
  public ObjectInspector getObjectInspector() {
    // Read the configuration parameters
    String columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS);
    // NOTE: if "columns.types" is missing, all columns will be of String type
    String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES);

    // Parse the configuration parameters
    ArrayList<String> columnNames = new ArrayList<String>();
    Deque<Integer> virtualColumns = new ArrayDeque<Integer>();
    if (columnNameProperty != null && columnNameProperty.length() > 0) {
      String[] colNames = columnNameProperty.split(",");
      for (int i = 0; i < colNames.length; i++) {
        if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(colNames[i])) {
          virtualColumns.addLast(i);
        } else {
          columnNames.add(colNames[i]);
        }
      }
    }
    if (columnTypeProperty == null) {
      // Default type: all string
      StringBuilder sb = new StringBuilder();
      for (int i = 0; i < columnNames.size(); i++) {
        if (i > 0) {
          sb.append(":");
        }
        sb.append("string");
      }
      columnTypeProperty = sb.toString();
    }

    ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    while (virtualColumns.size() > 0) {
      fieldTypes.remove(virtualColumns.removeLast());
    }
    StructTypeInfo rowType = new StructTypeInfo();
    rowType.setAllStructFieldNames(columnNames);
    rowType.setAllStructFieldTypeInfos(fieldTypes);
    return OrcRecordUpdater.createEventSchema(OrcStruct.createObjectInspector(rowType));
  }
示例#10
0
  static StandardStructObjectInspector structObjectInspector(Properties tableProperties) {
    // extract column info - don't use Hive constants as they were renamed in 0.9 breaking
    // compatibility
    // the column names are saved as the given inspector to #serialize doesn't preserves them (maybe
    // because it's an external table)
    // use the class since StructType requires it ...
    List<String> columnNames =
        StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ",");
    List<TypeInfo> colTypes =
        TypeInfoUtils.getTypeInfosFromTypeString(
            tableProperties.getProperty(HiveConstants.COLUMNS_TYPES));

    // create a standard writable Object Inspector - used later on by serialization/deserialization
    List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>();

    for (TypeInfo typeInfo : colTypes) {
      inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo));
    }

    return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
  }
示例#11
0
  @Override
  public void initialize(Configuration cfg, Properties props) throws SerDeException {
    String columnNameProperty = props.getProperty(serdeConstants.LIST_COLUMNS);
    columnNames = Arrays.asList(columnNameProperty.split(","));
    numColumns = columnNames.size();

    String columnTypeProperty = props.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);

    // Ensure we have the same number of column names and types
    assert numColumns == columnTypes.size();

    List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>(numColumns);
    row = new ArrayList<Object>(numColumns);
    for (int c = 0; c < numColumns; c++) {
      ObjectInspector oi =
          TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(columnTypes.get(c));
      inspectors.add(oi);
      row.add(null);
    }
    inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
  }
示例#12
0
  private OrcFile.WriterOptions getOptions(JobConf conf, Properties props) {
    OrcFile.WriterOptions result = OrcFile.writerOptions(props, conf);
    if (props != null) {
      final String columnNameProperty = props.getProperty(IOConstants.COLUMNS);
      final String columnTypeProperty = props.getProperty(IOConstants.COLUMNS_TYPES);
      if (columnNameProperty != null
          && !columnNameProperty.isEmpty()
          && columnTypeProperty != null
          && !columnTypeProperty.isEmpty()) {
        List<String> columnNames;
        List<TypeInfo> columnTypes;

        if (columnNameProperty.length() == 0) {
          columnNames = new ArrayList<String>();
        } else {
          columnNames = Arrays.asList(columnNameProperty.split(","));
        }

        if (columnTypeProperty.length() == 0) {
          columnTypes = new ArrayList<TypeInfo>();
        } else {
          columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
        }

        TypeDescription schema = TypeDescription.createStruct();
        for (int i = 0; i < columnNames.size(); ++i) {
          schema.addField(columnNames.get(i), convertTypeInfo(columnTypes.get(i)));
        }
        if (LOG.isDebugEnabled()) {
          LOG.debug("ORC schema = " + schema);
        }
        result.setSchema(schema);
      }
    }
    return result;
  }
  @Override
  public RecordWriter getHiveRecordWriter(
      JobConf jc,
      Path finalOutPath,
      Class<? extends Writable> valueClass,
      boolean isCompressed,
      Properties tbl,
      Progressable progress)
      throws IOException {

    boolean usenewformat = jc.getBoolean("fdf.newformat", false);
    IHead head = new IHead(usenewformat ? ConstVar.NewFormatFile : ConstVar.OldFormatFile);
    String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
    IFieldMap map = new IFieldMap();
    ArrayList<TypeInfo> types;
    if (columnTypeProperty == null) {
      types = new ArrayList<TypeInfo>();
      map.addFieldType(new IRecord.IFType(ConstVar.FieldType_Int, 0));
    } else types = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    String compress = tbl.getProperty(ConstVar.Compress);
    if (compress != null && compress.equalsIgnoreCase("true")) head.setCompress((byte) 1);
    int i = 0;
    for (TypeInfo type : types) {
      byte fdftype = 0;
      String name = type.getTypeName();
      if (name.equals(Constants.TINYINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Byte;
      else if (name.equals(Constants.SMALLINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Short;
      else if (name.equals(Constants.INT_TYPE_NAME)) fdftype = ConstVar.FieldType_Int;
      else if (name.equals(Constants.BIGINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Long;
      else if (name.equals(Constants.FLOAT_TYPE_NAME)) fdftype = ConstVar.FieldType_Float;
      else if (name.equals(Constants.DOUBLE_TYPE_NAME)) fdftype = ConstVar.FieldType_Double;
      else if (name.equals(Constants.STRING_TYPE_NAME)) fdftype = ConstVar.FieldType_String;

      map.addFieldType(new IRecord.IFType(fdftype, i++));
    }
    head.setFieldMap(map);

    ArrayList<ArrayList<Integer>> columnprojects = null;
    String projectionString = jc.get(ConstVar.Projection);
    if (projectionString != null) {
      columnprojects = new ArrayList<ArrayList<Integer>>();
      String[] projectionList = projectionString.split(ConstVar.RecordSplit);
      for (String str : projectionList) {
        ArrayList<Integer> cp = new ArrayList<Integer>();
        String[] item = str.split(ConstVar.FieldSplit);
        for (String s : item) {
          cp.add(Integer.valueOf(s));
        }
        columnprojects.add(cp);
      }
    }

    if (!jc.getBoolean(ConstVar.NeedPostfix, true)) {
      final Configuration conf = new Configuration(jc);
      final IFormatDataFile ifdf = new IFormatDataFile(conf);
      ifdf.create(finalOutPath.toString(), head);
      return new RecordWriter() {

        @Override
        public void write(Writable w) throws IOException {}

        @Override
        public void close(boolean abort) throws IOException {
          ifdf.close();
        }
      };
    }

    final IColumnDataFile icdf = new IColumnDataFile(jc);
    icdf.create(finalOutPath.toString(), head, columnprojects);

    LOG.info(finalOutPath.toString());
    LOG.info("output file compress?\t" + compress);
    LOG.info("head:\t" + head.toStr());

    return new RecordWriter() {

      @Override
      public void write(Writable w) throws IOException {
        icdf.addRecord((IRecord) w);
      }

      @Override
      public void close(boolean abort) throws IOException {
        icdf.close();
      }
    };
  }