@Override
  public void initialize(Configuration conf, Properties tbl) throws SerDeException {

    // Get column names and sort order
    String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
    if (columnNameProperty.length() == 0) {
      columnNames = new ArrayList<String>();
    } else {
      columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (columnTypeProperty.length() == 0) {
      columnTypes = new ArrayList<TypeInfo>();
    } else {
      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    assert (columnNames.size() == columnTypes.size());

    // Create row related objects
    rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    rowObjectInspector =
        (StructObjectInspector)
            TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(rowTypeInfo);
    row = new ArrayList<Object>(columnNames.size());
    for (int i = 0; i < columnNames.size(); i++) {
      row.add(null);
    }

    // Get the sort order
    String columnSortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER);
    columnSortOrderIsDesc = new boolean[columnNames.size()];
    for (int i = 0; i < columnSortOrderIsDesc.length; i++) {
      columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-');
    }
  }
  private ArrayWritableObjectInspector getObjectInspector(
      final String columnNames, final String columnTypes) {
    List<TypeInfo> columnTypeList = createHiveTypeInfoFrom(columnTypes);
    List<String> columnNameList = createHiveColumnsFrom(columnNames);
    StructTypeInfo rowTypeInfo =
        (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNameList, columnTypeList);

    return new ArrayWritableObjectInspector(rowTypeInfo);
  }
Пример #3
0
 public static TypeInfo buildPrimitiveOrcSchema() {
   return TypeInfoFactory.getStructTypeInfo(
       Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"),
       Arrays.asList(
           TypeInfoCreator.createInt(),
           TypeInfoCreator.createLong(),
           TypeInfoCreator.createBoolean(),
           TypeInfoCreator.createFloat(),
           TypeInfoCreator.createDouble(),
           TypeInfoCreator.createBinary(),
           TypeInfoCreator.createString()));
 }
Пример #4
0
  /**
   * Initializes the SerDe. Gets the list of columns and their types from the table properties. Will
   * use them to look into/create JSON data.
   *
   * @param conf Hadoop configuration object
   * @param tbl Table Properties
   * @throws SerDeException
   */
  @Override
  public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    LOG.debug("Initializing SerDe");
    // Get column names and sort order
    String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);

    LOG.debug("columns " + columnNameProperty + " types " + columnTypeProperty);

    // all table column names
    if (columnNameProperty.length() == 0) {
      columnNames = new ArrayList<String>();
    } else {
      columnNames = Arrays.asList(columnNameProperty.split(","));
    }

    // all column types
    if (columnTypeProperty.length() == 0) {
      columnTypes = new ArrayList<TypeInfo>();
    } else {
      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    assert (columnNames.size() == columnTypes.size());

    stats = new SerDeStats();

    // Create row related objects
    rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);

    // build options
    options = new JsonStructOIOptions(getMappings(tbl));

    rowObjectInspector =
        (StructObjectInspector)
            JsonObjectInspectorFactory.getJsonObjectInspectorFromTypeInfo(rowTypeInfo, options);

    // Get the sort order
    String columnSortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER);
    columnSortOrderIsDesc = new boolean[columnNames.size()];
    for (int i = 0; i < columnSortOrderIsDesc.length; i++) {
      columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder.charAt(i) == '-');
    }

    // other configuration
    ignoreMalformedJson =
        Boolean.parseBoolean(tbl.getProperty(PROP_IGNORE_MALFORMED_JSON, "false"));
  }
Пример #5
0
 @Test
 public void test_getOrcField_record() throws Exception {
   final SchemaBuilder.FieldAssembler<Schema> builder =
       SchemaBuilder.record("testRecord").namespace("any.data").fields();
   builder.name("int").type().intType().noDefault();
   builder.name("long").type().longType().longDefault(1L);
   builder.name("array").type().array().items().stringType().noDefault();
   Schema testSchema = builder.endRecord();
   TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema);
   assertEquals(
       TypeInfoFactory.getStructTypeInfo(
           Arrays.asList("int", "long", "array"),
           Arrays.asList(
               TypeInfoCreator.createInt(),
               TypeInfoCreator.createLong(),
               TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createString()))),
       orcType);
 }
  @Override
  public final void initialize(final Configuration conf, final Properties tbl)
      throws SerDeException {

    final TypeInfo rowTypeInfo;
    final List<String> columnNames;
    final List<TypeInfo> columnTypes;
    // Get column names and sort order
    final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);

    // Get compression properties
    compressionType = tbl.getProperty(ParquetOutputFormat.COMPRESSION, DEFAULTCOMPRESSION);

    if (columnNameProperty.length() == 0) {
      columnNames = new ArrayList<String>();
    } else {
      columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (columnTypeProperty.length() == 0) {
      columnTypes = new ArrayList<TypeInfo>();
    } else {
      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }

    if (columnNames.size() != columnTypes.size()) {
      throw new IllegalArgumentException(
          "ParquetHiveSerde initialization failed. Number of column "
              + "name and column type differs. columnNames = "
              + columnNames
              + ", columnTypes = "
              + columnTypes);
    }
    // Create row related objects
    rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

    // Stats part
    stats = new SerDeStats();
    serializedSize = 0;
    deserializedSize = 0;
    status = LAST_OPERATION.UNKNOWN;
  }