Пример #1
0
  public void testRW() throws Exception {

    Configuration conf = new Configuration();

    for (Pair<Properties, HCatRecord> e : getData()) {
      Properties tblProps = e.first;
      HCatRecord r = e.second;

      HCatRecordSerDe hrsd = new HCatRecordSerDe();
      SerDeUtils.initializeSerDe(hrsd, conf, tblProps, null);

      JsonSerDe jsde = new JsonSerDe();
      SerDeUtils.initializeSerDe(jsde, conf, tblProps, null);

      LOG.info("ORIG:{}", r);

      Writable s = hrsd.serialize(r, hrsd.getObjectInspector());
      LOG.info("ONE:{}", s);

      Object o1 = hrsd.deserialize(s);
      StringBuilder msg = new StringBuilder();
      boolean isEqual = HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o1);
      assertTrue(msg.toString(), isEqual);

      Writable s2 = jsde.serialize(o1, hrsd.getObjectInspector());
      LOG.info("TWO:{}", s2);
      Object o2 = jsde.deserialize(s2);
      LOG.info("deserialized TWO : {} ", o2);
      msg.setLength(0);
      isEqual = HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2, msg);
      assertTrue(msg.toString(), isEqual);
    }
  }
Пример #2
0
  public void generateMapMetaData() throws HiveException {
    // generate the meta data for key
    // index for key is -1

    try {
      TableDesc keyTableDesc = conf.getKeyTblDesc();
      SerDe keySerializer =
          (SerDe) ReflectionUtil.newInstance(keyTableDesc.getDeserializerClass(), null);
      SerDeUtils.initializeSerDe(keySerializer, null, keyTableDesc.getProperties(), null);
      MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false);
      for (int pos = 0; pos < order.length; pos++) {
        if (pos == posBigTable) {
          continue;
        }
        TableDesc valueTableDesc;
        if (conf.getNoOuterJoin()) {
          valueTableDesc = conf.getValueTblDescs().get(pos);
        } else {
          valueTableDesc = conf.getValueFilteredTblDescs().get(pos);
        }
        SerDe valueSerDe =
            (SerDe) ReflectionUtil.newInstance(valueTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null);
        MapJoinObjectSerDeContext valueContext =
            new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos));
        mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, valueContext);
      }
    } catch (SerDeException e) {
      throw new HiveException(e);
    }
  }
Пример #3
0
 static {
   StackTraceElement[] sTrace = new Exception().getStackTrace();
   String className = sTrace[0].getClassName();
   try {
     SerDeUtils.registerSerDe(shortName(), Class.forName(className));
     // For backward compatibility: this class replaces the following class.
     SerDeUtils.registerSerDe("org.apache.hadoop.hive.serde.TestSerDe", Class.forName(className));
   } catch (Exception e) {
     throw new RuntimeException(e);
   }
 }
Пример #4
0
  public void testRW() throws Exception {

    Configuration conf = new Configuration();

    for (Entry<Properties, HCatRecord> e : getData().entrySet()) {
      Properties tblProps = e.getKey();
      HCatRecord r = e.getValue();

      HCatRecordSerDe hrsd = new HCatRecordSerDe();
      SerDeUtils.initializeSerDe(hrsd, conf, tblProps, null);

      LOG.info("ORIG: {}", r);

      Writable s = hrsd.serialize(r, hrsd.getObjectInspector());
      LOG.info("ONE: {}", s);

      HCatRecord r2 = (HCatRecord) hrsd.deserialize(s);
      Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, r2));

      // If it went through correctly, then s is also a HCatRecord,
      // and also equal to the above, and a deepcopy, and this holds
      // through for multiple levels more of serialization as well.

      Writable s2 = hrsd.serialize(s, hrsd.getObjectInspector());
      LOG.info("TWO: {}", s2);
      Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s));
      Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s2));

      // serialize using another serde, and read out that object repr.
      LazySimpleSerDe testSD = new LazySimpleSerDe();
      SerDeUtils.initializeSerDe(testSD, conf, tblProps, null);

      Writable s3 = testSD.serialize(s, hrsd.getObjectInspector());
      LOG.info("THREE: {}", s3);
      Object o3 = testSD.deserialize(s3);
      Assert.assertFalse(r.getClass().equals(o3.getClass()));

      // then serialize again using hrsd, and compare results
      HCatRecord s4 = (HCatRecord) hrsd.serialize(o3, testSD.getObjectInspector());
      LOG.info("FOUR: {}", s4);

      // Test LazyHCatRecord init and read
      LazyHCatRecord s5 = new LazyHCatRecord(o3, testSD.getObjectInspector());
      LOG.info("FIVE: {}", s5);

      LazyHCatRecord s6 = new LazyHCatRecord(s4, hrsd.getObjectInspector());
      LOG.info("SIX: {}", s6);
    }
  }
Пример #5
0
  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {

    if (objInspector.getCategory() != Category.STRUCT) {
      throw new SerDeException(
          getClass().toString()
              + " can only serialize struct types, but we got: "
              + objInspector.getTypeName());
    }
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();

    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < fields.size(); i++) {
      if (i > 0) sb.append(separator);
      Object column = soi.getStructFieldData(obj, fields.get(i));
      if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
        // For primitive object, serialize to plain string
        sb.append(column == null ? nullString : column.toString());
      } else {
        // For complex object, serialize to JSON format
        sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector()));
      }
    }
    serializeCache.set(sb.toString());
    return serializeCache;
  }
Пример #6
0
  public void testMapValues() throws Exception {
    Configuration conf = new Configuration();
    Properties props = new Properties();

    props.put(serdeConstants.LIST_COLUMNS, "a,b");
    props.put(serdeConstants.LIST_COLUMN_TYPES, "array<string>,map<string,int>");
    JsonSerDe rjsd = new JsonSerDe();
    SerDeUtils.initializeSerDe(rjsd, conf, props, null);

    Text text1 = new Text("{ \"a\":[\"aaa\"],\"b\":{\"bbb\":1}} ");
    Text text2 = new Text("{\"a\":[\"yyy\"],\"b\":{\"zzz\":123}}");
    Text text3 = new Text("{\"a\":[\"a\"],\"b\":{\"x\":11, \"y\": 22, \"z\": null}}");

    HCatRecord expected1 =
        new DefaultHCatRecord(
            Arrays.<Object>asList(
                Arrays.<String>asList("aaa"), createHashMapStringInteger("bbb", 1)));
    HCatRecord expected2 =
        new DefaultHCatRecord(
            Arrays.<Object>asList(
                Arrays.<String>asList("yyy"), createHashMapStringInteger("zzz", 123)));
    HCatRecord expected3 =
        new DefaultHCatRecord(
            Arrays.<Object>asList(
                Arrays.<String>asList("a"),
                createHashMapStringInteger("x", 11, "y", 22, "z", null)));

    assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord) rjsd.deserialize(text1), expected1));
    assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord) rjsd.deserialize(text2), expected2));
  }
Пример #7
0
  /**
   * This test tests that our json deserialization is not too strict, as per HIVE-6166
   *
   * <p>i.e, if our schema is "s:struct<a:int,b:string>,k:int", and we pass in data that looks like
   * : { "x" : "abc" , "t" : { "a" : "1", "b" : "2", "c" : [ { "x" : 2 , "y" : 3 } , { "x" : 3 , "y"
   * : 2 } ] } , "s" : { "a" : 2 , "b" : "blah", "c": "woo" } }
   *
   * <p>Then it should still work, and ignore the "x" and "t" field and "c" subfield of "s", and it
   * should read k as null.
   */
  public void testLooseJsonReadability() throws Exception {
    Configuration conf = new Configuration();
    Properties props = new Properties();

    props.put(serdeConstants.LIST_COLUMNS, "s,k");
    props.put(serdeConstants.LIST_COLUMN_TYPES, "struct<a:int,b:string>,int");
    JsonSerDe rjsd = new JsonSerDe();
    SerDeUtils.initializeSerDe(rjsd, conf, props, null);

    Text jsonText =
        new Text(
            "{ \"x\" : \"abc\" , "
                + " \"t\" : { \"a\":\"1\", \"b\":\"2\", \"c\":[ { \"x\":2 , \"y\":3 } , { \"x\":3 , \"y\":2 }] } ,"
                + "\"s\" : { \"a\" : 2 , \"b\" : \"blah\", \"c\": \"woo\" } }");
    List<Object> expected = new ArrayList<Object>();
    List<Object> inner = new ArrayList<Object>();
    inner.add(2);
    inner.add("blah");
    expected.add(inner);
    expected.add(null);
    HCatRecord expectedRecord = new DefaultHCatRecord(expected);

    HCatRecord r = (HCatRecord) rjsd.deserialize(jsonText);
    System.err.println("record : " + r.toString());

    assertTrue(HCatDataCheckUtil.recordsEqual(r, expectedRecord));
  }
Пример #8
0
  private RowSet decodeFromString(List<Object> rows, RowSet rowSet)
      throws SQLException, SerDeException {
    getSerDe();
    StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector();
    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();

    Object[] deserializedFields = new Object[fieldRefs.size()];
    Object rowObj;
    ObjectInspector fieldOI;

    int protocol = getProtocolVersion().getValue();
    for (Object rowString : rows) {
      try {
        rowObj = serde.deserialize(new BytesWritable(((String) rowString).getBytes("UTF-8")));
      } catch (UnsupportedEncodingException e) {
        throw new SerDeException(e);
      }
      for (int i = 0; i < fieldRefs.size(); i++) {
        StructField fieldRef = fieldRefs.get(i);
        fieldOI = fieldRef.getFieldObjectInspector();
        Object fieldData = soi.getStructFieldData(rowObj, fieldRef);
        deserializedFields[i] = SerDeUtils.toThriftPayload(fieldData, fieldOI, protocol);
      }
      rowSet.addRow(deserializedFields);
    }
    return rowSet;
  }
Пример #9
0
  public void testRobustRead() throws Exception {
    /**
     * This test has been added to account for HCATALOG-436 We write out columns with "internal
     * column names" such as "_col0", but try to read with regular column names.
     */
    Configuration conf = new Configuration();

    for (Pair<Properties, HCatRecord> e : getData()) {
      Properties tblProps = e.first;
      HCatRecord r = e.second;

      Properties internalTblProps = new Properties();
      for (Map.Entry pe : tblProps.entrySet()) {
        if (!pe.getKey().equals(serdeConstants.LIST_COLUMNS)) {
          internalTblProps.put(pe.getKey(), pe.getValue());
        } else {
          internalTblProps.put(pe.getKey(), getInternalNames((String) pe.getValue()));
        }
      }

      LOG.info("orig tbl props:{}", tblProps);
      LOG.info("modif tbl props:{}", internalTblProps);

      JsonSerDe wjsd = new JsonSerDe();
      SerDeUtils.initializeSerDe(wjsd, conf, internalTblProps, null);

      JsonSerDe rjsd = new JsonSerDe();
      SerDeUtils.initializeSerDe(rjsd, conf, tblProps, null);

      LOG.info("ORIG:{}", r);

      Writable s = wjsd.serialize(r, wjsd.getObjectInspector());
      LOG.info("ONE:{}", s);

      Object o1 = wjsd.deserialize(s);
      LOG.info("deserialized ONE : {} ", o1);

      Object o2 = rjsd.deserialize(s);
      LOG.info("deserialized TWO : {} ", o2);
      StringBuilder msg = new StringBuilder();
      boolean isEqual = HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2, msg);
      assertTrue(msg.toString(), isEqual);
    }
  }
Пример #10
0
 private String toErrorMessage(Writable value, Object row, ObjectInspector inspector) {
   try {
     if (row != null) {
       return SerDeUtils.getJSONString(row, inspector);
     }
     return String.valueOf(value);
   } catch (Exception e) {
     return "[Error getting row data with exception " + StringUtils.stringifyException(e) + " ]";
   }
 }
  private ParquetHiveRecord getParquetWritable(
      String columnNames, String columnTypes, ArrayWritable record) throws SerDeException {
    Properties recordProperties = new Properties();
    recordProperties.setProperty("columns", columnNames);
    recordProperties.setProperty("columns.types", columnTypes);

    ParquetHiveSerDe serDe = new ParquetHiveSerDe();
    SerDeUtils.initializeSerDe(serDe, new Configuration(), recordProperties, null);

    return new ParquetHiveRecord(
        serDe.deserialize(record), getObjectInspector(columnNames, columnTypes));
  }
Пример #12
0
  /**
   * Convert a LazyObject to a standard Java object in compliance with JDBC 3.0 (see JDBC 3.0
   * Specification, Table B-3: Mapping from JDBC Types to Java Object Types).
   *
   * <p>This method is kept consistent with {@link HiveResultSetMetaData#hiveTypeToSqlType}.
   */
  private static Object convertLazyToJava(Object o, ObjectInspector oi) {
    Object obj = ObjectInspectorUtils.copyToStandardObject(o, oi, ObjectInspectorCopyOption.JAVA);

    if (obj == null) {
      return null;
    }
    if (oi.getTypeName().equals(serdeConstants.BINARY_TYPE_NAME)) {
      return new String((byte[]) obj);
    }
    // for now, expose non-primitive as a string
    // TODO: expose non-primitive as a structured object while maintaining JDBC compliance
    if (oi.getCategory() != ObjectInspector.Category.PRIMITIVE) {
      return SerDeUtils.getJSONString(o, oi);
    }
    return obj;
  }
Пример #13
0
 protected void initialize(ShapeDetails shp, StructObjectInspector OI) throws HiveException {
   String serdeClassName = shp.getSerdeClassName();
   Properties serDeProps = new Properties();
   Map<String, String> serdePropsMap = new LinkedHashMap<String, String>();
   addOIPropertiestoSerDePropsMap(OI, serdePropsMap);
   for (String serdeName : serdePropsMap.keySet()) {
     serDeProps.setProperty(serdeName, serdePropsMap.get(serdeName));
   }
   try {
     SerDe serDe = (SerDe) SerDeUtils.lookupDeserializer(serdeClassName);
     serDe.initialize(hConf, serDeProps);
     shp.setSerde(serDe);
     shp.setOI((StructObjectInspector) serDe.getObjectInspector());
   } catch (SerDeException se) {
     throw new HiveException(se);
   }
 }
Пример #14
0
  public void testUpperCaseKey() throws Exception {
    Configuration conf = new Configuration();
    Properties props = new Properties();

    props.put(serdeConstants.LIST_COLUMNS, "empid,name");
    props.put(serdeConstants.LIST_COLUMN_TYPES, "int,string");
    JsonSerDe rjsd = new JsonSerDe();
    SerDeUtils.initializeSerDe(rjsd, conf, props, null);

    Text text1 = new Text("{ \"empId\" : 123, \"name\" : \"John\" } ");
    Text text2 = new Text("{ \"empId\" : 456, \"name\" : \"Jane\" } ");

    HCatRecord expected1 = new DefaultHCatRecord(Arrays.<Object>asList(123, "John"));
    HCatRecord expected2 = new DefaultHCatRecord(Arrays.<Object>asList(456, "Jane"));

    assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord) rjsd.deserialize(text1), expected1));
    assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord) rjsd.deserialize(text2), expected2));
  }
Пример #15
0
 /**
  * Convert a Object to a standard Java object in compliance with JDBC 3.0 (see JDBC 3.0
  * Specification, Table B-3: Mapping from JDBC Types to Java Object Types).
  *
  * <p>This method is kept consistent with {@link HiveResultSetMetaData#hiveTypeToSqlType}.
  */
 public static Object toThriftPayload(Object val, ObjectInspector valOI, int version) {
   if (valOI.getCategory() == ObjectInspector.Category.PRIMITIVE) {
     if (val == null) {
       return null;
     }
     Object obj =
         ObjectInspectorUtils.copyToStandardObject(
             val, valOI, ObjectInspectorUtils.ObjectInspectorCopyOption.JAVA);
     // uses string type for binary before HIVE_CLI_SERVICE_PROTOCOL_V6
     if (version < 5
         && ((PrimitiveObjectInspector) valOI).getPrimitiveCategory()
             == PrimitiveObjectInspector.PrimitiveCategory.BINARY) {
       // todo HIVE-5269
       return new String((byte[]) obj);
     }
     return obj;
   }
   // for now, expose non-primitive as a string
   // TODO: expose non-primitive as a structured object while maintaining JDBC compliance
   return SerDeUtils.getJSONString(val, valOI);
 }
Пример #16
0
  private SerDe getSerDe() throws SQLException {
    if (serde != null) {
      return serde;
    }
    try {
      List<FieldSchema> fieldSchemas = mResultSchema.getFieldSchemas();
      StringBuilder namesSb = new StringBuilder();
      StringBuilder typesSb = new StringBuilder();

      if (fieldSchemas != null && !fieldSchemas.isEmpty()) {
        for (int pos = 0; pos < fieldSchemas.size(); pos++) {
          if (pos != 0) {
            namesSb.append(",");
            typesSb.append(",");
          }
          namesSb.append(fieldSchemas.get(pos).getName());
          typesSb.append(fieldSchemas.get(pos).getType());
        }
      }
      String names = namesSb.toString();
      String types = typesSb.toString();

      serde = new LazySimpleSerDe();
      Properties props = new Properties();
      if (names.length() > 0) {
        LOG.debug("Column names: " + names);
        props.setProperty(serdeConstants.LIST_COLUMNS, names);
      }
      if (types.length() > 0) {
        LOG.debug("Column types: " + types);
        props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types);
      }
      SerDeUtils.initializeSerDe(serde, new HiveConf(), props, null);

    } catch (Exception ex) {
      ex.printStackTrace();
      throw new SQLException("Could not create ResultSet: " + ex.getMessage(), ex);
    }
    return serde;
  }
 /**
  * @param values
  * @return true if it is not done and can take more inputs
  */
 private <E> boolean processKeyValues(Iterator<E> values, byte tag) throws HiveException {
   while (values.hasNext()) {
     BytesWritable valueWritable = (BytesWritable) values.next();
     try {
       valueObject[tag] = inputValueDeserializer[tag].deserialize(valueWritable);
     } catch (SerDeException e) {
       throw new HiveException(
           "Hive Runtime Error: Unable to deserialize reduce input value (tag="
               + tag
               + ") from "
               + Utilities.formatBinaryString(valueWritable.get(), 0, valueWritable.getSize())
               + " with properties "
               + valueTableDesc[tag].getProperties(),
           e);
     }
     row.clear();
     row.add(keyObject);
     row.add(valueObject[tag]);
     if (isLogInfoEnabled) {
       logMemoryInfo();
     }
     try {
       reducer.process(row, tag);
     } catch (Exception e) {
       String rowString = null;
       try {
         rowString = SerDeUtils.getJSONString(row, rowObjectInspector[tag]);
       } catch (Exception e2) {
         rowString =
             "[Error getting row data with exception " + StringUtils.stringifyException(e2) + " ]";
       }
       throw new HiveException("Error while processing row (tag=" + tag + ") " + rowString, e);
     }
   }
   return true; // give me more
 }
Пример #18
0
  private MapOpCtx initObjectInspector(
      Configuration hconf, MapOpCtx opCtx, StructObjectInspector tableRowOI) throws Exception {
    PartitionDesc pd = opCtx.partDesc;
    TableDesc td = pd.getTableDesc();

    // Use table properties in case of unpartitioned tables,
    // and the union of table properties and partition properties, with partition
    // taking precedence, in the case of partitioned tables
    Properties overlayedProps =
        SerDeUtils.createOverlayedProperties(td.getProperties(), pd.getProperties());

    Map<String, String> partSpec = pd.getPartSpec();

    opCtx.tableName = String.valueOf(overlayedProps.getProperty("name"));
    opCtx.partName = String.valueOf(partSpec);
    opCtx.deserializer = pd.getDeserializer(hconf);

    StructObjectInspector partRawRowObjectInspector;
    if (Utilities.isInputFileFormatSelfDescribing(pd)) {
      partRawRowObjectInspector = tableRowOI;
    } else {
      partRawRowObjectInspector = (StructObjectInspector) opCtx.deserializer.getObjectInspector();
    }

    opCtx.partTblObjectInspectorConverter =
        ObjectInspectorConverters.getConverter(partRawRowObjectInspector, tableRowOI);

    // Next check if this table has partitions and if so
    // get the list of partition names as well as allocate
    // the serdes for the partition columns
    String pcols = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);

    if (pcols != null && pcols.length() > 0) {
      String[] partKeys = pcols.trim().split("/");
      String pcolTypes =
          overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
      String[] partKeyTypes = pcolTypes.trim().split(":");

      if (partKeys.length > partKeyTypes.length) {
        throw new HiveException(
            "Internal error : partKeys length, "
                + partKeys.length
                + " greater than partKeyTypes length, "
                + partKeyTypes.length);
      }

      List<String> partNames = new ArrayList<String>(partKeys.length);
      Object[] partValues = new Object[partKeys.length];
      List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>(partKeys.length);

      for (int i = 0; i < partKeys.length; i++) {
        String key = partKeys[i];
        partNames.add(key);
        ObjectInspector oi =
            PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
                TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]));

        // Partitions do not exist for this table
        if (partSpec == null) {
          // for partitionless table, initialize partValue to null
          partValues[i] = null;
        } else {
          partValues[i] =
              ObjectInspectorConverters.getConverter(
                      PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi)
                  .convert(partSpec.get(key));
        }
        partObjectInspectors.add(oi);
      }
      opCtx.rowWithPart = new Object[] {null, partValues};
      opCtx.partObjectInspector =
          ObjectInspectorFactory.getStandardStructObjectInspector(partNames, partObjectInspectors);
    }

    // The op may not be a TableScan for mapjoins
    // Consider the query: select /*+MAPJOIN(a)*/ count(*) FROM T1 a JOIN T2 b ON a.key = b.key;
    // In that case, it will be a Select, but the rowOI need not be amended
    if (opCtx.op instanceof TableScanOperator) {
      TableScanOperator tsOp = (TableScanOperator) opCtx.op;
      TableScanDesc tsDesc = tsOp.getConf();
      if (tsDesc != null && tsDesc.hasVirtualCols()) {
        opCtx.vcs = tsDesc.getVirtualCols();
        opCtx.vcValues = new Object[opCtx.vcs.size()];
        opCtx.vcsObjectInspector = VirtualColumn.getVCSObjectInspector(opCtx.vcs);
        if (opCtx.isPartitioned()) {
          opCtx.rowWithPartAndVC = Arrays.copyOfRange(opCtx.rowWithPart, 0, 3);
        } else {
          opCtx.rowWithPartAndVC = new Object[2];
        }
      }
    }
    if (!opCtx.hasVC() && !opCtx.isPartitioned()) {
      opCtx.rowObjectInspector = tableRowOI;
      return opCtx;
    }
    List<StructObjectInspector> inspectors = new ArrayList<StructObjectInspector>();
    inspectors.add(tableRowOI);
    if (opCtx.isPartitioned()) {
      inspectors.add(opCtx.partObjectInspector);
    }
    if (opCtx.hasVC()) {
      inspectors.add(opCtx.vcsObjectInspector);
    }
    opCtx.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(inspectors);
    return opCtx;
  }
Пример #19
0
  public void validate() throws SemanticException {

    if ((this.getCols() == null) || (this.getCols().size() == 0)) {
      // for now make sure that serde exists
      if (StringUtils.isEmpty(this.getSerName())
          || !SerDeUtils.shouldGetColsFromSerDe(this.getSerName())) {
        throw new SemanticException(ErrorMsg.INVALID_TBL_DDL_SERDE.getMsg());
      }
      return;
    }

    if (this.getStorageHandler() == null) {
      try {
        Class<?> origin = Class.forName(this.getOutputFormat(), true, JavaUtils.getClassLoader());
        Class<? extends HiveOutputFormat> replaced =
            HiveFileFormatUtils.getOutputFormatSubstitute(origin);
        if (replaced == null) {
          throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg());
        }
      } catch (ClassNotFoundException e) {
        throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg());
      }
    }

    List<String> colNames = ParseUtils.validateColumnNameUniqueness(this.getCols());

    if (this.getBucketCols() != null) {
      // all columns in cluster and sort are valid columns
      Iterator<String> bucketCols = this.getBucketCols().iterator();
      while (bucketCols.hasNext()) {
        String bucketCol = bucketCols.next();
        boolean found = false;
        Iterator<String> colNamesIter = colNames.iterator();
        while (colNamesIter.hasNext()) {
          String colName = colNamesIter.next();
          if (bucketCol.equalsIgnoreCase(colName)) {
            found = true;
            break;
          }
        }
        if (!found) {
          throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg());
        }
      }
    }

    if (this.getSortCols() != null) {
      // all columns in cluster and sort are valid columns
      Iterator<Order> sortCols = this.getSortCols().iterator();
      while (sortCols.hasNext()) {
        String sortCol = sortCols.next().getCol();
        boolean found = false;
        Iterator<String> colNamesIter = colNames.iterator();
        while (colNamesIter.hasNext()) {
          String colName = colNamesIter.next();
          if (sortCol.equalsIgnoreCase(colName)) {
            found = true;
            break;
          }
        }
        if (!found) {
          throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg());
        }
      }
    }

    if (this.getPartCols() != null) {
      // there is no overlap between columns and partitioning columns
      Iterator<FieldSchema> partColsIter = this.getPartCols().iterator();
      while (partColsIter.hasNext()) {
        FieldSchema fs = partColsIter.next();
        String partCol = fs.getName();
        PrimitiveObjectInspectorUtils.PrimitiveTypeEntry pte =
            PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName(fs.getType());
        if (null == pte) {
          throw new SemanticException(
              ErrorMsg.PARTITION_COLUMN_NON_PRIMITIVE.getMsg()
                  + " Found "
                  + partCol
                  + " of type: "
                  + fs.getType());
        }
        Iterator<String> colNamesIter = colNames.iterator();
        while (colNamesIter.hasNext()) {
          String colName = BaseSemanticAnalyzer.unescapeIdentifier(colNamesIter.next());
          if (partCol.equalsIgnoreCase(colName)) {
            throw new SemanticException(ErrorMsg.COLUMN_REPEATED_IN_PARTITIONING_COLS.getMsg());
          }
        }
      }
    }

    /* Validate skewed information. */
    ValidationUtility.validateSkewedInformation(
        colNames, this.getSkewedColNames(), this.getSkewedColValues());
  }
Пример #20
0
  @Override
  public void processOp(Object row, int tag) throws HiveException {
    try {
      reportProgress();

      // get alias
      alias = (byte) tag;

      if ((lastAlias == null) || (!lastAlias.equals(alias))) {
        nextSz = joinEmitInterval;
      }

      ArrayList<Object> nr =
          JoinUtil.computeValues(
              row,
              joinValues.get(alias),
              joinValuesObjectInspectors.get(alias),
              joinFilters.get(alias),
              joinFilterObjectInspectors.get(alias),
              noOuterJoin);

      if (handleSkewJoin) {
        skewJoinKeyContext.handleSkew(tag);
      }

      // number of rows for the key in the given table
      int sz = storage.get(alias).size();
      StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag];
      StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString());
      Object keyObject = soi.getStructFieldData(row, sf);

      // Are we consuming too much memory
      if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0)) {
        if (sz == joinEmitInterval) {
          // The input is sorted by alias, so if we are already in the last join
          // operand,
          // we can emit some results now.
          // Note this has to be done before adding the current row to the
          // storage,
          // to preserve the correctness for outer joins.
          checkAndGenObject();
          storage.get(alias).clear();
        }
      } else {
        if (sz == nextSz) {
          // Output a warning if we reached at least 1000 rows for a join
          // operand
          // We won't output a warning for the last join operand since the size
          // will never goes to joinEmitInterval.
          LOG.warn("table " + alias + " has " + sz + " rows for join key " + keyObject);
          nextSz = getNextSize(nextSz);
        }
      }

      // Add the value to the vector
      storage.get(alias).add(nr);
      // if join-key is null, process each row in different group.
      if (SerDeUtils.hasAnyNullObject(keyObject, sf.getFieldObjectInspector())) {
        endGroup();
        startGroup();
      }
    } catch (Exception e) {
      e.printStackTrace();
      throw new HiveException(e);
    }
  }
  @Override
  @SuppressWarnings("unchecked")
  public void init(JobConf job, OutputCollector output, Reporter reporter) throws Exception {
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
    super.init(job, output, reporter);

    rowObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
    ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
    ObjectInspector keyObjectInspector;

    ReduceWork gWork = Utilities.getReduceWork(job);

    reducer = gWork.getReducer();
    vectorized = gWork.getVectorMode();
    reducer.setParentOperators(null); // clear out any parents as reducer is the
    // root
    isTagged = gWork.getNeedsTagging();
    try {
      keyTableDesc = gWork.getKeyDesc();
      inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
      SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
      keyObjectInspector = inputKeyDeserializer.getObjectInspector();
      valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()];

      if (vectorized) {
        final int maxTags = gWork.getTagToValueDesc().size();
        keyStructInspector = (StructObjectInspector) keyObjectInspector;
        batches = new VectorizedRowBatch[maxTags];
        valueStructInspectors = new StructObjectInspector[maxTags];
        valueStringWriters = new List[maxTags];
        keysColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
        buffer = new DataOutputBuffer();
      }

      for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
        // We should initialize the SerDe with the TypeInfo when available.
        valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
        inputValueDeserializer[tag] =
            ReflectionUtils.newInstance(valueTableDesc[tag].getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(
            inputValueDeserializer[tag], null, valueTableDesc[tag].getProperties(), null);
        valueObjectInspector[tag] = inputValueDeserializer[tag].getObjectInspector();

        ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();

        if (vectorized) {
          /* vectorization only works with struct object inspectors */
          valueStructInspectors[tag] = (StructObjectInspector) valueObjectInspector[tag];

          ObjectPair<VectorizedRowBatch, StandardStructObjectInspector> pair =
              VectorizedBatchUtil.constructVectorizedRowBatch(
                  keyStructInspector,
                  valueStructInspectors[tag],
                  gWork.getVectorScratchColumnTypeMap());
          batches[tag] = pair.getFirst();
          final int totalColumns =
              keysColumnOffset + valueStructInspectors[tag].getAllStructFieldRefs().size();
          valueStringWriters[tag] = new ArrayList<VectorExpressionWriter>(totalColumns);
          valueStringWriters[tag].addAll(
              Arrays.asList(
                  VectorExpressionWriterFactory.genVectorStructExpressionWritables(
                      keyStructInspector)));
          valueStringWriters[tag].addAll(
              Arrays.asList(
                  VectorExpressionWriterFactory.genVectorStructExpressionWritables(
                      valueStructInspectors[tag])));

          rowObjectInspector[tag] = pair.getSecond();
        } else {
          ois.add(keyObjectInspector);
          ois.add(valueObjectInspector[tag]);
          // reducer.setGroupKeyObjectInspector(keyObjectInspector);
          rowObjectInspector[tag] =
              ObjectInspectorFactory.getStandardStructObjectInspector(
                  Utilities.reduceFieldNameList, ois);
        }
      }
    } catch (Exception e) {
      throw new RuntimeException(e);
    }

    ExecMapperContext execContext = new ExecMapperContext(job);
    localWork = gWork.getMapRedLocalWork();
    execContext.setJc(jc);
    execContext.setLocalWork(localWork);
    reducer.passExecContext(execContext);

    reducer.setReporter(rp);
    OperatorUtils.setChildrenCollector(
        Arrays.<Operator<? extends OperatorDesc>>asList(reducer), output);

    // initialize reduce operator tree
    try {
      LOG.info(reducer.dump(0));
      reducer.initialize(jc, rowObjectInspector);

      if (localWork != null) {
        for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) {
          dummyOp.setExecContext(execContext);
          dummyOp.initialize(jc, null);
        }
      }

    } catch (Throwable e) {
      abort = true;
      if (e instanceof OutOfMemoryError) {
        // Don't create a new object if we are already out of memory
        throw (OutOfMemoryError) e;
      } else {
        throw new RuntimeException("Reduce operator initialization failed", e);
      }
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
  }