Example #1
0
  protected static StructObjectInspector createSelectListOI(
      MatchPath evaluator, PTFInputDef inpDef) {
    StructObjectInspector inOI = inpDef.getOutputShape().getOI();
    ArrayList<String> inputColumnNames = new ArrayList<String>();
    ArrayList<String> selectListNames = new ArrayList<String>();
    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
    for (StructField f : inOI.getAllStructFieldRefs()) {
      String inputColName = evaluator.inputColumnNamesMap.get(f.getFieldName());
      if (inputColName != null) {
        inputColumnNames.add(inputColName);
        selectListNames.add(f.getFieldName());
        fieldOIs.add(f.getFieldObjectInspector());
      }
    }

    StandardListObjectInspector pathAttrOI =
        ObjectInspectorFactory.getStandardListObjectInspector(
            ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames, fieldOIs));

    ArrayList<ObjectInspector> selectFieldOIs = new ArrayList<ObjectInspector>();
    selectFieldOIs.addAll(fieldOIs);
    selectFieldOIs.add(pathAttrOI);
    selectListNames.add(MatchPath.PATHATTR_NAME);
    return ObjectInspectorFactory.getStandardStructObjectInspector(selectListNames, selectFieldOIs);
  }
  private ObjectInspector solveOi(ObjectInspector arg) {

    switch (arg.getCategory()) {
      case PRIMITIVE:

        // VOID, BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, TIMESTAMP, BINARY, DECIMAL,
        // UNKNOWN
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) arg;
        return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
            poi.getPrimitiveCategory());
      case LIST:
        return ObjectInspectorFactory.getStandardListObjectInspector(
            solveOi(((ListObjectInspector) arg).getListElementObjectInspector()));
      case MAP:
        return ObjectInspectorFactory.getStandardMapObjectInspector(
            solveOi(((MapObjectInspector) arg).getMapKeyObjectInspector()),
            solveOi(((MapObjectInspector) arg).getMapValueObjectInspector()));
      case STRUCT:
        StructObjectInspector soi = (StructObjectInspector) arg;
        int size = soi.getAllStructFieldRefs().size();
        ArrayList<String> fnl = new ArrayList<String>(size);
        ArrayList<ObjectInspector> foil = new ArrayList<ObjectInspector>(size);

        for (StructField sf : ((StructObjectInspector) arg).getAllStructFieldRefs()) {
          fnl.add(sf.getFieldName());
          foil.add(solveOi(sf.getFieldObjectInspector()));
        }

        return JsonStructObjectInspector.getJsonStructObjectInspector(fnl, foil);
      default:
        return arg;
    }
  }
  @Override
  protected String extractField(Object target) {
    if (target instanceof HiveType) {
      HiveType type = (HiveType) target;
      ObjectInspector inspector = type.getObjectInspector();
      if (inspector instanceof StructObjectInspector) {
        StructObjectInspector soi = (StructObjectInspector) inspector;
        StructField field = soi.getStructFieldRef(fieldName);
        ObjectInspector foi = field.getFieldObjectInspector();
        Assert.isTrue(
            foi.getCategory() == ObjectInspector.Category.PRIMITIVE,
            String.format(
                "Field [%s] needs to be a primitive; found [%s]", fieldName, foi.getTypeName()));

        // expecting a writeable - simply do a toString
        Object data = soi.getStructFieldData(type.getObject(), field);
        if (data == null || data instanceof NullWritable) {
          return StringUtils.EMPTY;
        }
        return data.toString();
      }
    }

    return null;
  }
 /**
  * Whether comparison is supported for this type. Currently all types that references any map are
  * not comparable.
  */
 public static boolean compareSupported(ObjectInspector oi) {
   switch (oi.getCategory()) {
     case PRIMITIVE:
       return true;
     case LIST:
       ListObjectInspector loi = (ListObjectInspector) oi;
       return compareSupported(loi.getListElementObjectInspector());
     case STRUCT:
       StructObjectInspector soi = (StructObjectInspector) oi;
       List<? extends StructField> fields = soi.getAllStructFieldRefs();
       for (int f = 0; f < fields.size(); f++) {
         if (!compareSupported(fields.get(f).getFieldObjectInspector())) {
           return false;
         }
       }
       return true;
     case MAP:
       return false;
     case UNION:
       UnionObjectInspector uoi = (UnionObjectInspector) oi;
       for (ObjectInspector eoi : uoi.getObjectInspectors()) {
         if (!compareSupported(eoi)) {
           return false;
         }
       }
       return true;
     default:
       return false;
   }
 }
  public void printFileInfo() throws Exception {

    System.out.println("Reader: " + m_reader);

    System.out.println("# Rows: " + m_reader.getNumberOfRows());
    System.out.println("# Types in the file: " + m_types.size());
    for (int i = 0; i < m_types.size(); i++) {
      System.out.println("Type " + i + ": " + m_types.get(i).getKind());
    }

    System.out.println("Compression: " + m_reader.getCompression());
    if (m_reader.getCompression() != CompressionKind.NONE) {
      System.out.println("Compression size: " + m_reader.getCompressionSize());
    }

    m_oi = (StructObjectInspector) m_reader.getObjectInspector();

    System.out.println("object inspector type category: " + m_oi.getCategory());
    System.out.println("object inspector type name    : " + m_oi.getTypeName());

    System.out.println("Number of columns in the table: " + m_fields.size());

    // Print the type info:
    for (int i = 0; i < m_fields.size(); i++) {
      System.out.println("Column " + i + " name: " + m_fields.get(i).getFieldName());
      ObjectInspector lv_foi = m_fields.get(i).getFieldObjectInspector();
      System.out.println("Column " + i + " type category: " + lv_foi.getCategory());
      System.out.println("Column " + i + " type name: " + lv_foi.getTypeName());
    }
  }
  /**
   * Serializing means getting every field, and setting the appropriate JSONObject field. Actual
   * serialization is done at the end when the whole JSON object is built
   *
   * @param serializer
   * @param obj
   * @param structObjectInspector
   */
  private JSONObject serializeStruct(
      Object obj, StructObjectInspector soi, List<String> columnNames) {
    // do nothing for null struct
    if (null == obj) {
      return null;
    }

    JSONObject result = new JSONObject();

    List<? extends StructField> fields = soi.getAllStructFieldRefs();

    for (int i = 0; i < fields.size(); i++) {
      StructField sf = fields.get(i);
      Object data = soi.getStructFieldData(obj, sf);

      if (null != data) {
        try {
          // we want to serialize columns with their proper HIVE name,
          // not the _col2 kind of name usually generated upstream
          result.put(
              getSerializedFieldName(columnNames, i, sf),
              serializeField(data, sf.getFieldObjectInspector()));

        } catch (JSONException ex) {
          LOG.warn("Problem serializing", ex);
          throw new RuntimeException(ex);
        }
      }
    }
    return result;
  }
Example #7
0
  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {

    if (objInspector.getCategory() != Category.STRUCT) {
      throw new SerDeException(
          getClass().toString()
              + " can only serialize struct types, but we got: "
              + objInspector.getTypeName());
    }
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();

    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < fields.size(); i++) {
      if (i > 0) sb.append(separator);
      Object column = soi.getStructFieldData(obj, fields.get(i));
      if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
        // For primitive object, serialize to plain string
        sb.append(column == null ? nullString : column.toString());
      } else {
        // For complex object, serialize to JSON format
        sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector()));
      }
    }
    serializeCache.set(sb.toString());
    return serializeCache;
  }
Example #8
0
 private void stringifyObject(StringBuilder buffer, Object obj, ObjectInspector inspector)
     throws IOException {
   if (inspector instanceof StructObjectInspector) {
     buffer.append("{ ");
     StructObjectInspector soi = (StructObjectInspector) inspector;
     boolean isFirst = true;
     for (StructField field : soi.getAllStructFieldRefs()) {
       if (isFirst) {
         isFirst = false;
       } else {
         buffer.append(", ");
       }
       buffer.append(field.getFieldName());
       buffer.append(": ");
       stringifyObject(
           buffer, soi.getStructFieldData(obj, field), field.getFieldObjectInspector());
     }
     buffer.append(" }");
   } else if (inspector instanceof PrimitiveObjectInspector) {
     PrimitiveObjectInspector poi = (PrimitiveObjectInspector) inspector;
     buffer.append(poi.getPrimitiveJavaObject(obj).toString());
   } else {
     buffer.append("*unknown*");
   }
 }
Example #9
0
  private RowSet decodeFromString(List<Object> rows, RowSet rowSet)
      throws SQLException, SerDeException {
    getSerDe();
    StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector();
    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();

    Object[] deserializedFields = new Object[fieldRefs.size()];
    Object rowObj;
    ObjectInspector fieldOI;

    int protocol = getProtocolVersion().getValue();
    for (Object rowString : rows) {
      try {
        rowObj = serde.deserialize(new BytesWritable(((String) rowString).getBytes("UTF-8")));
      } catch (UnsupportedEncodingException e) {
        throw new SerDeException(e);
      }
      for (int i = 0; i < fieldRefs.size(); i++) {
        StructField fieldRef = fieldRefs.get(i);
        fieldOI = fieldRef.getFieldObjectInspector();
        Object fieldData = soi.getStructFieldData(rowObj, fieldRef);
        deserializedFields[i] = SerDeUtils.toThriftPayload(fieldData, fieldOI, protocol);
      }
      rowSet.addRow(deserializedFields);
    }
    return rowSet;
  }
  @Override
  public Writable serialize(Object obj, ObjectInspector oi) throws SerDeException {
    if (oi.getCategory() != Category.STRUCT) {
      throw new VoltSerdeException(
          getClass().toString()
              + " can only serialize struct types, but we got: "
              + oi.getTypeName());
    }
    VoltRecord vr = new VoltRecord(m_voltConf.getTableName());
    StructObjectInspector soi = (StructObjectInspector) oi;
    List<? extends StructField> structFields = soi.getAllStructFieldRefs();
    List<Object> fieldValues = soi.getStructFieldsDataAsList(obj);

    final int size = m_oig.getColumnTypes().size();

    for (int i = 0; i < size; ++i) {
      ObjectInspector fieldOI = structFields.get(i).getFieldObjectInspector();
      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) fieldOI;

      Object fieldValue = poi.getPrimitiveJavaObject(fieldValues.get(i));
      if (poi.getTypeInfo().equals(TypeInfoFactory.timestampTypeInfo)) {
        fieldValue = fieldValue != null ? new Date(((Timestamp) fieldValue).getTime()) : null;
      }
      vr.add(fieldValue);
    }

    return vr;
  }
Example #11
0
 private static void addInputColumnsToList(
     ShapeDetails shape, ArrayList<String> fieldNames, ArrayList<ObjectInspector> fieldOIs) {
   StructObjectInspector OI = shape.getOI();
   for (StructField f : OI.getAllStructFieldRefs()) {
     fieldNames.add(f.getFieldName());
     fieldOIs.add(f.getFieldObjectInspector());
   }
 }
  /**
   * @param oi - Input object inspector
   * @param oiSettableProperties - Lookup map to cache the result.(If no caching, pass null)
   * @return - true if : (1) oi is an instance of settable<DataType>OI. (2) All the embedded object
   *     inspectors are instances of settable<DataType>OI. If (1) or (2) is false, return false.
   */
  public static boolean hasAllFieldsSettable(
      ObjectInspector oi, Map<ObjectInspector, Boolean> oiSettableProperties) {
    // If the result is already present in the cache, return it.
    if (!(oiSettableProperties == null) && oiSettableProperties.containsKey(oi)) {
      return oiSettableProperties.get(oi).booleanValue();
    }
    // If the top-level object inspector is non-settable return false
    if (!(isInstanceOfSettableOI(oi))) {
      return setOISettablePropertiesMap(oi, oiSettableProperties, false);
    }

    Boolean returnValue = true;

    switch (oi.getCategory()) {
      case PRIMITIVE:
        break;
      case STRUCT:
        StructObjectInspector structOutputOI = (StructObjectInspector) oi;
        List<? extends StructField> listFields = structOutputOI.getAllStructFieldRefs();
        for (StructField listField : listFields) {
          if (!hasAllFieldsSettable(listField.getFieldObjectInspector(), oiSettableProperties)) {
            returnValue = false;
            break;
          }
        }
        break;
      case LIST:
        ListObjectInspector listOutputOI = (ListObjectInspector) oi;
        returnValue =
            hasAllFieldsSettable(
                listOutputOI.getListElementObjectInspector(), oiSettableProperties);
        break;
      case MAP:
        MapObjectInspector mapOutputOI = (MapObjectInspector) oi;
        returnValue =
            hasAllFieldsSettable(mapOutputOI.getMapKeyObjectInspector(), oiSettableProperties)
                && hasAllFieldsSettable(
                    mapOutputOI.getMapValueObjectInspector(), oiSettableProperties);
        break;
      case UNION:
        UnionObjectInspector unionOutputOI = (UnionObjectInspector) oi;
        List<ObjectInspector> unionListFields = unionOutputOI.getObjectInspectors();
        for (ObjectInspector listField : unionListFields) {
          if (!hasAllFieldsSettable(listField, oiSettableProperties)) {
            returnValue = false;
            break;
          }
        }
        break;
      default:
        throw new RuntimeException(
            "Hive internal error inside hasAllFieldsSettable : "
                + oi.getTypeName()
                + " not supported yet.");
    }
    return setOISettablePropertiesMap(oi, oiSettableProperties, returnValue);
  }
Example #13
0
  @Override
  public Writable serialize(final Object obj, final ObjectInspector inspector)
      throws SerDeException {

    final StructObjectInspector structInspector = (StructObjectInspector) inspector;
    final List<? extends StructField> fields = structInspector.getAllStructFieldRefs();
    if (fields.size() != columnNames.size()) {
      throw new SerDeException(
          String.format("Required %d columns, received %d.", columnNames.size(), fields.size()));
    }

    cachedWritable.clear();
    for (int c = 0; c < fieldCount; c++) {
      StructField structField = fields.get(c);

      LOG.debug("fieldId=" + c + ",structField=" + structField.toString());

      if (structField != null) {
        final Object field = structInspector.getStructFieldData(obj, fields.get(c));

        final AbstractPrimitiveObjectInspector fieldOI =
            (AbstractPrimitiveObjectInspector) fields.get(c).getFieldObjectInspector();

        Writable value = (Writable) fieldOI.getPrimitiveWritableObject(field);

        if (value == null) {
          continue;
        }

        LOG.debug("fieldCount=" + fieldCount + ",value=" + value.toString());
        if (value instanceof IntWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), value);
        } else if (value instanceof Text) {
          cachedWritable.put(new Text(columnNames.get(c)), ((Text) value));
        } else if (value instanceof LongWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((LongWritable) value));
        } else if (value instanceof DoubleWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((DoubleWritable) value));
        } else if (value instanceof FloatWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((FloatWritable) value));
        } else if (value instanceof BooleanWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((BooleanWritable) value));
        } else if (value instanceof ByteWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((ByteWritable) value));
        } else if (value instanceof BytesWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((BytesWritable) value));
        } else {
          LOG.warn("fieldCount=" + fieldCount + ",type=" + value.getClass().getName());
        }
      }
    }

    return cachedWritable;
  }
 /**
  * Copy fields in the input row to the output array of standard objects.
  *
  * @param result output list of standard objects.
  * @param row input row.
  * @param soi Object inspector for the to-be-copied columns.
  * @param objectInspectorOption
  */
 public static void copyToStandardObject(
     List<Object> result,
     Object row,
     StructObjectInspector soi,
     ObjectInspectorCopyOption objectInspectorOption) {
   List<? extends StructField> fields = soi.getAllStructFieldRefs();
   for (StructField f : fields) {
     result.add(
         copyToStandardObject(
             soi.getStructFieldData(row, f), f.getFieldObjectInspector(), objectInspectorOption));
   }
 }
 @Override
 public void setKeyValue(Writable key, Writable val) throws SerDeException {
   Object keyObj = keySerDe.deserialize(key), valObj = valSerDe.deserialize(val);
   List<? extends StructField> keyFields = keySoi.getAllStructFieldRefs(),
       valFields = valSoi.getAllStructFieldRefs();
   for (int i = 0; i < keyFields.size(); ++i) {
     keyObjs[i] = keySoi.getStructFieldData(keyObj, keyFields.get(i));
   }
   for (int i = 0; i < valFields.size(); ++i) {
     valObjs[i] = valSoi.getStructFieldData(valObj, valFields.get(i));
   }
 }
 private ArrayWritable createStruct(final Object obj, final StructObjectInspector inspector)
     throws SerDeException {
   final List<? extends StructField> fields = inspector.getAllStructFieldRefs();
   final Writable[] arr = new Writable[fields.size()];
   for (int i = 0; i < fields.size(); i++) {
     final StructField field = fields.get(i);
     final Object subObj = inspector.getStructFieldData(obj, field);
     final ObjectInspector subInspector = field.getFieldObjectInspector();
     arr[i] = createObject(subObj, subInspector);
   }
   return new ArrayWritable(Writable.class, arr);
 }
    /*
     * from the prunedCols list filter out columns that refer to WindowFns or WindowExprs
     * the returned list is set as the prunedList needed by the PTFOp.
     */
    private ArrayList<String> prunedInputList(
        List<String> prunedCols, WindowTableFunctionDef tDef) {
      ArrayList<String> prunedInputCols = new ArrayList<String>();

      StructObjectInspector OI = tDef.getInput().getOutputShape().getOI();
      for (StructField f : OI.getAllStructFieldRefs()) {
        String fName = f.getFieldName();
        if (prunedCols.contains(fName)) {
          prunedInputCols.add(fName);
        }
      }

      return prunedInputCols;
    }
Example #18
0
  /**
   * Traverse all the partitions for a table, and get the OI for the table. Note that a conversion
   * is required if any of the partition OI is different from the table OI. For eg. if the query
   * references table T (partitions P1, P2), and P1's schema is same as T, whereas P2's scheme is
   * different from T, conversion might be needed for both P1 and P2, since SettableOI might be
   * needed for T
   */
  private Map<TableDesc, StructObjectInspector> getConvertedOI(Configuration hconf)
      throws HiveException {
    Map<TableDesc, StructObjectInspector> tableDescOI =
        new HashMap<TableDesc, StructObjectInspector>();
    Set<TableDesc> identityConverterTableDesc = new HashSet<TableDesc>();
    try {
      Map<ObjectInspector, Boolean> oiSettableProperties = new HashMap<ObjectInspector, Boolean>();

      for (String onefile : conf.getPathToAliases().keySet()) {
        PartitionDesc pd = conf.getPathToPartitionInfo().get(onefile);
        TableDesc tableDesc = pd.getTableDesc();
        Deserializer partDeserializer = pd.getDeserializer(hconf);

        StructObjectInspector partRawRowObjectInspector;
        if (Utilities.isInputFileFormatSelfDescribing(pd)) {
          Deserializer tblDeserializer = tableDesc.getDeserializer(hconf);
          partRawRowObjectInspector = (StructObjectInspector) tblDeserializer.getObjectInspector();
        } else {
          partRawRowObjectInspector = (StructObjectInspector) partDeserializer.getObjectInspector();
        }

        StructObjectInspector tblRawRowObjectInspector = tableDescOI.get(tableDesc);
        if ((tblRawRowObjectInspector == null)
            || (identityConverterTableDesc.contains(tableDesc))) {
          Deserializer tblDeserializer = tableDesc.getDeserializer(hconf);
          tblRawRowObjectInspector =
              (StructObjectInspector)
                  ObjectInspectorConverters.getConvertedOI(
                      partRawRowObjectInspector,
                      tblDeserializer.getObjectInspector(),
                      oiSettableProperties);

          if (identityConverterTableDesc.contains(tableDesc)) {
            if (!partRawRowObjectInspector.equals(tblRawRowObjectInspector)) {
              identityConverterTableDesc.remove(tableDesc);
            }
          } else if (partRawRowObjectInspector.equals(tblRawRowObjectInspector)) {
            identityConverterTableDesc.add(tableDesc);
          }

          tableDescOI.put(tableDesc, tblRawRowObjectInspector);
        }
      }
    } catch (Exception e) {
      throw new HiveException(e);
    }
    return tableDescOI;
  }
  private void parseStringColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;

    Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);

    if (fieldData == null) {
      nulls[column] = true;
    } else if (hiveTypes[column] == HiveType.MAP
        || hiveTypes[column] == HiveType.LIST
        || hiveTypes[column] == HiveType.STRUCT) {
      // temporarily special case MAP, LIST, and STRUCT types as strings
      slices[column] =
          Slices.wrappedBuffer(
              SerDeUtils.getJsonBytes(sessionTimeZone, fieldData, fieldInspectors[column]));
      nulls[column] = false;
    } else {
      Object fieldValue =
          ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData);
      checkState(fieldValue != null, "fieldValue should not be null");
      if (fieldValue instanceof String) {
        slices[column] = Slices.utf8Slice((String) fieldValue);
      } else if (fieldValue instanceof byte[]) {
        slices[column] = Slices.wrappedBuffer((byte[]) fieldValue);
      } else {
        throw new IllegalStateException(
            "unsupported string field type: " + fieldValue.getClass().getName());
      }
      nulls[column] = false;
    }
  }
Example #20
0
  private void parseDecimalColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;

    Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);

    if (fieldData == null) {
      nulls[column] = true;
    } else {
      Object fieldValue =
          ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData);
      checkState(fieldValue != null, "fieldValue should not be null");

      HiveDecimal decimal = (HiveDecimal) fieldValue;
      DecimalType columnType = (DecimalType) types[column];
      BigInteger unscaledDecimal =
          rescale(decimal.unscaledValue(), decimal.scale(), columnType.getScale());

      if (columnType.isShort()) {
        longs[column] = unscaledDecimal.longValue();
      } else {
        slices[column] = Decimals.encodeUnscaledValue(unscaledDecimal);
      }
      nulls[column] = false;
    }
  }
Example #21
0
  private void parseStringColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;

    Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);

    if (fieldData == null) {
      nulls[column] = true;
    } else {
      Object fieldValue =
          ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData);
      checkState(fieldValue != null, "fieldValue should not be null");
      Slice value;
      if (fieldValue instanceof String) {
        value = Slices.utf8Slice((String) fieldValue);
      } else if (fieldValue instanceof byte[]) {
        value = Slices.wrappedBuffer((byte[]) fieldValue);
      } else if (fieldValue instanceof HiveVarchar) {
        value = Slices.utf8Slice(((HiveVarchar) fieldValue).getValue());
      } else {
        throw new IllegalStateException(
            "unsupported string field type: " + fieldValue.getClass().getName());
      }
      Type type = types[column];
      if (isVarcharType(type)) {
        value = truncateToLength(value, type);
      }
      slices[column] = value;
      nulls[column] = false;
    }
  }
Example #22
0
 private void initOperatorContext(List<Operator<? extends OperatorDesc>> children)
     throws HiveException {
   for (Map<Operator<?>, MapOpCtx> contexts : opCtxMap.values()) {
     for (MapOpCtx context : contexts.values()) {
       if (!children.contains(context.op)) {
         continue;
       }
       StructObjectInspector prev = childrenOpToOI.put(context.op, context.rowObjectInspector);
       if (prev != null && !prev.equals(context.rowObjectInspector)) {
         throw new HiveException("Conflict on row inspector for " + context.alias);
       }
       if (isLogDebugEnabled) {
         LOG.debug("dump " + context.op + " " + context.rowObjectInspector.getTypeName());
       }
     }
   }
 }
 /**
  * Builds the assigners from an object inspector and from a list of columns.
  *
  * @param outputBatch The batch to which the assigners are bound
  * @param outputOI The row object inspector
  * @param columnMap Vector column map
  * @param outputColumnNames Column names, used both to find the vector columns and the
  * @return
  * @throws HiveException
  */
 public static VectorColumnAssign[] buildAssigners(
     VectorizedRowBatch outputBatch,
     ObjectInspector outputOI,
     Map<String, Integer> columnMap,
     List<String> outputColumnNames)
     throws HiveException {
   StructObjectInspector soi = (StructObjectInspector) outputOI;
   VectorColumnAssign[] vcas = new VectorColumnAssign[outputColumnNames.size()];
   for (int i = 0; i < outputColumnNames.size(); ++i) {
     String columnName = outputColumnNames.get(i);
     Integer columnIndex = columnMap.get(columnName);
     StructField columnRef = soi.getStructFieldRef(columnName);
     ObjectInspector valueOI = columnRef.getFieldObjectInspector();
     vcas[i] = buildObjectAssign(outputBatch, columnIndex, valueOI);
   }
   return vcas;
 }
  @Override
  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
    outputByteBuffer.reset();
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();

    for (int i = 0; i < columnNames.size(); i++) {
      serialize(
          outputByteBuffer,
          soi.getStructFieldData(obj, fields.get(i)),
          fields.get(i).getFieldObjectInspector(),
          columnSortOrderIsDesc[i]);
    }

    serializeBytesWritable.set(outputByteBuffer.getData(), 0, outputByteBuffer.getLength());
    return serializeBytesWritable;
  }
Example #25
0
 /** return false though element is null if nullsafe flag is true for that */
 public static boolean hasAnyNullObject(List o, StructObjectInspector loi, boolean[] nullSafes) {
   List<? extends StructField> fields = loi.getAllStructFieldRefs();
   for (int i = 0; i < o.size(); i++) {
     if ((nullSafes == null || !nullSafes[i])
         && hasAnyNullObject(o.get(i), fields.get(i).getFieldObjectInspector())) {
       return true;
     }
   }
   return false;
 }
Example #26
0
  private void partialReadTest(FileSystem fs, int count, Path file)
      throws IOException, SerDeException {
    LOG.debug("reading " + count + " records");
    long start = System.currentTimeMillis();
    java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
    readCols.add(Integer.valueOf(2));
    readCols.add(Integer.valueOf(3));
    ColumnProjectionUtils.setReadColumnIDs(conf, readCols);
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);

    LongWritable rowID = new LongWritable();
    BytesRefArrayWritable cols = new BytesRefArrayWritable();

    while (reader.next(rowID)) {
      reader.getCurrentRow(cols);
      cols.resetValid(8);
      Object row = serDe.deserialize(cols);

      StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
      List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
      assertEquals("Field size should be 8", 8, fieldRefs.size());

      for (int i : readCols) {
        Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
        Object standardWritableData =
            ObjectInspectorUtils.copyToStandardObject(
                fieldData,
                fieldRefs.get(i).getFieldObjectInspector(),
                ObjectInspectorCopyOption.WRITABLE);
        assertEquals("Field " + i, standardWritableData, expectedPartitalFieldsData[i]);
      }

      assertEquals(
          "Class of the serialized object should be BytesRefArrayWritable",
          BytesRefArrayWritable.class,
          serDe.getSerializedClass());
      BytesRefArrayWritable serializedBytes = (BytesRefArrayWritable) serDe.serialize(row, oi);
      assertEquals("Serialized data", patialS, serializedBytes);
    }
    reader.close();
    long cost = System.currentTimeMillis() - start;
    LOG.debug("reading fully costs:" + cost + " milliseconds");
  }
Example #27
0
  @Override
  public synchronized void process(Object row, int tag) throws HiveException {

    StructObjectInspector soi = parentObjInspectors[tag];
    List<? extends StructField> fields = parentFields[tag];

    if (needsTransform[tag]) {
      for (int c = 0; c < fields.size(); c++) {
        outputRow.set(
            c,
            columnTypeResolvers[c].convertIfNecessary(
                soi.getStructFieldData(row, fields.get(c)),
                fields.get(c).getFieldObjectInspector()));
      }
      forward(outputRow, outputObjInspector);
    } else {
      forward(row, inputObjInspectors[tag]);
    }
  }
 public KeyValueWriter(SerDe keySerDe, SerDe valSerDe, boolean hasFilterTag)
     throws SerDeException {
   this.keySerDe = keySerDe;
   this.valSerDe = valSerDe;
   keySoi = (StructObjectInspector) keySerDe.getObjectInspector();
   valSoi = (StructObjectInspector) valSerDe.getObjectInspector();
   List<? extends StructField> keyFields = keySoi.getAllStructFieldRefs(),
       valFields = valSoi.getAllStructFieldRefs();
   keyOis = new ArrayList<ObjectInspector>(keyFields.size());
   valOis = new ArrayList<ObjectInspector>(valFields.size());
   for (int i = 0; i < keyFields.size(); ++i) {
     keyOis.add(keyFields.get(i).getFieldObjectInspector());
   }
   for (int i = 0; i < valFields.size(); ++i) {
     valOis.add(valFields.get(i).getFieldObjectInspector());
   }
   keyObjs = new Object[keyOis.size()];
   valObjs = new Object[valOis.size()];
   this.hasFilterTag = hasFilterTag;
 }
  public int openFile() throws Exception {

    m_reader = OrcFile.createReader(m_file_path, OrcFile.readerOptions(m_conf));
    m_types = m_reader.getTypes();
    m_oi = (StructObjectInspector) m_reader.getObjectInspector();
    m_fields = m_oi.getAllStructFieldRefs();

    m_rr = m_reader.rows();

    return 0;
  }
 /** Get the list of field names as csv from a StructObjectInspector. */
 public static String getFieldNames(StructObjectInspector soi) {
   List<? extends StructField> fields = soi.getAllStructFieldRefs();
   StringBuilder sb = new StringBuilder();
   for (int i = 0; i < fields.size(); i++) {
     if (i > 0) {
       sb.append(",");
     }
     sb.append(fields.get(i).getFieldName());
   }
   return sb.toString();
 }