예제 #1
0
  public void printFileInfo() throws Exception {

    System.out.println("Reader: " + m_reader);

    System.out.println("# Rows: " + m_reader.getNumberOfRows());
    System.out.println("# Types in the file: " + m_types.size());
    for (int i = 0; i < m_types.size(); i++) {
      System.out.println("Type " + i + ": " + m_types.get(i).getKind());
    }

    System.out.println("Compression: " + m_reader.getCompression());
    if (m_reader.getCompression() != CompressionKind.NONE) {
      System.out.println("Compression size: " + m_reader.getCompressionSize());
    }

    m_oi = (StructObjectInspector) m_reader.getObjectInspector();

    System.out.println("object inspector type category: " + m_oi.getCategory());
    System.out.println("object inspector type name    : " + m_oi.getTypeName());

    System.out.println("Number of columns in the table: " + m_fields.size());

    // Print the type info:
    for (int i = 0; i < m_fields.size(); i++) {
      System.out.println("Column " + i + " name: " + m_fields.get(i).getFieldName());
      ObjectInspector lv_foi = m_fields.get(i).getFieldObjectInspector();
      System.out.println("Column " + i + " type category: " + lv_foi.getCategory());
      System.out.println("Column " + i + " type name: " + lv_foi.getTypeName());
    }
  }
  /**
   * @param oi - Input object inspector
   * @param oiSettableProperties - Lookup map to cache the result.(If no caching, pass null)
   * @return - true if : (1) oi is an instance of settable<DataType>OI. (2) All the embedded object
   *     inspectors are instances of settable<DataType>OI. If (1) or (2) is false, return false.
   */
  public static boolean hasAllFieldsSettable(
      ObjectInspector oi, Map<ObjectInspector, Boolean> oiSettableProperties) {
    // If the result is already present in the cache, return it.
    if (!(oiSettableProperties == null) && oiSettableProperties.containsKey(oi)) {
      return oiSettableProperties.get(oi).booleanValue();
    }
    // If the top-level object inspector is non-settable return false
    if (!(isInstanceOfSettableOI(oi))) {
      return setOISettablePropertiesMap(oi, oiSettableProperties, false);
    }

    Boolean returnValue = true;

    switch (oi.getCategory()) {
      case PRIMITIVE:
        break;
      case STRUCT:
        StructObjectInspector structOutputOI = (StructObjectInspector) oi;
        List<? extends StructField> listFields = structOutputOI.getAllStructFieldRefs();
        for (StructField listField : listFields) {
          if (!hasAllFieldsSettable(listField.getFieldObjectInspector(), oiSettableProperties)) {
            returnValue = false;
            break;
          }
        }
        break;
      case LIST:
        ListObjectInspector listOutputOI = (ListObjectInspector) oi;
        returnValue =
            hasAllFieldsSettable(
                listOutputOI.getListElementObjectInspector(), oiSettableProperties);
        break;
      case MAP:
        MapObjectInspector mapOutputOI = (MapObjectInspector) oi;
        returnValue =
            hasAllFieldsSettable(mapOutputOI.getMapKeyObjectInspector(), oiSettableProperties)
                && hasAllFieldsSettable(
                    mapOutputOI.getMapValueObjectInspector(), oiSettableProperties);
        break;
      case UNION:
        UnionObjectInspector unionOutputOI = (UnionObjectInspector) oi;
        List<ObjectInspector> unionListFields = unionOutputOI.getObjectInspectors();
        for (ObjectInspector listField : unionListFields) {
          if (!hasAllFieldsSettable(listField, oiSettableProperties)) {
            returnValue = false;
            break;
          }
        }
        break;
      default:
        throw new RuntimeException(
            "Hive internal error inside hasAllFieldsSettable : "
                + oi.getTypeName()
                + " not supported yet.");
    }
    return setOISettablePropertiesMap(oi, oiSettableProperties, returnValue);
  }
 /** Get the type name of the Java class. */
 public static String getTypeNameFromJavaClass(Type t) {
   try {
     ObjectInspector oi =
         ObjectInspectorFactory.getReflectionObjectInspector(t, ObjectInspectorOptions.JAVA);
     return oi.getTypeName();
   } catch (Throwable e) {
     LOG.info(StringUtils.stringifyException(e));
     return "unknown";
   }
 }
 private static boolean isInstanceOfSettableOI(ObjectInspector oi) {
   switch (oi.getCategory()) {
     case PRIMITIVE:
       return isInstanceOfSettablePrimitiveOI((PrimitiveObjectInspector) oi);
     case STRUCT:
       return oi instanceof SettableStructObjectInspector;
     case LIST:
       return oi instanceof SettableListObjectInspector;
     case MAP:
       return oi instanceof SettableMapObjectInspector;
     case UNION:
       return oi instanceof SettableUnionObjectInspector;
     default:
       throw new RuntimeException(
           "Hive internal error inside isAssignableFromSettableOI : "
               + oi.getTypeName()
               + " not supported yet.");
   }
 }
예제 #5
0
  // This method is just for experimentation.
  public void testRead() throws Exception {

    m_reader = OrcFile.createReader(m_file_path, OrcFile.readerOptions(m_conf));

    System.out.println("Reader: " + m_reader);

    System.out.println("# Rows: " + m_reader.getNumberOfRows());
    m_types = m_reader.getTypes();
    System.out.println("# Types in the file: " + m_types.size());

    for (int i = 0; i < m_types.size(); i++) {
      System.out.println("Type " + i + ": " + m_types.get(i).getKind());
    }

    System.out.println("Compression: " + m_reader.getCompression());
    if (m_reader.getCompression() != CompressionKind.NONE) {
      System.out.println("Compression size: " + m_reader.getCompressionSize());
    }

    StructObjectInspector m_oi = (StructObjectInspector) m_reader.getObjectInspector();

    System.out.println("object inspector type category: " + m_oi.getCategory());
    System.out.println("object inspector type name    : " + m_oi.getTypeName());

    m_fields = m_oi.getAllStructFieldRefs();
    System.out.println("Number of columns in the table: " + m_fields.size());

    RecordReader m_rr = m_reader.rows();

    // Print the type info:
    for (int i = 0; i < m_fields.size(); i++) {
      System.out.println("Column " + i + " name: " + m_fields.get(i).getFieldName());
      ObjectInspector lv_foi = m_fields.get(i).getFieldObjectInspector();
      System.out.println("Column " + i + " type category: " + lv_foi.getCategory());
      System.out.println("Column " + i + " type name: " + lv_foi.getTypeName());
      //		Object lv_column_val = m_oi.getStructFieldData(lv_row, m_fields.get(i));
      // System.out.print("Column " + i + " value: " + lv_row.getFieldValue(i));
    }

    OrcStruct lv_row = null;
    Object lv_field_val = null;
    StringBuilder lv_row_string = new StringBuilder(1024);
    while (m_rr.hasNext()) {
      lv_row = (OrcStruct) m_rr.next(lv_row);
      lv_row_string.setLength(0);
      for (int i = 0; i < m_fields.size(); i++) {
        lv_field_val = lv_row.getFieldValue(i);
        if (lv_field_val != null) {
          lv_row_string.append(lv_field_val);
        }
        lv_row_string.append('|');
      }
      System.out.println(lv_row_string);
    }

    /**
     * Typecasting to appropriate type based on the 'kind' if (OrcProto.Type.Kind.INT ==
     * m_types.get(1).getKind()) { IntWritable lvf_1_val = (IntWritable) lv_row.getFieldValue(0);
     * System.out.println("Column 1 value: " + lvf_1_val); }
     */
  }
  /**
   * Compares two types identified by the given object inspectors. This method compares the types as
   * follows:
   *
   * <ol>
   *   <li>If the given inspectors do not belong to same category, the result is negative.
   *   <li>If the given inspectors are for <code>PRIMITIVE</code> type, the result is the comparison
   *       of their type names.
   *   <li>If the given inspectors are for <code>LIST</code> type, then the result is recursive call
   *       to compare the type of list elements.
   *   <li>If the given inspectors are <code>MAP</code> type, then the result is a recursive call to
   *       compare the map key and value types.
   *   <li>If the given inspectors are <code>STRUCT</code> type, then the result is negative if they
   *       do not have the same number of fields. If they do have the same number of fields, the
   *       result is a recursive call to compare each of the field types.
   *   <li>If none of the above, the result is negative.
   * </ol>
   *
   * @param o1
   * @param o2
   * @return true if the given object inspectors represent the same types.
   */
  public static boolean compareTypes(ObjectInspector o1, ObjectInspector o2) {
    Category c1 = o1.getCategory();
    Category c2 = o2.getCategory();

    // Return false if categories are not equal
    if (!c1.equals(c2)) {
      return false;
    }

    // If both categories are primitive return the comparison of type names.
    if (c1.equals(Category.PRIMITIVE)) {
      return o1.getTypeName().equals(o2.getTypeName());
    }

    // If lists, recursively compare the list element types
    if (c1.equals(Category.LIST)) {
      ObjectInspector child1 = ((ListObjectInspector) o1).getListElementObjectInspector();
      ObjectInspector child2 = ((ListObjectInspector) o2).getListElementObjectInspector();
      return compareTypes(child1, child2);
    }

    // If maps, recursively compare the key and value types
    if (c1.equals(Category.MAP)) {
      MapObjectInspector mapOI1 = (MapObjectInspector) o1;
      MapObjectInspector mapOI2 = (MapObjectInspector) o2;

      ObjectInspector childKey1 = mapOI1.getMapKeyObjectInspector();
      ObjectInspector childKey2 = mapOI2.getMapKeyObjectInspector();

      if (compareTypes(childKey1, childKey2)) {
        ObjectInspector childVal1 = mapOI1.getMapValueObjectInspector();
        ObjectInspector childVal2 = mapOI2.getMapValueObjectInspector();

        if (compareTypes(childVal1, childVal2)) {
          return true;
        }
      }

      return false;
    }

    // If structs, recursively compare the fields
    if (c1.equals(Category.STRUCT)) {
      StructObjectInspector structOI1 = (StructObjectInspector) o1;
      StructObjectInspector structOI2 = (StructObjectInspector) o2;

      List<? extends StructField> childFieldsList1 = structOI1.getAllStructFieldRefs();
      List<? extends StructField> childFieldsList2 = structOI2.getAllStructFieldRefs();

      if (childFieldsList1 == null && childFieldsList2 == null) {
        return true;
      } else if (childFieldsList1 == null || childFieldsList2 == null) {
        return false;
      } else if (childFieldsList1.size() != childFieldsList2.size()) {
        return false;
      }

      Iterator<? extends StructField> it1 = childFieldsList1.iterator();
      Iterator<? extends StructField> it2 = childFieldsList2.iterator();
      while (it1.hasNext()) {
        StructField field1 = it1.next();
        StructField field2 = it2.next();

        if (!compareTypes(field1.getFieldObjectInspector(), field2.getFieldObjectInspector())) {
          return false;
        }
      }

      return true;
    }

    if (c1.equals(Category.UNION)) {
      UnionObjectInspector uoi1 = (UnionObjectInspector) o1;
      UnionObjectInspector uoi2 = (UnionObjectInspector) o2;
      List<ObjectInspector> ois1 = uoi1.getObjectInspectors();
      List<ObjectInspector> ois2 = uoi2.getObjectInspectors();

      if (ois1 == null && ois2 == null) {
        return true;
      } else if (ois1 == null || ois2 == null) {
        return false;
      } else if (ois1.size() != ois2.size()) {
        return false;
      }
      Iterator<? extends ObjectInspector> it1 = ois1.iterator();
      Iterator<? extends ObjectInspector> it2 = ois2.iterator();
      while (it1.hasNext()) {
        if (!compareTypes(it1.next(), it2.next())) {
          return false;
        }
      }
      return true;
    }

    // Unknown category
    throw new RuntimeException("Unknown category encountered: " + c1);
  }
  public static int hashCode(Object o, ObjectInspector objIns) {
    if (o == null) {
      return 0;
    }
    switch (objIns.getCategory()) {
      case PRIMITIVE:
        {
          PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) objIns);
          switch (poi.getPrimitiveCategory()) {
            case VOID:
              return 0;
            case BOOLEAN:
              return ((BooleanObjectInspector) poi).get(o) ? 1 : 0;
            case BYTE:
              return ((ByteObjectInspector) poi).get(o);
            case SHORT:
              return ((ShortObjectInspector) poi).get(o);
            case INT:
              return ((IntObjectInspector) poi).get(o);
            case LONG:
              {
                long a = ((LongObjectInspector) poi).get(o);
                return (int) ((a >>> 32) ^ a);
              }
            case FLOAT:
              return Float.floatToIntBits(((FloatObjectInspector) poi).get(o));
            case DOUBLE:
              {
                // This hash function returns the same result as Double.hashCode()
                // while DoubleWritable.hashCode returns a different result.
                long a = Double.doubleToLongBits(((DoubleObjectInspector) poi).get(o));
                return (int) ((a >>> 32) ^ a);
              }
            case STRING:
              {
                // This hash function returns the same result as String.hashCode() when
                // all characters are ASCII, while Text.hashCode() always returns a
                // different result.
                Text t = ((StringObjectInspector) poi).getPrimitiveWritableObject(o);
                int r = 0;
                for (int i = 0; i < t.getLength(); i++) {
                  r = r * 31 + t.getBytes()[i];
                }
                return r;
              }
            case CHAR:
              return ((HiveCharObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
            case VARCHAR:
              return ((HiveVarcharObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
            case BINARY:
              return ((BinaryObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();

            case DATE:
              return ((DateObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
            case TIMESTAMP:
              TimestampWritable t = ((TimestampObjectInspector) poi).getPrimitiveWritableObject(o);
              return t.hashCode();
            case INTERVAL_YEAR_MONTH:
              HiveIntervalYearMonthWritable intervalYearMonth =
                  ((HiveIntervalYearMonthObjectInspector) poi).getPrimitiveWritableObject(o);
              return intervalYearMonth.hashCode();
            case INTERVAL_DAY_TIME:
              HiveIntervalDayTimeWritable intervalDayTime =
                  ((HiveIntervalDayTimeObjectInspector) poi).getPrimitiveWritableObject(o);
              return intervalDayTime.hashCode();
            case DECIMAL:
              return ((HiveDecimalObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();

            default:
              {
                throw new RuntimeException("Unknown type: " + poi.getPrimitiveCategory());
              }
          }
        }
      case LIST:
        {
          int r = 0;
          ListObjectInspector listOI = (ListObjectInspector) objIns;
          ObjectInspector elemOI = listOI.getListElementObjectInspector();
          for (int ii = 0; ii < listOI.getListLength(o); ++ii) {
            r = 31 * r + hashCode(listOI.getListElement(o, ii), elemOI);
          }
          return r;
        }
      case MAP:
        {
          int r = 0;
          MapObjectInspector mapOI = (MapObjectInspector) objIns;
          ObjectInspector keyOI = mapOI.getMapKeyObjectInspector();
          ObjectInspector valueOI = mapOI.getMapValueObjectInspector();
          Map<?, ?> map = mapOI.getMap(o);
          for (Map.Entry<?, ?> entry : map.entrySet()) {
            r += hashCode(entry.getKey(), keyOI) ^ hashCode(entry.getValue(), valueOI);
          }
          return r;
        }
      case STRUCT:
        int r = 0;
        StructObjectInspector structOI = (StructObjectInspector) objIns;
        List<? extends StructField> fields = structOI.getAllStructFieldRefs();
        for (StructField field : fields) {
          r =
              31 * r
                  + hashCode(
                      structOI.getStructFieldData(o, field), field.getFieldObjectInspector());
        }
        return r;

      case UNION:
        UnionObjectInspector uOI = (UnionObjectInspector) objIns;
        byte tag = uOI.getTag(o);
        return hashCode(uOI.getField(o), uOI.getObjectInspectors().get(tag));

      default:
        throw new RuntimeException("Unknown type: " + objIns.getTypeName());
    }
  }