public abstract class CascadingUtils {

  private static final String MAPPING_NAMES = "es.mapping.names";
  private static final boolean CASCADING_22_AVAILABLE =
      ObjectUtils.isClassPresent("cascading.tuple.type.CoercibleType", Tap.class.getClassLoader());

  static Settings addDefaultsToSettings(
      Properties flowProperties, Properties tapProperties, Log log) {
    Settings settings =
        HadoopSettingsManager.loadFrom(CascadingUtils.extractOriginalProperties(flowProperties))
            .merge(tapProperties);

    InitializationUtils.discoverNodesIfNeeded(settings, log);
    InitializationUtils.filterNonClientNodesIfNeeded(settings, log);
    InitializationUtils.discoverEsVersion(settings, log);

    InitializationUtils.setValueWriterIfNotSet(settings, CascadingValueWriter.class, log);
    InitializationUtils.setValueReaderIfNotSet(settings, JdkValueReader.class, log);
    InitializationUtils.setBytesConverterIfNeeded(
        settings, CascadingLocalBytesConverter.class, log);
    InitializationUtils.setFieldExtractorIfNotSet(settings, CascadingFieldExtractor.class, log);

    return settings;
  }

  static void addSerializationToken(Object config) {
    Configuration cfg = (Configuration) config;
    String tokens = cfg.get(TupleSerializationProps.SERIALIZATION_TOKENS);

    String lmw = LinkedMapWritable.class.getName();

    // if no tokens are defined, add one starting with 140
    if (tokens == null) {
      cfg.set(TupleSerializationProps.SERIALIZATION_TOKENS, "140=" + lmw);
      LogFactory.getLog(EsTap.class)
          .trace(String.format("Registered Cascading serialization token %s for %s", 140, lmw));
    } else {
      // token already registered
      if (tokens.contains(lmw)) {
        return;
      }

      // find token id
      Map<Integer, String> mapping = new LinkedHashMap<Integer, String>();
      tokens = tokens.replaceAll("\\s", ""); // allow for whitespace in token set

      for (String pair : tokens.split(",")) {
        String[] elements = pair.split("=");
        mapping.put(Integer.parseInt(elements[0]), elements[1]);
      }

      for (int id = 140; id < 255; id++) {
        if (!mapping.containsKey(Integer.valueOf(id))) {
          cfg.set(
              TupleSerializationProps.SERIALIZATION_TOKENS,
              Util.join(",", Util.removeNulls(tokens, id + "=" + lmw)));
          LogFactory.getLog(EsTap.class)
              .trace(String.format("Registered Cascading serialization token %s for %s", id, lmw));
          return;
        }
      }
    }
  }

  static FieldAlias alias(Settings settings) {
    return new FieldAlias(SettingsUtils.aliases(settings.getProperty(MAPPING_NAMES), false), false);
  }

  static List<String> asStrings(Fields fields) {
    if (fields == null || !fields.isDefined()) {
      // use auto-generated name
      return Collections.emptyList();
    }

    int size = fields.size();
    List<String> names = new ArrayList<String>(size);
    for (int fieldIndex = 0; fieldIndex < size; fieldIndex++) {
      names.add(fields.get(fieldIndex).toString());
    }

    return names;
  }

  static Collection<String> fieldToAlias(Settings settings, Fields fields) {
    FieldAlias fa = alias(settings);
    List<String> names = asStrings(fields);
    for (int i = 0; i < names.size(); i++) {
      String original = names.get(i);
      String alias = fa.toES(original);
      if (alias != null) {
        names.set(i, alias);
      }
    }
    return names;
  }

  static Properties extractOriginalProperties(Properties copy) {
    Field field = ReflectionUtils.findField(Properties.class, "defaults", Properties.class);
    ReflectionUtils.makeAccessible(field);
    return ReflectionUtils.getField(field, copy);
  }

  static Settings init(
      Settings settings, String nodes, int port, String resource, String query, boolean read) {
    if (StringUtils.hasText(nodes)) {
      settings.setHosts(nodes);
    }

    if (port > 0) {
      settings.setPort(port);
    }

    if (StringUtils.hasText(query)) {
      settings.setQuery(query);
    }

    if (StringUtils.hasText(resource)) {
      if (read) {
        settings.setResourceRead(resource);
      } else {
        settings.setResourceWrite(resource);
      }
    }

    return settings;
  }

  private abstract static class CoercibleOps {
    static void setObject(TupleEntry entry, Comparable<?> field, Object object) {
      if (object != null && entry.getFields().getType(field) instanceof CoercibleType) {
        entry.setObject(field, object.toString());
      } else {
        entry.setObject(field, object);
      }
    }

    static Tuple coerceToString(SinkCall<?, ?> sinkCall) {
      TupleEntry entry = sinkCall.getOutgoingEntry();
      Fields fields = entry.getFields();
      Tuple tuple = entry.getTuple();

      if (fields.hasTypes()) {
        Type types[] = new Type[fields.size()];
        for (int index = 0; index < fields.size(); index++) {
          Type type = fields.getType(index);
          if (type instanceof CoercibleType<?>) {
            types[index] = String.class;
          } else {
            types[index] = type;
          }
        }

        tuple = entry.getCoercedTuple(types);
      }
      return tuple;
    }
  }

  private abstract static class LegacyOps {
    static void setObject(TupleEntry entry, Comparable<?> field, Object object) {
      entry.setObject(field, object);
    }

    static Tuple coerceToString(SinkCall<?, ?> sinkCall) {
      return sinkCall.getOutgoingEntry().getTuple();
    }
  }

  static void setObject(TupleEntry entry, Comparable<?> field, Object object) {
    if (CASCADING_22_AVAILABLE) {
      CoercibleOps.setObject(entry, field, object);
    } else {
      LegacyOps.setObject(entry, field, object);
    }
  }

  static Tuple coerceToString(SinkCall<?, ?> sinkCall) {
    return (CASCADING_22_AVAILABLE
        ? CoercibleOps.coerceToString(sinkCall)
        : LegacyOps.coerceToString(sinkCall));
  }

  @SuppressWarnings("rawtypes")
  public static Tap hadoopTap(
      String host, int port, String path, String query, Fields fields, Properties props) {
    return new EsHadoopTap(host, port, path, query, fields, props);
  }
}
Exemplo n.º 2
0
abstract class HiveUtils {

  // Date type available since Hive 0.12
  static final boolean DATE_WRITABLE_AVAILABLE =
      ObjectUtils.isClassPresent(
          HiveConstants.DATE_WRITABLE, TimestampWritable.class.getClassLoader());

  static StandardStructObjectInspector structObjectInspector(Properties tableProperties) {
    // extract column info - don't use Hive constants as they were renamed in 0.9 breaking
    // compatibility
    // the column names are saved as the given inspector to #serialize doesn't preserves them (maybe
    // because it's an external table)
    // use the class since StructType requires it ...
    List<String> columnNames =
        StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ",");
    List<TypeInfo> colTypes =
        TypeInfoUtils.getTypeInfosFromTypeString(
            tableProperties.getProperty(HiveConstants.COLUMNS_TYPES));

    // create a standard writable Object Inspector - used later on by serialization/deserialization
    List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>();

    for (TypeInfo typeInfo : colTypes) {
      inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo));
    }

    return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
  }

  static StructTypeInfo typeInfo(StructObjectInspector inspector) {
    return (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(inspector);
  }

  static Collection<String> columnToAlias(Settings settings) {
    FieldAlias fa = alias(settings);
    List<String> columnNames =
        StringUtils.tokenize(settings.getProperty(HiveConstants.COLUMNS), ",");
    // eliminate virtual columns
    // we can't use virtual columns since some distro don't have this field...
    //        for (VirtualColumn vc : VirtualColumn.VIRTUAL_COLUMNS) {
    //            columnNames.remove(vc.getName());
    //        }

    for (String vc : HiveConstants.VIRTUAL_COLUMNS) {
      columnNames.remove(vc);
    }

    for (int i = 0; i < columnNames.size(); i++) {
      String original = columnNames.get(i);
      String alias = fa.toES(original);
      if (alias != null) {
        columnNames.set(i, alias);
      }
    }
    return columnNames;
  }

  static FieldAlias alias(Settings settings) {
    Map<String, String> aliasMap =
        SettingsUtils.aliases(settings.getProperty(HiveConstants.MAPPING_NAMES));

    // add default aliases for serialization (_colX -> mapping name)
    Map<String, String> columnMap = columnMap(settings);

    for (Entry<String, String> entry : columnMap.entrySet()) {
      String columnName = entry.getKey();
      String columnIndex = entry.getValue();

      if (!aliasMap.isEmpty()) {
        String alias = aliasMap.get(columnName);
        if (alias != null) {
          columnName = alias;
        }
      }

      aliasMap.put(columnIndex, columnName);
    }

    return new FieldAlias(aliasMap);
  }

  static Map<String, String> columnMap(Settings settings) {
    return columnMap(settings.getProperty(HiveConstants.COLUMNS));
  }

  // returns a map of {<column-name>:_colX}
  private static Map<String, String> columnMap(String columnString) {
    // add default aliases for serialization (mapping name -> _colX)
    List<String> columnNames = StringUtils.tokenize(columnString, ",");
    if (columnNames.isEmpty()) {
      return Collections.emptyMap();
    }

    Map<String, String> columns = new LinkedHashMap<String, String>();
    for (int i = 0; i < columnNames.size(); i++) {
      columns.put(columnNames.get(i), HiveConstants.UNNAMED_COLUMN_PREFIX + i);
    }
    return columns;
  }

  static void init(Settings settings, Log log) {
    InitializationUtils.checkIdForOperation(settings);
    InitializationUtils.setFieldExtractorIfNotSet(settings, HiveFieldExtractor.class, log);
    try {
      InitializationUtils.discoverEsVersion(settings, log);
    } catch (IOException ex) {
      throw new EsHadoopIllegalStateException("Cannot discover Elasticsearch version", ex);
    }
  }

  static void fixHive13InvalidComments(Settings settings, Properties tbl) {
    if (Booleans.parseBoolean(settings.getProperty("es.hive.disable.columns.comments.fix"))) {
      return;
    }

    settings.setProperty(HiveConstants.COLUMN_COMMENTS, "");
    tbl.remove(HiveConstants.COLUMN_COMMENTS);
  }
}