private Object wrapWithFormatter(String format, final FieldExtractor createFieldExtractor) { // instantiate field extractor final IndexFormatter iformatter = ObjectUtils.instantiate(settings.getMappingIndexFormatterClassName(), settings); iformatter.configure(format); return new FieldExtractor() { @Override public String field(Object target) { return iformatter.format(createFieldExtractor.field(target)); } }; }
@Override public void compile(String pattern) { this.pattern = pattern; // break it down into index/type String[] split = pattern.split("/"); Assert.isTrue(!ObjectUtils.isEmpty(split), "invalid pattern given " + pattern); Assert.isTrue(split.length == 2, "invalid pattern given " + pattern); // check pattern hasPattern = pattern.contains("{") && pattern.contains("}"); index = parse(split[0].trim()); type = parse(split[1].trim()); }
public RestClient(Settings settings) { network = new NetworkClient(settings, SettingsUtils.nodes(settings)); scrollKeepAlive = TimeValue.timeValueMillis(settings.getScrollKeepAlive()); indexReadMissingAsEmpty = settings.getIndexReadMissingAsEmpty(); String retryPolicyName = settings.getBatchWriteRetryPolicy(); if (ConfigurationOptions.ES_BATCH_WRITE_RETRY_POLICY_SIMPLE.equals(retryPolicyName)) { retryPolicyName = SimpleHttpRetryPolicy.class.getName(); } else if (ConfigurationOptions.ES_BATCH_WRITE_RETRY_POLICY_NONE.equals(retryPolicyName)) { retryPolicyName = NoHttpRetryPolicy.class.getName(); } retryPolicy = ObjectUtils.instantiate(retryPolicyName, settings); }
public void process(BytesArray storage) { // no extractors, no lookups if (ObjectUtils.isEmpty(paths)) { return; } results.clear(); if (log.isTraceEnabled()) { log.trace( String.format( "About to look for paths [%s] in doc [%s]", Arrays.toString(paths), storage)); } results.addAll( ParsingUtils.values(new JacksonJsonParser(storage.bytes(), 0, storage.length()), paths)); }
public abstract class CascadingUtils { private static final String MAPPING_NAMES = "es.mapping.names"; private static final boolean CASCADING_22_AVAILABLE = ObjectUtils.isClassPresent("cascading.tuple.type.CoercibleType", Tap.class.getClassLoader()); static Settings addDefaultsToSettings( Properties flowProperties, Properties tapProperties, Log log) { Settings settings = HadoopSettingsManager.loadFrom(CascadingUtils.extractOriginalProperties(flowProperties)) .merge(tapProperties); InitializationUtils.discoverNodesIfNeeded(settings, log); InitializationUtils.filterNonClientNodesIfNeeded(settings, log); InitializationUtils.discoverEsVersion(settings, log); InitializationUtils.setValueWriterIfNotSet(settings, CascadingValueWriter.class, log); InitializationUtils.setValueReaderIfNotSet(settings, JdkValueReader.class, log); InitializationUtils.setBytesConverterIfNeeded( settings, CascadingLocalBytesConverter.class, log); InitializationUtils.setFieldExtractorIfNotSet(settings, CascadingFieldExtractor.class, log); return settings; } static void addSerializationToken(Object config) { Configuration cfg = (Configuration) config; String tokens = cfg.get(TupleSerializationProps.SERIALIZATION_TOKENS); String lmw = LinkedMapWritable.class.getName(); // if no tokens are defined, add one starting with 140 if (tokens == null) { cfg.set(TupleSerializationProps.SERIALIZATION_TOKENS, "140=" + lmw); LogFactory.getLog(EsTap.class) .trace(String.format("Registered Cascading serialization token %s for %s", 140, lmw)); } else { // token already registered if (tokens.contains(lmw)) { return; } // find token id Map<Integer, String> mapping = new LinkedHashMap<Integer, String>(); tokens = tokens.replaceAll("\\s", ""); // allow for whitespace in token set for (String pair : tokens.split(",")) { String[] elements = pair.split("="); mapping.put(Integer.parseInt(elements[0]), elements[1]); } for (int id = 140; id < 255; id++) { if (!mapping.containsKey(Integer.valueOf(id))) { cfg.set( TupleSerializationProps.SERIALIZATION_TOKENS, Util.join(",", Util.removeNulls(tokens, id + "=" + lmw))); LogFactory.getLog(EsTap.class) .trace(String.format("Registered Cascading serialization token %s for %s", id, lmw)); return; } } } } static FieldAlias alias(Settings settings) { return new FieldAlias(SettingsUtils.aliases(settings.getProperty(MAPPING_NAMES), false), false); } static List<String> asStrings(Fields fields) { if (fields == null || !fields.isDefined()) { // use auto-generated name return Collections.emptyList(); } int size = fields.size(); List<String> names = new ArrayList<String>(size); for (int fieldIndex = 0; fieldIndex < size; fieldIndex++) { names.add(fields.get(fieldIndex).toString()); } return names; } static Collection<String> fieldToAlias(Settings settings, Fields fields) { FieldAlias fa = alias(settings); List<String> names = asStrings(fields); for (int i = 0; i < names.size(); i++) { String original = names.get(i); String alias = fa.toES(original); if (alias != null) { names.set(i, alias); } } return names; } static Properties extractOriginalProperties(Properties copy) { Field field = ReflectionUtils.findField(Properties.class, "defaults", Properties.class); ReflectionUtils.makeAccessible(field); return ReflectionUtils.getField(field, copy); } static Settings init( Settings settings, String nodes, int port, String resource, String query, boolean read) { if (StringUtils.hasText(nodes)) { settings.setHosts(nodes); } if (port > 0) { settings.setPort(port); } if (StringUtils.hasText(query)) { settings.setQuery(query); } if (StringUtils.hasText(resource)) { if (read) { settings.setResourceRead(resource); } else { settings.setResourceWrite(resource); } } return settings; } private abstract static class CoercibleOps { static void setObject(TupleEntry entry, Comparable<?> field, Object object) { if (object != null && entry.getFields().getType(field) instanceof CoercibleType) { entry.setObject(field, object.toString()); } else { entry.setObject(field, object); } } static Tuple coerceToString(SinkCall<?, ?> sinkCall) { TupleEntry entry = sinkCall.getOutgoingEntry(); Fields fields = entry.getFields(); Tuple tuple = entry.getTuple(); if (fields.hasTypes()) { Type types[] = new Type[fields.size()]; for (int index = 0; index < fields.size(); index++) { Type type = fields.getType(index); if (type instanceof CoercibleType<?>) { types[index] = String.class; } else { types[index] = type; } } tuple = entry.getCoercedTuple(types); } return tuple; } } private abstract static class LegacyOps { static void setObject(TupleEntry entry, Comparable<?> field, Object object) { entry.setObject(field, object); } static Tuple coerceToString(SinkCall<?, ?> sinkCall) { return sinkCall.getOutgoingEntry().getTuple(); } } static void setObject(TupleEntry entry, Comparable<?> field, Object object) { if (CASCADING_22_AVAILABLE) { CoercibleOps.setObject(entry, field, object); } else { LegacyOps.setObject(entry, field, object); } } static Tuple coerceToString(SinkCall<?, ?> sinkCall) { return (CASCADING_22_AVAILABLE ? CoercibleOps.coerceToString(sinkCall) : LegacyOps.coerceToString(sinkCall)); } @SuppressWarnings("rawtypes") public static Tap hadoopTap( String host, int port, String path, String query, Fields fields, Properties props) { return new EsHadoopTap(host, port, path, query, fields, props); } }
abstract class HiveUtils { // Date type available since Hive 0.12 static final boolean DATE_WRITABLE_AVAILABLE = ObjectUtils.isClassPresent( HiveConstants.DATE_WRITABLE, TimestampWritable.class.getClassLoader()); static StandardStructObjectInspector structObjectInspector(Properties tableProperties) { // extract column info - don't use Hive constants as they were renamed in 0.9 breaking // compatibility // the column names are saved as the given inspector to #serialize doesn't preserves them (maybe // because it's an external table) // use the class since StructType requires it ... List<String> columnNames = StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ","); List<TypeInfo> colTypes = TypeInfoUtils.getTypeInfosFromTypeString( tableProperties.getProperty(HiveConstants.COLUMNS_TYPES)); // create a standard writable Object Inspector - used later on by serialization/deserialization List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>(); for (TypeInfo typeInfo : colTypes) { inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo)); } return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors); } static StructTypeInfo typeInfo(StructObjectInspector inspector) { return (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(inspector); } static Collection<String> columnToAlias(Settings settings) { FieldAlias fa = alias(settings); List<String> columnNames = StringUtils.tokenize(settings.getProperty(HiveConstants.COLUMNS), ","); // eliminate virtual columns // we can't use virtual columns since some distro don't have this field... // for (VirtualColumn vc : VirtualColumn.VIRTUAL_COLUMNS) { // columnNames.remove(vc.getName()); // } for (String vc : HiveConstants.VIRTUAL_COLUMNS) { columnNames.remove(vc); } for (int i = 0; i < columnNames.size(); i++) { String original = columnNames.get(i); String alias = fa.toES(original); if (alias != null) { columnNames.set(i, alias); } } return columnNames; } static FieldAlias alias(Settings settings) { Map<String, String> aliasMap = SettingsUtils.aliases(settings.getProperty(HiveConstants.MAPPING_NAMES)); // add default aliases for serialization (_colX -> mapping name) Map<String, String> columnMap = columnMap(settings); for (Entry<String, String> entry : columnMap.entrySet()) { String columnName = entry.getKey(); String columnIndex = entry.getValue(); if (!aliasMap.isEmpty()) { String alias = aliasMap.get(columnName); if (alias != null) { columnName = alias; } } aliasMap.put(columnIndex, columnName); } return new FieldAlias(aliasMap); } static Map<String, String> columnMap(Settings settings) { return columnMap(settings.getProperty(HiveConstants.COLUMNS)); } // returns a map of {<column-name>:_colX} private static Map<String, String> columnMap(String columnString) { // add default aliases for serialization (mapping name -> _colX) List<String> columnNames = StringUtils.tokenize(columnString, ","); if (columnNames.isEmpty()) { return Collections.emptyMap(); } Map<String, String> columns = new LinkedHashMap<String, String>(); for (int i = 0; i < columnNames.size(); i++) { columns.put(columnNames.get(i), HiveConstants.UNNAMED_COLUMN_PREFIX + i); } return columns; } static void init(Settings settings, Log log) { InitializationUtils.checkIdForOperation(settings); InitializationUtils.setFieldExtractorIfNotSet(settings, HiveFieldExtractor.class, log); try { InitializationUtils.discoverEsVersion(settings, log); } catch (IOException ex) { throw new EsHadoopIllegalStateException("Cannot discover Elasticsearch version", ex); } } static void fixHive13InvalidComments(Settings settings, Properties tbl) { if (Booleans.parseBoolean(settings.getProperty("es.hive.disable.columns.comments.fix"))) { return; } settings.setProperty(HiveConstants.COLUMN_COMMENTS, ""); tbl.remove(HiveConstants.COLUMN_COMMENTS); } }