static { // pick up system properties TESTING_PROPS.putAll(System.getProperties()); // override with test settings try { TESTING_PROPS.load(TestUtils.class.getResourceAsStream("/test.properties")); } catch (IOException e) { throw new RuntimeException("Cannot load default Hadoop test properties"); } // manually select the hadoop properties String fs = System.getProperty("hd.fs"); String jt = System.getProperty("hd.jt"); // override if (StringUtils.hasText(fs)) { System.out.println("Setting FS to " + fs); TESTING_PROPS.put("fs.default.name", fs.trim()); } if (StringUtils.hasText(jt)) { System.out.println("Setting JT to " + jt); TESTING_PROPS.put("mapred.job.tracker", jt.trim()); } }
public static List<String> discoveredOrDeclaredNodes(Settings settings) { // returned the discovered nodes or, if not defined, the set nodes String discoveredNodes = settings.getProperty(InternalConfigurationOptions.INTERNAL_ES_DISCOVERED_NODES); return (StringUtils.hasText(discoveredNodes) ? StringUtils.tokenize(discoveredNodes) : declaredNodes(settings)); }
@Override public void setSettings(Settings settings) { this.settings = settings; String paramString = settings.getUpdateScriptParams(); List<String> fields = StringUtils.tokenize(paramString); for (String string : fields) { List<String> param = StringUtils.tokenize(string, ":"); Assert.isTrue(param.size() == 2, "Invalid param definition " + string); params.put(param.get(0), createFieldExtractor(param.get(1))); } }
protected List<Object> parse(String string) { // break it down into fields List<Object> template = new ArrayList<Object>(); while (string.contains("{")) { int startPattern = string.indexOf("{"); template.add(string.substring(0, startPattern)); int endPattern = string.indexOf("}"); Assert.isTrue(endPattern > startPattern + 1, "Invalid pattern given " + string); String nestedString = string.substring(startPattern + 1, endPattern); int separator = nestedString.indexOf(":"); if (separator > 0) { Assert.isTrue( nestedString.length() > separator + 1, "Invalid format given " + nestedString); String format = nestedString.substring(separator + 1); nestedString = nestedString.substring(0, separator); template.add(wrapWithFormatter(format, createFieldExtractor(nestedString))); } else { template.add(createFieldExtractor(nestedString)); } string = string.substring(endPattern + 1).trim(); } if (StringUtils.hasText(string)) { template.add(string); } return template; }
@Override public Object field(Object target) { List<Object> list = new ArrayList<Object>(params.size()); int entries = 0; // construct the param list but keep the value exposed to be properly transformed (if it's // extracted from the runtime) list.add(new RawJson("{")); for (Entry<String, FieldExtractor> entry : params.entrySet()) { String val = StringUtils.toJsonString(entry.getKey()) + ":"; if (entries > 0) { val = "," + val; } list.add(new RawJson(val)); Object field = entry.getValue().field(target); if (field == FieldExtractor.NOT_FOUND) { lastFailingFieldExtractor = entry.getValue(); return FieldExtractor.NOT_FOUND; } // add the param as is - it will be translated to JSON as part of the list later list.add(field); entries++; } list.add(new RawJson("}")); return list; }
protected String getProperty(String name, String defaultValue) { String value = getProperty(name); if (!StringUtils.hasText(value)) { return defaultValue; } return value; }
private static List<String> qualifyNodes(String nodes, int defaultPort) { List<String> list = StringUtils.tokenize(nodes); for (int i = 0; i < list.size(); i++) { String host = list.get(i); list.set(i, qualifyNode(host, defaultPort)); } return list; }
/** * Whether the settings indicate a ES 2.x (which introduces breaking changes) or 1.x. * * @param settings * @return */ public static boolean isEs20(Settings settings) { String version = settings.getProperty(InternalConfigurationOptions.INTERNAL_ES_VERSION); // assume ES 1.0 by default if (!StringUtils.hasText(version)) { return true; } return version.startsWith("2."); }
public static List<String> tokenizeAndUriDecode(String string, String delimiters) { List<String> tokenize = tokenize(string, delimiters, true, true); List<String> decoded = new ArrayList<String>(tokenize.size()); for (String token : tokenize) { decoded.add(StringUtils.decodeQuery(token)); } return decoded; }
// return the value in a JSON friendly way public static String toJsonString(Object value) { if (value == null) { return "null"; } else if (value.getClass().equals(String.class)) { return "\"" + StringUtils.jsonEncoding(value.toString()) + "\""; } // else it's a Boolean or Number so no escaping or quotes else { return value.toString(); } }
public String getTargetUri() { String address = getProperty(INTERNAL_ES_TARGET_URI); return (StringUtils.hasText(address) ? address : new StringBuilder("http://") .append(getHost()) .append(":") .append(getPort()) .append("/") .toString()); }
// returns a map of {<column-name>:_colX} private static Map<String, String> columnMap(String columnString) { // add default aliases for serialization (mapping name -> _colX) List<String> columnNames = StringUtils.tokenize(columnString, ","); if (columnNames.isEmpty()) { return Collections.emptyMap(); } Map<String, String> columns = new LinkedHashMap<String, String>(); for (int i = 0; i < columnNames.size(); i++) { columns.put(columnNames.get(i), HiveConstants.UNNAMED_COLUMN_PREFIX + i); } return columns; }
static Settings init( Settings settings, String nodes, int port, String resource, String query, boolean read) { if (StringUtils.hasText(nodes)) { settings.setHosts(nodes); } if (port > 0) { settings.setPort(port); } if (StringUtils.hasText(query)) { settings.setQuery(query); } if (StringUtils.hasText(resource)) { if (read) { settings.setResourceRead(resource); } else { settings.setResourceWrite(resource); } } return settings; }
public String getNodes() { String host = getProperty(ES_HOST); if (StringUtils.hasText(host)) { if (ES_HOST_WARNING) { LogFactory.getLog(Settings.class) .warn( String.format( "`%s` property has been deprecated - use `%s` instead", ES_HOST, ES_NODES)); ES_HOST_WARNING = false; } return host; } return getProperty(ES_NODES, ES_NODES_DEFAULT); }
@Test public void testNestedField() throws Exception { String data = "{ \"data\" : { \"map\" : { \"key\" : [ 10, 20 ] } } }"; RestUtils.putData(indexPrefix + "cascading-local/nestedmap", StringUtils.toUTF(data)); RestUtils.refresh(indexPrefix + "cascading-local"); Properties cfg = cfg(); cfg.setProperty("es.mapping.names", "nested:data.map.key"); Tap in = new EsTap(indexPrefix + "cascading-local/nestedmap", new Fields("nested")); Pipe pipe = new Pipe("copy"); pipe = new Each(pipe, new FilterNotNull()); pipe = new Each(pipe, AssertionLevel.STRICT, new AssertSizeLessThan(2)); // print out Tap out = new OutputStreamTap(new TextLine(), OUT); build(cfg, in, out, pipe); }
@SuppressWarnings({"rawtypes", "unchecked"}) public List<String> discoverNodes() throws IOException { String endpoint = "_nodes/transport"; Map<String, Map> nodes = (Map<String, Map>) get(endpoint, "nodes"); List<String> hosts = new ArrayList<String>(nodes.size()); for (Map value : nodes.values()) { String inet = (String) value.get("http_address"); if (StringUtils.hasText(inet)) { int startIp = inet.indexOf("/") + 1; int endIp = inet.indexOf("]"); inet = inet.substring(startIp, endIp); hosts.add(inet); } } return hosts; }
static StandardStructObjectInspector structObjectInspector(Properties tableProperties) { // extract column info - don't use Hive constants as they were renamed in 0.9 breaking // compatibility // the column names are saved as the given inspector to #serialize doesn't preserves them (maybe // because it's an external table) // use the class since StructType requires it ... List<String> columnNames = StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ","); List<TypeInfo> colTypes = TypeInfoUtils.getTypeInfosFromTypeString( tableProperties.getProperty(HiveConstants.COLUMNS_TYPES)); // create a standard writable Object Inspector - used later on by serialization/deserialization List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>(); for (TypeInfo typeInfo : colTypes) { inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo)); } return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors); }
public static Map<String, String> aliases(String definition, boolean caseInsensitive) { List<String> aliases = StringUtils.tokenize(definition); Map<String, String> aliasMap = new LinkedHashMap<String, String>(); if (aliases != null) { for (String string : aliases) { // split alias string = string.trim(); int index = string.indexOf(":"); if (index > 0) { String key = string.substring(0, index); aliasMap.put(key, string.substring(index + 1)); aliasMap.put( caseInsensitive ? key.toLowerCase(Locale.ROOT) : key, string.substring(index + 1)); } } } return aliasMap; }
Response execute(Request request, boolean checkStatus) throws IOException { Response response = network.execute(request); if (checkStatus && response.hasFailed()) { // check error first String msg = parseContent(response.body(), "error"); if (!StringUtils.hasText(msg)) { msg = String.format( "[%s] on [%s] failed; server[%s] returned [%s:%s]", request.method().name(), request.path(), response.uri(), response.status(), response.statusDescription()); } throw new IllegalStateException(msg); } return response; }
static Collection<String> columnToAlias(Settings settings) { FieldAlias fa = alias(settings); List<String> columnNames = StringUtils.tokenize(settings.getProperty(HiveConstants.COLUMNS), ","); // eliminate virtual columns // we can't use virtual columns since some distro don't have this field... // for (VirtualColumn vc : VirtualColumn.VIRTUAL_COLUMNS) { // columnNames.remove(vc.getName()); // } for (String vc : HiveConstants.VIRTUAL_COLUMNS) { columnNames.remove(vc); } for (int i = 0; i < columnNames.size(); i++) { String original = columnNames.get(i); String alias = fa.toES(original); if (alias != null) { columnNames.set(i, alias); } } return columnNames; }
public String getTargetResource() { String resource = getProperty(INTERNAL_ES_TARGET_RESOURCE); return (StringUtils.hasText(targetResource) ? targetResource : StringUtils.hasText(resource) ? resource : getProperty(ES_RESOURCE)); }
public boolean write( Object object, ResourceFieldSchema field, Generator generator, boolean writeFieldName) { byte type = (field != null ? field.getType() : DataType.findType(object)); if (writeFieldName) { generator.writeFieldName(alias.toES(field.getName())); } if (object == null) { generator.writeNull(); return true; } switch (type) { case DataType.ERROR: case DataType.UNKNOWN: return handleUnknown(object, field, generator); case DataType.NULL: generator.writeNull(); break; case DataType.BOOLEAN: generator.writeBoolean((Boolean) object); break; case DataType.INTEGER: generator.writeNumber(((Number) object).intValue()); break; case DataType.LONG: generator.writeNumber(((Number) object).longValue()); break; case DataType.FLOAT: generator.writeNumber(((Number) object).floatValue()); break; case DataType.DOUBLE: generator.writeNumber(((Number) object).doubleValue()); break; case DataType.BYTE: generator.writeNumber((Byte) object); break; case DataType.CHARARRAY: generator.writeString(object.toString()); break; case DataType.BYTEARRAY: generator.writeBinary(((DataByteArray) object).get()); break; // DateTime introduced in Pig 11 case 30: // DataType.DATETIME generator.writeString(PigUtils.convertDateToES(object)); break; // DateTime introduced in Pig 12 case 65: // DataType.BIGINTEGER throw new SerializationException( "Big integers are not supported by Elasticsearch - consider using a different type (such as string)"); // DateTime introduced in Pig 12 case 70: // DataType.BIGDECIMAL throw new SerializationException( "Big decimals are not supported by Elasticsearch - consider using a different type (such as string)"); case DataType.MAP: ResourceSchema nestedSchema = field.getSchema(); // empty tuple shortcut if (nestedSchema == null) { generator.writeBeginObject(); generator.writeEndObject(); break; } ResourceFieldSchema[] nestedFields = nestedSchema.getFields(); generator.writeBeginObject(); // Pig maps are actually String -> Object association so we can save the key right away for (Map.Entry<?, ?> entry : ((Map<?, ?>) object).entrySet()) { generator.writeFieldName(alias.toES(entry.getKey().toString())); write(entry.getValue(), nestedFields[0], generator, false); } generator.writeEndObject(); break; case DataType.TUPLE: nestedSchema = field.getSchema(); // empty tuple shortcut if (nestedSchema == null) { generator.writeBeginObject(); generator.writeEndObject(); break; } nestedFields = nestedSchema.getFields(); // use getAll instead of get(int) to avoid having to handle Exception... List<Object> tuples = ((Tuple) object).getAll(); generator.writeBeginObject(); for (int i = 0; i < nestedFields.length; i++) { String name = nestedFields[i].getName(); // handle schemas without names name = (StringUtils.hasText(name) ? alias.toES(name) : Integer.toString(i)); generator.writeFieldName(name); write(tuples.get(i), nestedFields[i], generator, false); } generator.writeEndObject(); break; case DataType.BAG: nestedSchema = field.getSchema(); // empty tuple shortcut if (nestedSchema == null) { generator.writeBeginArray(); generator.writeEndArray(); break; } ResourceFieldSchema bagType = nestedSchema.getFields()[0]; generator.writeBeginArray(); for (Tuple tuple : (DataBag) object) { write(tuple, bagType, generator, false); } generator.writeEndArray(); break; default: if (writeUnknownTypes) { return handleUnknown(object, field, generator); } return false; } return true; }
public String getTargetHosts() { String hosts = getProperty(INTERNAL_ES_HOSTS); return StringUtils.hasText(targetHosts) ? targetHosts : (StringUtils.hasText(hosts) ? hosts : getNodes()); }
public static void pinNode(Settings settings, String node, int port) { if (StringUtils.hasText(node) && port > 0) { settings.setProperty( InternalConfigurationOptions.INTERNAL_ES_PINNED_NODE, qualifyNode(node, port)); } }
public static boolean hasPinnedNode(Settings settings) { return StringUtils.hasText( settings.getProperty(InternalConfigurationOptions.INTERNAL_ES_PINNED_NODE)); }
protected Object textValue(String value) { return (value != null ? (!StringUtils.hasText(value) && emptyAsNull ? nullValue() : parseString(value)) : nullValue()); }
public String getHost() { return StringUtils.hasText(host) ? host : getProperty(ES_HOST, ES_HOST_DEFAULT); }
public static void setDiscoveredNodes(Settings settings, Collection<String> nodes) { settings.setProperty( InternalConfigurationOptions.INTERNAL_ES_DISCOVERED_NODES, StringUtils.concatenate(nodes, StringUtils.DEFAULT_DELIMITER)); }
public String getScrollFields() { String internalFields = getProperty(INTERNAL_ES_TARGET_FIELDS); return (StringUtils.hasText(internalFields) ? internalFields : getProperty(ES_SCROLL_FIELDS)); }
@Override public String toString() { return StringUtils.asUTFString(bytes, offset, size); }