static {
    // pick up system properties
    TESTING_PROPS.putAll(System.getProperties());

    // override with test settings
    try {
      TESTING_PROPS.load(TestUtils.class.getResourceAsStream("/test.properties"));
    } catch (IOException e) {
      throw new RuntimeException("Cannot load default Hadoop test properties");
    }

    // manually select the hadoop properties
    String fs = System.getProperty("hd.fs");
    String jt = System.getProperty("hd.jt");

    // override
    if (StringUtils.hasText(fs)) {
      System.out.println("Setting FS to " + fs);
      TESTING_PROPS.put("fs.default.name", fs.trim());
    }

    if (StringUtils.hasText(jt)) {
      System.out.println("Setting JT to " + jt);
      TESTING_PROPS.put("mapred.job.tracker", jt.trim());
    }
  }
 public static List<String> discoveredOrDeclaredNodes(Settings settings) {
   // returned the discovered nodes or, if not defined, the set nodes
   String discoveredNodes =
       settings.getProperty(InternalConfigurationOptions.INTERNAL_ES_DISCOVERED_NODES);
   return (StringUtils.hasText(discoveredNodes)
       ? StringUtils.tokenize(discoveredNodes)
       : declaredNodes(settings));
 }
  @Override
  public void setSettings(Settings settings) {
    this.settings = settings;

    String paramString = settings.getUpdateScriptParams();
    List<String> fields = StringUtils.tokenize(paramString);
    for (String string : fields) {
      List<String> param = StringUtils.tokenize(string, ":");
      Assert.isTrue(param.size() == 2, "Invalid param definition " + string);

      params.put(param.get(0), createFieldExtractor(param.get(1)));
    }
  }
 protected List<Object> parse(String string) {
   // break it down into fields
   List<Object> template = new ArrayList<Object>();
   while (string.contains("{")) {
     int startPattern = string.indexOf("{");
     template.add(string.substring(0, startPattern));
     int endPattern = string.indexOf("}");
     Assert.isTrue(endPattern > startPattern + 1, "Invalid pattern given " + string);
     String nestedString = string.substring(startPattern + 1, endPattern);
     int separator = nestedString.indexOf(":");
     if (separator > 0) {
       Assert.isTrue(
           nestedString.length() > separator + 1, "Invalid format given " + nestedString);
       String format = nestedString.substring(separator + 1);
       nestedString = nestedString.substring(0, separator);
       template.add(wrapWithFormatter(format, createFieldExtractor(nestedString)));
     } else {
       template.add(createFieldExtractor(nestedString));
     }
     string = string.substring(endPattern + 1).trim();
   }
   if (StringUtils.hasText(string)) {
     template.add(string);
   }
   return template;
 }
 @Override
 public Object field(Object target) {
   List<Object> list = new ArrayList<Object>(params.size());
   int entries = 0;
   // construct the param list but keep the value exposed to be properly transformed (if it's
   // extracted from the runtime)
   list.add(new RawJson("{"));
   for (Entry<String, FieldExtractor> entry : params.entrySet()) {
     String val = StringUtils.toJsonString(entry.getKey()) + ":";
     if (entries > 0) {
       val = "," + val;
     }
     list.add(new RawJson(val));
     Object field = entry.getValue().field(target);
     if (field == FieldExtractor.NOT_FOUND) {
       lastFailingFieldExtractor = entry.getValue();
       return FieldExtractor.NOT_FOUND;
     }
     // add the param as is - it will be translated to JSON as part of the list later
     list.add(field);
     entries++;
   }
   list.add(new RawJson("}"));
   return list;
 }
 protected String getProperty(String name, String defaultValue) {
   String value = getProperty(name);
   if (!StringUtils.hasText(value)) {
     return defaultValue;
   }
   return value;
 }
 private static List<String> qualifyNodes(String nodes, int defaultPort) {
   List<String> list = StringUtils.tokenize(nodes);
   for (int i = 0; i < list.size(); i++) {
     String host = list.get(i);
     list.set(i, qualifyNode(host, defaultPort));
   }
   return list;
 }
  /**
   * Whether the settings indicate a ES 2.x (which introduces breaking changes) or 1.x.
   *
   * @param settings
   * @return
   */
  public static boolean isEs20(Settings settings) {
    String version = settings.getProperty(InternalConfigurationOptions.INTERNAL_ES_VERSION);
    // assume ES 1.0 by default
    if (!StringUtils.hasText(version)) {
      return true;
    }

    return version.startsWith("2.");
  }
  public static List<String> tokenizeAndUriDecode(String string, String delimiters) {
    List<String> tokenize = tokenize(string, delimiters, true, true);
    List<String> decoded = new ArrayList<String>(tokenize.size());

    for (String token : tokenize) {
      decoded.add(StringUtils.decodeQuery(token));
    }

    return decoded;
  }
 // return the value in a JSON friendly way
 public static String toJsonString(Object value) {
   if (value == null) {
     return "null";
   } else if (value.getClass().equals(String.class)) {
     return "\"" + StringUtils.jsonEncoding(value.toString()) + "\"";
   }
   // else it's a Boolean or Number so no escaping or quotes
   else {
     return value.toString();
   }
 }
 public String getTargetUri() {
   String address = getProperty(INTERNAL_ES_TARGET_URI);
   return (StringUtils.hasText(address)
       ? address
       : new StringBuilder("http://")
           .append(getHost())
           .append(":")
           .append(getPort())
           .append("/")
           .toString());
 }
  // returns a map of {<column-name>:_colX}
  private static Map<String, String> columnMap(String columnString) {
    // add default aliases for serialization (mapping name -> _colX)
    List<String> columnNames = StringUtils.tokenize(columnString, ",");
    if (columnNames.isEmpty()) {
      return Collections.emptyMap();
    }

    Map<String, String> columns = new LinkedHashMap<String, String>();
    for (int i = 0; i < columnNames.size(); i++) {
      columns.put(columnNames.get(i), HiveConstants.UNNAMED_COLUMN_PREFIX + i);
    }
    return columns;
  }
  static Settings init(
      Settings settings, String nodes, int port, String resource, String query, boolean read) {
    if (StringUtils.hasText(nodes)) {
      settings.setHosts(nodes);
    }

    if (port > 0) {
      settings.setPort(port);
    }

    if (StringUtils.hasText(query)) {
      settings.setQuery(query);
    }

    if (StringUtils.hasText(resource)) {
      if (read) {
        settings.setResourceRead(resource);
      } else {
        settings.setResourceWrite(resource);
      }
    }

    return settings;
  }
  public String getNodes() {
    String host = getProperty(ES_HOST);
    if (StringUtils.hasText(host)) {
      if (ES_HOST_WARNING) {
        LogFactory.getLog(Settings.class)
            .warn(
                String.format(
                    "`%s` property has been deprecated - use `%s` instead", ES_HOST, ES_NODES));
        ES_HOST_WARNING = false;
      }
      return host;
    }

    return getProperty(ES_NODES, ES_NODES_DEFAULT);
  }
  @Test
  public void testNestedField() throws Exception {
    String data = "{ \"data\" : { \"map\" : { \"key\" : [ 10, 20 ] } } }";
    RestUtils.putData(indexPrefix + "cascading-local/nestedmap", StringUtils.toUTF(data));

    RestUtils.refresh(indexPrefix + "cascading-local");

    Properties cfg = cfg();
    cfg.setProperty("es.mapping.names", "nested:data.map.key");

    Tap in = new EsTap(indexPrefix + "cascading-local/nestedmap", new Fields("nested"));
    Pipe pipe = new Pipe("copy");
    pipe = new Each(pipe, new FilterNotNull());
    pipe = new Each(pipe, AssertionLevel.STRICT, new AssertSizeLessThan(2));

    // print out
    Tap out = new OutputStreamTap(new TextLine(), OUT);
    build(cfg, in, out, pipe);
  }
  @SuppressWarnings({"rawtypes", "unchecked"})
  public List<String> discoverNodes() throws IOException {
    String endpoint = "_nodes/transport";
    Map<String, Map> nodes = (Map<String, Map>) get(endpoint, "nodes");

    List<String> hosts = new ArrayList<String>(nodes.size());

    for (Map value : nodes.values()) {
      String inet = (String) value.get("http_address");
      if (StringUtils.hasText(inet)) {
        int startIp = inet.indexOf("/") + 1;
        int endIp = inet.indexOf("]");
        inet = inet.substring(startIp, endIp);
        hosts.add(inet);
      }
    }

    return hosts;
  }
  static StandardStructObjectInspector structObjectInspector(Properties tableProperties) {
    // extract column info - don't use Hive constants as they were renamed in 0.9 breaking
    // compatibility
    // the column names are saved as the given inspector to #serialize doesn't preserves them (maybe
    // because it's an external table)
    // use the class since StructType requires it ...
    List<String> columnNames =
        StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ",");
    List<TypeInfo> colTypes =
        TypeInfoUtils.getTypeInfosFromTypeString(
            tableProperties.getProperty(HiveConstants.COLUMNS_TYPES));

    // create a standard writable Object Inspector - used later on by serialization/deserialization
    List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>();

    for (TypeInfo typeInfo : colTypes) {
      inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo));
    }

    return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
  }
  public static Map<String, String> aliases(String definition, boolean caseInsensitive) {
    List<String> aliases = StringUtils.tokenize(definition);

    Map<String, String> aliasMap = new LinkedHashMap<String, String>();

    if (aliases != null) {
      for (String string : aliases) {
        // split alias
        string = string.trim();
        int index = string.indexOf(":");
        if (index > 0) {
          String key = string.substring(0, index);
          aliasMap.put(key, string.substring(index + 1));
          aliasMap.put(
              caseInsensitive ? key.toLowerCase(Locale.ROOT) : key, string.substring(index + 1));
        }
      }
    }

    return aliasMap;
  }
  Response execute(Request request, boolean checkStatus) throws IOException {
    Response response = network.execute(request);

    if (checkStatus && response.hasFailed()) {
      // check error first
      String msg = parseContent(response.body(), "error");
      if (!StringUtils.hasText(msg)) {
        msg =
            String.format(
                "[%s] on [%s] failed; server[%s] returned [%s:%s]",
                request.method().name(),
                request.path(),
                response.uri(),
                response.status(),
                response.statusDescription());
      }

      throw new IllegalStateException(msg);
    }

    return response;
  }
  static Collection<String> columnToAlias(Settings settings) {
    FieldAlias fa = alias(settings);
    List<String> columnNames =
        StringUtils.tokenize(settings.getProperty(HiveConstants.COLUMNS), ",");
    // eliminate virtual columns
    // we can't use virtual columns since some distro don't have this field...
    //        for (VirtualColumn vc : VirtualColumn.VIRTUAL_COLUMNS) {
    //            columnNames.remove(vc.getName());
    //        }

    for (String vc : HiveConstants.VIRTUAL_COLUMNS) {
      columnNames.remove(vc);
    }

    for (int i = 0; i < columnNames.size(); i++) {
      String original = columnNames.get(i);
      String alias = fa.toES(original);
      if (alias != null) {
        columnNames.set(i, alias);
      }
    }
    return columnNames;
  }
 public String getTargetResource() {
   String resource = getProperty(INTERNAL_ES_TARGET_RESOURCE);
   return (StringUtils.hasText(targetResource)
       ? targetResource
       : StringUtils.hasText(resource) ? resource : getProperty(ES_RESOURCE));
 }
  public boolean write(
      Object object, ResourceFieldSchema field, Generator generator, boolean writeFieldName) {
    byte type = (field != null ? field.getType() : DataType.findType(object));

    if (writeFieldName) {
      generator.writeFieldName(alias.toES(field.getName()));
    }

    if (object == null) {
      generator.writeNull();
      return true;
    }

    switch (type) {
      case DataType.ERROR:
      case DataType.UNKNOWN:
        return handleUnknown(object, field, generator);
      case DataType.NULL:
        generator.writeNull();
        break;
      case DataType.BOOLEAN:
        generator.writeBoolean((Boolean) object);
        break;
      case DataType.INTEGER:
        generator.writeNumber(((Number) object).intValue());
        break;
      case DataType.LONG:
        generator.writeNumber(((Number) object).longValue());
        break;
      case DataType.FLOAT:
        generator.writeNumber(((Number) object).floatValue());
        break;
      case DataType.DOUBLE:
        generator.writeNumber(((Number) object).doubleValue());
        break;
      case DataType.BYTE:
        generator.writeNumber((Byte) object);
        break;
      case DataType.CHARARRAY:
        generator.writeString(object.toString());
        break;
      case DataType.BYTEARRAY:
        generator.writeBinary(((DataByteArray) object).get());
        break;
        // DateTime introduced in Pig 11
      case 30: // DataType.DATETIME
        generator.writeString(PigUtils.convertDateToES(object));
        break;
        // DateTime introduced in Pig 12
      case 65: // DataType.BIGINTEGER
        throw new SerializationException(
            "Big integers are not supported by Elasticsearch - consider using a different type (such as string)");
        // DateTime introduced in Pig 12
      case 70: // DataType.BIGDECIMAL
        throw new SerializationException(
            "Big decimals are not supported by Elasticsearch - consider using a different type (such as string)");
      case DataType.MAP:
        ResourceSchema nestedSchema = field.getSchema();

        // empty tuple shortcut
        if (nestedSchema == null) {
          generator.writeBeginObject();
          generator.writeEndObject();
          break;
        }

        ResourceFieldSchema[] nestedFields = nestedSchema.getFields();

        generator.writeBeginObject();
        // Pig maps are actually String -> Object association so we can save the key right away
        for (Map.Entry<?, ?> entry : ((Map<?, ?>) object).entrySet()) {
          generator.writeFieldName(alias.toES(entry.getKey().toString()));
          write(entry.getValue(), nestedFields[0], generator, false);
        }
        generator.writeEndObject();
        break;

      case DataType.TUPLE:
        nestedSchema = field.getSchema();

        // empty tuple shortcut
        if (nestedSchema == null) {
          generator.writeBeginObject();
          generator.writeEndObject();
          break;
        }

        nestedFields = nestedSchema.getFields();

        // use getAll instead of get(int) to avoid having to handle Exception...
        List<Object> tuples = ((Tuple) object).getAll();

        generator.writeBeginObject();
        for (int i = 0; i < nestedFields.length; i++) {
          String name = nestedFields[i].getName();
          // handle schemas without names
          name = (StringUtils.hasText(name) ? alias.toES(name) : Integer.toString(i));
          generator.writeFieldName(name);
          write(tuples.get(i), nestedFields[i], generator, false);
        }
        generator.writeEndObject();
        break;

      case DataType.BAG:
        nestedSchema = field.getSchema();

        // empty tuple shortcut
        if (nestedSchema == null) {
          generator.writeBeginArray();
          generator.writeEndArray();
          break;
        }

        ResourceFieldSchema bagType = nestedSchema.getFields()[0];

        generator.writeBeginArray();
        for (Tuple tuple : (DataBag) object) {
          write(tuple, bagType, generator, false);
        }
        generator.writeEndArray();
        break;
      default:
        if (writeUnknownTypes) {
          return handleUnknown(object, field, generator);
        }
        return false;
    }
    return true;
  }
 public String getTargetHosts() {
   String hosts = getProperty(INTERNAL_ES_HOSTS);
   return StringUtils.hasText(targetHosts)
       ? targetHosts
       : (StringUtils.hasText(hosts) ? hosts : getNodes());
 }
 public static void pinNode(Settings settings, String node, int port) {
   if (StringUtils.hasText(node) && port > 0) {
     settings.setProperty(
         InternalConfigurationOptions.INTERNAL_ES_PINNED_NODE, qualifyNode(node, port));
   }
 }
 public static boolean hasPinnedNode(Settings settings) {
   return StringUtils.hasText(
       settings.getProperty(InternalConfigurationOptions.INTERNAL_ES_PINNED_NODE));
 }
 protected Object textValue(String value) {
   return (value != null
       ? (!StringUtils.hasText(value) && emptyAsNull ? nullValue() : parseString(value))
       : nullValue());
 }
 public String getHost() {
   return StringUtils.hasText(host) ? host : getProperty(ES_HOST, ES_HOST_DEFAULT);
 }
 public static void setDiscoveredNodes(Settings settings, Collection<String> nodes) {
   settings.setProperty(
       InternalConfigurationOptions.INTERNAL_ES_DISCOVERED_NODES,
       StringUtils.concatenate(nodes, StringUtils.DEFAULT_DELIMITER));
 }
 public String getScrollFields() {
   String internalFields = getProperty(INTERNAL_ES_TARGET_FIELDS);
   return (StringUtils.hasText(internalFields) ? internalFields : getProperty(ES_SCROLL_FIELDS));
 }
 @Override
 public String toString() {
   return StringUtils.asUTFString(bytes, offset, size);
 }