Exemple #1
0
  /**
   * Returns a list of metric names (including the passed in name) that are a submetric of the
   * specified metric. The metrics are topologically sorted by dependency, so the parent metric will
   * appear last.
   *
   * @param parentName
   * @return
   * @throws ConfigurationException
   */
  public List<String> getSubmetrics(String parentName) throws ConfigurationException {
    String type = getMetricType(parentName);
    Config config = getMetricConfig(parentName);
    List<String> toAdd = new ArrayList<String>();
    if (type.equals("ensemble") || type.equals("simple-ensemble")) {
      for (String child : config.getStringList("metrics")) {
        toAdd.addAll(getSubmetrics(child));
        toAdd.add(child);
      }
    } else if (type.equals("sparsevector.mostsimilarconcepts")) {
      toAdd.addAll(getSubmetrics(config.getString("generator.basemetric")));
    } else if (type.equals("milnewitten")) {
      toAdd.add(config.getString("inlink"));
      toAdd.add(config.getString("outlink"));
    } else if (config.hasPath("reliesOn")) {
      toAdd.addAll(config.getStringList("reliesOn"));
    }
    toAdd.add(parentName);
    List<String> results = new ArrayList<String>();

    // Make sure things only appear once. We save the FIRST time they appear to preserve
    // dependencies.
    for (String name : toAdd) {
      if (!results.contains(name)) {
        results.add(name);
      }
    }
    return results;
  }
 @Override
 public void configure(Config config, String key) {
   fieldName1 = config.getString(key + ".field1");
   fieldName2 = config.getString(key + ".field2");
   if (config.hasPath(key + ".keys")) {
     keys = config.getStringList(key + ".keys");
   }
   key2 = config.getString(key + ".key2");
   constant = config.getDouble((key + ".constant"));
   outputName = config.getString(key + ".output");
 }
 @Override
 public void configure(Config config, String key) {
   transforms = new ArrayList<>();
   List<String> transformKeys = config.getStringList(key + ".transforms");
   for (String transformKey : transformKeys) {
     Transform tmpTransform = TransformFactory.createTransform(config, transformKey);
     if (tmpTransform != null) {
       transforms.add(tmpTransform);
     }
   }
 }
 public List<String> getExecutors() {
   if (vampires.hasPath("executors")) {
     return vampires
         .getStringList("executors")
         .stream()
         .map(String::toUpperCase)
         .collect(Collectors.toList());
   } else {
     LOG.error("missing executors config value");
     throw new IllegalArgumentException("missing executors config value");
   }
 }
  /**
   * 初始化配置
   *
   * @throws TaskContainerConfigException
   */
  public TaskContainerConf() {
    Config config = ConfigFactory.load("taskContainer.conf");
    config.getString("taskContainer.version");
    if (StringUtils.isNotEmpty(config.getString("taskContainer.router"))) {
      router = config.getInt("taskContainer.router");
    }
    if (StringUtils.isNotEmpty(config.getString("taskContainer.worker"))) {
      worker = config.getInt("taskContainer.worker");
    }
    if (StringUtils.isNotEmpty(config.getString("taskContainer.alertPhone"))) {
      alertPhone = config.getString("taskContainer.alertPhone");
    }

    if (StringUtils.isNotEmpty(config.getString("taskContainer.projectName"))) {
      projectName = config.getString("taskContainer.projectName");
    }
    if (StringUtils.isNotEmpty(config.getString("taskContainer.processTaskClass"))) {
      processTaskClass = config.getString("taskContainer.processTaskClass");
    } else {
      LOAD_STATE = FAIL_LOAD;
      throw new TaskContainerConfigException("processTaskClass is empty");
    }
    try {
      aClass = Class.forName(processTaskClass);
      LOAD_STATE = FAIL_LOAD;
      processQueueInstance = aClass.newInstance();
    } catch (ClassNotFoundException e) {
      LOAD_STATE = FAIL_LOAD;
      throw new TaskContainerConfigException(e);
    } catch (InstantiationException e) {
      LOAD_STATE = FAIL_LOAD;
      throw new TaskContainerConfigException(e);
    } catch (IllegalAccessException e) {
      LOAD_STATE = FAIL_LOAD;
      throw new TaskContainerConfigException(e);
    }
    maxParallel = router * worker;
    queues = config.getStringList("taskContainer.taskList");
    logger.info("------------task container suc load conf---------------");
    logger.info("project.name:{}", projectName);
    logger.info("router:{}", router);
    logger.info("worker:{}", worker);
    logger.info("max.parallel:{}", maxParallel);
    logger.info("task.list:{}", queues);
    logger.info("process.task.class:{}", processTaskClass);
    logger.info("-------------------------------------------------------");
    LOAD_STATE = SUC_LOAD;
  }
  public List<ResultsWriter> getWriters() {
    List<ResultsWriter> writers = new LinkedList<>();
    List<String> enabledWriters = vampires.getStringList("enabled-writers");
    if (enabledWriters.contains("json")) {
      writers.add(new JsonResultsWriter(vampires));
    }

    if (enabledWriters.contains("mongo")) {
      writers.add(new MongoWriter());
    }

    if (writers.isEmpty()) {
      LOG.info("no writers configured. using default writer: json");
      writers.add(new JsonResultsWriter(vampires));
    }

    return writers;
  }
 public static void main(String[] args) throws Exception {
   Queue queue = new Queue();
   HostRouteHttpServer server =
       new HostRouteHttpServer(
           new HttpServerConfigurator(queue)
               .withAddress(
                   new Address(
                       CONFIG.getString("http.route.bind.host"),
                       CONFIG.getInt("http.route.bind.port"))),
           new HttpClientConfigurator(queue).withTrust(new Trust()));
   for (Config c : CONFIG.getConfigList("http.route.map")) {
     server.route(
         c.getStringList("hosts"),
         new Address(c.getString("to.host"), c.getInt("to.port")),
         c.getString("to.path"));
   }
   server.start();
 }
Exemple #8
0
  private void initWord2Vec(String name) throws ConfigurationException, IOException, DaoException {
    Config config = getMetricConfig(name).getConfig("generator");
    File model = Word2VecGenerator.getModelFile(config.getString("modelDir"), language);
    if (skipBuiltMetrics && model.isFile()) {
      return;
    }

    if (config.hasPath("prebuilt") && config.getBoolean("prebuilt")) {
      if (model.isFile()) {
        return;
      }
      File downloadPath = new File(config.getString("binfile"));
      if (!downloadPath.isFile()) {
        throw new ConfigurationException(
            "word2vec model "
                + downloadPath.getAbsolutePath()
                + " cannot be found."
                + " You must download it from "
                + config.getString("url")
                + " into to the wikibrain download directory.");
      }
      if (!config.getStringList("languages").contains(language.getLangCode())) {
        throw new ConfigurationException(
            "word2vec model " + downloadPath + " does not support language" + language);
      }
      if (downloadPath.toString().toLowerCase().endsWith("gz")) {
        LOG.info("decompressing " + downloadPath + " to " + model);
        File tmp = File.createTempFile("word2vec", "bin");
        try {
          FileUtils.deleteQuietly(tmp);
          GZIPInputStream gz = new GZIPInputStream(new FileInputStream(downloadPath));
          FileUtils.copyInputStreamToFile(gz, tmp);
          gz.close();
          model.getParentFile().mkdirs();
          FileUtils.moveFile(tmp, model);
        } finally {
          FileUtils.deleteQuietly(tmp);
        }
      } else {
        FileUtils.copyFile(downloadPath, model);
      }
      return;
    }

    LinkProbabilityDao lpd = env.getConfigurator().get(LinkProbabilityDao.class);
    lpd.useCache(true);
    if (!lpd.isBuilt()) {
      lpd.build();
    }

    String corpusName = config.getString("corpus");
    Corpus corpus = null;
    if (!corpusName.equals("NONE")) {
      corpus =
          env.getConfigurator()
              .get(Corpus.class, config.getString("corpus"), "language", language.getLangCode());
      if (!corpus.exists()) {
        corpus.create();
      }
    }

    if (model.isFile()
        && (corpus == null || model.lastModified() > corpus.getCorpusFile().lastModified())) {
      return;
    }
    if (corpus == null) {
      throw new ConfigurationException(
          "word2vec metric "
              + name
              + " cannot build or find model!"
              + "configuration has no corpus, but model not found at "
              + model
              + ".");
    }
    Word2VecTrainer trainer =
        new Word2VecTrainer(env.getConfigurator().get(LocalPageDao.class), language);
    if (config.hasPath("dimensions")) {
      LOG.info("set number of dimensions to " + config.getInt("dimensions"));
      trainer.setLayer1Size(config.getInt("dimensions"));
    }
    if (config.hasPath("maxWords")) {
      LOG.info("set maxWords to " + config.getInt("maxWords"));
      trainer.setMaxWords(config.getInt("maxWords"));
    }
    if (config.hasPath("window")) {
      LOG.info("set window to " + config.getInt("maxWords"));
      trainer.setWindow(config.getInt("window"));
    }
    trainer.setKeepAllArticles(true);
    trainer.train(corpus.getDirectory());
    trainer.save(model);
  }
Exemple #9
0
  public static InboundSettings create(Config config) {
    Config inbound = config.getConfig("inbound");

    List<String> columnNames;
    if (inbound.hasPath("column-names")) {
      columnNames = inbound.getStringList("column-names");
    } else {
      int numColumns = inbound.getInt("num-columns");
      columnNames = new ArrayList<>(numColumns);
      for (int i = 0; i < numColumns; i++) {
        columnNames.add(String.valueOf(i));
      }
    }

    Function<Object, Integer> lookup = new LookupFunction(columnNames);

    Collection<Integer> allColumns = Collections2.transform(columnNames, lookup);

    Collection<Integer> idColumns;
    if (inbound.hasPath("id-columns")) {
      idColumns =
          ImmutableSet.copyOf(Collections2.transform(inbound.getAnyRefList("id-columns"), lookup));
    } else {
      idColumns = ImmutableSet.of();
    }

    Collection<Integer> ignoredColumns;
    if (inbound.hasPath("ignored-columns")) {
      ignoredColumns =
          ImmutableSet.copyOf(
              Collections2.transform(inbound.getAnyRefList("ignored-columns"), lookup));
    } else {
      ignoredColumns = ImmutableSet.of();
    }

    Collection<Integer> categoricalColumns;
    Collection<Integer> numericColumns;
    if (inbound.hasPath("categorical-columns")) {
      Preconditions.checkState(!inbound.hasPath("numeric-columns"));
      categoricalColumns =
          new HashSet<>(
              Collections2.transform(inbound.getAnyRefList("categorical-columns"), lookup));
      numericColumns = new HashSet<>(allColumns);
      numericColumns.removeAll(categoricalColumns);
    } else if (inbound.hasPath("numeric-columns")) {
      Preconditions.checkState(!inbound.hasPath("categorical-columns"));
      numericColumns =
          new HashSet<>(Collections2.transform(inbound.getAnyRefList("numeric-columns"), lookup));
      categoricalColumns = new HashSet<>(allColumns);
      categoricalColumns.removeAll(numericColumns);
    } else {
      throw new IllegalArgumentException("No categorical-columns or numeric-columns set");
    }
    numericColumns.removeAll(idColumns);
    numericColumns.removeAll(ignoredColumns);
    categoricalColumns.removeAll(idColumns);
    categoricalColumns.removeAll(ignoredColumns);

    Integer targetColumn = null;
    if (inbound.hasPath("target-column")) {
      targetColumn = lookup.apply(inbound.getAnyRef("target-column"));
      Preconditions.checkState(
          categoricalColumns.contains(targetColumn) || numericColumns.contains(targetColumn),
          "Target column not specified as numeric or categorical");
    }

    return new InboundSettings(
        columnNames, idColumns, categoricalColumns, numericColumns, ignoredColumns, targetColumn);
  }
Exemple #10
0
  //    @Ignore
  @Before
  public void setUp() {

    conf = ConfigFactory.load();
    List<String> zkHosts = conf.getStringList("zookeeper.hosts");

    for (String host : zkHosts) {
      ZOOKEEPER_HOSTS += host + ",";
    }
    ZOOKEEPER_HOSTS = ZOOKEEPER_HOSTS.substring(0, ZOOKEEPER_HOSTS.length() - 1);

    List<String> kafkaHosts = conf.getStringList("kafka.hosts");

    for (String host : kafkaHosts) {
      KAFKA_HOSTS += host + ",";
    }
    KAFKA_HOSTS = KAFKA_HOSTS.substring(0, KAFKA_HOSTS.length() - 1);

    LOGGER.debug("Using Zookeeper hosts: " + ZOOKEEPER_HOSTS);
    LOGGER.debug("Using Zookeeper hosts: " + KAFKA_HOSTS);
    //        try {
    //            zookeeperServer = new ZookeeperServer();
    //            zookeeperServer.start();
    //        } catch (Exception e) {
    //            e.printStackTrace();
    //        }
    //        try {
    //            kafkaServer = new KafkaServer();
    //            kafkaServer.start();
    //        } catch (Exception e) {
    //            e.printStackTrace();
    //        }

    String[] connection = KAFKA_HOSTS.split(":");

    //        simpleConsumer = new SimpleConsumer("localhost", 9092, 60000, 1024, CLIENT_ID);
    simpleConsumer =
        new SimpleConsumer(connection[0], Integer.parseInt(connection[1]), 60000, 1024, CLIENT_ID);

    kafkaSink = new KafkaSink();

    Context kafkaContext = new Context();
    kafkaContext.put("topic", "test");
    kafkaContext.put("writeBody", "false");
    kafkaContext.put("kafka.metadata.broker.list", KAFKA_HOSTS);
    kafkaContext.put("kafka.serializer.class", "kafka.serializer.StringEncoder");

    Configurables.configure(kafkaSink, kafkaContext);

    Context channelContext = new Context();
    channelContext.put("capacity", "10000");
    channelContext.put("transactionCapacity", "200");

    channel = new MemoryChannel();
    channel.setName("junitChannel");
    Configurables.configure(channel, channelContext);

    kafkaSink.setChannel(channel);

    channel.start();
    kafkaSink.start();
  }