/** * Returns a list of metric names (including the passed in name) that are a submetric of the * specified metric. The metrics are topologically sorted by dependency, so the parent metric will * appear last. * * @param parentName * @return * @throws ConfigurationException */ public List<String> getSubmetrics(String parentName) throws ConfigurationException { String type = getMetricType(parentName); Config config = getMetricConfig(parentName); List<String> toAdd = new ArrayList<String>(); if (type.equals("ensemble") || type.equals("simple-ensemble")) { for (String child : config.getStringList("metrics")) { toAdd.addAll(getSubmetrics(child)); toAdd.add(child); } } else if (type.equals("sparsevector.mostsimilarconcepts")) { toAdd.addAll(getSubmetrics(config.getString("generator.basemetric"))); } else if (type.equals("milnewitten")) { toAdd.add(config.getString("inlink")); toAdd.add(config.getString("outlink")); } else if (config.hasPath("reliesOn")) { toAdd.addAll(config.getStringList("reliesOn")); } toAdd.add(parentName); List<String> results = new ArrayList<String>(); // Make sure things only appear once. We save the FIRST time they appear to preserve // dependencies. for (String name : toAdd) { if (!results.contains(name)) { results.add(name); } } return results; }
@Override public void configure(Config config, String key) { fieldName1 = config.getString(key + ".field1"); fieldName2 = config.getString(key + ".field2"); if (config.hasPath(key + ".keys")) { keys = config.getStringList(key + ".keys"); } key2 = config.getString(key + ".key2"); constant = config.getDouble((key + ".constant")); outputName = config.getString(key + ".output"); }
@Override public void configure(Config config, String key) { transforms = new ArrayList<>(); List<String> transformKeys = config.getStringList(key + ".transforms"); for (String transformKey : transformKeys) { Transform tmpTransform = TransformFactory.createTransform(config, transformKey); if (tmpTransform != null) { transforms.add(tmpTransform); } } }
public List<String> getExecutors() { if (vampires.hasPath("executors")) { return vampires .getStringList("executors") .stream() .map(String::toUpperCase) .collect(Collectors.toList()); } else { LOG.error("missing executors config value"); throw new IllegalArgumentException("missing executors config value"); } }
/** * 初始化配置 * * @throws TaskContainerConfigException */ public TaskContainerConf() { Config config = ConfigFactory.load("taskContainer.conf"); config.getString("taskContainer.version"); if (StringUtils.isNotEmpty(config.getString("taskContainer.router"))) { router = config.getInt("taskContainer.router"); } if (StringUtils.isNotEmpty(config.getString("taskContainer.worker"))) { worker = config.getInt("taskContainer.worker"); } if (StringUtils.isNotEmpty(config.getString("taskContainer.alertPhone"))) { alertPhone = config.getString("taskContainer.alertPhone"); } if (StringUtils.isNotEmpty(config.getString("taskContainer.projectName"))) { projectName = config.getString("taskContainer.projectName"); } if (StringUtils.isNotEmpty(config.getString("taskContainer.processTaskClass"))) { processTaskClass = config.getString("taskContainer.processTaskClass"); } else { LOAD_STATE = FAIL_LOAD; throw new TaskContainerConfigException("processTaskClass is empty"); } try { aClass = Class.forName(processTaskClass); LOAD_STATE = FAIL_LOAD; processQueueInstance = aClass.newInstance(); } catch (ClassNotFoundException e) { LOAD_STATE = FAIL_LOAD; throw new TaskContainerConfigException(e); } catch (InstantiationException e) { LOAD_STATE = FAIL_LOAD; throw new TaskContainerConfigException(e); } catch (IllegalAccessException e) { LOAD_STATE = FAIL_LOAD; throw new TaskContainerConfigException(e); } maxParallel = router * worker; queues = config.getStringList("taskContainer.taskList"); logger.info("------------task container suc load conf---------------"); logger.info("project.name:{}", projectName); logger.info("router:{}", router); logger.info("worker:{}", worker); logger.info("max.parallel:{}", maxParallel); logger.info("task.list:{}", queues); logger.info("process.task.class:{}", processTaskClass); logger.info("-------------------------------------------------------"); LOAD_STATE = SUC_LOAD; }
public List<ResultsWriter> getWriters() { List<ResultsWriter> writers = new LinkedList<>(); List<String> enabledWriters = vampires.getStringList("enabled-writers"); if (enabledWriters.contains("json")) { writers.add(new JsonResultsWriter(vampires)); } if (enabledWriters.contains("mongo")) { writers.add(new MongoWriter()); } if (writers.isEmpty()) { LOG.info("no writers configured. using default writer: json"); writers.add(new JsonResultsWriter(vampires)); } return writers; }
public static void main(String[] args) throws Exception { Queue queue = new Queue(); HostRouteHttpServer server = new HostRouteHttpServer( new HttpServerConfigurator(queue) .withAddress( new Address( CONFIG.getString("http.route.bind.host"), CONFIG.getInt("http.route.bind.port"))), new HttpClientConfigurator(queue).withTrust(new Trust())); for (Config c : CONFIG.getConfigList("http.route.map")) { server.route( c.getStringList("hosts"), new Address(c.getString("to.host"), c.getInt("to.port")), c.getString("to.path")); } server.start(); }
private void initWord2Vec(String name) throws ConfigurationException, IOException, DaoException { Config config = getMetricConfig(name).getConfig("generator"); File model = Word2VecGenerator.getModelFile(config.getString("modelDir"), language); if (skipBuiltMetrics && model.isFile()) { return; } if (config.hasPath("prebuilt") && config.getBoolean("prebuilt")) { if (model.isFile()) { return; } File downloadPath = new File(config.getString("binfile")); if (!downloadPath.isFile()) { throw new ConfigurationException( "word2vec model " + downloadPath.getAbsolutePath() + " cannot be found." + " You must download it from " + config.getString("url") + " into to the wikibrain download directory."); } if (!config.getStringList("languages").contains(language.getLangCode())) { throw new ConfigurationException( "word2vec model " + downloadPath + " does not support language" + language); } if (downloadPath.toString().toLowerCase().endsWith("gz")) { LOG.info("decompressing " + downloadPath + " to " + model); File tmp = File.createTempFile("word2vec", "bin"); try { FileUtils.deleteQuietly(tmp); GZIPInputStream gz = new GZIPInputStream(new FileInputStream(downloadPath)); FileUtils.copyInputStreamToFile(gz, tmp); gz.close(); model.getParentFile().mkdirs(); FileUtils.moveFile(tmp, model); } finally { FileUtils.deleteQuietly(tmp); } } else { FileUtils.copyFile(downloadPath, model); } return; } LinkProbabilityDao lpd = env.getConfigurator().get(LinkProbabilityDao.class); lpd.useCache(true); if (!lpd.isBuilt()) { lpd.build(); } String corpusName = config.getString("corpus"); Corpus corpus = null; if (!corpusName.equals("NONE")) { corpus = env.getConfigurator() .get(Corpus.class, config.getString("corpus"), "language", language.getLangCode()); if (!corpus.exists()) { corpus.create(); } } if (model.isFile() && (corpus == null || model.lastModified() > corpus.getCorpusFile().lastModified())) { return; } if (corpus == null) { throw new ConfigurationException( "word2vec metric " + name + " cannot build or find model!" + "configuration has no corpus, but model not found at " + model + "."); } Word2VecTrainer trainer = new Word2VecTrainer(env.getConfigurator().get(LocalPageDao.class), language); if (config.hasPath("dimensions")) { LOG.info("set number of dimensions to " + config.getInt("dimensions")); trainer.setLayer1Size(config.getInt("dimensions")); } if (config.hasPath("maxWords")) { LOG.info("set maxWords to " + config.getInt("maxWords")); trainer.setMaxWords(config.getInt("maxWords")); } if (config.hasPath("window")) { LOG.info("set window to " + config.getInt("maxWords")); trainer.setWindow(config.getInt("window")); } trainer.setKeepAllArticles(true); trainer.train(corpus.getDirectory()); trainer.save(model); }
public static InboundSettings create(Config config) { Config inbound = config.getConfig("inbound"); List<String> columnNames; if (inbound.hasPath("column-names")) { columnNames = inbound.getStringList("column-names"); } else { int numColumns = inbound.getInt("num-columns"); columnNames = new ArrayList<>(numColumns); for (int i = 0; i < numColumns; i++) { columnNames.add(String.valueOf(i)); } } Function<Object, Integer> lookup = new LookupFunction(columnNames); Collection<Integer> allColumns = Collections2.transform(columnNames, lookup); Collection<Integer> idColumns; if (inbound.hasPath("id-columns")) { idColumns = ImmutableSet.copyOf(Collections2.transform(inbound.getAnyRefList("id-columns"), lookup)); } else { idColumns = ImmutableSet.of(); } Collection<Integer> ignoredColumns; if (inbound.hasPath("ignored-columns")) { ignoredColumns = ImmutableSet.copyOf( Collections2.transform(inbound.getAnyRefList("ignored-columns"), lookup)); } else { ignoredColumns = ImmutableSet.of(); } Collection<Integer> categoricalColumns; Collection<Integer> numericColumns; if (inbound.hasPath("categorical-columns")) { Preconditions.checkState(!inbound.hasPath("numeric-columns")); categoricalColumns = new HashSet<>( Collections2.transform(inbound.getAnyRefList("categorical-columns"), lookup)); numericColumns = new HashSet<>(allColumns); numericColumns.removeAll(categoricalColumns); } else if (inbound.hasPath("numeric-columns")) { Preconditions.checkState(!inbound.hasPath("categorical-columns")); numericColumns = new HashSet<>(Collections2.transform(inbound.getAnyRefList("numeric-columns"), lookup)); categoricalColumns = new HashSet<>(allColumns); categoricalColumns.removeAll(numericColumns); } else { throw new IllegalArgumentException("No categorical-columns or numeric-columns set"); } numericColumns.removeAll(idColumns); numericColumns.removeAll(ignoredColumns); categoricalColumns.removeAll(idColumns); categoricalColumns.removeAll(ignoredColumns); Integer targetColumn = null; if (inbound.hasPath("target-column")) { targetColumn = lookup.apply(inbound.getAnyRef("target-column")); Preconditions.checkState( categoricalColumns.contains(targetColumn) || numericColumns.contains(targetColumn), "Target column not specified as numeric or categorical"); } return new InboundSettings( columnNames, idColumns, categoricalColumns, numericColumns, ignoredColumns, targetColumn); }
// @Ignore @Before public void setUp() { conf = ConfigFactory.load(); List<String> zkHosts = conf.getStringList("zookeeper.hosts"); for (String host : zkHosts) { ZOOKEEPER_HOSTS += host + ","; } ZOOKEEPER_HOSTS = ZOOKEEPER_HOSTS.substring(0, ZOOKEEPER_HOSTS.length() - 1); List<String> kafkaHosts = conf.getStringList("kafka.hosts"); for (String host : kafkaHosts) { KAFKA_HOSTS += host + ","; } KAFKA_HOSTS = KAFKA_HOSTS.substring(0, KAFKA_HOSTS.length() - 1); LOGGER.debug("Using Zookeeper hosts: " + ZOOKEEPER_HOSTS); LOGGER.debug("Using Zookeeper hosts: " + KAFKA_HOSTS); // try { // zookeeperServer = new ZookeeperServer(); // zookeeperServer.start(); // } catch (Exception e) { // e.printStackTrace(); // } // try { // kafkaServer = new KafkaServer(); // kafkaServer.start(); // } catch (Exception e) { // e.printStackTrace(); // } String[] connection = KAFKA_HOSTS.split(":"); // simpleConsumer = new SimpleConsumer("localhost", 9092, 60000, 1024, CLIENT_ID); simpleConsumer = new SimpleConsumer(connection[0], Integer.parseInt(connection[1]), 60000, 1024, CLIENT_ID); kafkaSink = new KafkaSink(); Context kafkaContext = new Context(); kafkaContext.put("topic", "test"); kafkaContext.put("writeBody", "false"); kafkaContext.put("kafka.metadata.broker.list", KAFKA_HOSTS); kafkaContext.put("kafka.serializer.class", "kafka.serializer.StringEncoder"); Configurables.configure(kafkaSink, kafkaContext); Context channelContext = new Context(); channelContext.put("capacity", "10000"); channelContext.put("transactionCapacity", "200"); channel = new MemoryChannel(); channel.setName("junitChannel"); Configurables.configure(channel, channelContext); kafkaSink.setChannel(channel); channel.start(); kafkaSink.start(); }