/** * Loads a similarity metric if it isn't already loaded. * * @param name * @return * @throws ConfigurationException * @throws IOException */ public SimilarityMetric loadMetric(String name) throws ConfigurationException, IOException { if (env.hasMetric(name)) { return env.getMetric(name); } info("loading metric " + name); String type = requireString(configuration.getMetric(name), "type"); SimilarityMetric metric; if (type.equals("category")) { metric = createCategorySimilarity(name); } else if (type.equals("text")) { metric = createTextSimilarity(name); } else if (type.equals("esa")) { metric = createEsaSimilarity(name); } else if (type.equals("links")) { metric = createLinkSimilarity(name); } else if (type.equals("pairwise")) { metric = createPairwiseSimilarity(name); } else if (type.equals("ensemble")) { metric = loadEnsembleMetric(name); } else { throw new ConfigurationException("Unknown metric type: " + type); } metric.setName(name); JSONObject params = configuration.getMetric(name); if (params.containsKey("normalizer")) { Normalizer norm = parseNormalizer(requireString(params, "normalizer")); if (!norm.equals(null)) { metric.setNormalizer(norm); } } env.addMetric(name, metric); return metric; }
/** * Loads an index if it is not already loaded. * * @param name - The name of the * @return * @throws IOException * @throws ConfigurationException */ private IndexHelper loadIndex(String name) throws IOException, ConfigurationException { if (env.hasIndex(name)) { return env.getIndex(name); } info("loading index " + name); JSONObject indexConfig = configuration.getIndex(name); File parentDir = requireDirectory(configuration.getIndexes(), "outputDir"); IndexHelper helper = new IndexHelper(new File(parentDir, name), true); if (indexConfig.containsKey("similarity")) { String sim = requireString(indexConfig, "similarity"); if (sim.equals("ESA")) { helper.getSearcher().setSimilarity(new ESASimilarity.LuceneSimilarity()); } else { throw new ConfigurationException("unknown similarity type: " + sim); } } if (indexConfig.containsKey("analyzer")) { String analyzer = requireString(indexConfig, "analyzer"); if (analyzer.equals("ESA")) { helper.setAnalyzer(new ESAAnalyzer()); } else { throw new ConfigurationException("unknown analyzer type: " + analyzer); } } env.addIndex(name, helper); return helper; }
private List<KnownSim> loadGold() throws ConfigurationException, IOException { JSONObject params = configuration.getGold(); String path = requireString(params, "path"); List<KnownSim> g = KnownSim.read(new File(path)); env.setGold(g); return g; }
private SimilarityMetric createLinkSimilarity(String name) throws ConfigurationException, IOException { JSONObject params = configuration.getMetric(name); SimilarityMetric metric; String field = requireString(params, "field"); LinkSimilarity lmetric = new LinkSimilarity( loadMainMapper(), loadIndex(requireString(params, "lucene")), env.getMainIndex(), field); if (params.containsKey("similarity")) { String sim = requireString(params, "similarity"); if (sim.equals("tfidf")) { lmetric.setSimilarity(LinkSimilarity.SimFn.TFIDF); } else if (sim.equals("google")) { lmetric.setSimilarity(LinkSimilarity.SimFn.GOOGLE); } else if (sim.equals("logodds")) { lmetric.setSimilarity(LinkSimilarity.SimFn.LOGODDS); } else if (sim.equals("jacard")) { lmetric.setSimilarity(LinkSimilarity.SimFn.JACARD); } else if (sim.equals("lucene")) { lmetric.setSimilarity(LinkSimilarity.SimFn.LUCENE); } else { throw new IllegalArgumentException("unknown similarity: " + sim); } } if (params.containsKey("minDocFreq")) { lmetric.setMinDocFreq(requireInteger(params, "minDocFreq")); } metric = lmetric; return metric; }
private ConceptMapper getDictionaryMapper(String name) throws IOException, ConfigurationException { try { JSONObject params = configuration.getMapper(name); return new DictionaryMapper( requireDirectory(params, "dictionary"), loadIndex(requireString(params, "indexName"))); } catch (DatabaseException e) { throw new IOException(e); } }
private SimilarityMetric createCategorySimilarity(String name) throws ConfigurationException, IOException { JSONObject params = configuration.getMetric(name); SimilarityMetric metric; IndexHelper helper = loadIndex(requireString(params, "lucene")); CategoryGraph graph = new CategoryGraph(helper); graph.init(); metric = new CategorySimilarity(loadMainMapper(), graph, helper); return metric; }
private SimilarityMetric createEsaSimilarity(String name) throws ConfigurationException, IOException { JSONObject params = configuration.getMetric(name); ESASimilarity metric = new ESASimilarity(loadMainMapper(), loadIndex(requireString(params, "lucene"))); if (params.containsKey("textLucene")) { metric.setTextHelper(loadIndex(requireString(params, "textLucene"))); } return metric; }
/** * Loads indexes and puts them in the environment. * * @throws ConfigurationException * @throws IOException */ public void loadIndexes() throws ConfigurationException, IOException { info("loading indexes"); Collection<String> namesToSkip = Arrays.asList("inputDir", "outputDir"); for (String name : (Set<String>) configuration.getIndexes().keySet()) { if (namesToSkip.contains(name)) { continue; } loadIndex(name); } }
/** * Loads metrics and puts them in the environment. * * @return * @throws IOException * @throws ConfigurationException */ public List<SimilarityMetric> loadMetrics() throws IOException, ConfigurationException { info("loading metrics"); Set<String> ensembleKeys = new HashSet<String>(); List<SimilarityMetric> metrics = new ArrayList<SimilarityMetric>(); for (String key : (Set<String>) configuration.getMetrics().keySet()) { String type = requireString(configuration.getMetric(key), "type"); if (type.equals("ensemble")) { ensembleKeys.add(key); } else if (type.equals("pairwise") && !doPairwise) { // do nothing } else { metrics.add(loadMetric(key)); } } if (doEnsembles) { for (String key : ensembleKeys) { metrics.add(loadMetric(key)); } } return metrics; }
private SimilarityMetric createPairwiseSimilarity(String name) throws IOException, ConfigurationException { JSONObject params = configuration.getMetric(name); PairwiseCosineSimilarity metric; SparseMatrix m = new SparseMatrix(requireFile(params, "matrix")); SparseMatrix mt = new SparseMatrix(requireFile(params, "transpose")); metric = new PairwiseCosineSimilarity(loadMainMapper(), loadMainIndex(), m, mt); if (params.containsKey("basedOn")) { metric.setBasedOn(loadMetric(requireString(params, "basedOn"))); } if (params.containsKey("buildPhraseVectors")) { metric.setBuildPhraseVectors(requireBoolean(params, "buildPhraseVectors")); } return metric; }
private SimilarityMetric loadEnsembleMetric(String key) throws IOException, ConfigurationException { info("loading ensemble metric " + key); Map<String, Object> params = (Map<String, Object>) configuration.getMetric(key); setDoEnsembles(false); List<SimilarityMetric> metrics = loadMetrics(); EnsembleSimilarity similarity = new EnsembleSimilarity(new SvmEnsemble(), loadMainMapper(), env.getMainIndex()); similarity.setComponents(metrics); similarity.read(requireDirectory(params, "model")); similarity.setName(key); if (params.containsKey("minComponents")) { similarity.setMinComponents(requireInteger(params, "minComponents")); } return similarity; }
private SimilarityMetric createTextSimilarity(String name) throws ConfigurationException, IOException { JSONObject params = configuration.getMetric(name); SimilarityMetric metric; IndexHelper helper = loadIndex(requireString(params, "lucene")); String field = requireString(params, "field"); metric = new TextSimilarity(loadMainMapper(), helper, field); if (params.containsKey("maxPercentage")) { ((TextSimilarity) metric).setMaxPercentage(requireInteger(params, "maxPercentage")); } if (params.containsKey("minTermFreq")) { ((TextSimilarity) metric).setMinTermFreq(requireInteger(params, "minTermFreq")); } if (params.containsKey("minDocFreq")) { ((TextSimilarity) metric).setMinDocFreq(requireInteger(params, "minDocFreq")); } if (params.containsKey("useInternalMapper")) { ((TextSimilarity) metric).setUseInternalMapper(requireBoolean(params, "useInternalMapper")); } return metric; }
/** * Loads the mapper with the specified name if it is not already loaded. * * @param name * @return The requested mapper. * @throws ConfigurationException * @throws IOException */ public synchronized ConceptMapper loadMapper(String name) throws ConfigurationException, IOException { if (env.hasMapper(name)) { return env.getMapper(name); } info("loading mapper " + name); JSONObject params = configuration.getMapper(name); String type = requireString(params, "type"); ConceptMapper mapper; if (type.equals("dictionary")) { mapper = getDictionaryMapper(name); } else if (type.equals("lucene")) { mapper = getLuceneMapper(name); } else if (type.equals("ensemble")) { mapper = getEnsembleMapper(name); } else if (type.equals("title")) { mapper = getTitleMapper(name); } else { throw new ConfigurationException("unknown type for mapper " + name + ": " + type); } env.addMapper(name, mapper); return mapper; }
/** * Loads mappers and puts them in the environment. * * @throws IOException * @throws ConfigurationException */ public void loadMappers() throws IOException, ConfigurationException { info("loading mappers"); for (String name : (Set<String>) configuration.getMappers().keySet()) { loadMapper(name); } }
private void info(String message) { LOG.info("configurator for " + configuration.getPath() + ": " + message); }
private ConceptMapper getTitleMapper(String name) throws IOException, ConfigurationException { JSONObject params = configuration.getMapper(name); IndexHelper helper = loadIndex(requireString(params, "indexName")); return new TitleMapper(helper); }