/** * Loads a similarity metric if it isn't already loaded. * * @param name * @return * @throws ConfigurationException * @throws IOException */ public SimilarityMetric loadMetric(String name) throws ConfigurationException, IOException { if (env.hasMetric(name)) { return env.getMetric(name); } info("loading metric " + name); String type = requireString(configuration.getMetric(name), "type"); SimilarityMetric metric; if (type.equals("category")) { metric = createCategorySimilarity(name); } else if (type.equals("text")) { metric = createTextSimilarity(name); } else if (type.equals("esa")) { metric = createEsaSimilarity(name); } else if (type.equals("links")) { metric = createLinkSimilarity(name); } else if (type.equals("pairwise")) { metric = createPairwiseSimilarity(name); } else if (type.equals("ensemble")) { metric = loadEnsembleMetric(name); } else { throw new ConfigurationException("Unknown metric type: " + type); } metric.setName(name); JSONObject params = configuration.getMetric(name); if (params.containsKey("normalizer")) { Normalizer norm = parseNormalizer(requireString(params, "normalizer")); if (!norm.equals(null)) { metric.setNormalizer(norm); } } env.addMetric(name, metric); return metric; }
/** * Loads an index if it is not already loaded. * * @param name - The name of the * @return * @throws IOException * @throws ConfigurationException */ private IndexHelper loadIndex(String name) throws IOException, ConfigurationException { if (env.hasIndex(name)) { return env.getIndex(name); } info("loading index " + name); JSONObject indexConfig = configuration.getIndex(name); File parentDir = requireDirectory(configuration.getIndexes(), "outputDir"); IndexHelper helper = new IndexHelper(new File(parentDir, name), true); if (indexConfig.containsKey("similarity")) { String sim = requireString(indexConfig, "similarity"); if (sim.equals("ESA")) { helper.getSearcher().setSimilarity(new ESASimilarity.LuceneSimilarity()); } else { throw new ConfigurationException("unknown similarity type: " + sim); } } if (indexConfig.containsKey("analyzer")) { String analyzer = requireString(indexConfig, "analyzer"); if (analyzer.equals("ESA")) { helper.setAnalyzer(new ESAAnalyzer()); } else { throw new ConfigurationException("unknown analyzer type: " + analyzer); } } env.addIndex(name, helper); return helper; }
private List<KnownSim> loadGold() throws ConfigurationException, IOException { JSONObject params = configuration.getGold(); String path = requireString(params, "path"); List<KnownSim> g = KnownSim.read(new File(path)); env.setGold(g); return g; }
private SimilarityMetric createLinkSimilarity(String name) throws ConfigurationException, IOException { JSONObject params = configuration.getMetric(name); SimilarityMetric metric; String field = requireString(params, "field"); LinkSimilarity lmetric = new LinkSimilarity( loadMainMapper(), loadIndex(requireString(params, "lucene")), env.getMainIndex(), field); if (params.containsKey("similarity")) { String sim = requireString(params, "similarity"); if (sim.equals("tfidf")) { lmetric.setSimilarity(LinkSimilarity.SimFn.TFIDF); } else if (sim.equals("google")) { lmetric.setSimilarity(LinkSimilarity.SimFn.GOOGLE); } else if (sim.equals("logodds")) { lmetric.setSimilarity(LinkSimilarity.SimFn.LOGODDS); } else if (sim.equals("jacard")) { lmetric.setSimilarity(LinkSimilarity.SimFn.JACARD); } else if (sim.equals("lucene")) { lmetric.setSimilarity(LinkSimilarity.SimFn.LUCENE); } else { throw new IllegalArgumentException("unknown similarity: " + sim); } } if (params.containsKey("minDocFreq")) { lmetric.setMinDocFreq(requireInteger(params, "minDocFreq")); } metric = lmetric; return metric; }
/** * Loads the mapper with the specified name if it is not already loaded. * * @param name * @return The requested mapper. * @throws ConfigurationException * @throws IOException */ public synchronized ConceptMapper loadMapper(String name) throws ConfigurationException, IOException { if (env.hasMapper(name)) { return env.getMapper(name); } info("loading mapper " + name); JSONObject params = configuration.getMapper(name); String type = requireString(params, "type"); ConceptMapper mapper; if (type.equals("dictionary")) { mapper = getDictionaryMapper(name); } else if (type.equals("lucene")) { mapper = getLuceneMapper(name); } else if (type.equals("ensemble")) { mapper = getEnsembleMapper(name); } else if (type.equals("title")) { mapper = getTitleMapper(name); } else { throw new ConfigurationException("unknown type for mapper " + name + ": " + type); } env.addMapper(name, mapper); return mapper; }
private SimilarityMetric loadEnsembleMetric(String key) throws IOException, ConfigurationException { info("loading ensemble metric " + key); Map<String, Object> params = (Map<String, Object>) configuration.getMetric(key); setDoEnsembles(false); List<SimilarityMetric> metrics = loadMetrics(); EnsembleSimilarity similarity = new EnsembleSimilarity(new SvmEnsemble(), loadMainMapper(), env.getMainIndex()); similarity.setComponents(metrics); similarity.read(requireDirectory(params, "model")); similarity.setName(key); if (params.containsKey("minComponents")) { similarity.setMinComponents(requireInteger(params, "minComponents")); } return similarity; }