/** * Loads a similarity metric if it isn't already loaded. * * @param name * @return * @throws ConfigurationException * @throws IOException */ public SimilarityMetric loadMetric(String name) throws ConfigurationException, IOException { if (env.hasMetric(name)) { return env.getMetric(name); } info("loading metric " + name); String type = requireString(configuration.getMetric(name), "type"); SimilarityMetric metric; if (type.equals("category")) { metric = createCategorySimilarity(name); } else if (type.equals("text")) { metric = createTextSimilarity(name); } else if (type.equals("esa")) { metric = createEsaSimilarity(name); } else if (type.equals("links")) { metric = createLinkSimilarity(name); } else if (type.equals("pairwise")) { metric = createPairwiseSimilarity(name); } else if (type.equals("ensemble")) { metric = loadEnsembleMetric(name); } else { throw new ConfigurationException("Unknown metric type: " + type); } metric.setName(name); JSONObject params = configuration.getMetric(name); if (params.containsKey("normalizer")) { Normalizer norm = parseNormalizer(requireString(params, "normalizer")); if (!norm.equals(null)) { metric.setNormalizer(norm); } } env.addMetric(name, metric); return metric; }
private SimilarityMetric createLinkSimilarity(String name) throws ConfigurationException, IOException { JSONObject params = configuration.getMetric(name); SimilarityMetric metric; String field = requireString(params, "field"); LinkSimilarity lmetric = new LinkSimilarity( loadMainMapper(), loadIndex(requireString(params, "lucene")), env.getMainIndex(), field); if (params.containsKey("similarity")) { String sim = requireString(params, "similarity"); if (sim.equals("tfidf")) { lmetric.setSimilarity(LinkSimilarity.SimFn.TFIDF); } else if (sim.equals("google")) { lmetric.setSimilarity(LinkSimilarity.SimFn.GOOGLE); } else if (sim.equals("logodds")) { lmetric.setSimilarity(LinkSimilarity.SimFn.LOGODDS); } else if (sim.equals("jacard")) { lmetric.setSimilarity(LinkSimilarity.SimFn.JACARD); } else if (sim.equals("lucene")) { lmetric.setSimilarity(LinkSimilarity.SimFn.LUCENE); } else { throw new IllegalArgumentException("unknown similarity: " + sim); } } if (params.containsKey("minDocFreq")) { lmetric.setMinDocFreq(requireInteger(params, "minDocFreq")); } metric = lmetric; return metric; }
private SimilarityMetric createCategorySimilarity(String name) throws ConfigurationException, IOException { JSONObject params = configuration.getMetric(name); SimilarityMetric metric; IndexHelper helper = loadIndex(requireString(params, "lucene")); CategoryGraph graph = new CategoryGraph(helper); graph.init(); metric = new CategorySimilarity(loadMainMapper(), graph, helper); return metric; }
private SimilarityMetric createEsaSimilarity(String name) throws ConfigurationException, IOException { JSONObject params = configuration.getMetric(name); ESASimilarity metric = new ESASimilarity(loadMainMapper(), loadIndex(requireString(params, "lucene"))); if (params.containsKey("textLucene")) { metric.setTextHelper(loadIndex(requireString(params, "textLucene"))); } return metric; }
private SimilarityMetric createPairwiseSimilarity(String name) throws IOException, ConfigurationException { JSONObject params = configuration.getMetric(name); PairwiseCosineSimilarity metric; SparseMatrix m = new SparseMatrix(requireFile(params, "matrix")); SparseMatrix mt = new SparseMatrix(requireFile(params, "transpose")); metric = new PairwiseCosineSimilarity(loadMainMapper(), loadMainIndex(), m, mt); if (params.containsKey("basedOn")) { metric.setBasedOn(loadMetric(requireString(params, "basedOn"))); } if (params.containsKey("buildPhraseVectors")) { metric.setBuildPhraseVectors(requireBoolean(params, "buildPhraseVectors")); } return metric; }
private SimilarityMetric loadEnsembleMetric(String key) throws IOException, ConfigurationException { info("loading ensemble metric " + key); Map<String, Object> params = (Map<String, Object>) configuration.getMetric(key); setDoEnsembles(false); List<SimilarityMetric> metrics = loadMetrics(); EnsembleSimilarity similarity = new EnsembleSimilarity(new SvmEnsemble(), loadMainMapper(), env.getMainIndex()); similarity.setComponents(metrics); similarity.read(requireDirectory(params, "model")); similarity.setName(key); if (params.containsKey("minComponents")) { similarity.setMinComponents(requireInteger(params, "minComponents")); } return similarity; }
private SimilarityMetric createTextSimilarity(String name) throws ConfigurationException, IOException { JSONObject params = configuration.getMetric(name); SimilarityMetric metric; IndexHelper helper = loadIndex(requireString(params, "lucene")); String field = requireString(params, "field"); metric = new TextSimilarity(loadMainMapper(), helper, field); if (params.containsKey("maxPercentage")) { ((TextSimilarity) metric).setMaxPercentage(requireInteger(params, "maxPercentage")); } if (params.containsKey("minTermFreq")) { ((TextSimilarity) metric).setMinTermFreq(requireInteger(params, "minTermFreq")); } if (params.containsKey("minDocFreq")) { ((TextSimilarity) metric).setMinDocFreq(requireInteger(params, "minDocFreq")); } if (params.containsKey("useInternalMapper")) { ((TextSimilarity) metric).setUseInternalMapper(requireBoolean(params, "useInternalMapper")); } return metric; }
/** * Loads metrics and puts them in the environment. * * @return * @throws IOException * @throws ConfigurationException */ public List<SimilarityMetric> loadMetrics() throws IOException, ConfigurationException { info("loading metrics"); Set<String> ensembleKeys = new HashSet<String>(); List<SimilarityMetric> metrics = new ArrayList<SimilarityMetric>(); for (String key : (Set<String>) configuration.getMetrics().keySet()) { String type = requireString(configuration.getMetric(key), "type"); if (type.equals("ensemble")) { ensembleKeys.add(key); } else if (type.equals("pairwise") && !doPairwise) { // do nothing } else { metrics.add(loadMetric(key)); } } if (doEnsembles) { for (String key : ensembleKeys) { metrics.add(loadMetric(key)); } } return metrics; }