@Override public DocScoreList mostSimilar(int wpId, int maxResults, TIntSet validIds) throws IOException { if (hasCachedMostSimilar(wpId)) { return getCachedMostSimilar(wpId, maxResults, validIds); } MoreLikeThis mlt = getMoreLikeThis(); int luceneId = esaHelper.wpIdToLuceneId(wpId); Query query; if (luceneId >= 0) { query = mlt.like(luceneId); } else if (textHelper != null && textHelper.wpIdToLuceneId(wpId) >= 0) { Document d = textHelper.wpIdToLuceneDoc(wpId); String text = d.get(Page.FIELD_TEXT); query = mlt.like(new StringReader(text), Page.FIELD_TEXT); } else { return null; } TopDocs similarDocs = searcher.search(query, esaHelper.getWpIdFilter(validIds), maxResults); pruneSimilar(similarDocs); DocScoreList scores = new DocScoreList(similarDocs.scoreDocs.length); for (int i = 0; i < similarDocs.scoreDocs.length; i++) { ScoreDoc sd = similarDocs.scoreDocs[i]; scores.set(i, esaHelper.luceneIdToWpId(sd.doc), similarDocs.scoreDocs[i].score); } return normalize(scores); }
@Override public DocScoreList mostSimilar(String phrase, int maxResults, TIntSet validIds) throws IOException { final TIntDoubleHashMap scores = getConceptVector(phrase, validIds); Integer luceneIds[] = ArrayUtils.toObject(scores.keys()); Arrays.sort( luceneIds, new Comparator<Integer>() { @Override public int compare(Integer id1, Integer id2) { return -1 * new Double(scores.get(id1)).compareTo(scores.get(id2)); } }); DocScoreList result = new DocScoreList(Math.min(luceneIds.length, maxResults)); for (int i = 0; i < result.numDocs(); i++) { result.set(i, esaHelper.luceneIdToWpId(luceneIds[i]), scores.get(luceneIds[i])); } return normalize(result); }