/** * Create a PriorityQueue from a word->tf map. * * @param words a map of words keyed on the word(String) with Int objects as the values. * @param fieldNames an array of field names to override defaults. */ private PriorityQueue<ScoreTerm> createQueue(Map<String, Int> words, String... fieldNames) throws IOException { // have collected all words in doc and their freqs int numDocs = ir.numDocs(); final int limit = Math.min(maxQueryTerms, words.size()); FreqQ queue = new FreqQ(limit); // will order words by score for (String word : words.keySet()) { // for every word int tf = words.get(word).x; // term freq in the source doc if (minTermFreq > 0 && tf < minTermFreq) { continue; // filter out words that don't occur enough times in the source } // go through all the fields and find the largest document frequency String topField = fieldNames[0]; int docFreq = 0; for (String fieldName : fieldNames) { int freq = ir.docFreq(new Term(fieldName, word)); topField = (freq > docFreq) ? fieldName : topField; docFreq = (freq > docFreq) ? freq : docFreq; } if (minDocFreq > 0 && docFreq < minDocFreq) { continue; // filter out words that don't occur in enough docs } if (docFreq > maxDocFreq) { continue; // filter out words that occur in too many docs } if (docFreq == 0) { continue; // index update problem? } float idf = similarity.idf(docFreq, numDocs); float score = tf * idf; if (queue.size() < limit) { // there is still space in the queue queue.add(new ScoreTerm(word, topField, score, idf, docFreq, tf)); } else { ScoreTerm term = queue.top(); if (term.score < score) { // update the smallest in the queue in place and update the queue. term.update(word, topField, score, idf, docFreq, tf); queue.updateTop(); } } } return queue; }