public Accumulator_cascade[] rank_cascade() { // point to next position in keptDocs array that hasn't been filled int indexCntKeptDocs = 0; // Clear priority queue. mSortedAccumulators.clear(); // Cliques associated with the MRF. List<Clique> cliques = mMRF.getCliques(); if (cliques.size() == 0) { System.out.println("Shouldn't have size 0"); System.exit(-1); } // Current accumulator. Accumulator_cascade a = mAccumulators[0]; /* // Initialize the MRF. try { mMRF.initialize(); } catch (ConfigurationException e) { sLogger.error("Error initializing MRF. Aborting ranking!"); return null; } */ // Maximum possible score that this MRF can achieve. float mrfMaxScore = 0.0f; for (Clique c : cliques) { if (!((((Clique_cascade) c).getParamID()).equals("termWt"))) { System.out.println( "In this faster cascade implementation, first stage must be term in order to get positions[] values! " + ((Clique_cascade) c).getParamID()); System.exit(-1); } mrfMaxScore += c.getMaxScore(); } // Sort cliques according to their max scores. Collections.sort(cliques, mMaxScoreComparator); // Score that must be achieved to enter result set. double scoreThreshold = Double.NEGATIVE_INFINITY; // Offset into document set we're currently at (if applicable). int docsetOffset = 0; int docno = 0; if (mDocSet != null) { docno = docsetOffset < mDocSet.length ? mDocSet[docsetOffset++] : Integer.MAX_VALUE; } else { if (cascadeStage != 0) { System.out.println("Shouldn't happen. Cascade stage " + cascadeStage); System.exit(-1); } docno = mMRF.getNextCandidate(); } boolean firstTime = true; long startTime = System.currentTimeMillis(); while (docno < Integer.MAX_VALUE) { for (DocumentNode documentNode : mDocNodes) { documentNode.setDocno(docno); } // Document-at-a-time scoring. float docMaxScore = mrfMaxScore; boolean skipped = false; float score = 0.0f; // Lidan: accumulate document scores across the cascade stages if (mDocSet != null && cascadeStage != 0) { score = accumulated_scores[docsetOffset - 1]; } // for each query term, its position in a document int[][] termPositions = new int[cliques.size()][]; int document_length = -1; for (int i = 0; i < cliques.size(); i++) { // Current clique that we're scoring. Clique c = cliques.get(i); // If there's no way that this document can enter the result set // then exit. if (firstTime) { term_to_cliqueNumber.put(c.getConcept().trim().toLowerCase(), i + ""); term_to_termCollectionFrequency.put( c.getConcept().trim().toLowerCase(), ((Clique_cascade) c).termCollectionCF() + ""); term_to_termDF.put( c.getConcept().trim().toLowerCase(), ((Clique_cascade) c).termCollectionDF() + ""); } if (score + docMaxScore <= scoreThreshold) { // Advance postings readers (but don't score). for (int j = i; j < cliques.size(); j++) { cliques.get(j).setNextCandidate(docno + 1); } skipped = true; break; } // Document independent cliques do not affect the ranking. if (!c.isDocDependent()) { continue; } // Update document score. float cliqueScore = c.getPotential(); score += c.getWeight() * cliqueScore; // Update the max score for the rest of the cliques. docMaxScore -= c.getMaxScore(); // stuff needed for document evaluation in the next stage int[] p = ((Clique_cascade) c).getPositions(); if (p != null) { termPositions[i] = Arrays.copyOf(p, p.length); document_length = ((Clique_cascade) c).getDocLen(); } } firstTime = false; // Keep track of mNumResults best accumulators. if (!skipped && score > scoreThreshold) { a.docno = docno; a.score = score; a.index_into_keptDocs = indexCntKeptDocs; keptDocLengths[indexCntKeptDocs] = document_length; mSortedAccumulators.add(a); // save positional information for each query term in the document for (int j = 0; j < termPositions.length; j++) { if (termPositions[j] != null) { keptDocs[indexCntKeptDocs][j] = Arrays.copyOf(termPositions[j], termPositions[j].length); } } if (mSortedAccumulators.size() == mNumResults + 1) { a = mSortedAccumulators.poll(); // Re-use the accumulator of the removed document // After maximum # docs been put into queue, each time a new document is added, an old // document will be ejected, use the spot freed by the ejected document to store the new // document positional info in keptDocs indexCntKeptDocs = a.index_into_keptDocs; keptDocs[indexCntKeptDocs] = new int[numQueryTerms][]; scoreThreshold = mSortedAccumulators.peek().score; } else { a = mAccumulators[ mSortedAccumulators.size()]; // Next non-used accumulator in the accumulator pool indexCntKeptDocs++; } } if (mDocSet != null) { docno = docsetOffset < mDocSet.length ? mDocSet[docsetOffset++] : Integer.MAX_VALUE; } else { if (cascadeStage != 0) { System.out.println("Shouldn't happen. Cascade stage " + cascadeStage); System.exit(-1); } docno = mMRF.getNextCandidate(); } } // Grab the accumulators off the stack, in (reverse) order. Accumulator_cascade[] results_tmp = new Accumulator_cascade[Math.min(mNumResults, mSortedAccumulators.size())]; for (int i = 0; i < results_tmp.length; i++) { results_tmp[results_tmp.length - 1 - i] = mSortedAccumulators.poll(); meanScore += results_tmp[results_tmp.length - 1 - i].score; } meanScore /= results_tmp.length; Accumulator_cascade[] results = results_tmp; /* Do the sorting in rank() //if there are more stages, should sort by docno if (cnt!=cliques_all.size()){ int [] order = new int[results_tmp.length]; double [] docnos = new double[results_tmp.length]; for (int i=0; i<order.length; i++){ order[i] = i; docnos[i] = results_tmp[i].docno; } ivory.smrf.model.constrained.ConstraintModel.Quicksort(docnos, order, 0, results.length-1); results = new Accumulator_cascade[results_tmp.length]; for (int i=0; i<order.length; i++){ results[i] = results_tmp[order[i]]; } } */ long endTime = System.currentTimeMillis(); return results; }