public QryResult evaluateIndri(RetrievalModelIndri r) throws IOException { QryResult result = args.get(0).evaluate(r); double score = 0; double lam = r.lambda; double mu = r.mu; this.field = result.invertedList.field; this.C = (double) QryEval.corpus.get(field); this.ctf = (double) result.invertedList.ctf; long doclen; InvList.DocPosting docp; double Pmle = this.ctf / this.C; for (int i = 0; i < result.invertedList.df; i++) { docp = result.invertedList.postings.get(i); doclen = QryEval.dls.getDocLength(this.field, docp.docid); score = (1.0 - lam) * ((double) docp.tf + mu * Pmle) / ((double) doclen + mu) + lam * Pmle; result.docScores.add(docp.docid, score); } // The SCORE operator should not return a populated inverted list. // If there is one, replace it with an empty inverted list. if (result.invertedList.df > 0) result.invertedList = new InvList(); return result; }
// for Ranked model public QryResult evaluateBooleanRanked(RetrievalModel r) throws IOException { // Evaluate the query argument. QryResult result = args.get(0).evaluate(r); // Each pass of the loop computes a score for one document. Note: // If the evaluate operation above returned a score list (which is // very possible), this loop gets skipped. for (int i = 0; i < result.invertedList.df; i++) { // DIFFERENT RETRIEVAL MODELS IMPLEMENT THIS DIFFERENTLY. // Unranked Boolean. All matching documents get a score of 1.0. result.docScores.add( result.invertedList.postings.get(i).docid, result.invertedList.postings.get(i).tf); } // The SCORE operator should not return a populated inverted list. // If there is one, replace it with an empty inverted list. if (result.invertedList.df > 0) result.invertedList = new InvList(); return result; }
// for BM25 public QryResult evaluateBM25(RetrievalModelBM25 r) throws IOException { // Evaluate the query argument. QryResult result = args.get(0).evaluate(r); DocLengthStore dls = new DocLengthStore(QryEval.READER); int N = QryEval.READER.getDocCount(result.invertedList.field); long doclen; float avg_doclen = QryEval.READER.getSumTotalTermFreq(result.invertedList.field) / (float) QryEval.READER.getDocCount(result.invertedList.field); double rsj = Math.log((N - result.invertedList.df + 0.5) / (result.invertedList.df + 0.5)); double score = 0; // Each pass of the loop computes a score for one document. Note: // If the evaluate operation above returned a score list (which is // very possible), this loop gets skipped. for (int i = 0; i < result.invertedList.df; i++) { doclen = dls.getDocLength(result.invertedList.field, result.invertedList.postings.get(i).docid); score = rsj * result.invertedList.postings.get(i).tf / (result.invertedList.postings.get(i).tf + r.k1 * ((1 - r.b) + r.b * doclen / avg_doclen)); result.docScores.add(result.invertedList.postings.get(i).docid, score); } // The SCORE operator should not return a populated inverted list. // If there is one, replace it with an empty inverted list. if (result.invertedList.df > 0) result.invertedList = new InvList(); return result; }