Пример #1
0
  public Accumulator_cascade[] rank_cascade() {

    // point to next position in keptDocs array that hasn't been filled
    int indexCntKeptDocs = 0;

    // Clear priority queue.
    mSortedAccumulators.clear();

    // Cliques associated with the MRF.
    List<Clique> cliques = mMRF.getCliques();

    if (cliques.size() == 0) {
      System.out.println("Shouldn't have size 0");
      System.exit(-1);
    }

    // Current accumulator.
    Accumulator_cascade a = mAccumulators[0];

    /*
    // Initialize the MRF.
    try {
    	mMRF.initialize();
    } catch (ConfigurationException e) {
    	sLogger.error("Error initializing MRF. Aborting ranking!");
    	return null;
    }
    */

    // Maximum possible score that this MRF can achieve.
    float mrfMaxScore = 0.0f;
    for (Clique c : cliques) {
      if (!((((Clique_cascade) c).getParamID()).equals("termWt"))) {
        System.out.println(
            "In this faster cascade implementation, first stage must be term in order to get positions[] values! "
                + ((Clique_cascade) c).getParamID());
        System.exit(-1);
      }
      mrfMaxScore += c.getMaxScore();
    }

    // Sort cliques according to their max scores.
    Collections.sort(cliques, mMaxScoreComparator);

    // Score that must be achieved to enter result set.
    double scoreThreshold = Double.NEGATIVE_INFINITY;

    // Offset into document set we're currently at (if applicable).
    int docsetOffset = 0;

    int docno = 0;
    if (mDocSet != null) {
      docno = docsetOffset < mDocSet.length ? mDocSet[docsetOffset++] : Integer.MAX_VALUE;
    } else {
      if (cascadeStage != 0) {
        System.out.println("Shouldn't happen. Cascade stage " + cascadeStage);
        System.exit(-1);
      }

      docno = mMRF.getNextCandidate();
    }

    boolean firstTime = true;

    long startTime = System.currentTimeMillis();

    while (docno < Integer.MAX_VALUE) {
      for (DocumentNode documentNode : mDocNodes) {
        documentNode.setDocno(docno);
      }

      // Document-at-a-time scoring.
      float docMaxScore = mrfMaxScore;
      boolean skipped = false;

      float score = 0.0f;

      // Lidan: accumulate document scores across the cascade stages
      if (mDocSet != null && cascadeStage != 0) {
        score = accumulated_scores[docsetOffset - 1];
      }

      // for each query term, its position in a document
      int[][] termPositions = new int[cliques.size()][];
      int document_length = -1;

      for (int i = 0; i < cliques.size(); i++) {

        // Current clique that we're scoring.
        Clique c = cliques.get(i);

        // If there's no way that this document can enter the result set
        // then exit.

        if (firstTime) {
          term_to_cliqueNumber.put(c.getConcept().trim().toLowerCase(), i + "");
          term_to_termCollectionFrequency.put(
              c.getConcept().trim().toLowerCase(), ((Clique_cascade) c).termCollectionCF() + "");
          term_to_termDF.put(
              c.getConcept().trim().toLowerCase(), ((Clique_cascade) c).termCollectionDF() + "");
        }

        if (score + docMaxScore <= scoreThreshold) {
          // Advance postings readers (but don't score).
          for (int j = i; j < cliques.size(); j++) {
            cliques.get(j).setNextCandidate(docno + 1);
          }
          skipped = true;

          break;
        }

        // Document independent cliques do not affect the ranking.
        if (!c.isDocDependent()) {
          continue;
        }

        // Update document score.
        float cliqueScore = c.getPotential();
        score += c.getWeight() * cliqueScore;

        // Update the max score for the rest of the cliques.
        docMaxScore -= c.getMaxScore();

        // stuff needed for document evaluation in the next stage
        int[] p = ((Clique_cascade) c).getPositions();

        if (p != null) {

          termPositions[i] = Arrays.copyOf(p, p.length);

          document_length = ((Clique_cascade) c).getDocLen();
        }
      }

      firstTime = false;

      // Keep track of mNumResults best accumulators.
      if (!skipped && score > scoreThreshold) {
        a.docno = docno;
        a.score = score;
        a.index_into_keptDocs = indexCntKeptDocs;
        keptDocLengths[indexCntKeptDocs] = document_length;

        mSortedAccumulators.add(a);

        // save positional information for each query term in the document
        for (int j = 0; j < termPositions.length; j++) {

          if (termPositions[j] != null) {
            keptDocs[indexCntKeptDocs][j] =
                Arrays.copyOf(termPositions[j], termPositions[j].length);
          }
        }

        if (mSortedAccumulators.size() == mNumResults + 1) {
          a = mSortedAccumulators.poll(); // Re-use the accumulator of the removed document

          // After maximum # docs been put into queue, each time a new document is added, an old
          // document will be ejected, use the spot freed by the ejected document to store the new
          // document positional info in keptDocs

          indexCntKeptDocs = a.index_into_keptDocs;
          keptDocs[indexCntKeptDocs] = new int[numQueryTerms][];

          scoreThreshold = mSortedAccumulators.peek().score;

        } else {
          a =
              mAccumulators[
                  mSortedAccumulators.size()]; // Next non-used accumulator in the accumulator pool
          indexCntKeptDocs++;
        }
      }

      if (mDocSet != null) {
        docno = docsetOffset < mDocSet.length ? mDocSet[docsetOffset++] : Integer.MAX_VALUE;
      } else {
        if (cascadeStage != 0) {
          System.out.println("Shouldn't happen. Cascade stage " + cascadeStage);
          System.exit(-1);
        }

        docno = mMRF.getNextCandidate();
      }
    }

    // Grab the accumulators off the stack, in (reverse) order.
    Accumulator_cascade[] results_tmp =
        new Accumulator_cascade[Math.min(mNumResults, mSortedAccumulators.size())];

    for (int i = 0; i < results_tmp.length; i++) {
      results_tmp[results_tmp.length - 1 - i] = mSortedAccumulators.poll();
      meanScore += results_tmp[results_tmp.length - 1 - i].score;
    }

    meanScore /= results_tmp.length;

    Accumulator_cascade[] results = results_tmp;

    /* Do the sorting in rank()
    //if there are more stages, should sort by docno
    if (cnt!=cliques_all.size()){
    	int [] order = new int[results_tmp.length];
    	double [] docnos = new double[results_tmp.length];
    	for (int i=0; i<order.length; i++){
    		order[i] = i;
    		docnos[i] = results_tmp[i].docno;
    	}

    	ivory.smrf.model.constrained.ConstraintModel.Quicksort(docnos, order, 0, results.length-1);
    	results = new Accumulator_cascade[results_tmp.length];
    	for (int i=0; i<order.length; i++){
    		results[i] = results_tmp[order[i]];
    	}
    }
    */

    long endTime = System.currentTimeMillis();

    return results;
  }