/**
   * Check if we have found a match
   *
   * @return boolean
   * @throws IOException
   */
  private boolean found() throws IOException {
    // No predicate test if there are no positions
    if (positions.length == 0) {
      return true;
    }

    // no more documents - no match
    if (!more) {
      return false;
    }

    // min and max must point to the same document
    if (min != max) {
      return false;
    }

    if (rootDoc != max) {
      return false;
    }

    // We have duplicate entries - suport should be improved but it is not used at the moment
    // This shuld work akin to the leaf scorer
    // It would compact the index
    // The match must be in a known term range
    int count = root.freq();
    int start = 0;
    int end = -1;
    for (int i = 0; i < count; i++) {
      if (i == 0) {
        // First starts at zero
        start = 0;
        end = root.nextPosition();
      } else {
        start = end + 1;
        end = root.nextPosition();
      }

      if (check(start, end)) {
        return true;
      }
    }

    // We had checks to do and they all failed.
    return false;
  }
    public void seek(TermEnum terms) throws IOException {
      original.seek(terms);

      docFreq = terms.docFreq();
      pointer = -1;

      if (docFreq > postingMaps.length) { // grow postingsMap
        PostingMap[] newMap = new PostingMap[docFreq];
        System.arraycopy(postingMaps, 0, newMap, 0, postingMaps.length);
        for (int i = postingMaps.length; i < docFreq; i++) {
          newMap[i] = new PostingMap();
        }
        postingMaps = newMap;
      }

      out.reset();

      int i = 0;
      while (original.next()) {
        PostingMap map = postingMaps[i++];
        map.newDoc = oldToNew[original.doc()]; // remap the newDoc id
        map.offset = out.getFilePointer(); // save pointer to buffer

        final int tf = original.freq(); // buffer tf & positions
        out.writeVInt(tf);
        int prevPosition = 0;
        for (int j = tf; j > 0; j--) { // delta encode positions
          int p = original.nextPosition();
          out.writeVInt(p - prevPosition);
          prevPosition = p;
        }
      }
      out.flush();
      docFreq = i; // allow for deletions

      Arrays.sort(postingMaps, 0, docFreq); // resort by mapped doc ids
      // HeapSorter.sort(postingMaps,docFreq); // TODO MC - due to the lack of space

      // NOTE: this might be substantially faster if RAMInputStream were public
      // and supported a reset() operation.
      in = tempDir.openInput(TEMP_FILE);
    }
    @Override
    public void load() throws Exception {
      TermPositions tp = null;
      byte[] payloadBuffer = new byte[4]; // four bytes for an int
      try {
        tp = _reader.termPositions(_sizeTerm);

        if (tp == null) return;

        while (tp.next()) {
          if (tp.freq() > 0) {
            tp.nextPosition();
            tp.getPayload(payloadBuffer, 0);
            int len = bytesToInt(payloadBuffer);
            allocate(tp.doc(), Math.min(len, _maxItems), true);
          }
        }
      } finally {
        if (tp != null) tp.close();
      }
    }
Example #4
0
  private void dumpTerms() throws IOException {
    outputBanner("Terms (in Term.compareTo() order)");

    TermEnum terms = mIndexReader.terms();
    int order = 0;

    while (terms.next()) {
      order++;
      Term term = terms.term();
      String field = term.field();
      String text = term.text();

      if (!wantThisTerm(field, text)) {
        continue;
      }

      outputLn(order + " " + field + ": " + text);

      /*
       * for each term, print the
       * <document, frequency, <position>* > tuples for a term.
       *
       * document:  document in which the Term appears
       * frequency: number of time the Term appears in the document
       * position:  position for each appearance in the document
       *
       * e.g. doc.add(new Field("field", "one two three two four five", Field.Store.YES, Field.Index.ANALYZED));
       *      then the tuple for Term("field", "two") in this document would be like:
       *      88, 2, <2, 4>
       *      where
       *      88 is the document number
       *      2  is the frequency this term appear in the document
       *      <2, 4> are the positions for each appearance in the document
       */
      // by TermPositions
      outputLn("    document, frequency, <position>*");

      // keep track of docs that appear in all terms that are filtered in.
      Set<Integer> docNums = null;
      if (hasFilters()) {
        docNums = new HashSet<Integer>();
      }

      TermPositions termPos = mIndexReader.termPositions(term);
      while (termPos.next()) {
        int docNum = termPos.doc();
        int freq = termPos.freq();

        if (docNums != null) {
          docNums.add(docNum);
        }

        output("    " + docNum + ", " + freq + ", <");

        boolean first = true;
        for (int f = 0; f < freq; f++) {
          int positionInDoc = termPos.nextPosition();
          if (!first) {
            output(" ");
          } else {
            first = false;
          }
          output(positionInDoc + "");
        }
        outputLn(">");
      }
      termPos.close();

      if (docNums != null) {
        computeDocsIntersection(docNums);
      }

      outputLn();

      if (order % 1000 == 0) {
        mConsole.debug("Dumped " + order + " terms");
      }
    }

    terms.close();
  }