Java TermPositions示例，org.apache.lucene.index.TermPositions Java示例

示例#1

0

显示文件

文件： ContainerScorer.java 项目： bulias/community-edition

  /*
   * Go through all the term positions and try and move to next document. Any
   * failure measn we have no more.
   *
   * This can be used at initialisation and when moving away from an existing
   * match.
   *
   * This will set min, max, more and rootDoc
   *
   */
  private void doNextOnAll() throws IOException {
    // Do the terms
    int current;
    boolean first = true;
    for (int i = 0, l = positions.length; i < l; i++) {
      if (positions[i].getCachingTermPositions() != null) {
        if (positions[i].getCachingTermPositions().next()) {

          current = positions[i].getCachingTermPositions().doc();
          adjustMinMax(current, first);
          first = false;
        } else {
          more = false;
          return;
        }
      }
    }

    // Do the root term - it must always exists as the path could well have mutiple entries
    // If an entry in the index does not have a root terminal it is broken
    if (root.next()) {
      rootDoc = root.doc();
    } else {
      more = false;
      return;
    }
    if (root.doc() < max) {
      if (root.skipTo(max)) {
        rootDoc = root.doc();
      } else {
        more = false;
        return;
      }
    }
  }

示例#2

0

显示文件

文件： PayloadBoostTermQuery.java 项目： chtrinh/sunspot

      protected void processPayload(Similarity similarity) throws IOException {
        if (positions.isPayloadAvailable()) {
          payload = positions.getPayload(payload, 0);
          payloadScore =
              function.currentScore(
                  doc,
                  term.field(),
                  spans.start(),
                  spans.end(),
                  payloadsSeen,
                  payloadScore,
                  similarity.scorePayload(
                      doc,
                      term.field(),
                      spans.start(),
                      spans.end(),
                      payload,
                      0,
                      positions.getPayloadLength()));
          payloadsSeen++;

        } else {
          // zero out the payload?
        }
      }

示例#3

0

显示文件

文件： ContainerScorer.java 项目： bulias/community-edition

  /*
   * Try and skip all those term positions at documents less than the current
   * max up to value. This is quite likely to fail and leave us with (min !=
   * max) but that is OK, we try again.
   *
   * It is possible that max increases as we process terms, this is OK. We
   * just failed to skip to a given value of max and start doing the next.
   */
  private void skipToMax() throws IOException {
    // Do the terms
    int current;
    for (int i = 0, l = positions.length; i < l; i++) {
      if (i == 0) {
        min = max;
      }
      if (positions[i].getCachingTermPositions() != null) {
        if (positions[i].getCachingTermPositions().doc() < max) {
          if (positions[i].getCachingTermPositions().skipTo(max)) {
            current = positions[i].getCachingTermPositions().doc();
            adjustMinMax(current, false);
          } else {
            more = false;
            return;
          }
        }
      }
    }

    // Do the root
    if (root.doc() < max) {
      if (root.skipTo(max)) {
        rootDoc = root.doc();
      } else {
        more = false;
        return;
      }
    }
  }

示例#4

0

显示文件

文件： ContainerScorer.java 项目： bulias/community-edition

  /*
   * (non-Javadoc)
   *
   * @see org.apache.lucene.search.Scorer#next()
   */
  public boolean next() throws IOException {
    // If there is no filtering
    if (allContainers()) {
      // containers and roots must be in sync or the index is broken
      while (more) {
        if (containers.next() && root.next()) {
          if (check(0, root.nextPosition())) {
            return true;
          }
        } else {
          doClose();
          more = false;
          return false;
        }
      }
    }

    if (!more) {
      // One of the search terms has no more docuements
      return false;
    }

    if (max == 0) {
      // We need to initialise
      // Just do a next on all terms and check if the first doc matches
      doNextOnAll();
      if (found()) {
        return true;
      }
      // drop through to the normal find sequence
    }

    return findNext();
  }

示例#5

0

显示文件

文件： ContainerScorer.java 项目： bulias/community-edition

 private void doClose() throws IOException {
   if (root != null) {
     root.close();
   }
   if (containers != null) {
     containers.close();
   }
   if (positions != null) {
     for (StructuredFieldPosition position : positions) {
       CachingTermPositions ctp = position.getCachingTermPositions();
       if (ctp != null) {
         ctp.close();
       }
     }
   }
 }

示例#6

0

显示文件

文件： ContainerScorer.java 项目： bulias/community-edition

  /**
   * Check if we have found a match
   *
   * @return boolean
   * @throws IOException
   */
  private boolean found() throws IOException {
    // No predicate test if there are no positions
    if (positions.length == 0) {
      return true;
    }

    // no more documents - no match
    if (!more) {
      return false;
    }

    // min and max must point to the same document
    if (min != max) {
      return false;
    }

    if (rootDoc != max) {
      return false;
    }

    // We have duplicate entries - suport should be improved but it is not used at the moment
    // This shuld work akin to the leaf scorer
    // It would compact the index
    // The match must be in a known term range
    int count = root.freq();
    int start = 0;
    int end = -1;
    for (int i = 0; i < count; i++) {
      if (i == 0) {
        // First starts at zero
        start = 0;
        end = root.nextPosition();
      } else {
        start = end + 1;
        end = root.nextPosition();
      }

      if (check(start, end)) {
        return true;
      }
    }

    // We had checks to do and they all failed.
    return false;
  }

示例#7

0

显示文件

文件： IndexSorterArquivoWeb.java 项目： arquivo/pwa-technologies

    public void seek(TermEnum terms) throws IOException {
      original.seek(terms);

      docFreq = terms.docFreq();
      pointer = -1;

      if (docFreq > postingMaps.length) { // grow postingsMap
        PostingMap[] newMap = new PostingMap[docFreq];
        System.arraycopy(postingMaps, 0, newMap, 0, postingMaps.length);
        for (int i = postingMaps.length; i < docFreq; i++) {
          newMap[i] = new PostingMap();
        }
        postingMaps = newMap;
      }

      out.reset();

      int i = 0;
      while (original.next()) {
        PostingMap map = postingMaps[i++];
        map.newDoc = oldToNew[original.doc()]; // remap the newDoc id
        map.offset = out.getFilePointer(); // save pointer to buffer

        final int tf = original.freq(); // buffer tf & positions
        out.writeVInt(tf);
        int prevPosition = 0;
        for (int j = tf; j > 0; j--) { // delta encode positions
          int p = original.nextPosition();
          out.writeVInt(p - prevPosition);
          prevPosition = p;
        }
      }
      out.flush();
      docFreq = i; // allow for deletions

      Arrays.sort(postingMaps, 0, docFreq); // resort by mapped doc ids
      // HeapSorter.sort(postingMaps,docFreq); // TODO MC - due to the lack of space

      // NOTE: this might be substantially faster if RAMInputStream were public
      // and supported a reset() operation.
      in = tempDir.openInput(TEMP_FILE);
    }

示例#8

0

显示文件

文件： PhraseFilterMatchList.java 项目： Cue/greplin-lucene-utils

  /**
   * Intersects all doc/position pairs at the given offset with this match list. Modifies this list
   * in place as an optimization.
   *
   * @param termPositions the term positions enumerator
   * @param offset the offset of the given term in the phrase
   * @throws java.io.IOException if IO problems occur within Lucene
   */
  void intersect(final TermPositions termPositions, final int offset) throws IOException {
    int currentDoc = -1;
    int resultCount = 0;
    for (int i = 0; i < this.count; i++) {
      int docId = this.docIds[i];
      while (currentDoc < docId) {
        if (termPositions.next()) {
          currentDoc = termPositions.doc();
        } else {
          this.count = resultCount;
          return;
        }
      }

      if (currentDoc == docId) {
        PhraseFilterIntList positions = this.positions[i];
        if (positions.intersect(termPositions, offset)) {
          this.docIds[resultCount] = docId;
          this.positions[resultCount++] = positions;
        }
      }
    }
    this.count = resultCount;
  }

示例#9

0

显示文件

文件： MultiValueFacetDataCache.java 项目： jbijoux/bobo

    @Override
    public void load() throws Exception {
      TermPositions tp = null;
      byte[] payloadBuffer = new byte[4]; // four bytes for an int
      try {
        tp = _reader.termPositions(_sizeTerm);

        if (tp == null) return;

        while (tp.next()) {
          if (tp.freq() > 0) {
            tp.nextPosition();
            tp.getPayload(payloadBuffer, 0);
            int len = bytesToInt(payloadBuffer);
            allocate(tp.doc(), Math.min(len, _maxItems), true);
          }
        }
      } finally {
        if (tp != null) tp.close();
      }
    }

示例#10

0

显示文件

文件： ContainerScorer.java 项目： bulias/community-edition

  /*
   * (non-Javadoc)
   *
   * @see org.apache.lucene.search.Scorer#skipTo(int)
   */
  public boolean skipTo(int target) throws IOException {
    if (allContainers()) {
      containers.skipTo(target);
      root.skipTo(containers.doc()); // must match
      if (check(0, root.nextPosition())) {
        return true;
      }
      while (more) {
        if (containers.next() && root.next()) {
          if (check(0, root.nextPosition())) {
            return true;
          }
        } else {
          more = false;
          return false;
        }
      }
    }

    max = target;
    return findNext();
  }

示例#11

0

显示文件

文件： LuceneStore.java 项目： nickl-/zoie

    ReaderData(IndexReader reader) throws IOException {
      this.reader = reader;
      long minUID = Long.MAX_VALUE;
      long maxUID = Long.MIN_VALUE;

      uidMap = new Long2IntRBTreeMap();
      uidMap.defaultReturnValue(-1);
      int maxDoc = reader.maxDoc();
      if (maxDoc == 0) {
        _minUID = Long.MIN_VALUE;
        _maxUID = Long.MIN_VALUE;
        return;
      }
      TermPositions tp = null;
      byte[] payloadBuffer = new byte[8]; // four bytes for a long
      try {
        tp = reader.termPositions(ZoieSegmentReader.UID_TERM);
        while (tp.next()) {
          int doc = tp.doc();
          assert doc < maxDoc;

          tp.nextPosition();
          tp.getPayload(payloadBuffer, 0);
          long uid = ZoieSegmentReader.bytesToLong(payloadBuffer);
          if (uid < minUID) minUID = uid;
          if (uid > maxUID) maxUID = uid;
          uidMap.put(uid, doc);
        }
      } finally {
        if (tp != null) {
          tp.close();
        }
      }

      _minUID = minUID;
      _maxUID = maxUID;
    }

示例#12

0

显示文件

文件： LuceneViewer.java 项目： nullin/zimbra-sources

  private void dumpTerms() throws IOException {
    outputBanner("Terms (in Term.compareTo() order)");

    TermEnum terms = mIndexReader.terms();
    int order = 0;

    while (terms.next()) {
      order++;
      Term term = terms.term();
      String field = term.field();
      String text = term.text();

      if (!wantThisTerm(field, text)) {
        continue;
      }

      outputLn(order + " " + field + ": " + text);

      /*
       * for each term, print the
       * <document, frequency, <position>* > tuples for a term.
       *
       * document:  document in which the Term appears
       * frequency: number of time the Term appears in the document
       * position:  position for each appearance in the document
       *
       * e.g. doc.add(new Field("field", "one two three two four five", Field.Store.YES, Field.Index.ANALYZED));
       *      then the tuple for Term("field", "two") in this document would be like:
       *      88, 2, <2, 4>
       *      where
       *      88 is the document number
       *      2  is the frequency this term appear in the document
       *      <2, 4> are the positions for each appearance in the document
       */
      // by TermPositions
      outputLn("    document, frequency, <position>*");

      // keep track of docs that appear in all terms that are filtered in.
      Set<Integer> docNums = null;
      if (hasFilters()) {
        docNums = new HashSet<Integer>();
      }

      TermPositions termPos = mIndexReader.termPositions(term);
      while (termPos.next()) {
        int docNum = termPos.doc();
        int freq = termPos.freq();

        if (docNums != null) {
          docNums.add(docNum);
        }

        output("    " + docNum + ", " + freq + ", <");

        boolean first = true;
        for (int f = 0; f < freq; f++) {
          int positionInDoc = termPos.nextPosition();
          if (!first) {
            output(" ");
          } else {
            first = false;
          }
          output(positionInDoc + "");
        }
        outputLn(">");
      }
      termPos.close();

      if (docNums != null) {
        computeDocsIntersection(docNums);
      }

      outputLn();

      if (order % 1000 == 0) {
        mConsole.debug("Dumped " + order + " terms");
      }
    }

    terms.close();
  }

示例#13

0

显示文件

文件： ContainerScorer.java 项目： bulias/community-edition

 /*
  * (non-Javadoc)
  *
  * @see org.apache.lucene.search.Scorer#doc()
  */
 public int doc() {
   if (allContainers()) {
     return containers.doc();
   }
   return max;
 }

示例#14

0

显示文件

文件： IndexSorterArquivoWeb.java 项目： arquivo/pwa-technologies

 public void close() throws IOException {
   original.close();
 }