예제 #1
0
 private final SnippetVector checkValue(
     SnippetVector currentVector,
     Iterator<SnippetVector> vectorIterator,
     int startOffset,
     Fragment fragment) {
   if (currentVector == null) return null;
   StringBuilder result = new StringBuilder();
   String originalText = fragment.getOriginalText();
   int originalTextLength = originalText.length();
   int endOffset = startOffset + originalTextLength;
   int pos = 0;
   while (currentVector != null) {
     int end = currentVector.end - fragment.vectorOffset;
     if (end > endOffset) break;
     int start = currentVector.start - fragment.vectorOffset;
     if (start >= startOffset) {
       appendSubString(originalText, pos, start - startOffset, result);
       if (tags != null) result.append(tags[0]);
       appendSubString(originalText, start - startOffset, end - startOffset, result);
       if (tags != null) result.append(tags[1]);
       pos = end - startOffset;
     }
     currentVector = vectorIterator.hasNext() ? vectorIterator.next() : null;
   }
   if (result.length() == 0) return currentVector;
   if (pos < originalTextLength) appendSubString(originalText, pos, originalTextLength, result);
   fragment.setHighlightedText(result.toString());
   return currentVector;
 }
예제 #2
0
  public boolean getSnippets(
      final int docId,
      final ReaderInterface reader,
      final List<FieldValueItem> values,
      final List<FieldValueItem> snippets,
      final Timer parentTimer)
      throws IOException, ParseException, SyntaxError, SearchLibException {

    if (values == null) return false;

    final Timer timer = new Timer(parentTimer, "SnippetField " + this.name);
    final long halfTimeExpiration =
        this.timeLimit == 0 ? 0 : timer.getStartOffset(this.timeLimit / 2);
    final long expiration = this.timeLimit == 0 ? 0 : timer.getStartOffset(this.timeLimit);

    FragmenterAbstract fragmenter = fragmenterTemplate.newInstance();
    SnippetVector currentVector = null;

    Timer t = new Timer(timer, "extractTermVectorIterator");

    Iterator<SnippetVector> vectorIterator =
        SnippetVectors.extractTermVectorIterator(
            docId, reader, snippetQueries, name, values, indexAnalyzer, t, halfTimeExpiration);
    if (vectorIterator != null)
      currentVector = vectorIterator.hasNext() ? vectorIterator.next() : null;

    t.end(null);

    t = new Timer(timer, "getFraments");

    int startOffset = 0;
    FragmentList fragments = new FragmentList();
    int vectorOffset = 0;
    for (FieldValueItem valueItem : values) {
      String value = valueItem.getValue();
      if (value != null) {
        // VectorOffset++ depends of EndOffset bug #patch Lucene 579 and
        // 1458
        fragmenter.getFragments(value, fragments, vectorOffset++);
      }
    }

    t.end(null);

    if (fragments.size() == 0) {
      timer.end(null);
      return false;
    }

    t = new Timer(timer, "checkValue");

    Fragment fragment = fragments.first();
    while (fragment != null) {
      currentVector = checkValue(currentVector, vectorIterator, startOffset, fragment);
      startOffset += fragment.getOriginalText().length();
      fragment = fragment.next();
    }

    t.end(null);

    Timer sbTimer = new Timer(timer, "snippetBuilder");

    boolean result = false;
    int snippetCounter = maxSnippetNumber;
    int scoredFragment = 0;
    while (snippetCounter-- != 0) {
      Fragment bestScoreFragment = null;
      fragment = Fragment.findNextHighlightedFragment(fragments.first());
      List<Fragment> scoreFragments = new ArrayList<Fragment>(0);
      double maxSearchScore = 0;

      t = new Timer(sbTimer, "fragmentScore");
      boolean expired = false;

      while (fragment != null) {
        double sc = fragment.searchScore(name, queryAnalyzer, query);
        if (sc > maxSearchScore) maxSearchScore = sc;
        scoreFragments.add(fragment);
        fragment = Fragment.findNextHighlightedFragment(fragment.next());
        scoredFragment++;
        if (expiration != 0) {
          if (System.currentTimeMillis() > expiration) {
            expired = true;
            break;
          }
        }
      }

      t.end("fragmentScore " + scoredFragment + " " + expired);

      for (Fragment frag : scoreFragments)
        bestScoreFragment =
            Fragment.bestScore(bestScoreFragment, frag, maxSearchScore, maxSnippetSize);

      if (bestScoreFragment != null) {
        SnippetBuilder snippetBuilder =
            new SnippetBuilder(maxSnippetSize, unescapedSeparator, tags, bestScoreFragment);
        if (snippetBuilder.length() > 0)
          snippets.add(new FieldValueItem(FieldValueOriginEnum.SNIPPET, snippetBuilder.toString()));
        fragments.remove(snippetBuilder.getFragments());
        result = true;
        continue;
      }

      if (fragments.first() == null) break;
      SnippetBuilder snippetBuilder =
          new SnippetBuilder(maxSnippetSize, unescapedSeparator, tags, fragments.first());
      if (snippetBuilder.length() > 0) {
        snippets.add(new FieldValueItem(FieldValueOriginEnum.SNIPPET, snippetBuilder.toString()));
        fragments.remove(snippetBuilder.getFragments());
      }
    }

    sbTimer.end(null);

    timer.end(null);

    return result;
  }