Ejemplos de Term.bytes en Java, ejemplos de org.apache.lucene.index.Term.bytes en Java

Ejemplo n.º 1

0

Mostrar archivo

Archivo: MultiPhrasePrefixQuery.java Proyecto: cchacin/elasticsearch

  private void getPrefixTerms(
      ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment
    // into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf
    // individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
      Terms _terms = leaf.reader().terms(field);
      if (_terms == null) {
        continue;
      }

      TermsEnum termsEnum = _terms.iterator();
      TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
      if (TermsEnum.SeekStatus.END == seekStatus) {
        continue;
      }

      for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
        if (!StringHelper.startsWith(term, prefix.bytes())) {
          break;
        }

        terms.add(new Term(field, BytesRef.deepCopyOf(term)));
        if (terms.size() >= maxExpansions) {
          return;
        }
      }
    }
  }

Ejemplo n.º 2

0

Mostrar archivo

Archivo: LeafReader.java Proyecto: fullstorydev/lucene-solr

 /**
  * Returns {@link PostingsEnum} for the specified term. This will return null if either the field
  * or term does not exist.
  *
  * <p><b>NOTE:</b> The returned {@link PostingsEnum} may contain deleted docs.
  *
  * @see TermsEnum#postings(PostingsEnum)
  */
 public final PostingsEnum postings(Term term, int flags) throws IOException {
   assert term.field() != null;
   assert term.bytes() != null;
   final Terms terms = terms(term.field());
   if (terms != null) {
     final TermsEnum termsEnum = terms.iterator();
     if (termsEnum.seekExact(term.bytes())) {
       return termsEnum.postings(null, flags);
     }
   }
   return null;
 }

Ejemplo n.º 3

0

Mostrar archivo

Archivo: LeafReader.java Proyecto: fullstorydev/lucene-solr

 /**
  * Returns {@link DocsAndPositionsEnum} for the specified term. This will return null if the field
  * or term does not exist or positions weren't indexed.
  *
  * @deprecated use {@link #postings(Term, int)} instead
  */
 @Deprecated
 public final DocsAndPositionsEnum termPositionsEnum(Term term) throws IOException {
   assert term.field() != null;
   assert term.bytes() != null;
   final Terms terms = terms(term.field());
   if (terms != null) {
     final TermsEnum termsEnum = terms.iterator();
     if (termsEnum.seekExact(term.bytes())) {
       return termsEnum.docsAndPositions(getLiveDocs(), null);
     }
   }
   return null;
 }

Ejemplo n.º 4

0

Mostrar archivo

Archivo: PercolatorFieldMapper.java Proyecto: ricardocerq/elasticsearch

 void processQuery(Query query, ParseContext context) {
   ParseContext.Document doc = context.doc();
   FieldType pft = (FieldType) this.fieldType();
   QueryAnalyzer.Result result;
   try {
     result = QueryAnalyzer.analyze(query);
   } catch (QueryAnalyzer.UnsupportedQueryException e) {
     doc.add(
         new Field(
             pft.extractionResultField.name(),
             EXTRACTION_FAILED,
             extractionResultField.fieldType()));
     return;
   }
   for (Term term : result.terms) {
     BytesRefBuilder builder = new BytesRefBuilder();
     builder.append(new BytesRef(term.field()));
     builder.append(FIELD_VALUE_SEPARATOR);
     builder.append(term.bytes());
     doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType()));
   }
   if (result.verified) {
     doc.add(
         new Field(
             extractionResultField.name(),
             EXTRACTION_COMPLETE,
             extractionResultField.fieldType()));
   } else {
     doc.add(
         new Field(
             extractionResultField.name(), EXTRACTION_PARTIAL, extractionResultField.fieldType()));
   }
 }

Ejemplo n.º 5

0

Mostrar archivo

Archivo: PKIndexSplitter.java Proyecto: Edwin-Ran/Lucene_src_learning

 /**
  * Split an index based on a given primary key term and a 'middle' term. If the middle term is
  * present, it's sent to dir2.
  */
 public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm) {
   this(
       input,
       dir1,
       dir2,
       new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false));
 }

Ejemplo n.º 6

0

Mostrar archivo

Archivo: FuzzyQuery.java Proyecto: fullstorydev/lucene-solr

 @Override
 protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
   if (maxEdits == 0 || prefixLength >= term.text().length()) { // can only match if it's exact
     return new SingleTermsEnum(terms.iterator(), term.bytes());
   }
   return new FuzzyTermsEnum(terms, atts, getTerm(), maxEdits, prefixLength, transpositions);
 }

Ejemplo n.º 7

0

Mostrar archivo

Archivo: LeafReader.java Proyecto: fullstorydev/lucene-solr

 /**
  * Returns the number of documents containing the term <code>t</code>. This method returns 0 if
  * the term or field does not exists. This method does not take into account deleted documents
  * that have not yet been merged away.
  */
 @Override
 public final long totalTermFreq(Term term) throws IOException {
   final Terms terms = terms(term.field());
   if (terms == null) {
     return 0;
   }
   final TermsEnum termsEnum = terms.iterator();
   if (termsEnum.seekExact(term.bytes())) {
     return termsEnum.totalTermFreq();
   } else {
     return 0;
   }
 }

Ejemplo n.º 8

0

Mostrar archivo

Archivo: PKIndexSplitter.java Proyecto: Edwin-Ran/Lucene_src_learning

 public PKIndexSplitter(
     Directory input,
     Directory dir1,
     Directory dir2,
     Term midTerm,
     IndexWriterConfig config1,
     IndexWriterConfig config2) {
   this(
       input,
       dir1,
       dir2,
       new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false),
       config1,
       config2);
 }

Ejemplo n.º 9

0

Mostrar archivo

Archivo: MatchQuery.java Proyecto: untergeek/elasticsearch

 protected Query blendTermQuery(Term term, MappedFieldType fieldType) {
   if (fuzziness != null) {
     if (fieldType != null) {
       try {
         Query query =
             fieldType.fuzzyQuery(
                 term.text(), fuzziness, fuzzyPrefixLength, maxExpansions, transpositions);
         if (query instanceof FuzzyQuery) {
           QueryParsers.setRewriteMethod((FuzzyQuery) query, fuzzyRewriteMethod);
         }
         return query;
       } catch (RuntimeException e) {
         return new TermQuery(term);
         // See long comment below about why we're lenient here.
       }
     }
     int edits = fuzziness.asDistance(term.text());
     FuzzyQuery query =
         new FuzzyQuery(term, edits, fuzzyPrefixLength, maxExpansions, transpositions);
     QueryParsers.setRewriteMethod(query, fuzzyRewriteMethod);
     return query;
   }
   if (fieldType != null) {
     /*
      * Its a bit weird to default to lenient here but its the backwards
      * compatible. It makes some sense when you think about what we are
      * doing here: at this point the user has forced an analyzer and
      * passed some string to the match query. We cut it up using the
      * analyzer and then tried to cram whatever we get into the field.
      * lenient=true here means that we try the terms in the query and on
      * the off chance that they are actually valid terms then we
      * actually try them. lenient=false would mean that we blow up the
      * query if they aren't valid terms. "valid" in this context means
      * "parses properly to something of the type being queried." So "1"
      * is a valid number, etc.
      *
      * We use the text form here because we we've received the term from
      * an analyzer that cut some string into text.
      */
     Query query = termQuery(fieldType, term.bytes(), true);
     if (query != null) {
       return query;
     }
   }
   return new TermQuery(term);
 }

Ejemplo n.º 10

0

Mostrar archivo

Archivo: FuzzyLikeThisQuery.java Proyecto: joel-bernstein/lucene-solr

 private Query newTermQuery(IndexReader reader, Term term) throws IOException {
   if (ignoreTF) {
     return new ConstantScoreQuery(new TermQuery(term));
   } else {
     // we build an artificial TermContext that will give an overall df and ttf
     // equal to 1
     TermContext context = new TermContext(reader.getContext());
     for (LeafReaderContext leafContext : reader.leaves()) {
       Terms terms = leafContext.reader().terms(term.field());
       if (terms != null) {
         TermsEnum termsEnum = terms.iterator();
         if (termsEnum.seekExact(term.bytes())) {
           int freq = 1 - context.docFreq(); // we want the total df and ttf to be 1
           context.register(termsEnum.termState(), leafContext.ord, freq, freq);
         }
       }
     }
     return new TermQuery(term, context);
   }
 }

Ejemplo n.º 11

0

Mostrar archivo

Archivo: DocSetUtil.java Proyecto: fullstorydev/lucene-solr

  public static DocSet createDocSet(SolrIndexSearcher searcher, Term term) throws IOException {
    DirectoryReader reader = searcher.getRawReader(); // raw reader to avoid extra wrapping overhead
    int maxDoc = searcher.getIndexReader().maxDoc();
    int smallSetSize = smallSetSize(maxDoc);

    String field = term.field();
    BytesRef termVal = term.bytes();

    int maxCount = 0;
    int firstReader = -1;
    List<LeafReaderContext> leaves = reader.leaves();
    PostingsEnum[] postList =
        new PostingsEnum
            [leaves
                .size()]; // use array for slightly higher scanning cost, but fewer memory
                          // allocations
    for (LeafReaderContext ctx : leaves) {
      assert leaves.get(ctx.ord) == ctx;
      LeafReader r = ctx.reader();
      Fields f = r.fields();
      Terms t = f.terms(field);
      if (t == null) continue; // field is missing
      TermsEnum te = t.iterator();
      if (te.seekExact(termVal)) {
        maxCount += te.docFreq();
        postList[ctx.ord] = te.postings(null, PostingsEnum.NONE);
        if (firstReader < 0) firstReader = ctx.ord;
      }
    }

    if (maxCount == 0) {
      return DocSet.EMPTY;
    }

    if (maxCount <= smallSetSize) {
      return createSmallSet(leaves, postList, maxCount, firstReader);
    }

    return createBigSet(leaves, postList, maxDoc, firstReader);
  }

Ejemplo n.º 12

0

Mostrar archivo

Archivo: CommonTermsQuery.java Proyecto: fullstorydev/lucene-solr

  public void collectTermContext(
      IndexReader reader,
      List<LeafReaderContext> leaves,
      TermContext[] contextArray,
      Term[] queryTerms)
      throws IOException {
    TermsEnum termsEnum = null;
    for (LeafReaderContext context : leaves) {
      final Fields fields = context.reader().fields();
      for (int i = 0; i < queryTerms.length; i++) {
        Term term = queryTerms[i];
        TermContext termContext = contextArray[i];
        final Terms terms = fields.terms(term.field());
        if (terms == null) {
          // field does not exist
          continue;
        }
        termsEnum = terms.iterator();
        assert termsEnum != null;

        if (termsEnum == TermsEnum.EMPTY) continue;
        if (termsEnum.seekExact(term.bytes())) {
          if (termContext == null) {
            contextArray[i] =
                new TermContext(
                    reader.getContext(),
                    termsEnum.termState(),
                    context.ord,
                    termsEnum.docFreq(),
                    termsEnum.totalTermFreq());
          } else {
            termContext.register(
                termsEnum.termState(), context.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
          }
        }
      }
    }
  }

Ejemplo n.º 13

0

Mostrar archivo

Archivo: QueryParsing.java Proyecto: kpheliosearch/heliosearch

  /** @see #toString(Query,IndexSchema) */
  public static void toString(Query query, IndexSchema schema, Appendable out, int flags)
      throws IOException {
    boolean writeBoost = true;

    if (query instanceof TermQuery) {
      TermQuery q = (TermQuery) query;
      Term t = q.getTerm();
      FieldType ft = writeFieldName(t.field(), schema, out, flags);
      writeFieldVal(t.bytes(), ft, out, flags);
    } else if (query instanceof TermRangeQuery) {
      TermRangeQuery q = (TermRangeQuery) query;
      String fname = q.getField();
      FieldType ft = writeFieldName(fname, schema, out, flags);
      out.append(q.includesLower() ? '[' : '{');
      BytesRef lt = q.getLowerTerm();
      BytesRef ut = q.getUpperTerm();
      if (lt == null) {
        out.append('*');
      } else {
        writeFieldVal(lt, ft, out, flags);
      }

      out.append(" TO ");

      if (ut == null) {
        out.append('*');
      } else {
        writeFieldVal(ut, ft, out, flags);
      }

      out.append(q.includesUpper() ? ']' : '}');
    } else if (query instanceof NumericRangeQuery) {
      NumericRangeQuery q = (NumericRangeQuery) query;
      String fname = q.getField();
      FieldType ft = writeFieldName(fname, schema, out, flags);
      out.append(q.includesMin() ? '[' : '{');
      Number lt = q.getMin();
      Number ut = q.getMax();
      if (lt == null) {
        out.append('*');
      } else {
        out.append(lt.toString());
      }

      out.append(" TO ");

      if (ut == null) {
        out.append('*');
      } else {
        out.append(ut.toString());
      }

      out.append(q.includesMax() ? ']' : '}');
    } else if (query instanceof BooleanQuery) {
      BooleanQuery q = (BooleanQuery) query;
      boolean needParens = false;

      if (q.getBoost() != 1.0 || q.getMinimumNumberShouldMatch() != 0 || q.isCoordDisabled()) {
        needParens = true;
      }
      if (needParens) {
        out.append('(');
      }
      boolean first = true;
      for (BooleanClause c : q.clauses()) {
        if (!first) {
          out.append(' ');
        } else {
          first = false;
        }

        if (c.isProhibited()) {
          out.append('-');
        } else if (c.isRequired()) {
          out.append('+');
        }
        Query subQuery = c.getQuery();
        boolean wrapQuery = false;

        // TODO: may need to put parens around other types
        // of queries too, depending on future syntax.
        if (subQuery instanceof BooleanQuery) {
          wrapQuery = true;
        }

        if (wrapQuery) {
          out.append('(');
        }

        toString(subQuery, schema, out, flags);

        if (wrapQuery) {
          out.append(')');
        }
      }

      if (needParens) {
        out.append(')');
      }
      if (q.getMinimumNumberShouldMatch() > 0) {
        out.append('~');
        out.append(Integer.toString(q.getMinimumNumberShouldMatch()));
      }
      if (q.isCoordDisabled()) {
        out.append("/no_coord");
      }

    } else if (query instanceof PrefixQuery) {
      PrefixQuery q = (PrefixQuery) query;
      Term prefix = q.getPrefix();
      FieldType ft = writeFieldName(prefix.field(), schema, out, flags);
      out.append(prefix.text());
      out.append('*');
    } else if (query instanceof WildcardQuery) {
      out.append(query.toString());
      writeBoost = false;
    } else if (query instanceof FuzzyQuery) {
      out.append(query.toString());
      writeBoost = false;
    } else if (query instanceof ConstantScoreQuery) {
      out.append(query.toString());
      writeBoost = false;
    } else if (query instanceof WrappedQuery) {
      WrappedQuery q = (WrappedQuery) query;
      out.append(q.getOptions());
      toString(q.getWrappedQuery(), schema, out, flags);
      writeBoost = false; // we don't use the boost on wrapped queries
    } else {
      out.append(query.getClass().getSimpleName() + '(' + query.toString() + ')');
      writeBoost = false;
    }

    if (writeBoost && query.getBoost() != 1.0f) {
      out.append("^");
      out.append(Float.toString(query.getBoost()));
    }
  }

Ejemplo n.º 14

0

Mostrar archivo

Archivo: PostingsHighlighter.java Proyecto: jarvisxiong/read-open-source-code

  /**
   * Expert: highlights the top-N passages from multiple fields, for the provided int[] docids, to
   * custom Object as returned by the {@link PassageFormatter}. Use this API to render to something
   * other than String.
   *
   * @param fieldsIn field names to highlight. Must have a stored string value and also be indexed
   *     with offsets.
   * @param query query to highlight.
   * @param searcher searcher that was previously used to execute the query.
   * @param docidsIn containing the document IDs to highlight.
   * @param maxPassagesIn The maximum number of top-N ranked passages per-field used to form the
   *     highlighted snippets.
   * @return Map keyed on field name, containing the array of formatted snippets corresponding to
   *     the documents in <code>docidsIn</code>. If no highlights were found for a document, the
   *     first {@code maxPassages} from the field will be returned.
   * @throws IOException if an I/O error occurred during processing
   * @throws IllegalArgumentException if <code>field</code> was indexed without {@link
   *     IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
   */
  protected Map<String, Object[]> highlightFieldsAsObjects(
      String fieldsIn[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassagesIn[])
      throws IOException {
    if (fieldsIn.length < 1) {
      throw new IllegalArgumentException("fieldsIn must not be empty");
    }
    if (fieldsIn.length != maxPassagesIn.length) {
      throw new IllegalArgumentException("invalid number of maxPassagesIn");
    }
    final IndexReader reader = searcher.getIndexReader();
    Query rewritten = rewrite(query);
    SortedSet<Term> queryTerms = new TreeSet<>();
    rewritten.extractTerms(queryTerms);

    IndexReaderContext readerContext = reader.getContext();
    List<AtomicReaderContext> leaves = readerContext.leaves();

    // Make our own copies because we sort in-place:
    int[] docids = new int[docidsIn.length];
    System.arraycopy(docidsIn, 0, docids, 0, docidsIn.length);
    final String fields[] = new String[fieldsIn.length];
    System.arraycopy(fieldsIn, 0, fields, 0, fieldsIn.length);
    final int maxPassages[] = new int[maxPassagesIn.length];
    System.arraycopy(maxPassagesIn, 0, maxPassages, 0, maxPassagesIn.length);

    // sort for sequential io
    Arrays.sort(docids);
    new InPlaceMergeSorter() {

      @Override
      protected void swap(int i, int j) {
        String tmp = fields[i];
        fields[i] = fields[j];
        fields[j] = tmp;
        int tmp2 = maxPassages[i];
        maxPassages[i] = maxPassages[j];
        maxPassages[j] = tmp2;
      }

      @Override
      protected int compare(int i, int j) {
        return fields[i].compareTo(fields[j]);
      }
    }.sort(0, fields.length);

    // pull stored data:
    String[][] contents = loadFieldValues(searcher, fields, docids, maxLength);

    Map<String, Object[]> highlights = new HashMap<>();
    for (int i = 0; i < fields.length; i++) {
      String field = fields[i];
      int numPassages = maxPassages[i];
      Term floor = new Term(field, "");
      Term ceiling = new Term(field, UnicodeUtil.BIG_TERM);
      SortedSet<Term> fieldTerms = queryTerms.subSet(floor, ceiling);
      // TODO: should we have some reasonable defaults for term pruning? (e.g. stopwords)

      // Strip off the redundant field:
      BytesRef terms[] = new BytesRef[fieldTerms.size()];
      int termUpto = 0;
      for (Term term : fieldTerms) {
        terms[termUpto++] = term.bytes();
      }
      Map<Integer, Object> fieldHighlights =
          highlightField(
              field,
              contents[i],
              getBreakIterator(field),
              terms,
              docids,
              leaves,
              numPassages,
              query);

      Object[] result = new Object[docids.length];
      for (int j = 0; j < docidsIn.length; j++) {
        result[j] = fieldHighlights.get(docidsIn[j]);
      }
      highlights.put(field, result);
    }
    return highlights;
  }

Ejemplo n.º 15

0

Mostrar archivo

Archivo: BufferedUpdatesStream.java Proyecto: jarvisxiong/read-open-source-code

  // NumericDocValues Updates
  // If otherFieldUpdates != null, we need to merge the updates into them
  private synchronized Map<String, NumericFieldUpdates> applyNumericDocValuesUpdates(
      Iterable<NumericUpdate> updates,
      ReadersAndUpdates rld,
      SegmentReader reader,
      Map<String, NumericFieldUpdates> otherFieldUpdates)
      throws IOException {
    Fields fields = reader.fields();
    if (fields == null) {
      // This reader has no postings
      return Collections.emptyMap();
    }

    // TODO: we can process the updates per DV field, from last to first so that
    // if multiple terms affect same document for the same field, we add an update
    // only once (that of the last term). To do that, we can keep a bitset which
    // marks which documents have already been updated. So e.g. if term T1
    // updates doc 7, and then we process term T2 and it updates doc 7 as well,
    // we don't apply the update since we know T1 came last and therefore wins
    // the update.
    // We can also use that bitset as 'liveDocs' to pass to TermEnum.docs(), so
    // that these documents aren't even returned.

    String currentField = null;
    TermsEnum termsEnum = null;
    DocsEnum docs = null;
    final Map<String, NumericFieldUpdates> result =
        otherFieldUpdates == null ? new HashMap<String, NumericFieldUpdates>() : otherFieldUpdates;
    // System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader);
    for (NumericUpdate update : updates) {
      Term term = update.term;
      int limit = update.docIDUpto;

      // TODO: we traverse the terms in update order (not term order) so that we
      // apply the updates in the correct order, i.e. if two terms udpate the
      // same document, the last one that came in wins, irrespective of the
      // terms lexical order.
      // we can apply the updates in terms order if we keep an updatesGen (and
      // increment it with every update) and attach it to each NumericUpdate. Note
      // that we cannot rely only on docIDUpto because an app may send two updates
      // which will get same docIDUpto, yet will still need to respect the order
      // those updates arrived.

      if (!term.field().equals(currentField)) {
        // if we change the code to process updates in terms order, enable this assert
        //        assert currentField == null || currentField.compareTo(term.field()) < 0;
        currentField = term.field();
        Terms terms = fields.terms(currentField);
        if (terms != null) {
          termsEnum = terms.iterator(termsEnum);
        } else {
          termsEnum = null;
          continue; // no terms in that field
        }
      }

      if (termsEnum == null) {
        continue;
      }
      // System.out.println("  term=" + term);

      if (termsEnum.seekExact(term.bytes())) {
        // we don't need term frequencies for this
        DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, DocsEnum.FLAG_NONE);

        // System.out.println("BDS: got docsEnum=" + docsEnum);

        NumericFieldUpdates fieldUpdates = result.get(update.field);
        if (fieldUpdates == null) {
          fieldUpdates = new NumericFieldUpdates.PackedNumericFieldUpdates(reader.maxDoc());
          result.put(update.field, fieldUpdates);
        }
        int doc;
        while ((doc = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
          // System.out.println(Thread.currentThread().getName() + " numericDVUpdate term=" + term +
          // " doc=" + docID);
          if (doc >= limit) {
            break; // no more docs that can be updated for this term
          }
          fieldUpdates.add(doc, update.value);
        }
      }
    }
    return result;
  }

Ejemplo n.º 16

0

Mostrar archivo

Archivo: BufferedUpdatesStream.java Proyecto: jarvisxiong/read-open-source-code

  // Delete by Term
  private synchronized long applyTermDeletes(
      Iterable<Term> termsIter, ReadersAndUpdates rld, SegmentReader reader) throws IOException {
    long delCount = 0;
    Fields fields = reader.fields();
    if (fields == null) {
      // This reader has no postings
      return 0;
    }

    TermsEnum termsEnum = null;

    String currentField = null;
    DocsEnum docs = null;

    assert checkDeleteTerm(null);

    boolean any = false;

    // System.out.println(Thread.currentThread().getName() + " del terms reader=" + reader);
    for (Term term : termsIter) {
      // Since we visit terms sorted, we gain performance
      // by re-using the same TermsEnum and seeking only
      // forwards
      if (!term.field().equals(currentField)) {
        assert currentField == null || currentField.compareTo(term.field()) < 0;
        currentField = term.field();
        Terms terms = fields.terms(currentField);
        if (terms != null) {
          termsEnum = terms.iterator(termsEnum);
        } else {
          termsEnum = null;
        }
      }

      if (termsEnum == null) {
        continue;
      }
      assert checkDeleteTerm(term);

      // System.out.println("  term=" + term);

      if (termsEnum.seekExact(term.bytes())) {
        // we don't need term frequencies for this
        DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, DocsEnum.FLAG_NONE);
        // System.out.println("BDS: got docsEnum=" + docsEnum);

        if (docsEnum != null) {
          while (true) {
            final int docID = docsEnum.nextDoc();
            // System.out.println(Thread.currentThread().getName() + " del term=" + term + " doc=" +
            // docID);
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }
            if (!any) {
              rld.initWritableLiveDocs();
              any = true;
            }
            // NOTE: there is no limit check on the docID
            // when deleting by Term (unlike by Query)
            // because on flush we apply all Term deletes to
            // each segment.  So all Term deleting here is
            // against prior segments:
            if (rld.delete(docID)) {
              delCount++;
            }
          }
        }
      }
    }

    return delCount;
  }

Ejemplo n.º 17

0

Mostrar archivo

Archivo: MultiPhraseQuery.java Proyecto: ieure/lucene-solr-snapshot

    @Override
    public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext)
        throws IOException {
      if (termArrays.size() == 0) // optimize zero-term case
      return null;
      final IndexReader reader = context.reader;
      final Bits delDocs = reader.getDeletedDocs();

      PhraseQuery.PostingsAndFreq[] postingsFreqs =
          new PhraseQuery.PostingsAndFreq[termArrays.size()];

      for (int pos = 0; pos < postingsFreqs.length; pos++) {
        Term[] terms = termArrays.get(pos);

        final DocsAndPositionsEnum postingsEnum;
        int docFreq;

        if (terms.length > 1) {
          postingsEnum = new UnionDocsAndPositionsEnum(reader, terms);

          // coarse -- this overcounts since a given doc can
          // have more than one terms:
          docFreq = 0;
          for (int termIdx = 0; termIdx < terms.length; termIdx++) {
            docFreq += reader.docFreq(terms[termIdx]);
          }
        } else {
          final Term term = terms[0];
          postingsEnum = reader.termPositionsEnum(delDocs, term.field(), term.bytes());

          if (postingsEnum == null) {
            if (reader.termDocsEnum(delDocs, term.field(), term.bytes()) != null) {
              // term does exist, but has no positions
              throw new IllegalStateException(
                  "field \""
                      + term.field()
                      + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term="
                      + term.text()
                      + ")");
            } else {
              // term does not exist
              return null;
            }
          }

          docFreq = reader.docFreq(term.field(), term.bytes());
        }

        postingsFreqs[pos] =
            new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue());
      }

      // sort by increasing docFreq order
      if (slop == 0) {
        ArrayUtil.quickSort(postingsFreqs);
      }

      if (slop == 0) {
        ExactPhraseScorer s =
            new ExactPhraseScorer(this, postingsFreqs, similarity, reader.norms(field));
        if (s.noDocs) {
          return null;
        } else {
          return s;
        }
      } else {
        return new SloppyPhraseScorer(this, postingsFreqs, similarity, slop, reader.norms(field));
      }
    }

Ejemplo n.º 18

0

Mostrar archivo

Archivo: InternalEngine.java Proyecto: cchacin/elasticsearch

 private Object dirtyLock(Term uid) {
   return dirtyLock(uid.bytes());
 }