Example #1
0
    @Override
    public SeekStatus seekCeil(BytesRef target) throws IOException {

      // already here
      if (term != null && term.equals(target)) {
        return SeekStatus.FOUND;
      }

      int startIdx = Arrays.binarySearch(indexedTermsArray, target);

      if (startIdx >= 0) {
        // we hit the term exactly... lucky us!
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
        assert seekStatus == TermsEnum.SeekStatus.FOUND;
        ord = startIdx << indexIntervalBits;
        setTerm();
        assert term != null;
        return SeekStatus.FOUND;
      }

      // we didn't hit the term exactly
      startIdx = -startIdx - 1;

      if (startIdx == 0) {
        // our target occurs *before* the first term
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
        assert seekStatus == TermsEnum.SeekStatus.NOT_FOUND;
        ord = 0;
        setTerm();
        assert term != null;
        return SeekStatus.NOT_FOUND;
      }

      // back up to the start of the block
      startIdx--;

      if ((ord >> indexIntervalBits) == startIdx && term != null && term.compareTo(target) <= 0) {
        // we are already in the right block and the current term is before the term we want,
        // so we don't need to seek.
      } else {
        // seek to the right block
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(indexedTermsArray[startIdx]);
        assert seekStatus == TermsEnum.SeekStatus.FOUND;
        ord = startIdx << indexIntervalBits;
        setTerm();
        assert term != null; // should be non-null since it's in the index
      }

      while (term != null && term.compareTo(target) < 0) {
        next();
      }

      if (term == null) {
        return SeekStatus.END;
      } else if (term.compareTo(target) == 0) {
        return SeekStatus.FOUND;
      } else {
        return SeekStatus.NOT_FOUND;
      }
    }
 /**
  * Compares two terms, returning a negative integer if this term belongs before the argument, zero
  * if this term is equal to the argument, and a positive integer if this term belongs after the
  * argument.
  *
  * <p>The ordering of terms is first by field, then by text.
  */
 @Override
 public final int compareTo(Term other) {
   if (field.equals(other.field)) {
     return bytes.compareTo(other.bytes);
   } else {
     return field.compareTo(other.field);
   }
 }
 @Override
 public int compareSameType(Object other) {
   MutableValueStr b = (MutableValueStr) other;
   int c = value.compareTo(b.value);
   if (c != 0) return c;
   if (exists == b.exists) return 0;
   return exists ? 1 : -1;
 }
Example #4
0
  public void testTermMinMaxRandom() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    int numDocs = atLeast(100);
    BytesRef minTerm = null;
    BytesRef maxTerm = null;
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      Field field = new TextField("field", "", Field.Store.NO);
      doc.add(field);
      // System.out.println("  doc " + i);
      CannedBinaryTokenStream.BinaryToken[] tokens =
          new CannedBinaryTokenStream.BinaryToken[atLeast(10)];
      for (int j = 0; j < tokens.length; j++) {
        byte[] bytes = new byte[TestUtil.nextInt(random(), 1, 20)];
        random().nextBytes(bytes);
        BytesRef tokenBytes = new BytesRef(bytes);
        // System.out.println("    token " + tokenBytes);
        if (minTerm == null || tokenBytes.compareTo(minTerm) < 0) {
          // System.out.println("      ** new min");
          minTerm = tokenBytes;
        }
        if (maxTerm == null || tokenBytes.compareTo(maxTerm) > 0) {
          // System.out.println("      ** new max");
          maxTerm = tokenBytes;
        }
        tokens[j] = new CannedBinaryTokenStream.BinaryToken(tokenBytes);
      }
      field.setTokenStream(new CannedBinaryTokenStream(tokens));
      w.addDocument(doc);
    }

    IndexReader r = w.getReader();
    Terms terms = MultiFields.getTerms(r, "field");
    assertEquals(minTerm, terms.getMin());
    assertEquals(maxTerm, terms.getMax());

    r.close();
    w.close();
    dir.close();
  }
  @Override
  protected final BytesRef nextSeekTerm(BytesRef term) {
    while (hasNext()) {
      if (currentRange == null) {
        nextRange();
      }
      // if the new upper bound is before the term parameter, the sub-range is never a hit
      if (term != null && term.compareTo(currentCell) > 0) {
        nextRange();
        if (!rangeBounds.isEmpty()) {
          continue;
        }
      }
      // never seek backwards, so use current term if lower bound is smaller
      return (term != null && term.compareTo(currentCell) > 0) ? term : currentCell;
    }

    // no more sub-range enums available
    assert rangeBounds.isEmpty();
    return null;
  }
  public int compareBottom(int doc) {
    assert bottomSlot != -1;
    int order = termsIndex.getOrd(doc);
    int ord = (order == 0) ? NULL_ORD : order;
    final int cmp = bottomOrd - ord;
    if (cmp != 0) {
      return cmp;
    }

    // take care of the case where both vals are null
    if (bottomOrd == NULL_ORD) return 0;
    return bottomValue.compareTo(termsIndex.lookup(order, tempBR));
  }
 @Override
 public int compareToNoLeaf(Cell fromCell) {
   final NRCell nrCell = (NRCell) fromCell;
   assert term != nrCell.term;
   // trick to re-use bytesref; provided that we re-instate it
   int myLastLen = term.length;
   int otherLastLen = nrCell.term.length;
   term.length = termLenByLevel[getLevel()];
   nrCell.term.length = termLenByLevel[nrCell.getLevel()];
   int answer = term.compareTo(nrCell.term);
   term.length = myLastLen;
   nrCell.term.length = otherLastLen;
   return answer;
 }
 private int countTerms(MultiTermQuery q) throws Exception {
   final Terms terms = MultiFields.getTerms(reader, q.getField());
   if (terms == null) return 0;
   final TermsEnum termEnum = q.getTermsEnum(terms);
   assertNotNull(termEnum);
   int count = 0;
   BytesRef cur, last = null;
   while ((cur = termEnum.next()) != null) {
     count++;
     if (last != null) {
       assertTrue(last.compareTo(cur) < 0);
     }
     last = BytesRef.deepCopyOf(cur);
   }
   // LUCENE-3314: the results after next() already returned null are undefined,
   // assertNull(termEnum.next());
   return count;
 }
 @Override
 public void seekExact(BytesRef target, TermState otherState) {
   // if (DEBUG) {
   //   System.out.println("BTTR.seekExact termState seg=" + segment + " target=" +
   // target.utf8ToString() + " " + target + " state=" + otherState);
   // }
   assert clearEOF();
   if (target.compareTo(term.get()) != 0 || !termExists) {
     assert otherState != null && otherState instanceof BlockTermState;
     currentFrame = staticFrame;
     currentFrame.state.copyFrom(otherState);
     term.copyBytes(target);
     currentFrame.metaDataUpto = currentFrame.getTermBlockOrd();
     assert currentFrame.metaDataUpto > 0;
     validIndexPrefix = 0;
   } else {
     // if (DEBUG) {
     //   System.out.println("  skip seek: already on target state=" + currentFrame.state);
     // }
   }
 }
  @Override
  public int compare(int slot1, int slot2) {
    if (readerGen[slot1] == readerGen[slot2]) {
      int cmp = ords[slot1] - ords[slot2];
      if (cmp != 0) {
        return cmp;
      }
    }

    final BytesRef val1 = values[slot1];
    final BytesRef val2 = values[slot2];

    if (val1 == null) {
      if (val2 == null) {
        return 0;
      }
      return 1;
    } else if (val2 == null) {
      return -1;
    }
    return val1.compareTo(val2);
  }
  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(TermsParams.TERMS, false)) return;

    String[] fields = params.getParams(TermsParams.TERMS_FIELD);

    NamedList<Object> termsResult = new SimpleOrderedMap<>();
    rb.rsp.add("terms", termsResult);

    if (fields == null || fields.length == 0) return;

    int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
    if (limit < 0) {
      limit = Integer.MAX_VALUE;
    }

    String lowerStr = params.get(TermsParams.TERMS_LOWER);
    String upperStr = params.get(TermsParams.TERMS_UPPER);
    boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
    boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
    boolean sort =
        !TermsParams.TERMS_SORT_INDEX.equals(
            params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
    int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1);
    int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT);
    if (freqmax < 0) {
      freqmax = Integer.MAX_VALUE;
    }
    String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
    String regexp = params.get(TermsParams.TERMS_REGEXP_STR);
    Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null;

    boolean raw = params.getBool(TermsParams.TERMS_RAW, false);

    final AtomicReader indexReader = rb.req.getSearcher().getAtomicReader();
    Fields lfields = indexReader.fields();

    for (String field : fields) {
      NamedList<Integer> fieldTerms = new NamedList<>();
      termsResult.add(field, fieldTerms);

      Terms terms = lfields == null ? null : lfields.terms(field);
      if (terms == null) {
        // no terms for this field
        continue;
      }

      FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field);
      if (ft == null) ft = new StrField();

      // prefix must currently be text
      BytesRef prefixBytes = prefix == null ? null : new BytesRef(prefix);

      BytesRef upperBytes = null;
      if (upperStr != null) {
        upperBytes = new BytesRef();
        ft.readableToIndexed(upperStr, upperBytes);
      }

      BytesRef lowerBytes;
      if (lowerStr == null) {
        // If no lower bound was specified, use the prefix
        lowerBytes = prefixBytes;
      } else {
        lowerBytes = new BytesRef();
        if (raw) {
          // TODO: how to handle binary? perhaps we don't for "raw"... or if the field exists
          // perhaps we detect if the FieldType is non-character and expect hex if so?
          lowerBytes = new BytesRef(lowerStr);
        } else {
          lowerBytes = new BytesRef();
          ft.readableToIndexed(lowerStr, lowerBytes);
        }
      }

      TermsEnum termsEnum = terms.iterator(null);
      BytesRef term = null;

      if (lowerBytes != null) {
        if (termsEnum.seekCeil(lowerBytes) == TermsEnum.SeekStatus.END) {
          termsEnum = null;
        } else {
          term = termsEnum.term();
          // Only advance the enum if we are excluding the lower bound and the lower Term actually
          // matches
          if (lowerIncl == false && term.equals(lowerBytes)) {
            term = termsEnum.next();
          }
        }
      } else {
        // position termsEnum on first term
        term = termsEnum.next();
      }

      int i = 0;
      BoundedTreeSet<CountPair<BytesRef, Integer>> queue =
          (sort ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit) : null);
      CharsRef external = new CharsRef();
      while (term != null && (i < limit || sort)) {
        boolean externalized = false; // did we fill in "external" yet for this term?

        // stop if the prefix doesn't match
        if (prefixBytes != null && !StringHelper.startsWith(term, prefixBytes)) break;

        if (pattern != null) {
          // indexed text or external text?
          // TODO: support "raw" mode?
          ft.indexedToReadable(term, external);
          externalized = true;
          if (!pattern.matcher(external).matches()) {
            term = termsEnum.next();
            continue;
          }
        }

        if (upperBytes != null) {
          int upperCmp = term.compareTo(upperBytes);
          // if we are past the upper term, or equal to it (when don't include upper) then stop.
          if (upperCmp > 0 || (upperCmp == 0 && !upperIncl)) break;
        }

        // This is a good term in the range.  Check if mincount/maxcount conditions are satisfied.
        int docFreq = termsEnum.docFreq();
        if (docFreq >= freqmin && docFreq <= freqmax) {
          // add the term to the list
          if (sort) {
            queue.add(new CountPair<>(BytesRef.deepCopyOf(term), docFreq));
          } else {

            // TODO: handle raw somehow
            if (!externalized) {
              ft.indexedToReadable(term, external);
            }
            fieldTerms.add(external.toString(), docFreq);
            i++;
          }
        }

        term = termsEnum.next();
      }

      if (sort) {
        for (CountPair<BytesRef, Integer> item : queue) {
          if (i >= limit) break;
          ft.indexedToReadable(item.key, external);
          fieldTerms.add(external.toString(), item.val);
          i++;
        }
      }
    }
  }
 public int compare(BytesRef a, BytesRef b) {
   return a.compareTo(b);
 }