@Override public SeekStatus seekCeil(BytesRef target) throws IOException { // already here if (term != null && term.equals(target)) { return SeekStatus.FOUND; } int startIdx = Arrays.binarySearch(indexedTermsArray, target); if (startIdx >= 0) { // we hit the term exactly... lucky us! TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target); assert seekStatus == TermsEnum.SeekStatus.FOUND; ord = startIdx << indexIntervalBits; setTerm(); assert term != null; return SeekStatus.FOUND; } // we didn't hit the term exactly startIdx = -startIdx - 1; if (startIdx == 0) { // our target occurs *before* the first term TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target); assert seekStatus == TermsEnum.SeekStatus.NOT_FOUND; ord = 0; setTerm(); assert term != null; return SeekStatus.NOT_FOUND; } // back up to the start of the block startIdx--; if ((ord >> indexIntervalBits) == startIdx && term != null && term.compareTo(target) <= 0) { // we are already in the right block and the current term is before the term we want, // so we don't need to seek. } else { // seek to the right block TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(indexedTermsArray[startIdx]); assert seekStatus == TermsEnum.SeekStatus.FOUND; ord = startIdx << indexIntervalBits; setTerm(); assert term != null; // should be non-null since it's in the index } while (term != null && term.compareTo(target) < 0) { next(); } if (term == null) { return SeekStatus.END; } else if (term.compareTo(target) == 0) { return SeekStatus.FOUND; } else { return SeekStatus.NOT_FOUND; } }
/** * Compares two terms, returning a negative integer if this term belongs before the argument, zero * if this term is equal to the argument, and a positive integer if this term belongs after the * argument. * * <p>The ordering of terms is first by field, then by text. */ @Override public final int compareTo(Term other) { if (field.equals(other.field)) { return bytes.compareTo(other.bytes); } else { return field.compareTo(other.field); } }
@Override public int compareSameType(Object other) { MutableValueStr b = (MutableValueStr) other; int c = value.compareTo(b.value); if (c != 0) return c; if (exists == b.exists) return 0; return exists ? 1 : -1; }
public void testTermMinMaxRandom() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); int numDocs = atLeast(100); BytesRef minTerm = null; BytesRef maxTerm = null; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); Field field = new TextField("field", "", Field.Store.NO); doc.add(field); // System.out.println(" doc " + i); CannedBinaryTokenStream.BinaryToken[] tokens = new CannedBinaryTokenStream.BinaryToken[atLeast(10)]; for (int j = 0; j < tokens.length; j++) { byte[] bytes = new byte[TestUtil.nextInt(random(), 1, 20)]; random().nextBytes(bytes); BytesRef tokenBytes = new BytesRef(bytes); // System.out.println(" token " + tokenBytes); if (minTerm == null || tokenBytes.compareTo(minTerm) < 0) { // System.out.println(" ** new min"); minTerm = tokenBytes; } if (maxTerm == null || tokenBytes.compareTo(maxTerm) > 0) { // System.out.println(" ** new max"); maxTerm = tokenBytes; } tokens[j] = new CannedBinaryTokenStream.BinaryToken(tokenBytes); } field.setTokenStream(new CannedBinaryTokenStream(tokens)); w.addDocument(doc); } IndexReader r = w.getReader(); Terms terms = MultiFields.getTerms(r, "field"); assertEquals(minTerm, terms.getMin()); assertEquals(maxTerm, terms.getMax()); r.close(); w.close(); dir.close(); }
@Override protected final BytesRef nextSeekTerm(BytesRef term) { while (hasNext()) { if (currentRange == null) { nextRange(); } // if the new upper bound is before the term parameter, the sub-range is never a hit if (term != null && term.compareTo(currentCell) > 0) { nextRange(); if (!rangeBounds.isEmpty()) { continue; } } // never seek backwards, so use current term if lower bound is smaller return (term != null && term.compareTo(currentCell) > 0) ? term : currentCell; } // no more sub-range enums available assert rangeBounds.isEmpty(); return null; }
public int compareBottom(int doc) { assert bottomSlot != -1; int order = termsIndex.getOrd(doc); int ord = (order == 0) ? NULL_ORD : order; final int cmp = bottomOrd - ord; if (cmp != 0) { return cmp; } // take care of the case where both vals are null if (bottomOrd == NULL_ORD) return 0; return bottomValue.compareTo(termsIndex.lookup(order, tempBR)); }
@Override public int compareToNoLeaf(Cell fromCell) { final NRCell nrCell = (NRCell) fromCell; assert term != nrCell.term; // trick to re-use bytesref; provided that we re-instate it int myLastLen = term.length; int otherLastLen = nrCell.term.length; term.length = termLenByLevel[getLevel()]; nrCell.term.length = termLenByLevel[nrCell.getLevel()]; int answer = term.compareTo(nrCell.term); term.length = myLastLen; nrCell.term.length = otherLastLen; return answer; }
private int countTerms(MultiTermQuery q) throws Exception { final Terms terms = MultiFields.getTerms(reader, q.getField()); if (terms == null) return 0; final TermsEnum termEnum = q.getTermsEnum(terms); assertNotNull(termEnum); int count = 0; BytesRef cur, last = null; while ((cur = termEnum.next()) != null) { count++; if (last != null) { assertTrue(last.compareTo(cur) < 0); } last = BytesRef.deepCopyOf(cur); } // LUCENE-3314: the results after next() already returned null are undefined, // assertNull(termEnum.next()); return count; }
@Override public void seekExact(BytesRef target, TermState otherState) { // if (DEBUG) { // System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + // target.utf8ToString() + " " + target + " state=" + otherState); // } assert clearEOF(); if (target.compareTo(term.get()) != 0 || !termExists) { assert otherState != null && otherState instanceof BlockTermState; currentFrame = staticFrame; currentFrame.state.copyFrom(otherState); term.copyBytes(target); currentFrame.metaDataUpto = currentFrame.getTermBlockOrd(); assert currentFrame.metaDataUpto > 0; validIndexPrefix = 0; } else { // if (DEBUG) { // System.out.println(" skip seek: already on target state=" + currentFrame.state); // } } }
@Override public int compare(int slot1, int slot2) { if (readerGen[slot1] == readerGen[slot2]) { int cmp = ords[slot1] - ords[slot2]; if (cmp != 0) { return cmp; } } final BytesRef val1 = values[slot1]; final BytesRef val2 = values[slot2]; if (val1 == null) { if (val2 == null) { return 0; } return 1; } else if (val2 == null) { return -1; } return val1.compareTo(val2); }
@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(TermsParams.TERMS, false)) return; String[] fields = params.getParams(TermsParams.TERMS_FIELD); NamedList<Object> termsResult = new SimpleOrderedMap<>(); rb.rsp.add("terms", termsResult); if (fields == null || fields.length == 0) return; int limit = params.getInt(TermsParams.TERMS_LIMIT, 10); if (limit < 0) { limit = Integer.MAX_VALUE; } String lowerStr = params.get(TermsParams.TERMS_LOWER); String upperStr = params.get(TermsParams.TERMS_UPPER); boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false); boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true); boolean sort = !TermsParams.TERMS_SORT_INDEX.equals( params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT)); int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); if (freqmax < 0) { freqmax = Integer.MAX_VALUE; } String prefix = params.get(TermsParams.TERMS_PREFIX_STR); String regexp = params.get(TermsParams.TERMS_REGEXP_STR); Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null; boolean raw = params.getBool(TermsParams.TERMS_RAW, false); final AtomicReader indexReader = rb.req.getSearcher().getAtomicReader(); Fields lfields = indexReader.fields(); for (String field : fields) { NamedList<Integer> fieldTerms = new NamedList<>(); termsResult.add(field, fieldTerms); Terms terms = lfields == null ? null : lfields.terms(field); if (terms == null) { // no terms for this field continue; } FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field); if (ft == null) ft = new StrField(); // prefix must currently be text BytesRef prefixBytes = prefix == null ? null : new BytesRef(prefix); BytesRef upperBytes = null; if (upperStr != null) { upperBytes = new BytesRef(); ft.readableToIndexed(upperStr, upperBytes); } BytesRef lowerBytes; if (lowerStr == null) { // If no lower bound was specified, use the prefix lowerBytes = prefixBytes; } else { lowerBytes = new BytesRef(); if (raw) { // TODO: how to handle binary? perhaps we don't for "raw"... or if the field exists // perhaps we detect if the FieldType is non-character and expect hex if so? lowerBytes = new BytesRef(lowerStr); } else { lowerBytes = new BytesRef(); ft.readableToIndexed(lowerStr, lowerBytes); } } TermsEnum termsEnum = terms.iterator(null); BytesRef term = null; if (lowerBytes != null) { if (termsEnum.seekCeil(lowerBytes) == TermsEnum.SeekStatus.END) { termsEnum = null; } else { term = termsEnum.term(); // Only advance the enum if we are excluding the lower bound and the lower Term actually // matches if (lowerIncl == false && term.equals(lowerBytes)) { term = termsEnum.next(); } } } else { // position termsEnum on first term term = termsEnum.next(); } int i = 0; BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit) : null); CharsRef external = new CharsRef(); while (term != null && (i < limit || sort)) { boolean externalized = false; // did we fill in "external" yet for this term? // stop if the prefix doesn't match if (prefixBytes != null && !StringHelper.startsWith(term, prefixBytes)) break; if (pattern != null) { // indexed text or external text? // TODO: support "raw" mode? ft.indexedToReadable(term, external); externalized = true; if (!pattern.matcher(external).matches()) { term = termsEnum.next(); continue; } } if (upperBytes != null) { int upperCmp = term.compareTo(upperBytes); // if we are past the upper term, or equal to it (when don't include upper) then stop. if (upperCmp > 0 || (upperCmp == 0 && !upperIncl)) break; } // This is a good term in the range. Check if mincount/maxcount conditions are satisfied. int docFreq = termsEnum.docFreq(); if (docFreq >= freqmin && docFreq <= freqmax) { // add the term to the list if (sort) { queue.add(new CountPair<>(BytesRef.deepCopyOf(term), docFreq)); } else { // TODO: handle raw somehow if (!externalized) { ft.indexedToReadable(term, external); } fieldTerms.add(external.toString(), docFreq); i++; } } term = termsEnum.next(); } if (sort) { for (CountPair<BytesRef, Integer> item : queue) { if (i >= limit) break; ft.indexedToReadable(item.key, external); fieldTerms.add(external.toString(), item.val); i++; } } } }
public int compare(BytesRef a, BytesRef b) { return a.compareTo(b); }