@Override public SeekStatus seekCeil(final BytesRef target) throws IOException { if (fr.index == null) { throw new IllegalStateException("terms index was not loaded"); } term.grow(1 + target.length); assert clearEOF(); // if (DEBUG) { // System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" + // target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" + // termExists + ") validIndexPrefix= " + validIndexPrefix); // printSeekState(); // } FST.Arc<Pair<BytesRef, Long>> arc; int targetUpto; Pair<BytesRef, Long> output; targetBeforeCurrentLength = currentFrame.ord; if (currentFrame != staticFrame) { // We are already seek'd; find the common // prefix of new seek term vs current term and // re-use the corresponding seek state. For // example, if app first seeks to foobar, then // seeks to foobaz, we can re-use the seek state // for the first 5 bytes. // if (DEBUG) { // System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix); // } arc = arcs[0]; assert arc.isFinal(); output = arc.output; targetUpto = 0; IDVersionSegmentTermsEnumFrame lastFrame = stack[0]; assert validIndexPrefix <= term.length(); final int targetLimit = Math.min(target.length, validIndexPrefix); int cmp = 0; // TODO: we should write our vLong backwards (MSB // first) to get better sharing from the FST // First compare up to valid seek frames: while (targetUpto < targetLimit) { cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); // if (DEBUG) { // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " // vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + // " output=" + output); // } if (cmp != 0) { break; } arc = arcs[1 + targetUpto]; assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF) : "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF); // TODO: we could save the outputs in local // byte[][] instead of making new objs ever // seek; but, often the FST doesn't have any // shared bytes (but this could change if we // reverse vLong byte order) if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) { output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output); } if (arc.isFinal()) { lastFrame = stack[1 + lastFrame.ord]; } targetUpto++; } if (cmp == 0) { final int targetUptoMid = targetUpto; // Second compare the rest of the term, but // don't save arc/output/frame: final int targetLimit2 = Math.min(target.length, term.length()); while (targetUpto < targetLimit2) { cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); // if (DEBUG) { // System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit // + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) // + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"); // } if (cmp != 0) { break; } targetUpto++; } if (cmp == 0) { cmp = term.length() - target.length; } targetUpto = targetUptoMid; } if (cmp < 0) { // Common case: target term is after current // term, ie, app is seeking multiple terms // in sorted order // if (DEBUG) { // System.out.println(" target is after current (shares prefixLen=" + targetUpto + "); // clear frame.scanned ord=" + lastFrame.ord); // } currentFrame = lastFrame; } else if (cmp > 0) { // Uncommon case: target term // is before current term; this means we can // keep the currentFrame but we must rewind it // (so we scan from the start) targetBeforeCurrentLength = 0; // if (DEBUG) { // System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); // rewind frame ord=" + lastFrame.ord); // } currentFrame = lastFrame; currentFrame.rewind(); } else { // Target is exactly the same as current term assert term.length() == target.length; if (termExists) { // if (DEBUG) { // System.out.println(" target is same as current; return FOUND"); // } return SeekStatus.FOUND; } else { // if (DEBUG) { // System.out.println(" target is same as current but term doesn't exist"); // } } } } else { targetBeforeCurrentLength = -1; arc = fr.index.getFirstArc(arcs[0]); // Empty string prefix must have an output (block) in the index! assert arc.isFinal(); assert arc.output != null; // if (DEBUG) { // System.out.println(" no seek state; push root frame"); // } output = arc.output; currentFrame = staticFrame; // term.length = 0; targetUpto = 0; currentFrame = pushFrame( arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0); } // if (DEBUG) { // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " // currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + // targetBeforeCurrentLength); // } // We are done sharing the common prefix with the incoming target and where we are currently // seek'd; now continue walking the index: while (targetUpto < target.length) { final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF; final FST.Arc<Pair<BytesRef, Long>> nextArc = fr.index.findTargetArc(targetLabel, arc, getArc(1 + targetUpto), fstReader); if (nextArc == null) { // Index is exhausted // if (DEBUG) { // System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + // toHex(targetLabel)); // } validIndexPrefix = currentFrame.prefix; // validIndexPrefix = targetUpto; currentFrame.scanToFloorFrame(target); currentFrame.loadBlock(); final SeekStatus result = currentFrame.scanToTerm(target, false); if (result == SeekStatus.END) { term.copyBytes(target); termExists = false; if (next() != null) { // if (DEBUG) { // System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term); // } return SeekStatus.NOT_FOUND; } else { // if (DEBUG) { // System.out.println(" return END"); // } return SeekStatus.END; } } else { // if (DEBUG) { // System.out.println(" return " + result + " term=" + brToString(term) + " " + term); // } return result; } } else { // Follow this arc term.setByteAt(targetUpto, (byte) targetLabel); arc = nextArc; // Aggregate output as we go: assert arc.output != null; if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) { output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output); } // if (DEBUG) { // System.out.println(" index: follow label=" + toHex(target.bytes[target.offset + // targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput); // } targetUpto++; if (arc.isFinal()) { // if (DEBUG) System.out.println(" arc is final!"); currentFrame = pushFrame( arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto); // if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + // currentFrame.hasTerms); } } } // validIndexPrefix = targetUpto; validIndexPrefix = currentFrame.prefix; currentFrame.scanToFloorFrame(target); currentFrame.loadBlock(); final SeekStatus result = currentFrame.scanToTerm(target, false); if (result == SeekStatus.END) { term.copyBytes(target); termExists = false; if (next() != null) { // if (DEBUG) { // System.out.println(" return NOT_FOUND term=" + term.utf8ToString() + " " + term); // } return SeekStatus.NOT_FOUND; } else { // if (DEBUG) { // System.out.println(" return END"); // } return SeekStatus.END; } } else { return result; } }
/** * Optimized version of {@link #seekExact(BytesRef)} that can sometimes fail-fast if the version * indexed with the requested ID is less than the specified minIDVersion. Applications that index * a monotonically increasing global version with each document can use this for fast optimistic * concurrency. */ public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException { if (fr.index == null) { throw new IllegalStateException("terms index was not loaded"); } term.grow(1 + target.length); assert clearEOF(); // if (DEBUG) { // System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + // fr.fieldInfo.name + ":" + brToString(target) + " minIDVersion=" + minIDVersion + " current=" // + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix); // printSeekState(System.out); // } FST.Arc<Pair<BytesRef, Long>> arc; int targetUpto; Pair<BytesRef, Long> output; long startFrameFP = currentFrame.fp; targetBeforeCurrentLength = currentFrame.ord; boolean changed = false; // TODO: we could stop earlier w/ the version check, every time we traverse an index arc we can // check? if (currentFrame != staticFrame) { // We are already seek'd; find the common // prefix of new seek term vs current term and // re-use the corresponding seek state. For // example, if app first seeks to foobar, then // seeks to foobaz, we can re-use the seek state // for the first 5 bytes. // if (DEBUG) { // System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix); // } arc = arcs[0]; assert arc.isFinal(); output = arc.output; targetUpto = 0; IDVersionSegmentTermsEnumFrame lastFrame = stack[0]; assert validIndexPrefix <= term.length() : "validIndexPrefix=" + validIndexPrefix + " term.length=" + term.length() + " seg=" + fr.parent; final int targetLimit = Math.min(target.length, validIndexPrefix); int cmp = 0; // TODO: reverse vLong byte order for better FST // prefix output sharing // First compare up to valid seek frames: while (targetUpto < targetLimit) { cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); // if (DEBUG) { // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit // + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + // " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output // + " output=" + output); // } if (cmp != 0) { break; } arc = arcs[1 + targetUpto]; // if (arc.label != (target.bytes[target.offset + targetUpto] & 0xFF)) { // System.out.println("FAIL: arc.label=" + (char) arc.label + " targetLabel=" + (char) // (target.bytes[target.offset + targetUpto] & 0xFF)); // } assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF) : "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF); if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) { output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output); } if (arc.isFinal()) { lastFrame = stack[1 + lastFrame.ord]; } targetUpto++; } if (cmp == 0) { final int targetUptoMid = targetUpto; // Second compare the rest of the term, but // don't save arc/output/frame; we only do this // to find out if the target term is before, // equal or after the current term final int targetLimit2 = Math.min(target.length, term.length()); while (targetUpto < targetLimit2) { cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); // if (DEBUG) { // System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + // targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + // targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"); // } if (cmp != 0) { break; } targetUpto++; } if (cmp == 0) { cmp = term.length() - target.length; } targetUpto = targetUptoMid; } if (cmp < 0) { // Common case: target term is after current // term, ie, app is seeking multiple terms // in sorted order // if (DEBUG) { // System.out.println(" target is after current (shares prefixLen=" + targetUpto + "); // frame.ord=" + lastFrame.ord + "; targetUpto=" + targetUpto); // } currentFrame = lastFrame; } else if (cmp > 0) { // Uncommon case: target term // is before current term; this means we can // keep the currentFrame but we must rewind it // (so we scan from the start) targetBeforeCurrentLength = 0; changed = true; // if (DEBUG) { // System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); // rewind frame ord=" + lastFrame.ord); // } currentFrame = lastFrame; currentFrame.rewind(); } else { // Target is exactly the same as current term assert term.length() == target.length; if (termExists) { if (currentFrame.maxIDVersion < minIDVersion) { // The max version for all terms in this block is lower than the minVersion // if (DEBUG) { // System.out.println(" target is same as current maxIDVersion=" + // currentFrame.maxIDVersion + " is < minIDVersion=" + minIDVersion + "; return false"); // } return false; } currentFrame.decodeMetaData(); if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) { // This term's version is lower than the minVersion // if (DEBUG) { // System.out.println(" target is same as current but version=" + // ((IDVersionTermState) currentFrame.state).idVersion + " is < minIDVersion=" + // minIDVersion + "; return false"); // } return false; } // System.out.println(" term version=" + ((IDVersionTermState) // currentFrame.state).idVersion + " frame version=" + currentFrame.maxIDVersion + " frame // ord=" + currentFrame.ord); // if (DEBUG) { // System.out.println(" target is same as current; return true"); // } return true; } else { // if (DEBUG) { // System.out.println(" target is same as current but term doesn't exist"); // } } // validIndexPrefix = currentFrame.depth; // term.length = target.length; // return termExists; } } else { targetBeforeCurrentLength = -1; arc = fr.index.getFirstArc(arcs[0]); // System.out.println("first arc=" + arc); // Empty string prefix must have an output (block) in the index! assert arc.isFinal(); assert arc.output != null; // if (DEBUG) { // System.out.println(" no seek state; push root frame"); // } output = arc.output; currentFrame = staticFrame; // term.length = 0; targetUpto = 0; currentFrame = pushFrame( arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0); } // if (DEBUG) { // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " // currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + // targetBeforeCurrentLength + " termExists=" + termExists); // } // We are done sharing the common prefix with the incoming target and where we are currently // seek'd; now continue walking the index: while (targetUpto < target.length) { final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF; final FST.Arc<Pair<BytesRef, Long>> nextArc = fr.index.findTargetArc(targetLabel, arc, getArc(1 + targetUpto), fstReader); if (nextArc == null) { // Index is exhausted // if (DEBUG) { // System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + // Integer.toHexString(targetLabel) + " termExists=" + termExists); // } validIndexPrefix = currentFrame.prefix; // validIndexPrefix = targetUpto; currentFrame.scanToFloorFrame(target); if (!currentFrame.hasTerms) { termExists = false; term.setByteAt(targetUpto, (byte) targetLabel); term.setLength(1 + targetUpto); // if (DEBUG) { // System.out.println(" FAST NOT_FOUND term=" + brToString(term)); // } return false; } // System.out.println(" check maxVersion=" + currentFrame.maxIDVersion + " vs " + // minIDVersion); // if (DEBUG) { // System.out.println(" frame.maxIDVersion=" + currentFrame.maxIDVersion + " vs // minIDVersion=" + minIDVersion); // } if (currentFrame.maxIDVersion < minIDVersion) { // The max version for all terms in this block is lower than the minVersion if (currentFrame.fp != startFrameFP || changed) { // if (targetUpto+1 > term.length) { termExists = false; term.setByteAt(targetUpto, (byte) targetLabel); term.setLength(1 + targetUpto); // if (DEBUG) { // System.out.println(" reset current term"); // } validIndexPrefix = Math.min(validIndexPrefix, term.length()); } // if (currentFrame.ord != startFrameOrd) { // termExists = false; // } // if (DEBUG) { // System.out.println(" FAST version NOT_FOUND term=" + brToString(term) + " // targetUpto=" + targetUpto + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + // " validIndexPrefix=" + validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " + // currentFrame.fp + " termExists=" + termExists); // } return false; } currentFrame.loadBlock(); // if (DEBUG) { // System.out.println(" scan currentFrame ord=" + currentFrame.ord); // } final SeekStatus result = currentFrame.scanToTerm(target, true); if (result == SeekStatus.FOUND) { currentFrame.decodeMetaData(); if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) { // This term's version is lower than the minVersion // if (DEBUG) { // System.out.println(" return NOT_FOUND: idVersion=" + ((IDVersionTermState) // currentFrame.state).idVersion + " vs minIDVersion=" + minIDVersion); // } return false; } // if (DEBUG) { // System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term); // } return true; } else { // if (DEBUG) { // System.out.println(" got " + result + "; return NOT_FOUND term=" + // brToString(term)); // } return false; } } else { // Follow this arc arc = nextArc; if (term.byteAt(targetUpto) != (byte) targetLabel) { // if (DEBUG) { // System.out.println(" now set termExists=false targetUpto=" + targetUpto + " term=" + // term.bytes[targetUpto] + " targetLabel=" + targetLabel); // } changed = true; term.setByteAt(targetUpto, (byte) targetLabel); termExists = false; } // Aggregate output as we go: assert arc.output != null; if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) { output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output); } // if (DEBUG) { // System.out.println(" index: follow label=" + (char) ((target.bytes[target.offset + // targetUpto]&0xff)) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput); // } targetUpto++; if (arc.isFinal()) { // if (DEBUG) System.out.println(" arc is final!"); currentFrame = pushFrame( arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto); // if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + // currentFrame.hasTerms); } } } // validIndexPrefix = targetUpto; validIndexPrefix = currentFrame.prefix; currentFrame.scanToFloorFrame(target); // Target term is entirely contained in the index: if (!currentFrame.hasTerms) { termExists = false; term.setLength(targetUpto); // if (DEBUG) { // System.out.println(" FAST NOT_FOUND term=" + brToString(term)); // } return false; } // if (DEBUG) { // System.out.println(" frame.maxIDVersion=" + currentFrame.maxIDVersion + " vs // minIDVersion=" + minIDVersion); // } if (currentFrame.maxIDVersion < minIDVersion) { // The max version for all terms in this block is lower than the minVersion termExists = false; term.setLength(targetUpto); return false; } currentFrame.loadBlock(); final SeekStatus result = currentFrame.scanToTerm(target, true); if (result == SeekStatus.FOUND) { // if (DEBUG) { // System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term); // } currentFrame.decodeMetaData(); if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) { // This term's version is lower than the minVersion return false; } return true; } else { // if (DEBUG) { // System.out.println(" got result " + result + "; return NOT_FOUND term=" + // term.utf8ToString()); // } return false; } }