private void decodeMetaData() throws IOException { // System.out.println("BTR.decodeMetadata mdUpto=" + metaDataUpto + " vs termCount=" + // state.termBlockOrd + " state=" + state); if (!seekPending) { // TODO: cutover to random-access API // here.... really stupid that we have to decode N // wasted term metadata just to get to the N+1th // that we really need... // lazily catch up on metadata decode: final int limit = state.termBlockOrd; boolean absolute = metaDataUpto == 0; // TODO: better API would be "jump straight to term=N"??? while (metaDataUpto < limit) { // System.out.println(" decode mdUpto=" + metaDataUpto); // TODO: we could make "tiers" of metadata, ie, // decode docFreq/totalTF but don't decode postings // metadata; this way caller could get // docFreq/totalTF w/o paying decode cost for // postings // TODO: if docFreq were bulk decoded we could // just skipN here: // docFreq, totalTermFreq state.docFreq = freqReader.readVInt(); // System.out.println(" dF=" + state.docFreq); if (fieldInfo.getIndexOptions() != IndexOptions.DOCS) { state.totalTermFreq = state.docFreq + freqReader.readVLong(); // System.out.println(" totTF=" + state.totalTermFreq); } // metadata for (int i = 0; i < longs.length; i++) { longs[i] = bytesReader.readVLong(); } postingsReader.decodeTerm(longs, bytesReader, fieldInfo, state, absolute); metaDataUpto++; absolute = false; } } else { // System.out.println(" skip! seekPending"); } }
// Pushes a frame we seek'd to IDVersionSegmentTermsEnumFrame pushFrame( FST.Arc<Pair<BytesRef, Long>> arc, Pair<BytesRef, Long> frameData, int length) throws IOException { scratchReader.reset( frameData.output1.bytes, frameData.output1.offset, frameData.output1.length); final long code = scratchReader.readVLong(); final long fpSeek = code >>> VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS; final IDVersionSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord); f.maxIDVersion = Long.MAX_VALUE - frameData.output2; f.hasTerms = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0; f.hasTermsOrig = f.hasTerms; f.isFloor = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0; if (f.isFloor) { f.setFloorData(scratchReader, frameData.output1); } pushFrame(arc, fpSeek, length); return f; }
@SuppressWarnings("unused") private void printSeekState(PrintStream out) throws IOException { if (currentFrame == staticFrame) { out.println(" no prior seek"); } else { out.println(" prior seek state:"); int ord = 0; boolean isSeekFrame = true; while (true) { IDVersionSegmentTermsEnumFrame f = getFrame(ord); assert f != null; final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefix); if (f.nextEnt == -1) { out.println( " frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + brToString(prefix) + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp << VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0) + (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0)) + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd()); } else { out.println( " frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + brToString(prefix) + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp << VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0) + (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0)) + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd()); } if (fr.index != null) { assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc; if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix - 1) & 0xFF)) { out.println( " broken seek state: arc.label=" + (char) f.arc.label + " vs term byte=" + (char) (term.byteAt(f.prefix - 1) & 0xFF)); throw new RuntimeException("seek state is broken"); } Pair<BytesRef, Long> output = Util.get(fr.index, prefix); if (output == null) { out.println(" broken seek state: prefix is not final in index"); throw new RuntimeException("seek state is broken"); } else if (isSeekFrame && !f.isFloor) { final ByteArrayDataInput reader = new ByteArrayDataInput( output.output1.bytes, output.output1.offset, output.output1.length); final long codeOrig = reader.readVLong(); final long code = (f.fp << VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) | (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0) | (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0); if (codeOrig != code) { out.println( " broken seek state: output code=" + codeOrig + " doesn't match frame code=" + code); throw new RuntimeException("seek state is broken"); } } } if (f == currentFrame) { break; } if (f.prefix == validIndexPrefix) { isSeekFrame = false; } ord++; } } }