/* * (non-Javadoc) * * @see org.apache.lucene.search.Scorer#next() */ public boolean next() throws IOException { // If there is no filtering if (allContainers()) { // containers and roots must be in sync or the index is broken while (more) { if (containers.next() && root.next()) { if (check(0, root.nextPosition())) { return true; } } else { doClose(); more = false; return false; } } } if (!more) { // One of the search terms has no more docuements return false; } if (max == 0) { // We need to initialise // Just do a next on all terms and check if the first doc matches doNextOnAll(); if (found()) { return true; } // drop through to the normal find sequence } return findNext(); }
/** * Check if we have found a match * * @return boolean * @throws IOException */ private boolean found() throws IOException { // No predicate test if there are no positions if (positions.length == 0) { return true; } // no more documents - no match if (!more) { return false; } // min and max must point to the same document if (min != max) { return false; } if (rootDoc != max) { return false; } // We have duplicate entries - suport should be improved but it is not used at the moment // This shuld work akin to the leaf scorer // It would compact the index // The match must be in a known term range int count = root.freq(); int start = 0; int end = -1; for (int i = 0; i < count; i++) { if (i == 0) { // First starts at zero start = 0; end = root.nextPosition(); } else { start = end + 1; end = root.nextPosition(); } if (check(start, end)) { return true; } } // We had checks to do and they all failed. return false; }
/* * (non-Javadoc) * * @see org.apache.lucene.search.Scorer#skipTo(int) */ public boolean skipTo(int target) throws IOException { if (allContainers()) { containers.skipTo(target); root.skipTo(containers.doc()); // must match if (check(0, root.nextPosition())) { return true; } while (more) { if (containers.next() && root.next()) { if (check(0, root.nextPosition())) { return true; } } else { more = false; return false; } } } max = target; return findNext(); }
public void seek(TermEnum terms) throws IOException { original.seek(terms); docFreq = terms.docFreq(); pointer = -1; if (docFreq > postingMaps.length) { // grow postingsMap PostingMap[] newMap = new PostingMap[docFreq]; System.arraycopy(postingMaps, 0, newMap, 0, postingMaps.length); for (int i = postingMaps.length; i < docFreq; i++) { newMap[i] = new PostingMap(); } postingMaps = newMap; } out.reset(); int i = 0; while (original.next()) { PostingMap map = postingMaps[i++]; map.newDoc = oldToNew[original.doc()]; // remap the newDoc id map.offset = out.getFilePointer(); // save pointer to buffer final int tf = original.freq(); // buffer tf & positions out.writeVInt(tf); int prevPosition = 0; for (int j = tf; j > 0; j--) { // delta encode positions int p = original.nextPosition(); out.writeVInt(p - prevPosition); prevPosition = p; } } out.flush(); docFreq = i; // allow for deletions Arrays.sort(postingMaps, 0, docFreq); // resort by mapped doc ids // HeapSorter.sort(postingMaps,docFreq); // TODO MC - due to the lack of space // NOTE: this might be substantially faster if RAMInputStream were public // and supported a reset() operation. in = tempDir.openInput(TEMP_FILE); }
@Override public void load() throws Exception { TermPositions tp = null; byte[] payloadBuffer = new byte[4]; // four bytes for an int try { tp = _reader.termPositions(_sizeTerm); if (tp == null) return; while (tp.next()) { if (tp.freq() > 0) { tp.nextPosition(); tp.getPayload(payloadBuffer, 0); int len = bytesToInt(payloadBuffer); allocate(tp.doc(), Math.min(len, _maxItems), true); } } } finally { if (tp != null) tp.close(); } }
ReaderData(IndexReader reader) throws IOException { this.reader = reader; long minUID = Long.MAX_VALUE; long maxUID = Long.MIN_VALUE; uidMap = new Long2IntRBTreeMap(); uidMap.defaultReturnValue(-1); int maxDoc = reader.maxDoc(); if (maxDoc == 0) { _minUID = Long.MIN_VALUE; _maxUID = Long.MIN_VALUE; return; } TermPositions tp = null; byte[] payloadBuffer = new byte[8]; // four bytes for a long try { tp = reader.termPositions(ZoieSegmentReader.UID_TERM); while (tp.next()) { int doc = tp.doc(); assert doc < maxDoc; tp.nextPosition(); tp.getPayload(payloadBuffer, 0); long uid = ZoieSegmentReader.bytesToLong(payloadBuffer); if (uid < minUID) minUID = uid; if (uid > maxUID) maxUID = uid; uidMap.put(uid, doc); } } finally { if (tp != null) { tp.close(); } } _minUID = minUID; _maxUID = maxUID; }
private void dumpTerms() throws IOException { outputBanner("Terms (in Term.compareTo() order)"); TermEnum terms = mIndexReader.terms(); int order = 0; while (terms.next()) { order++; Term term = terms.term(); String field = term.field(); String text = term.text(); if (!wantThisTerm(field, text)) { continue; } outputLn(order + " " + field + ": " + text); /* * for each term, print the * <document, frequency, <position>* > tuples for a term. * * document: document in which the Term appears * frequency: number of time the Term appears in the document * position: position for each appearance in the document * * e.g. doc.add(new Field("field", "one two three two four five", Field.Store.YES, Field.Index.ANALYZED)); * then the tuple for Term("field", "two") in this document would be like: * 88, 2, <2, 4> * where * 88 is the document number * 2 is the frequency this term appear in the document * <2, 4> are the positions for each appearance in the document */ // by TermPositions outputLn(" document, frequency, <position>*"); // keep track of docs that appear in all terms that are filtered in. Set<Integer> docNums = null; if (hasFilters()) { docNums = new HashSet<Integer>(); } TermPositions termPos = mIndexReader.termPositions(term); while (termPos.next()) { int docNum = termPos.doc(); int freq = termPos.freq(); if (docNums != null) { docNums.add(docNum); } output(" " + docNum + ", " + freq + ", <"); boolean first = true; for (int f = 0; f < freq; f++) { int positionInDoc = termPos.nextPosition(); if (!first) { output(" "); } else { first = false; } output(positionInDoc + ""); } outputLn(">"); } termPos.close(); if (docNums != null) { computeDocsIntersection(docNums); } outputLn(); if (order % 1000 == 0) { mConsole.debug("Dumped " + order + " terms"); } } terms.close(); }