public static void writeFlamdex(final FlamdexReader fdx, final FlamdexWriter w) throws IOException { final DocIdStream dis = fdx.getDocIdStream(); final int[] docIdBuf = new int[DOC_ID_BUFFER_SIZE]; for (final String intField : fdx.getIntFields()) { final IntFieldWriter ifw = w.getIntFieldWriter(intField); final IntTermIterator iter = fdx.getIntTermIterator(intField); while (iter.next()) { ifw.nextTerm(iter.term()); dis.reset(iter); while (true) { final int n = dis.fillDocIdBuffer(docIdBuf); for (int i = 0; i < n; ++i) { ifw.nextDoc(docIdBuf[i]); } if (n < docIdBuf.length) break; } } iter.close(); ifw.close(); } for (final String stringField : fdx.getStringFields()) { final StringFieldWriter sfw = w.getStringFieldWriter(stringField); final StringTermIterator iter = fdx.getStringTermIterator(stringField); while (iter.next()) { sfw.nextTerm(iter.term()); dis.reset(iter); while (true) { final int n = dis.fillDocIdBuffer(docIdBuf); for (int i = 0; i < n; ++i) { sfw.nextDoc(docIdBuf[i]); } if (n < docIdBuf.length) break; } } iter.close(); sfw.close(); } dis.close(); w.close(); }
public static void merge(FlamdexReader[] readers, FlamdexWriter w) throws IOException { final DocIdStream[] docIdStreams = new DocIdStream[readers.length]; final int[] segmentStartDocs = new int[readers.length]; int totalNumDocs = 0; for (int i = 0; i < readers.length; ++i) { docIdStreams[i] = readers[i].getDocIdStream(); segmentStartDocs[i] = totalNumDocs; totalNumDocs += readers[i].getNumDocs(); } log.info("merging " + readers.length + " readers with a total of " + totalNumDocs + " docs"); final int[] indexBuf = new int[readers.length]; final int[] docIdBuf = new int[64]; for (final String intField : mergeIntFields(readers)) { final IntFieldWriter ifw = w.getIntFieldWriter(intField); final IntTermIteratorWrapper[] iterators = new IntTermIteratorWrapper[readers.length]; final IndirectPriorityQueue<IntTermIteratorWrapper> pq = new ObjectHeapSemiIndirectPriorityQueue<IntTermIteratorWrapper>( iterators, iterators.length); for (int i = 0; i < readers.length; ++i) { if (!readers[i].getIntFields().contains(intField)) continue; final IntTermIterator it = readers[i].getIntTermIterator(intField); if (it.next()) { iterators[i] = new IntTermIteratorWrapper(it, i); pq.enqueue(i); } else { it.close(); } } while (!pq.isEmpty()) { final long term = iterators[pq.first()].it.term(); int numIndexes = 0; IntTermIteratorWrapper wrap; while (!pq.isEmpty() && (wrap = iterators[pq.first()]).it.term() == term) { final int index = wrap.index; docIdStreams[index].reset(wrap.it); indexBuf[numIndexes++] = index; if (wrap.it.next()) { pq.changed(); } else { wrap.it.close(); pq.dequeue(); } } ifw.nextTerm(term); for (int i = 0; i < numIndexes; ++i) { final int index = indexBuf[i]; final int startDoc = segmentStartDocs[index]; final DocIdStream dis = docIdStreams[index]; while (true) { final int n = dis.fillDocIdBuffer(docIdBuf); for (int j = 0; j < n; ++j) { ifw.nextDoc(docIdBuf[j] + startDoc); } if (n < docIdBuf.length) break; } } } ifw.close(); } for (final String stringField : mergeStringFields(readers)) { final StringFieldWriter sfw = w.getStringFieldWriter(stringField); final StringTermIteratorWrapper[] iterators = new StringTermIteratorWrapper[readers.length]; final IndirectPriorityQueue<StringTermIteratorWrapper> pq = new ObjectHeapSemiIndirectPriorityQueue<StringTermIteratorWrapper>( iterators, iterators.length); for (int i = 0; i < readers.length; ++i) { if (!readers[i].getStringFields().contains(stringField)) continue; final StringTermIterator it = readers[i].getStringTermIterator(stringField); if (it.next()) { iterators[i] = new StringTermIteratorWrapper(it, i); pq.enqueue(i); } else { it.close(); } } while (!pq.isEmpty()) { final String term = iterators[pq.first()].it.term(); int numIndexes = 0; StringTermIteratorWrapper wrap; while (!pq.isEmpty() && (wrap = iterators[pq.first()]).it.term().equals(term)) { final int index = wrap.index; docIdStreams[index].reset(wrap.it); indexBuf[numIndexes++] = index; if (wrap.it.next()) { pq.changed(); } else { wrap.it.close(); pq.dequeue(); } } sfw.nextTerm(term); for (int i = 0; i < numIndexes; ++i) { final int index = indexBuf[i]; final int startDoc = segmentStartDocs[index]; final DocIdStream dis = docIdStreams[index]; while (true) { final int n = dis.fillDocIdBuffer(docIdBuf); for (int j = 0; j < n; ++j) { sfw.nextDoc(docIdBuf[j] + startDoc); } if (n < docIdBuf.length) break; } } } sfw.close(); } for (final DocIdStream dis : docIdStreams) { dis.close(); } }