/** Construct reading the named set of readers. */ DirectoryReader( Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor, Collection<ReaderFinishedListener> readerFinishedListeners) throws IOException { this.directory = directory; this.readOnly = readOnly; this.segmentInfos = sis; this.deletionPolicy = deletionPolicy; this.termInfosIndexDivisor = termInfosIndexDivisor; if (readerFinishedListeners == null) { this.readerFinishedListeners = new MapBackedSet<ReaderFinishedListener>( new ConcurrentHashMap<ReaderFinishedListener, Boolean>()); } else { this.readerFinishedListeners = readerFinishedListeners; } applyAllDeletes = false; // To reduce the chance of hitting FileNotFound // (and having to retry), we open segments in // reverse because IndexWriter merges & deletes // the newest segments first. SegmentReader[] readers = new SegmentReader[sis.size()]; for (int i = sis.size() - 1; i >= 0; i--) { boolean success = false; try { readers[i] = SegmentReader.get(readOnly, sis.info(i), termInfosIndexDivisor); readers[i].readerFinishedListeners = this.readerFinishedListeners; success = true; } finally { if (!success) { // Close all readers we had opened: for (i++; i < sis.size(); i++) { try { readers[i].close(); } catch (Throwable ignore) { // keep going - we want to clean up as much as possible } } } } } initialize(readers); }
public void listSegments() throws IOException { DecimalFormat formatter = new DecimalFormat("###,###.###"); for (int x = 0; x < infos.size(); x++) { SegmentInfo info = infos.info(x); String sizeStr = formatter.format(info.sizeInBytes(true)); System.out.println(info.name + " " + sizeStr); } }
ReaderCommit(SegmentInfos infos, Directory dir) throws IOException { segmentsFileName = infos.getCurrentSegmentFileName(); this.dir = dir; userData = infos.getUserData(); files = Collections.unmodifiableCollection(infos.files(dir, true)); version = infos.getVersion(); generation = infos.getGeneration(); segmentCount = infos.size(); }
// Used by near real-time search DirectoryReader( IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, boolean applyAllDeletes) throws IOException { this.directory = writer.getDirectory(); this.readOnly = true; this.applyAllDeletes = applyAllDeletes; // saved for reopen this.termInfosIndexDivisor = termInfosIndexDivisor; readerFinishedListeners = writer.getReaderFinishedListeners(); // IndexWriter synchronizes externally before calling // us, which ensures infos will not change; so there's // no need to process segments in reverse order final int numSegments = infos.size(); List<SegmentReader> readers = new ArrayList<SegmentReader>(); final Directory dir = writer.getDirectory(); segmentInfos = (SegmentInfos) infos.clone(); int infosUpto = 0; for (int i = 0; i < numSegments; i++) { boolean success = false; try { final SegmentInfo info = infos.info(i); assert info.dir == dir; final SegmentReader reader = writer.readerPool.getReadOnlyClone(info, true, termInfosIndexDivisor); if (reader.numDocs() > 0 || writer.getKeepFullyDeletedSegments()) { reader.readerFinishedListeners = readerFinishedListeners; readers.add(reader); infosUpto++; } else { reader.close(); segmentInfos.remove(infosUpto); } success = true; } finally { if (!success) { // Close all readers we had opened: for (SegmentReader reader : readers) { try { reader.close(); } catch (Throwable ignore) { // keep going - we want to clean up as much as possible } } } } } this.writer = writer; initialize(readers.toArray(new SegmentReader[readers.size()])); }
@Override public synchronized IndexReader reopen() throws CorruptIndexException, IOException { IndexReader newInner = null; SegmentInfos sinfos = new SegmentInfos(); sinfos.read(_dir); int size = sinfos.size(); if (in instanceof MultiReader) { // setup current reader list List<IndexReader> boboReaderList = new LinkedList<IndexReader>(); ReaderUtil.gatherSubReaders((List<IndexReader>) boboReaderList, in); Map<String, BoboIndexReader> readerMap = new HashMap<String, BoboIndexReader>(); for (IndexReader reader : boboReaderList) { BoboIndexReader boboReader = (BoboIndexReader) reader; SegmentReader sreader = (SegmentReader) (boboReader.in); readerMap.put(sreader.getSegmentName(), boboReader); } ArrayList<BoboIndexReader> currentReaders = new ArrayList<BoboIndexReader>(size); boolean isNewReader = false; for (int i = 0; i < size; ++i) { SegmentInfo sinfo = (SegmentInfo) sinfos.info(i); BoboIndexReader breader = readerMap.remove(sinfo.name); if (breader != null) { // should use SegmentReader.reopen // TODO: see LUCENE-2559 BoboIndexReader newReader = (BoboIndexReader) breader.reopen(true); if (newReader != breader) { isNewReader = true; } if (newReader != null) { currentReaders.add(newReader); } } else { isNewReader = true; SegmentReader newSreader = SegmentReader.get(true, sinfo, 1); breader = BoboIndexReader.getInstanceAsSubReader( newSreader, this._facetHandlers, this._runtimeFacetHandlerFactories); breader._dir = _dir; currentReaders.add(breader); } } isNewReader = isNewReader || (readerMap.size() != 0); if (!isNewReader) { return this; } else { MultiReader newMreader = new MultiReader( currentReaders.toArray(new BoboIndexReader[currentReaders.size()]), false); BoboIndexReader newReader = BoboIndexReader.getInstanceAsSubReader( newMreader, this._facetHandlers, this._runtimeFacetHandlerFactories); newReader._dir = _dir; return newReader; } } else if (in instanceof SegmentReader) { // should use SegmentReader.reopen // TODO: see LUCENE-2559 SegmentReader sreader = (SegmentReader) in; int numDels = sreader.numDeletedDocs(); SegmentInfo sinfo = null; boolean sameSeg = false; // get SegmentInfo instance for (int i = 0; i < size; ++i) { SegmentInfo sinfoTmp = (SegmentInfo) sinfos.info(i); if (sinfoTmp.name.equals(sreader.getSegmentName())) { int numDels2 = sinfoTmp.getDelCount(); sameSeg = numDels == numDels2; sinfo = sinfoTmp; break; } } if (sinfo == null) { // segment no longer exists return null; } if (sameSeg) { return this; } else { SegmentReader newSreader = SegmentReader.get(true, sinfo, 1); return BoboIndexReader.getInstanceAsSubReader( newSreader, this._facetHandlers, this._runtimeFacetHandlerFactories); } } else { // should not reach here, a catch-all default case IndexReader reader = in.reopen(true); if (in != reader) { return BoboIndexReader.getInstance( newInner, _facetHandlers, _runtimeFacetHandlerFactories, _workArea); } else { return this; } } }
@Deprecated @Override public boolean isOptimized() { ensureOpen(); return segmentInfos.size() == 1 && !hasDeletions(); }
/** This constructor is only used for {@link #doOpenIfChanged()} */ DirectoryReader( Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, Map<String, byte[]> oldNormsCache, boolean readOnly, boolean doClone, int termInfosIndexDivisor, Collection<ReaderFinishedListener> readerFinishedListeners) throws IOException { this.directory = directory; this.readOnly = readOnly; this.segmentInfos = infos; this.termInfosIndexDivisor = termInfosIndexDivisor; assert readerFinishedListeners != null; this.readerFinishedListeners = readerFinishedListeners; applyAllDeletes = false; // we put the old SegmentReaders in a map, that allows us // to lookup a reader using its segment name Map<String, Integer> segmentReaders = new HashMap<String, Integer>(); if (oldReaders != null) { // create a Map SegmentName->SegmentReader for (int i = 0; i < oldReaders.length; i++) { segmentReaders.put(oldReaders[i].getSegmentName(), Integer.valueOf(i)); } } SegmentReader[] newReaders = new SegmentReader[infos.size()]; // remember which readers are shared between the old and the re-opened // DirectoryReader - we have to incRef those readers boolean[] readerShared = new boolean[infos.size()]; for (int i = infos.size() - 1; i >= 0; i--) { // find SegmentReader for this segment Integer oldReaderIndex = segmentReaders.get(infos.info(i).name); if (oldReaderIndex == null) { // this is a new segment, no old SegmentReader can be reused newReaders[i] = null; } else { // there is an old reader for this segment - we'll try to reopen it newReaders[i] = oldReaders[oldReaderIndex.intValue()]; } boolean success = false; try { SegmentReader newReader; if (newReaders[i] == null || infos.info(i).getUseCompoundFile() != newReaders[i].getSegmentInfo().getUseCompoundFile()) { // We should never see a totally new segment during cloning assert !doClone; // this is a new reader; in case we hit an exception we can close it safely newReader = SegmentReader.get(readOnly, infos.info(i), termInfosIndexDivisor); newReader.readerFinishedListeners = readerFinishedListeners; readerShared[i] = false; newReaders[i] = newReader; } else { newReader = newReaders[i].reopenSegment(infos.info(i), doClone, readOnly); if (newReader == null) { // this reader will be shared between the old and the new one, // so we must incRef it readerShared[i] = true; newReaders[i].incRef(); } else { assert newReader.readerFinishedListeners == readerFinishedListeners; readerShared[i] = false; // Steal ref returned to us by reopenSegment: newReaders[i] = newReader; } } success = true; } finally { if (!success) { for (i++; i < infos.size(); i++) { if (newReaders[i] != null) { try { if (!readerShared[i]) { // this is a new subReader that is not used by the old one, // we can close it newReaders[i].close(); } else { // this subReader is also used by the old reader, so instead // closing we must decRef it newReaders[i].decRef(); } } catch (IOException ignore) { // keep going - we want to clean up as much as possible } } } } } } // initialize the readers to calculate maxDoc before we try to reuse the old normsCache initialize(newReaders); // try to copy unchanged norms from the old normsCache to the new one if (oldNormsCache != null) { for (Map.Entry<String, byte[]> entry : oldNormsCache.entrySet()) { String field = entry.getKey(); if (!hasNorms(field)) { continue; } byte[] oldBytes = entry.getValue(); byte[] bytes = new byte[maxDoc()]; for (int i = 0; i < subReaders.length; i++) { Integer oldReaderIndex = segmentReaders.get(subReaders[i].getSegmentName()); // this SegmentReader was not re-opened, we can copy all of its norms if (oldReaderIndex != null && (oldReaders[oldReaderIndex.intValue()] == subReaders[i] || oldReaders[oldReaderIndex.intValue()].norms.get(field) == subReaders[i].norms.get(field))) { // we don't have to synchronize here: either this constructor is called from a // SegmentReader, // in which case no old norms cache is present, or it is called from // MultiReader.reopen(), // which is synchronized System.arraycopy( oldBytes, oldStarts[oldReaderIndex.intValue()], bytes, starts[i], starts[i + 1] - starts[i]); } else { subReaders[i].norms(field, bytes, starts[i]); } } normsCache.put(field, bytes); // update cache } } }
private int getIdx(String name) { for (int x = 0; x < infos.size(); x++) { if (name.equals(infos.info(x).name)) return x; } return -1; }
private SegmentInfo getInfo(String name) { for (int x = 0; x < infos.size(); x++) { if (name.equals(infos.info(x).name)) return infos.info(x); } return null; }
@Override public MergeSpecification findMerges( MergeTrigger mergeTrigger, SegmentInfos infos, IndexWriter writer) throws IOException { if (verbose(writer)) { message("findMerges: " + infos.size() + " segments", writer); } if (infos.size() == 0) { return null; } final Collection<SegmentCommitInfo> merging = writer.getMergingSegments(); final Collection<SegmentCommitInfo> toBeMerged = new HashSet<>(); final List<SegmentCommitInfo> infosSorted = new ArrayList<>(infos.asList()); Collections.sort(infosSorted, new SegmentByteSizeDescending(writer)); // Compute total index bytes & print details about the index long totIndexBytes = 0; long minSegmentBytes = Long.MAX_VALUE; for (SegmentCommitInfo info : infosSorted) { final long segBytes = size(info, writer); if (verbose(writer)) { String extra = merging.contains(info) ? " [merging]" : ""; if (segBytes >= maxMergedSegmentBytes / 2.0) { extra += " [skip: too large]"; } else if (segBytes < floorSegmentBytes) { extra += " [floored]"; } message( " seg=" + writer.segString(info) + " size=" + String.format(Locale.ROOT, "%.3f", segBytes / 1024 / 1024.) + " MB" + extra, writer); } minSegmentBytes = Math.min(segBytes, minSegmentBytes); // Accum total byte size totIndexBytes += segBytes; } // If we have too-large segments, grace them out // of the maxSegmentCount: int tooBigCount = 0; while (tooBigCount < infosSorted.size()) { long segBytes = size(infosSorted.get(tooBigCount), writer); if (segBytes < maxMergedSegmentBytes / 2.0) { break; } totIndexBytes -= segBytes; tooBigCount++; } minSegmentBytes = floorSize(minSegmentBytes); // Compute max allowed segs in the index long levelSize = minSegmentBytes; long bytesLeft = totIndexBytes; double allowedSegCount = 0; while (true) { final double segCountLevel = bytesLeft / (double) levelSize; if (segCountLevel < segsPerTier) { allowedSegCount += Math.ceil(segCountLevel); break; } allowedSegCount += segsPerTier; bytesLeft -= segsPerTier * levelSize; levelSize *= maxMergeAtOnce; } int allowedSegCountInt = (int) allowedSegCount; MergeSpecification spec = null; // Cycle to possibly select more than one merge: while (true) { long mergingBytes = 0; // Gather eligible segments for merging, ie segments // not already being merged and not already picked (by // prior iteration of this loop) for merging: final List<SegmentCommitInfo> eligible = new ArrayList<>(); for (int idx = tooBigCount; idx < infosSorted.size(); idx++) { final SegmentCommitInfo info = infosSorted.get(idx); if (merging.contains(info)) { mergingBytes += size(info, writer); } else if (!toBeMerged.contains(info)) { eligible.add(info); } } final boolean maxMergeIsRunning = mergingBytes >= maxMergedSegmentBytes; if (verbose(writer)) { message( " allowedSegmentCount=" + allowedSegCountInt + " vs count=" + infosSorted.size() + " (eligible count=" + eligible.size() + ") tooBigCount=" + tooBigCount, writer); } if (eligible.size() == 0) { return spec; } if (eligible.size() > allowedSegCountInt) { // OK we are over budget -- find best merge! MergeScore bestScore = null; List<SegmentCommitInfo> best = null; boolean bestTooLarge = false; long bestMergeBytes = 0; // Consider all merge starts: for (int startIdx = 0; startIdx <= eligible.size() - maxMergeAtOnce; startIdx++) { long totAfterMergeBytes = 0; final List<SegmentCommitInfo> candidate = new ArrayList<>(); boolean hitTooLarge = false; for (int idx = startIdx; idx < eligible.size() && candidate.size() < maxMergeAtOnce; idx++) { final SegmentCommitInfo info = eligible.get(idx); final long segBytes = size(info, writer); if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) { hitTooLarge = true; // NOTE: we continue, so that we can try // "packing" smaller segments into this merge // to see if we can get closer to the max // size; this in general is not perfect since // this is really "bin packing" and we'd have // to try different permutations. continue; } candidate.add(info); totAfterMergeBytes += segBytes; } // We should never see an empty candidate: we iterated over maxMergeAtOnce // segments, and already pre-excluded the too-large segments: assert candidate.size() > 0; final MergeScore score = score(candidate, hitTooLarge, mergingBytes, writer); if (verbose(writer)) { message( " maybe=" + writer.segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes / 1024. / 1024.), writer); } // If we are already running a max sized merge // (maxMergeIsRunning), don't allow another max // sized merge to kick off: if ((bestScore == null || score.getScore() < bestScore.getScore()) && (!hitTooLarge || !maxMergeIsRunning)) { best = candidate; bestScore = score; bestTooLarge = hitTooLarge; bestMergeBytes = totAfterMergeBytes; } } if (best != null) { if (spec == null) { spec = new MergeSpecification(); } final OneMerge merge = new OneMerge(best); spec.add(merge); for (SegmentCommitInfo info : merge.segments) { toBeMerged.add(info); } if (verbose(writer)) { message( " add merge=" + writer.segString(merge.segments) + " size=" + String.format(Locale.ROOT, "%.3f MB", bestMergeBytes / 1024. / 1024.) + " score=" + String.format(Locale.ROOT, "%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : ""), writer); } } else { return spec; } } else { return spec; } } }