public void testSizeInBytesCache() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) .setMergePolicy(newLogMergePolicy()); IndexWriter writer = new IndexWriter(dir, conf); writer.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); doc.add(new Field("a", "value", Store.YES, Index.ANALYZED)); writer.addDocument(doc); writer.close(); SegmentInfos sis = new SegmentInfos(); sis.read(dir); SegmentInfo si = sis.info(0); long sizeInBytesNoStore = si.sizeInBytes(false); long sizeInBytesWithStore = si.sizeInBytes(true); assertTrue( "sizeInBytesNoStore=" + sizeInBytesNoStore + " sizeInBytesWithStore=" + sizeInBytesWithStore, sizeInBytesWithStore > sizeInBytesNoStore); dir.close(); }
public void listSegments() throws IOException { DecimalFormat formatter = new DecimalFormat("###,###.###"); for (int x = 0; x < infos.size(); x++) { SegmentInfo info = infos.info(x); String sizeStr = formatter.format(info.sizeInBytes(true)); System.out.println(info.name + " " + sizeStr); } }
// Used by near real-time search DirectoryReader( IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, boolean applyAllDeletes) throws IOException { this.directory = writer.getDirectory(); this.readOnly = true; this.applyAllDeletes = applyAllDeletes; // saved for reopen this.termInfosIndexDivisor = termInfosIndexDivisor; readerFinishedListeners = writer.getReaderFinishedListeners(); // IndexWriter synchronizes externally before calling // us, which ensures infos will not change; so there's // no need to process segments in reverse order final int numSegments = infos.size(); List<SegmentReader> readers = new ArrayList<SegmentReader>(); final Directory dir = writer.getDirectory(); segmentInfos = (SegmentInfos) infos.clone(); int infosUpto = 0; for (int i = 0; i < numSegments; i++) { boolean success = false; try { final SegmentInfo info = infos.info(i); assert info.dir == dir; final SegmentReader reader = writer.readerPool.getReadOnlyClone(info, true, termInfosIndexDivisor); if (reader.numDocs() > 0 || writer.getKeepFullyDeletedSegments()) { reader.readerFinishedListeners = readerFinishedListeners; readers.add(reader); infosUpto++; } else { reader.close(); segmentInfos.remove(infosUpto); } success = true; } finally { if (!success) { // Close all readers we had opened: for (SegmentReader reader : readers) { try { reader.close(); } catch (Throwable ignore) { // keep going - we want to clean up as much as possible } } } } } this.writer = writer; initialize(readers.toArray(new SegmentReader[readers.size()])); }
/** Construct reading the named set of readers. */ DirectoryReader( Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor, Collection<ReaderFinishedListener> readerFinishedListeners) throws IOException { this.directory = directory; this.readOnly = readOnly; this.segmentInfos = sis; this.deletionPolicy = deletionPolicy; this.termInfosIndexDivisor = termInfosIndexDivisor; if (readerFinishedListeners == null) { this.readerFinishedListeners = new MapBackedSet<ReaderFinishedListener>( new ConcurrentHashMap<ReaderFinishedListener, Boolean>()); } else { this.readerFinishedListeners = readerFinishedListeners; } applyAllDeletes = false; // To reduce the chance of hitting FileNotFound // (and having to retry), we open segments in // reverse because IndexWriter merges & deletes // the newest segments first. SegmentReader[] readers = new SegmentReader[sis.size()]; for (int i = sis.size() - 1; i >= 0; i--) { boolean success = false; try { readers[i] = SegmentReader.get(readOnly, sis.info(i), termInfosIndexDivisor); readers[i].readerFinishedListeners = this.readerFinishedListeners; success = true; } finally { if (!success) { // Close all readers we had opened: for (i++; i < sis.size(); i++) { try { readers[i].close(); } catch (Throwable ignore) { // keep going - we want to clean up as much as possible } } } } } initialize(readers); }
@Override public synchronized IndexReader reopen() throws CorruptIndexException, IOException { IndexReader newInner = null; SegmentInfos sinfos = new SegmentInfos(); sinfos.read(_dir); int size = sinfos.size(); if (in instanceof MultiReader) { // setup current reader list List<IndexReader> boboReaderList = new LinkedList<IndexReader>(); ReaderUtil.gatherSubReaders((List<IndexReader>) boboReaderList, in); Map<String, BoboIndexReader> readerMap = new HashMap<String, BoboIndexReader>(); for (IndexReader reader : boboReaderList) { BoboIndexReader boboReader = (BoboIndexReader) reader; SegmentReader sreader = (SegmentReader) (boboReader.in); readerMap.put(sreader.getSegmentName(), boboReader); } ArrayList<BoboIndexReader> currentReaders = new ArrayList<BoboIndexReader>(size); boolean isNewReader = false; for (int i = 0; i < size; ++i) { SegmentInfo sinfo = (SegmentInfo) sinfos.info(i); BoboIndexReader breader = readerMap.remove(sinfo.name); if (breader != null) { // should use SegmentReader.reopen // TODO: see LUCENE-2559 BoboIndexReader newReader = (BoboIndexReader) breader.reopen(true); if (newReader != breader) { isNewReader = true; } if (newReader != null) { currentReaders.add(newReader); } } else { isNewReader = true; SegmentReader newSreader = SegmentReader.get(true, sinfo, 1); breader = BoboIndexReader.getInstanceAsSubReader( newSreader, this._facetHandlers, this._runtimeFacetHandlerFactories); breader._dir = _dir; currentReaders.add(breader); } } isNewReader = isNewReader || (readerMap.size() != 0); if (!isNewReader) { return this; } else { MultiReader newMreader = new MultiReader( currentReaders.toArray(new BoboIndexReader[currentReaders.size()]), false); BoboIndexReader newReader = BoboIndexReader.getInstanceAsSubReader( newMreader, this._facetHandlers, this._runtimeFacetHandlerFactories); newReader._dir = _dir; return newReader; } } else if (in instanceof SegmentReader) { // should use SegmentReader.reopen // TODO: see LUCENE-2559 SegmentReader sreader = (SegmentReader) in; int numDels = sreader.numDeletedDocs(); SegmentInfo sinfo = null; boolean sameSeg = false; // get SegmentInfo instance for (int i = 0; i < size; ++i) { SegmentInfo sinfoTmp = (SegmentInfo) sinfos.info(i); if (sinfoTmp.name.equals(sreader.getSegmentName())) { int numDels2 = sinfoTmp.getDelCount(); sameSeg = numDels == numDels2; sinfo = sinfoTmp; break; } } if (sinfo == null) { // segment no longer exists return null; } if (sameSeg) { return this; } else { SegmentReader newSreader = SegmentReader.get(true, sinfo, 1); return BoboIndexReader.getInstanceAsSubReader( newSreader, this._facetHandlers, this._runtimeFacetHandlerFactories); } } else { // should not reach here, a catch-all default case IndexReader reader = in.reopen(true); if (in != reader) { return BoboIndexReader.getInstance( newInner, _facetHandlers, _runtimeFacetHandlerFactories, _workArea); } else { return this; } } }
/** This constructor is only used for {@link #doOpenIfChanged()} */ DirectoryReader( Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, Map<String, byte[]> oldNormsCache, boolean readOnly, boolean doClone, int termInfosIndexDivisor, Collection<ReaderFinishedListener> readerFinishedListeners) throws IOException { this.directory = directory; this.readOnly = readOnly; this.segmentInfos = infos; this.termInfosIndexDivisor = termInfosIndexDivisor; assert readerFinishedListeners != null; this.readerFinishedListeners = readerFinishedListeners; applyAllDeletes = false; // we put the old SegmentReaders in a map, that allows us // to lookup a reader using its segment name Map<String, Integer> segmentReaders = new HashMap<String, Integer>(); if (oldReaders != null) { // create a Map SegmentName->SegmentReader for (int i = 0; i < oldReaders.length; i++) { segmentReaders.put(oldReaders[i].getSegmentName(), Integer.valueOf(i)); } } SegmentReader[] newReaders = new SegmentReader[infos.size()]; // remember which readers are shared between the old and the re-opened // DirectoryReader - we have to incRef those readers boolean[] readerShared = new boolean[infos.size()]; for (int i = infos.size() - 1; i >= 0; i--) { // find SegmentReader for this segment Integer oldReaderIndex = segmentReaders.get(infos.info(i).name); if (oldReaderIndex == null) { // this is a new segment, no old SegmentReader can be reused newReaders[i] = null; } else { // there is an old reader for this segment - we'll try to reopen it newReaders[i] = oldReaders[oldReaderIndex.intValue()]; } boolean success = false; try { SegmentReader newReader; if (newReaders[i] == null || infos.info(i).getUseCompoundFile() != newReaders[i].getSegmentInfo().getUseCompoundFile()) { // We should never see a totally new segment during cloning assert !doClone; // this is a new reader; in case we hit an exception we can close it safely newReader = SegmentReader.get(readOnly, infos.info(i), termInfosIndexDivisor); newReader.readerFinishedListeners = readerFinishedListeners; readerShared[i] = false; newReaders[i] = newReader; } else { newReader = newReaders[i].reopenSegment(infos.info(i), doClone, readOnly); if (newReader == null) { // this reader will be shared between the old and the new one, // so we must incRef it readerShared[i] = true; newReaders[i].incRef(); } else { assert newReader.readerFinishedListeners == readerFinishedListeners; readerShared[i] = false; // Steal ref returned to us by reopenSegment: newReaders[i] = newReader; } } success = true; } finally { if (!success) { for (i++; i < infos.size(); i++) { if (newReaders[i] != null) { try { if (!readerShared[i]) { // this is a new subReader that is not used by the old one, // we can close it newReaders[i].close(); } else { // this subReader is also used by the old reader, so instead // closing we must decRef it newReaders[i].decRef(); } } catch (IOException ignore) { // keep going - we want to clean up as much as possible } } } } } } // initialize the readers to calculate maxDoc before we try to reuse the old normsCache initialize(newReaders); // try to copy unchanged norms from the old normsCache to the new one if (oldNormsCache != null) { for (Map.Entry<String, byte[]> entry : oldNormsCache.entrySet()) { String field = entry.getKey(); if (!hasNorms(field)) { continue; } byte[] oldBytes = entry.getValue(); byte[] bytes = new byte[maxDoc()]; for (int i = 0; i < subReaders.length; i++) { Integer oldReaderIndex = segmentReaders.get(subReaders[i].getSegmentName()); // this SegmentReader was not re-opened, we can copy all of its norms if (oldReaderIndex != null && (oldReaders[oldReaderIndex.intValue()] == subReaders[i] || oldReaders[oldReaderIndex.intValue()].norms.get(field) == subReaders[i].norms.get(field))) { // we don't have to synchronize here: either this constructor is called from a // SegmentReader, // in which case no old norms cache is present, or it is called from // MultiReader.reopen(), // which is synchronized System.arraycopy( oldBytes, oldStarts[oldReaderIndex.intValue()], bytes, starts[i], starts[i + 1] - starts[i]); } else { subReaders[i].norms(field, bytes, starts[i]); } } normsCache.put(field, bytes); // update cache } } }
private int getIdx(String name) { for (int x = 0; x < infos.size(); x++) { if (name.equals(infos.info(x).name)) return x; } return -1; }
private SegmentInfo getInfo(String name) { for (int x = 0; x < infos.size(); x++) { if (name.equals(infos.info(x).name)) return infos.info(x); } return null; }