예제 #1
0
  /** Construct reading the named set of readers. */
  DirectoryReader(
      Directory directory,
      SegmentInfos sis,
      IndexDeletionPolicy deletionPolicy,
      boolean readOnly,
      int termInfosIndexDivisor,
      Collection<ReaderFinishedListener> readerFinishedListeners)
      throws IOException {
    this.directory = directory;
    this.readOnly = readOnly;
    this.segmentInfos = sis;
    this.deletionPolicy = deletionPolicy;
    this.termInfosIndexDivisor = termInfosIndexDivisor;

    if (readerFinishedListeners == null) {
      this.readerFinishedListeners =
          new MapBackedSet<ReaderFinishedListener>(
              new ConcurrentHashMap<ReaderFinishedListener, Boolean>());
    } else {
      this.readerFinishedListeners = readerFinishedListeners;
    }
    applyAllDeletes = false;

    // To reduce the chance of hitting FileNotFound
    // (and having to retry), we open segments in
    // reverse because IndexWriter merges & deletes
    // the newest segments first.

    SegmentReader[] readers = new SegmentReader[sis.size()];
    for (int i = sis.size() - 1; i >= 0; i--) {
      boolean success = false;
      try {
        readers[i] = SegmentReader.get(readOnly, sis.info(i), termInfosIndexDivisor);
        readers[i].readerFinishedListeners = this.readerFinishedListeners;
        success = true;
      } finally {
        if (!success) {
          // Close all readers we had opened:
          for (i++; i < sis.size(); i++) {
            try {
              readers[i].close();
            } catch (Throwable ignore) {
              // keep going - we want to clean up as much as possible
            }
          }
        }
      }
    }

    initialize(readers);
  }
예제 #2
0
 public void listSegments() throws IOException {
   DecimalFormat formatter = new DecimalFormat("###,###.###");
   for (int x = 0; x < infos.size(); x++) {
     SegmentInfo info = infos.info(x);
     String sizeStr = formatter.format(info.sizeInBytes(true));
     System.out.println(info.name + " " + sizeStr);
   }
 }
예제 #3
0
 ReaderCommit(SegmentInfos infos, Directory dir) throws IOException {
   segmentsFileName = infos.getCurrentSegmentFileName();
   this.dir = dir;
   userData = infos.getUserData();
   files = Collections.unmodifiableCollection(infos.files(dir, true));
   version = infos.getVersion();
   generation = infos.getGeneration();
   segmentCount = infos.size();
 }
예제 #4
0
  // Used by near real-time search
  DirectoryReader(
      IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, boolean applyAllDeletes)
      throws IOException {
    this.directory = writer.getDirectory();
    this.readOnly = true;
    this.applyAllDeletes = applyAllDeletes; // saved for reopen

    this.termInfosIndexDivisor = termInfosIndexDivisor;
    readerFinishedListeners = writer.getReaderFinishedListeners();

    // IndexWriter synchronizes externally before calling
    // us, which ensures infos will not change; so there's
    // no need to process segments in reverse order
    final int numSegments = infos.size();

    List<SegmentReader> readers = new ArrayList<SegmentReader>();
    final Directory dir = writer.getDirectory();

    segmentInfos = (SegmentInfos) infos.clone();
    int infosUpto = 0;
    for (int i = 0; i < numSegments; i++) {
      boolean success = false;
      try {
        final SegmentInfo info = infos.info(i);
        assert info.dir == dir;
        final SegmentReader reader =
            writer.readerPool.getReadOnlyClone(info, true, termInfosIndexDivisor);
        if (reader.numDocs() > 0 || writer.getKeepFullyDeletedSegments()) {
          reader.readerFinishedListeners = readerFinishedListeners;
          readers.add(reader);
          infosUpto++;
        } else {
          reader.close();
          segmentInfos.remove(infosUpto);
        }
        success = true;
      } finally {
        if (!success) {
          // Close all readers we had opened:
          for (SegmentReader reader : readers) {
            try {
              reader.close();
            } catch (Throwable ignore) {
              // keep going - we want to clean up as much as possible
            }
          }
        }
      }
    }

    this.writer = writer;

    initialize(readers.toArray(new SegmentReader[readers.size()]));
  }
예제 #5
0
  @Override
  public synchronized IndexReader reopen() throws CorruptIndexException, IOException {
    IndexReader newInner = null;

    SegmentInfos sinfos = new SegmentInfos();
    sinfos.read(_dir);
    int size = sinfos.size();

    if (in instanceof MultiReader) {
      // setup current reader list
      List<IndexReader> boboReaderList = new LinkedList<IndexReader>();
      ReaderUtil.gatherSubReaders((List<IndexReader>) boboReaderList, in);
      Map<String, BoboIndexReader> readerMap = new HashMap<String, BoboIndexReader>();
      for (IndexReader reader : boboReaderList) {
        BoboIndexReader boboReader = (BoboIndexReader) reader;
        SegmentReader sreader = (SegmentReader) (boboReader.in);
        readerMap.put(sreader.getSegmentName(), boboReader);
      }

      ArrayList<BoboIndexReader> currentReaders = new ArrayList<BoboIndexReader>(size);
      boolean isNewReader = false;
      for (int i = 0; i < size; ++i) {
        SegmentInfo sinfo = (SegmentInfo) sinfos.info(i);
        BoboIndexReader breader = readerMap.remove(sinfo.name);
        if (breader != null) {
          // should use SegmentReader.reopen
          // TODO: see LUCENE-2559
          BoboIndexReader newReader = (BoboIndexReader) breader.reopen(true);
          if (newReader != breader) {
            isNewReader = true;
          }
          if (newReader != null) {
            currentReaders.add(newReader);
          }
        } else {
          isNewReader = true;
          SegmentReader newSreader = SegmentReader.get(true, sinfo, 1);
          breader =
              BoboIndexReader.getInstanceAsSubReader(
                  newSreader, this._facetHandlers, this._runtimeFacetHandlerFactories);
          breader._dir = _dir;
          currentReaders.add(breader);
        }
      }
      isNewReader = isNewReader || (readerMap.size() != 0);
      if (!isNewReader) {
        return this;
      } else {
        MultiReader newMreader =
            new MultiReader(
                currentReaders.toArray(new BoboIndexReader[currentReaders.size()]), false);
        BoboIndexReader newReader =
            BoboIndexReader.getInstanceAsSubReader(
                newMreader, this._facetHandlers, this._runtimeFacetHandlerFactories);
        newReader._dir = _dir;
        return newReader;
      }
    } else if (in instanceof SegmentReader) {
      // should use SegmentReader.reopen
      // TODO: see LUCENE-2559

      SegmentReader sreader = (SegmentReader) in;
      int numDels = sreader.numDeletedDocs();

      SegmentInfo sinfo = null;
      boolean sameSeg = false;
      // get SegmentInfo instance
      for (int i = 0; i < size; ++i) {
        SegmentInfo sinfoTmp = (SegmentInfo) sinfos.info(i);
        if (sinfoTmp.name.equals(sreader.getSegmentName())) {
          int numDels2 = sinfoTmp.getDelCount();
          sameSeg = numDels == numDels2;
          sinfo = sinfoTmp;
          break;
        }
      }

      if (sinfo == null) {
        // segment no longer exists
        return null;
      }
      if (sameSeg) {
        return this;
      } else {
        SegmentReader newSreader = SegmentReader.get(true, sinfo, 1);
        return BoboIndexReader.getInstanceAsSubReader(
            newSreader, this._facetHandlers, this._runtimeFacetHandlerFactories);
      }
    } else {
      // should not reach here, a catch-all default case
      IndexReader reader = in.reopen(true);
      if (in != reader) {
        return BoboIndexReader.getInstance(
            newInner, _facetHandlers, _runtimeFacetHandlerFactories, _workArea);
      } else {
        return this;
      }
    }
  }
예제 #6
0
 @Deprecated
 @Override
 public boolean isOptimized() {
   ensureOpen();
   return segmentInfos.size() == 1 && !hasDeletions();
 }
예제 #7
0
  /** This constructor is only used for {@link #doOpenIfChanged()} */
  DirectoryReader(
      Directory directory,
      SegmentInfos infos,
      SegmentReader[] oldReaders,
      int[] oldStarts,
      Map<String, byte[]> oldNormsCache,
      boolean readOnly,
      boolean doClone,
      int termInfosIndexDivisor,
      Collection<ReaderFinishedListener> readerFinishedListeners)
      throws IOException {
    this.directory = directory;
    this.readOnly = readOnly;
    this.segmentInfos = infos;
    this.termInfosIndexDivisor = termInfosIndexDivisor;
    assert readerFinishedListeners != null;
    this.readerFinishedListeners = readerFinishedListeners;
    applyAllDeletes = false;

    // we put the old SegmentReaders in a map, that allows us
    // to lookup a reader using its segment name
    Map<String, Integer> segmentReaders = new HashMap<String, Integer>();

    if (oldReaders != null) {
      // create a Map SegmentName->SegmentReader
      for (int i = 0; i < oldReaders.length; i++) {
        segmentReaders.put(oldReaders[i].getSegmentName(), Integer.valueOf(i));
      }
    }

    SegmentReader[] newReaders = new SegmentReader[infos.size()];

    // remember which readers are shared between the old and the re-opened
    // DirectoryReader - we have to incRef those readers
    boolean[] readerShared = new boolean[infos.size()];

    for (int i = infos.size() - 1; i >= 0; i--) {
      // find SegmentReader for this segment
      Integer oldReaderIndex = segmentReaders.get(infos.info(i).name);
      if (oldReaderIndex == null) {
        // this is a new segment, no old SegmentReader can be reused
        newReaders[i] = null;
      } else {
        // there is an old reader for this segment - we'll try to reopen it
        newReaders[i] = oldReaders[oldReaderIndex.intValue()];
      }

      boolean success = false;
      try {
        SegmentReader newReader;
        if (newReaders[i] == null
            || infos.info(i).getUseCompoundFile()
                != newReaders[i].getSegmentInfo().getUseCompoundFile()) {

          // We should never see a totally new segment during cloning
          assert !doClone;

          // this is a new reader; in case we hit an exception we can close it safely
          newReader = SegmentReader.get(readOnly, infos.info(i), termInfosIndexDivisor);
          newReader.readerFinishedListeners = readerFinishedListeners;
          readerShared[i] = false;
          newReaders[i] = newReader;
        } else {
          newReader = newReaders[i].reopenSegment(infos.info(i), doClone, readOnly);
          if (newReader == null) {
            // this reader will be shared between the old and the new one,
            // so we must incRef it
            readerShared[i] = true;
            newReaders[i].incRef();
          } else {
            assert newReader.readerFinishedListeners == readerFinishedListeners;
            readerShared[i] = false;
            // Steal ref returned to us by reopenSegment:
            newReaders[i] = newReader;
          }
        }
        success = true;
      } finally {
        if (!success) {
          for (i++; i < infos.size(); i++) {
            if (newReaders[i] != null) {
              try {
                if (!readerShared[i]) {
                  // this is a new subReader that is not used by the old one,
                  // we can close it
                  newReaders[i].close();
                } else {
                  // this subReader is also used by the old reader, so instead
                  // closing we must decRef it
                  newReaders[i].decRef();
                }
              } catch (IOException ignore) {
                // keep going - we want to clean up as much as possible
              }
            }
          }
        }
      }
    }

    // initialize the readers to calculate maxDoc before we try to reuse the old normsCache
    initialize(newReaders);

    // try to copy unchanged norms from the old normsCache to the new one
    if (oldNormsCache != null) {
      for (Map.Entry<String, byte[]> entry : oldNormsCache.entrySet()) {
        String field = entry.getKey();
        if (!hasNorms(field)) {
          continue;
        }

        byte[] oldBytes = entry.getValue();

        byte[] bytes = new byte[maxDoc()];

        for (int i = 0; i < subReaders.length; i++) {
          Integer oldReaderIndex = segmentReaders.get(subReaders[i].getSegmentName());

          // this SegmentReader was not re-opened, we can copy all of its norms
          if (oldReaderIndex != null
              && (oldReaders[oldReaderIndex.intValue()] == subReaders[i]
                  || oldReaders[oldReaderIndex.intValue()].norms.get(field)
                      == subReaders[i].norms.get(field))) {
            // we don't have to synchronize here: either this constructor is called from a
            // SegmentReader,
            // in which case no old norms cache is present, or it is called from
            // MultiReader.reopen(),
            // which is synchronized
            System.arraycopy(
                oldBytes,
                oldStarts[oldReaderIndex.intValue()],
                bytes,
                starts[i],
                starts[i + 1] - starts[i]);
          } else {
            subReaders[i].norms(field, bytes, starts[i]);
          }
        }

        normsCache.put(field, bytes); // update cache
      }
    }
  }
예제 #8
0
 private int getIdx(String name) {
   for (int x = 0; x < infos.size(); x++) {
     if (name.equals(infos.info(x).name)) return x;
   }
   return -1;
 }
예제 #9
0
 private SegmentInfo getInfo(String name) {
   for (int x = 0; x < infos.size(); x++) {
     if (name.equals(infos.info(x).name)) return infos.info(x);
   }
   return null;
 }
  @Override
  public MergeSpecification findMerges(
      MergeTrigger mergeTrigger, SegmentInfos infos, IndexWriter writer) throws IOException {
    if (verbose(writer)) {
      message("findMerges: " + infos.size() + " segments", writer);
    }
    if (infos.size() == 0) {
      return null;
    }
    final Collection<SegmentCommitInfo> merging = writer.getMergingSegments();
    final Collection<SegmentCommitInfo> toBeMerged = new HashSet<>();

    final List<SegmentCommitInfo> infosSorted = new ArrayList<>(infos.asList());
    Collections.sort(infosSorted, new SegmentByteSizeDescending(writer));

    // Compute total index bytes & print details about the index
    long totIndexBytes = 0;
    long minSegmentBytes = Long.MAX_VALUE;
    for (SegmentCommitInfo info : infosSorted) {
      final long segBytes = size(info, writer);
      if (verbose(writer)) {
        String extra = merging.contains(info) ? " [merging]" : "";
        if (segBytes >= maxMergedSegmentBytes / 2.0) {
          extra += " [skip: too large]";
        } else if (segBytes < floorSegmentBytes) {
          extra += " [floored]";
        }
        message(
            "  seg="
                + writer.segString(info)
                + " size="
                + String.format(Locale.ROOT, "%.3f", segBytes / 1024 / 1024.)
                + " MB"
                + extra,
            writer);
      }

      minSegmentBytes = Math.min(segBytes, minSegmentBytes);
      // Accum total byte size
      totIndexBytes += segBytes;
    }

    // If we have too-large segments, grace them out
    // of the maxSegmentCount:
    int tooBigCount = 0;
    while (tooBigCount < infosSorted.size()) {
      long segBytes = size(infosSorted.get(tooBigCount), writer);
      if (segBytes < maxMergedSegmentBytes / 2.0) {
        break;
      }
      totIndexBytes -= segBytes;
      tooBigCount++;
    }

    minSegmentBytes = floorSize(minSegmentBytes);

    // Compute max allowed segs in the index
    long levelSize = minSegmentBytes;
    long bytesLeft = totIndexBytes;
    double allowedSegCount = 0;
    while (true) {
      final double segCountLevel = bytesLeft / (double) levelSize;
      if (segCountLevel < segsPerTier) {
        allowedSegCount += Math.ceil(segCountLevel);
        break;
      }
      allowedSegCount += segsPerTier;
      bytesLeft -= segsPerTier * levelSize;
      levelSize *= maxMergeAtOnce;
    }
    int allowedSegCountInt = (int) allowedSegCount;

    MergeSpecification spec = null;

    // Cycle to possibly select more than one merge:
    while (true) {

      long mergingBytes = 0;

      // Gather eligible segments for merging, ie segments
      // not already being merged and not already picked (by
      // prior iteration of this loop) for merging:
      final List<SegmentCommitInfo> eligible = new ArrayList<>();
      for (int idx = tooBigCount; idx < infosSorted.size(); idx++) {
        final SegmentCommitInfo info = infosSorted.get(idx);
        if (merging.contains(info)) {
          mergingBytes += size(info, writer);
        } else if (!toBeMerged.contains(info)) {
          eligible.add(info);
        }
      }

      final boolean maxMergeIsRunning = mergingBytes >= maxMergedSegmentBytes;

      if (verbose(writer)) {
        message(
            "  allowedSegmentCount="
                + allowedSegCountInt
                + " vs count="
                + infosSorted.size()
                + " (eligible count="
                + eligible.size()
                + ") tooBigCount="
                + tooBigCount,
            writer);
      }

      if (eligible.size() == 0) {
        return spec;
      }

      if (eligible.size() > allowedSegCountInt) {

        // OK we are over budget -- find best merge!
        MergeScore bestScore = null;
        List<SegmentCommitInfo> best = null;
        boolean bestTooLarge = false;
        long bestMergeBytes = 0;

        // Consider all merge starts:
        for (int startIdx = 0; startIdx <= eligible.size() - maxMergeAtOnce; startIdx++) {

          long totAfterMergeBytes = 0;

          final List<SegmentCommitInfo> candidate = new ArrayList<>();
          boolean hitTooLarge = false;
          for (int idx = startIdx;
              idx < eligible.size() && candidate.size() < maxMergeAtOnce;
              idx++) {
            final SegmentCommitInfo info = eligible.get(idx);
            final long segBytes = size(info, writer);

            if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) {
              hitTooLarge = true;
              // NOTE: we continue, so that we can try
              // "packing" smaller segments into this merge
              // to see if we can get closer to the max
              // size; this in general is not perfect since
              // this is really "bin packing" and we'd have
              // to try different permutations.
              continue;
            }
            candidate.add(info);
            totAfterMergeBytes += segBytes;
          }

          // We should never see an empty candidate: we iterated over maxMergeAtOnce
          // segments, and already pre-excluded the too-large segments:
          assert candidate.size() > 0;

          final MergeScore score = score(candidate, hitTooLarge, mergingBytes, writer);
          if (verbose(writer)) {
            message(
                "  maybe="
                    + writer.segString(candidate)
                    + " score="
                    + score.getScore()
                    + " "
                    + score.getExplanation()
                    + " tooLarge="
                    + hitTooLarge
                    + " size="
                    + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes / 1024. / 1024.),
                writer);
          }

          // If we are already running a max sized merge
          // (maxMergeIsRunning), don't allow another max
          // sized merge to kick off:
          if ((bestScore == null || score.getScore() < bestScore.getScore())
              && (!hitTooLarge || !maxMergeIsRunning)) {
            best = candidate;
            bestScore = score;
            bestTooLarge = hitTooLarge;
            bestMergeBytes = totAfterMergeBytes;
          }
        }

        if (best != null) {
          if (spec == null) {
            spec = new MergeSpecification();
          }
          final OneMerge merge = new OneMerge(best);
          spec.add(merge);
          for (SegmentCommitInfo info : merge.segments) {
            toBeMerged.add(info);
          }

          if (verbose(writer)) {
            message(
                "  add merge="
                    + writer.segString(merge.segments)
                    + " size="
                    + String.format(Locale.ROOT, "%.3f MB", bestMergeBytes / 1024. / 1024.)
                    + " score="
                    + String.format(Locale.ROOT, "%.3f", bestScore.getScore())
                    + " "
                    + bestScore.getExplanation()
                    + (bestTooLarge ? " [max merge]" : ""),
                writer);
          }
        } else {
          return spec;
        }
      } else {
        return spec;
      }
    }
  }