Пример #1
0
 /** Returns a copy of this instance, also copying each SegmentInfo. */
 @Override
 public SegmentInfos clone() {
   try {
     final SegmentInfos sis = (SegmentInfos) super.clone();
     // deep clone, first recreate all collections:
     sis.segments = new ArrayList<>(size());
     for (final SegmentCommitInfo info : this) {
       assert info.info.getCodec() != null;
       // dont directly access segments, use add method!!!
       sis.add(info.clone());
     }
     sis.userData = new HashMap<>(userData);
     return sis;
   } catch (CloneNotSupportedException e) {
     throw new RuntimeException("should not happen", e);
   }
 }
Пример #2
0
 /** Returns a copy of this instance, also copying each SegmentInfo. */
 @Override
 public Object clone() {
   try {
     final SegmentInfos sis = (SegmentInfos) super.clone();
     // deep clone, first recreate all collections:
     sis.segments = new ArrayList<SegmentInfo>(size());
     sis.segmentSet = new HashSet<SegmentInfo>(size());
     sis.cachedUnmodifiableList = null;
     sis.cachedUnmodifiableSet = null;
     for (final SegmentInfo info : this) {
       // dont directly access segments, use add method!!!
       sis.add((SegmentInfo) info.clone());
     }
     sis.userData = new HashMap<String, String>(userData);
     return sis;
   } catch (CloneNotSupportedException e) {
     throw new RuntimeException("should not happen", e);
   }
 }
Пример #3
0
 public void split(File destDir, String[] segs) throws IOException {
   destDir.mkdirs();
   FSDirectory destFSDir = FSDirectory.open(destDir);
   SegmentInfos destInfos = new SegmentInfos();
   destInfos.counter = infos.counter;
   for (String n : segs) {
     SegmentInfo info = getInfo(n);
     destInfos.add(info);
     // now copy files over
     List<String> files = info.files();
     for (final String srcName : files) {
       File srcFile = new File(dir, srcName);
       File destFile = new File(destDir, srcName);
       copyFile(srcFile, destFile);
     }
   }
   destInfos.changed();
   destInfos.commit(destFSDir);
   // System.out.println("destDir:"+destDir.getAbsolutePath());
 }
Пример #4
0
  /**
   * Read a particular segmentFileName. Note that this may throw an IOException if a commit is in
   * process.
   *
   * @param directory -- directory containing the segments file
   * @param segmentFileName -- segment file to load
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   */
  public final void read(Directory directory, String segmentFileName)
      throws CorruptIndexException, IOException {
    boolean success = false;

    // Clear any previous segments:
    this.clear();

    ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName));

    generation = generationFromSegmentsFileName(segmentFileName);

    lastGeneration = generation;

    try {
      int format = input.readInt();
      if (format < 0) { // file contains explicit format info
        // check that it is a format we can understand
        if (format < CURRENT_FORMAT)
          throw new CorruptIndexException("Unknown format version: " + format);
        version = input.readLong(); // read version
        counter = input.readInt(); // read counter
      } else { // file is in old format without explicit format info
        counter = format;
      }

      for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
        SegmentInfo si = new SegmentInfo(directory, format, input);
        if (si.getVersion() == null) {
          // It's a pre-3.1 segment, upgrade its version to either 3.0 or 2.x
          Directory dir = directory;
          if (si.getDocStoreOffset() != -1) {
            if (si.getDocStoreIsCompoundFile()) {
              dir =
                  new CompoundFileReader(
                      dir,
                      IndexFileNames.segmentFileName(
                          si.getDocStoreSegment(), IndexFileNames.COMPOUND_FILE_STORE_EXTENSION),
                      1024);
            }
          } else if (si.getUseCompoundFile()) {
            dir =
                new CompoundFileReader(
                    dir,
                    IndexFileNames.segmentFileName(si.name, IndexFileNames.COMPOUND_FILE_EXTENSION),
                    1024);
          }

          try {
            String store = si.getDocStoreOffset() != -1 ? si.getDocStoreSegment() : si.name;
            si.setVersion(FieldsReader.detectCodeVersion(dir, store));
          } finally {
            // If we opened the directory, close it
            if (dir != directory) dir.close();
          }
        }
        add(si);
      }

      if (format >= 0) { // in old format the version number may be at the end of the file
        if (input.getFilePointer() >= input.length())
          version = System.currentTimeMillis(); // old file format without version number
        else version = input.readLong(); // read version
      }

      if (format <= FORMAT_USER_DATA) {
        if (format <= FORMAT_DIAGNOSTICS) {
          userData = input.readStringStringMap();
        } else if (0 != input.readByte()) {
          userData = Collections.singletonMap("userData", input.readString());
        } else {
          userData = Collections.<String, String>emptyMap();
        }
      } else {
        userData = Collections.<String, String>emptyMap();
      }

      if (format <= FORMAT_CHECKSUM) {
        final long checksumNow = input.getChecksum();
        final long checksumThen = input.readLong();
        if (checksumNow != checksumThen)
          throw new CorruptIndexException("checksum mismatch in segments file");
      }
      success = true;
    } finally {
      input.close();
      if (!success) {
        // Clear any segment infos we had loaded so we
        // have a clean slate on retry:
        this.clear();
      }
    }
  }
Пример #5
0
  /**
   * Read a particular segmentFileName. Note that this may throw an IOException if a commit is in
   * process.
   *
   * @param directory -- directory containing the segments file
   * @param segmentFileName -- segment file to load
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   */
  public static final SegmentInfos readCommit(Directory directory, String segmentFileName)
      throws IOException {

    long generation = generationFromSegmentsFileName(segmentFileName);
    try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) {
      // NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need
      // to read the magic ourselves.
      int magic = input.readInt();
      if (magic != CodecUtil.CODEC_MAGIC) {
        throw new IndexFormatTooOldException(
            input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
      }
      // 4.0+
      int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_CURRENT);
      // 5.0+
      byte id[] = null;
      if (format >= VERSION_50) {
        id = new byte[StringHelper.ID_LENGTH];
        input.readBytes(id, 0, id.length);
        CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX));
      }

      SegmentInfos infos = new SegmentInfos();
      infos.id = id;
      infos.generation = generation;
      infos.lastGeneration = generation;
      if (format >= VERSION_53) {
        // TODO: in the future (7.0?  sigh) we can use this to throw IndexFormatTooOldException ...
        // or just rely on the
        // minSegmentLuceneVersion check instead:
        infos.luceneVersion =
            Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
      } else {
        // else compute the min version down below in the for loop
      }

      infos.version = input.readLong();
      infos.counter = input.readInt();
      int numSegments = input.readInt();
      if (numSegments < 0) {
        throw new CorruptIndexException("invalid segment count: " + numSegments, input);
      }

      if (format >= VERSION_53) {
        if (numSegments > 0) {
          infos.minSegmentLuceneVersion =
              Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
          if (infos.minSegmentLuceneVersion.onOrAfter(Version.LUCENE_4_0_0_ALPHA) == false) {
            throw new IndexFormatTooOldException(
                input,
                "this index contains a too-old segment (version: "
                    + infos.minSegmentLuceneVersion
                    + ")");
          }
        } else {
          // else leave as null: no segments
        }
      } else {
        // else we recompute it below as we visit segments; it can't be used for throwing
        // IndexFormatTooOldExc, but consumers of
        // SegmentInfos can maybe still use it for other reasons
      }

      long totalDocs = 0;
      for (int seg = 0; seg < numSegments; seg++) {
        String segName = input.readString();
        final byte segmentID[];
        if (format >= VERSION_50) {
          byte hasID = input.readByte();
          if (hasID == 1) {
            segmentID = new byte[StringHelper.ID_LENGTH];
            input.readBytes(segmentID, 0, segmentID.length);
          } else if (hasID == 0) {
            segmentID = null; // 4.x segment, doesn't have an ID
          } else {
            throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input);
          }
        } else {
          segmentID = null;
        }
        Codec codec = readCodec(input, format < VERSION_53);
        SegmentInfo info =
            codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ);
        info.setCodec(codec);
        totalDocs += info.maxDoc();
        long delGen = input.readLong();
        int delCount = input.readInt();
        if (delCount < 0 || delCount > info.maxDoc()) {
          throw new CorruptIndexException(
              "invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input);
        }
        long fieldInfosGen = -1;
        if (format >= VERSION_46) {
          fieldInfosGen = input.readLong();
        }
        long dvGen = -1;
        if (format >= VERSION_49) {
          dvGen = input.readLong();
        } else {
          dvGen = fieldInfosGen;
        }
        SegmentCommitInfo siPerCommit =
            new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen);
        if (format >= VERSION_46) {
          if (format < VERSION_49) {
            // Recorded per-generation files, which were buggy (see
            // LUCENE-5636). We need to read and keep them so we continue to
            // reference those files. Unfortunately it means that the files will
            // be referenced even if the fields are updated again, until the
            // segment is merged.
            final int numGensUpdatesFiles = input.readInt();
            final Map<Long, Set<String>> genUpdatesFiles;
            if (numGensUpdatesFiles == 0) {
              genUpdatesFiles = Collections.emptyMap();
            } else {
              genUpdatesFiles = new HashMap<>(numGensUpdatesFiles);
              for (int i = 0; i < numGensUpdatesFiles; i++) {
                genUpdatesFiles.put(input.readLong(), input.readStringSet());
              }
            }
            siPerCommit.setGenUpdatesFiles(genUpdatesFiles);
          } else {
            if (format >= VERSION_51) {
              siPerCommit.setFieldInfosFiles(input.readSetOfStrings());
            } else {
              siPerCommit.setFieldInfosFiles(Collections.unmodifiableSet(input.readStringSet()));
            }
            final Map<Integer, Set<String>> dvUpdateFiles;
            final int numDVFields = input.readInt();
            if (numDVFields == 0) {
              dvUpdateFiles = Collections.emptyMap();
            } else {
              Map<Integer, Set<String>> map = new HashMap<>(numDVFields);
              for (int i = 0; i < numDVFields; i++) {
                if (format >= VERSION_51) {
                  map.put(input.readInt(), input.readSetOfStrings());
                } else {
                  map.put(input.readInt(), Collections.unmodifiableSet(input.readStringSet()));
                }
              }
              dvUpdateFiles = Collections.unmodifiableMap(map);
            }
            siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles);
          }
        }
        infos.add(siPerCommit);

        Version segmentVersion = info.getVersion();
        if (format < VERSION_53) {
          if (infos.minSegmentLuceneVersion == null
              || segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
            infos.minSegmentLuceneVersion = segmentVersion;
          }
        } else if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
          throw new CorruptIndexException(
              "segments file recorded minSegmentLuceneVersion="
                  + infos.minSegmentLuceneVersion
                  + " but segment="
                  + info
                  + " has older version="
                  + segmentVersion,
              input);
        }
      }

      if (format >= VERSION_51) {
        infos.userData = input.readMapOfStrings();
      } else {
        infos.userData = Collections.unmodifiableMap(input.readStringStringMap());
      }

      if (format >= VERSION_48) {
        CodecUtil.checkFooter(input);
      } else {
        final long checksumNow = input.getChecksum();
        final long checksumThen = input.readLong();
        if (checksumNow != checksumThen) {
          throw new CorruptIndexException(
              "checksum failed (hardware problem?) : expected="
                  + Long.toHexString(checksumThen)
                  + " actual="
                  + Long.toHexString(checksumNow),
              input);
        }
        CodecUtil.checkEOF(input);
      }

      // LUCENE-6299: check we are in bounds
      if (totalDocs > IndexWriter.getActualMaxDocs()) {
        throw new CorruptIndexException(
            "Too many documents: an index cannot exceed "
                + IndexWriter.getActualMaxDocs()
                + " but readers have total maxDoc="
                + totalDocs,
            input);
      }

      return infos;
    }
  }