private final void init(Version matchVersion) {
   // best effort NPE if you dont call reset
   if (matchVersion.onOrAfter(Version.LUCENE_40)) {
     this.scanner = new StandardTokenizerImpl(input);
   } else if (matchVersion.onOrAfter(Version.LUCENE_34)) {
     this.scanner = new StandardTokenizerImpl34(input);
   } else if (matchVersion.onOrAfter(Version.LUCENE_31)) {
     this.scanner = new StandardTokenizerImpl31(input);
   } else {
     this.scanner = new ClassicTokenizerImpl(input);
   }
 }
 public RussianAnalyzer(Version matchVersion) {
   this(
       matchVersion,
       matchVersion.onOrAfter(Version.LUCENE_31)
           ? DefaultSetHolder.DEFAULT_STOP_SET
           : DefaultSetHolder.DEFAULT_STOP_SET_30);
 }
 private void init(Reader input, Version matchVersion) {
   if (matchVersion.onOrAfter(Version.LUCENE_24)) {
     replaceInvalidAcronym = true;
   } else {
     replaceInvalidAcronym = false;
   }
   this.input = input;
   termAtt = addAttribute(TermAttribute.class);
   offsetAtt = addAttribute(OffsetAttribute.class);
   posIncrAtt = addAttribute(PositionIncrementAttribute.class);
   typeAtt = addAttribute(TypeAttribute.class);
 }
 public StoreFileMetaData(
     String name, long length, String checksum, Version writtenBy, BytesRef hash) {
   // its possible here to have a _na_ checksum or an unsupported writtenBy version, if the
   // file is a segments_N file, but that is fine in the case of a segments_N file because
   // we handle that case upstream
   assert name.startsWith("segments_")
           || (writtenBy != null && writtenBy.onOrAfter(FIRST_LUCENE_CHECKSUM_VERSION))
       : "index version less that "
           + FIRST_LUCENE_CHECKSUM_VERSION
           + " are not supported but got: "
           + writtenBy;
   this.name = Objects.requireNonNull(name, "name must not be null");
   this.length = length;
   this.checksum = Objects.requireNonNull(checksum, "checksum must not be null");
   this.writtenBy = Objects.requireNonNull(writtenBy, "writtenBy must not be null");
   this.hash = hash == null ? new BytesRef() : hash;
 }
Beispiel #5
0
 /**
  * Builds an analyzer with the given stop words.
  *
  * @param matchVersion Lucene version to match See {@link <a href="#version">above</a>}
  * @param stopWords stop words
  */
 public PhaidraAnalyzer(Version matchVersion, Set<?> stopWords) {
   super(matchVersion, stopWords);
   replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_33);
 }
  private void write(Directory directory) throws IOException {

    long nextGeneration = getNextPendingGeneration();
    String segmentFileName =
        IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, "", nextGeneration);

    // Always advance the generation on write:
    generation = nextGeneration;

    IndexOutput segnOutput = null;
    boolean success = false;

    try {
      segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
      CodecUtil.writeIndexHeader(
          segnOutput,
          "segments",
          VERSION_CURRENT,
          StringHelper.randomId(),
          Long.toString(nextGeneration, Character.MAX_RADIX));
      segnOutput.writeVInt(Version.LATEST.major);
      segnOutput.writeVInt(Version.LATEST.minor);
      segnOutput.writeVInt(Version.LATEST.bugfix);

      segnOutput.writeLong(version);
      segnOutput.writeInt(counter); // write counter
      segnOutput.writeInt(size());

      if (size() > 0) {

        Version minSegmentVersion = null;

        // We do a separate loop up front so we can write the minSegmentVersion before
        // any SegmentInfo; this makes it cleaner to throw IndexFormatTooOldExc at read time:
        for (SegmentCommitInfo siPerCommit : this) {
          Version segmentVersion = siPerCommit.info.getVersion();
          if (minSegmentVersion == null || segmentVersion.onOrAfter(minSegmentVersion) == false) {
            minSegmentVersion = segmentVersion;
          }
        }

        segnOutput.writeVInt(minSegmentVersion.major);
        segnOutput.writeVInt(minSegmentVersion.minor);
        segnOutput.writeVInt(minSegmentVersion.bugfix);
      }

      // write infos
      for (SegmentCommitInfo siPerCommit : this) {
        SegmentInfo si = siPerCommit.info;
        segnOutput.writeString(si.name);
        byte segmentID[] = si.getId();
        // TODO: remove this in lucene 6, we don't need to include 4.x segments in commits anymore
        if (segmentID == null) {
          segnOutput.writeByte((byte) 0);
        } else {
          if (segmentID.length != StringHelper.ID_LENGTH) {
            throw new IllegalStateException(
                "cannot write segment: invalid id segment="
                    + si.name
                    + "id="
                    + StringHelper.idToString(segmentID));
          }
          segnOutput.writeByte((byte) 1);
          segnOutput.writeBytes(segmentID, segmentID.length);
        }
        segnOutput.writeString(si.getCodec().getName());
        segnOutput.writeLong(siPerCommit.getDelGen());
        int delCount = siPerCommit.getDelCount();
        if (delCount < 0 || delCount > si.maxDoc()) {
          throw new IllegalStateException(
              "cannot write segment: invalid maxDoc segment="
                  + si.name
                  + " maxDoc="
                  + si.maxDoc()
                  + " delCount="
                  + delCount);
        }
        segnOutput.writeInt(delCount);
        segnOutput.writeLong(siPerCommit.getFieldInfosGen());
        segnOutput.writeLong(siPerCommit.getDocValuesGen());
        segnOutput.writeSetOfStrings(siPerCommit.getFieldInfosFiles());
        final Map<Integer, Set<String>> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles();
        segnOutput.writeInt(dvUpdatesFiles.size());
        for (Entry<Integer, Set<String>> e : dvUpdatesFiles.entrySet()) {
          segnOutput.writeInt(e.getKey());
          segnOutput.writeSetOfStrings(e.getValue());
        }
      }
      segnOutput.writeMapOfStrings(userData);
      CodecUtil.writeFooter(segnOutput);
      segnOutput.close();
      directory.sync(Collections.singleton(segmentFileName));
      success = true;
    } finally {
      if (success) {
        pendingCommit = true;
      } else {
        // We hit an exception above; try to close the file
        // but suppress any exception:
        IOUtils.closeWhileHandlingException(segnOutput);
        // Try not to leave a truncated segments_N file in
        // the index:
        IOUtils.deleteFilesIgnoringExceptions(directory, segmentFileName);
      }
    }
  }
  /**
   * Read a particular segmentFileName. Note that this may throw an IOException if a commit is in
   * process.
   *
   * @param directory -- directory containing the segments file
   * @param segmentFileName -- segment file to load
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   */
  public static final SegmentInfos readCommit(Directory directory, String segmentFileName)
      throws IOException {

    long generation = generationFromSegmentsFileName(segmentFileName);
    try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) {
      // NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need
      // to read the magic ourselves.
      int magic = input.readInt();
      if (magic != CodecUtil.CODEC_MAGIC) {
        throw new IndexFormatTooOldException(
            input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
      }
      // 4.0+
      int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_CURRENT);
      // 5.0+
      byte id[] = null;
      if (format >= VERSION_50) {
        id = new byte[StringHelper.ID_LENGTH];
        input.readBytes(id, 0, id.length);
        CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX));
      }

      SegmentInfos infos = new SegmentInfos();
      infos.id = id;
      infos.generation = generation;
      infos.lastGeneration = generation;
      if (format >= VERSION_53) {
        // TODO: in the future (7.0?  sigh) we can use this to throw IndexFormatTooOldException ...
        // or just rely on the
        // minSegmentLuceneVersion check instead:
        infos.luceneVersion =
            Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
      } else {
        // else compute the min version down below in the for loop
      }

      infos.version = input.readLong();
      infos.counter = input.readInt();
      int numSegments = input.readInt();
      if (numSegments < 0) {
        throw new CorruptIndexException("invalid segment count: " + numSegments, input);
      }

      if (format >= VERSION_53) {
        if (numSegments > 0) {
          infos.minSegmentLuceneVersion =
              Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
          if (infos.minSegmentLuceneVersion.onOrAfter(Version.LUCENE_4_0_0_ALPHA) == false) {
            throw new IndexFormatTooOldException(
                input,
                "this index contains a too-old segment (version: "
                    + infos.minSegmentLuceneVersion
                    + ")");
          }
        } else {
          // else leave as null: no segments
        }
      } else {
        // else we recompute it below as we visit segments; it can't be used for throwing
        // IndexFormatTooOldExc, but consumers of
        // SegmentInfos can maybe still use it for other reasons
      }

      long totalDocs = 0;
      for (int seg = 0; seg < numSegments; seg++) {
        String segName = input.readString();
        final byte segmentID[];
        if (format >= VERSION_50) {
          byte hasID = input.readByte();
          if (hasID == 1) {
            segmentID = new byte[StringHelper.ID_LENGTH];
            input.readBytes(segmentID, 0, segmentID.length);
          } else if (hasID == 0) {
            segmentID = null; // 4.x segment, doesn't have an ID
          } else {
            throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input);
          }
        } else {
          segmentID = null;
        }
        Codec codec = readCodec(input, format < VERSION_53);
        SegmentInfo info =
            codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ);
        info.setCodec(codec);
        totalDocs += info.maxDoc();
        long delGen = input.readLong();
        int delCount = input.readInt();
        if (delCount < 0 || delCount > info.maxDoc()) {
          throw new CorruptIndexException(
              "invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input);
        }
        long fieldInfosGen = -1;
        if (format >= VERSION_46) {
          fieldInfosGen = input.readLong();
        }
        long dvGen = -1;
        if (format >= VERSION_49) {
          dvGen = input.readLong();
        } else {
          dvGen = fieldInfosGen;
        }
        SegmentCommitInfo siPerCommit =
            new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen);
        if (format >= VERSION_46) {
          if (format < VERSION_49) {
            // Recorded per-generation files, which were buggy (see
            // LUCENE-5636). We need to read and keep them so we continue to
            // reference those files. Unfortunately it means that the files will
            // be referenced even if the fields are updated again, until the
            // segment is merged.
            final int numGensUpdatesFiles = input.readInt();
            final Map<Long, Set<String>> genUpdatesFiles;
            if (numGensUpdatesFiles == 0) {
              genUpdatesFiles = Collections.emptyMap();
            } else {
              genUpdatesFiles = new HashMap<>(numGensUpdatesFiles);
              for (int i = 0; i < numGensUpdatesFiles; i++) {
                genUpdatesFiles.put(input.readLong(), input.readStringSet());
              }
            }
            siPerCommit.setGenUpdatesFiles(genUpdatesFiles);
          } else {
            if (format >= VERSION_51) {
              siPerCommit.setFieldInfosFiles(input.readSetOfStrings());
            } else {
              siPerCommit.setFieldInfosFiles(Collections.unmodifiableSet(input.readStringSet()));
            }
            final Map<Integer, Set<String>> dvUpdateFiles;
            final int numDVFields = input.readInt();
            if (numDVFields == 0) {
              dvUpdateFiles = Collections.emptyMap();
            } else {
              Map<Integer, Set<String>> map = new HashMap<>(numDVFields);
              for (int i = 0; i < numDVFields; i++) {
                if (format >= VERSION_51) {
                  map.put(input.readInt(), input.readSetOfStrings());
                } else {
                  map.put(input.readInt(), Collections.unmodifiableSet(input.readStringSet()));
                }
              }
              dvUpdateFiles = Collections.unmodifiableMap(map);
            }
            siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles);
          }
        }
        infos.add(siPerCommit);

        Version segmentVersion = info.getVersion();
        if (format < VERSION_53) {
          if (infos.minSegmentLuceneVersion == null
              || segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
            infos.minSegmentLuceneVersion = segmentVersion;
          }
        } else if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
          throw new CorruptIndexException(
              "segments file recorded minSegmentLuceneVersion="
                  + infos.minSegmentLuceneVersion
                  + " but segment="
                  + info
                  + " has older version="
                  + segmentVersion,
              input);
        }
      }

      if (format >= VERSION_51) {
        infos.userData = input.readMapOfStrings();
      } else {
        infos.userData = Collections.unmodifiableMap(input.readStringStringMap());
      }

      if (format >= VERSION_48) {
        CodecUtil.checkFooter(input);
      } else {
        final long checksumNow = input.getChecksum();
        final long checksumThen = input.readLong();
        if (checksumNow != checksumThen) {
          throw new CorruptIndexException(
              "checksum failed (hardware problem?) : expected="
                  + Long.toHexString(checksumThen)
                  + " actual="
                  + Long.toHexString(checksumNow),
              input);
        }
        CodecUtil.checkEOF(input);
      }

      // LUCENE-6299: check we are in bounds
      if (totalDocs > IndexWriter.getActualMaxDocs()) {
        throw new CorruptIndexException(
            "Too many documents: an index cannot exceed "
                + IndexWriter.getActualMaxDocs()
                + " but readers have total maxDoc="
                + totalDocs,
            input);
      }

      return infos;
    }
  }
 /**
  * Returns a {@link CharacterUtils} implementation according to the given {@link Version}
  * instance.
  *
  * @param matchVersion a version instance
  * @return a {@link CharacterUtils} implementation according to the given {@link Version}
  *     instance.
  */
 public static CharacterUtils getInstance(final Version matchVersion) {
   return matchVersion.onOrAfter(Version.LUCENE_31) ? JAVA_5 : JAVA_4;
 }