private final void init(Version matchVersion) { // best effort NPE if you dont call reset if (matchVersion.onOrAfter(Version.LUCENE_40)) { this.scanner = new StandardTokenizerImpl(input); } else if (matchVersion.onOrAfter(Version.LUCENE_34)) { this.scanner = new StandardTokenizerImpl34(input); } else if (matchVersion.onOrAfter(Version.LUCENE_31)) { this.scanner = new StandardTokenizerImpl31(input); } else { this.scanner = new ClassicTokenizerImpl(input); } }
public RussianAnalyzer(Version matchVersion) { this( matchVersion, matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_STOP_SET : DefaultSetHolder.DEFAULT_STOP_SET_30); }
private void init(Reader input, Version matchVersion) { if (matchVersion.onOrAfter(Version.LUCENE_24)) { replaceInvalidAcronym = true; } else { replaceInvalidAcronym = false; } this.input = input; termAtt = addAttribute(TermAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); typeAtt = addAttribute(TypeAttribute.class); }
public StoreFileMetaData( String name, long length, String checksum, Version writtenBy, BytesRef hash) { // its possible here to have a _na_ checksum or an unsupported writtenBy version, if the // file is a segments_N file, but that is fine in the case of a segments_N file because // we handle that case upstream assert name.startsWith("segments_") || (writtenBy != null && writtenBy.onOrAfter(FIRST_LUCENE_CHECKSUM_VERSION)) : "index version less that " + FIRST_LUCENE_CHECKSUM_VERSION + " are not supported but got: " + writtenBy; this.name = Objects.requireNonNull(name, "name must not be null"); this.length = length; this.checksum = Objects.requireNonNull(checksum, "checksum must not be null"); this.writtenBy = Objects.requireNonNull(writtenBy, "writtenBy must not be null"); this.hash = hash == null ? new BytesRef() : hash; }
/** * Builds an analyzer with the given stop words. * * @param matchVersion Lucene version to match See {@link <a href="#version">above</a>} * @param stopWords stop words */ public PhaidraAnalyzer(Version matchVersion, Set<?> stopWords) { super(matchVersion, stopWords); replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_33); }
private void write(Directory directory) throws IOException { long nextGeneration = getNextPendingGeneration(); String segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, "", nextGeneration); // Always advance the generation on write: generation = nextGeneration; IndexOutput segnOutput = null; boolean success = false; try { segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT); CodecUtil.writeIndexHeader( segnOutput, "segments", VERSION_CURRENT, StringHelper.randomId(), Long.toString(nextGeneration, Character.MAX_RADIX)); segnOutput.writeVInt(Version.LATEST.major); segnOutput.writeVInt(Version.LATEST.minor); segnOutput.writeVInt(Version.LATEST.bugfix); segnOutput.writeLong(version); segnOutput.writeInt(counter); // write counter segnOutput.writeInt(size()); if (size() > 0) { Version minSegmentVersion = null; // We do a separate loop up front so we can write the minSegmentVersion before // any SegmentInfo; this makes it cleaner to throw IndexFormatTooOldExc at read time: for (SegmentCommitInfo siPerCommit : this) { Version segmentVersion = siPerCommit.info.getVersion(); if (minSegmentVersion == null || segmentVersion.onOrAfter(minSegmentVersion) == false) { minSegmentVersion = segmentVersion; } } segnOutput.writeVInt(minSegmentVersion.major); segnOutput.writeVInt(minSegmentVersion.minor); segnOutput.writeVInt(minSegmentVersion.bugfix); } // write infos for (SegmentCommitInfo siPerCommit : this) { SegmentInfo si = siPerCommit.info; segnOutput.writeString(si.name); byte segmentID[] = si.getId(); // TODO: remove this in lucene 6, we don't need to include 4.x segments in commits anymore if (segmentID == null) { segnOutput.writeByte((byte) 0); } else { if (segmentID.length != StringHelper.ID_LENGTH) { throw new IllegalStateException( "cannot write segment: invalid id segment=" + si.name + "id=" + StringHelper.idToString(segmentID)); } segnOutput.writeByte((byte) 1); segnOutput.writeBytes(segmentID, segmentID.length); } segnOutput.writeString(si.getCodec().getName()); segnOutput.writeLong(siPerCommit.getDelGen()); int delCount = siPerCommit.getDelCount(); if (delCount < 0 || delCount > si.maxDoc()) { throw new IllegalStateException( "cannot write segment: invalid maxDoc segment=" + si.name + " maxDoc=" + si.maxDoc() + " delCount=" + delCount); } segnOutput.writeInt(delCount); segnOutput.writeLong(siPerCommit.getFieldInfosGen()); segnOutput.writeLong(siPerCommit.getDocValuesGen()); segnOutput.writeSetOfStrings(siPerCommit.getFieldInfosFiles()); final Map<Integer, Set<String>> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles(); segnOutput.writeInt(dvUpdatesFiles.size()); for (Entry<Integer, Set<String>> e : dvUpdatesFiles.entrySet()) { segnOutput.writeInt(e.getKey()); segnOutput.writeSetOfStrings(e.getValue()); } } segnOutput.writeMapOfStrings(userData); CodecUtil.writeFooter(segnOutput); segnOutput.close(); directory.sync(Collections.singleton(segmentFileName)); success = true; } finally { if (success) { pendingCommit = true; } else { // We hit an exception above; try to close the file // but suppress any exception: IOUtils.closeWhileHandlingException(segnOutput); // Try not to leave a truncated segments_N file in // the index: IOUtils.deleteFilesIgnoringExceptions(directory, segmentFileName); } } }
/** * Read a particular segmentFileName. Note that this may throw an IOException if a commit is in * process. * * @param directory -- directory containing the segments file * @param segmentFileName -- segment file to load * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static final SegmentInfos readCommit(Directory directory, String segmentFileName) throws IOException { long generation = generationFromSegmentsFileName(segmentFileName); try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) { // NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need // to read the magic ourselves. int magic = input.readInt(); if (magic != CodecUtil.CODEC_MAGIC) { throw new IndexFormatTooOldException( input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC); } // 4.0+ int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_CURRENT); // 5.0+ byte id[] = null; if (format >= VERSION_50) { id = new byte[StringHelper.ID_LENGTH]; input.readBytes(id, 0, id.length); CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX)); } SegmentInfos infos = new SegmentInfos(); infos.id = id; infos.generation = generation; infos.lastGeneration = generation; if (format >= VERSION_53) { // TODO: in the future (7.0? sigh) we can use this to throw IndexFormatTooOldException ... // or just rely on the // minSegmentLuceneVersion check instead: infos.luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt()); } else { // else compute the min version down below in the for loop } infos.version = input.readLong(); infos.counter = input.readInt(); int numSegments = input.readInt(); if (numSegments < 0) { throw new CorruptIndexException("invalid segment count: " + numSegments, input); } if (format >= VERSION_53) { if (numSegments > 0) { infos.minSegmentLuceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt()); if (infos.minSegmentLuceneVersion.onOrAfter(Version.LUCENE_4_0_0_ALPHA) == false) { throw new IndexFormatTooOldException( input, "this index contains a too-old segment (version: " + infos.minSegmentLuceneVersion + ")"); } } else { // else leave as null: no segments } } else { // else we recompute it below as we visit segments; it can't be used for throwing // IndexFormatTooOldExc, but consumers of // SegmentInfos can maybe still use it for other reasons } long totalDocs = 0; for (int seg = 0; seg < numSegments; seg++) { String segName = input.readString(); final byte segmentID[]; if (format >= VERSION_50) { byte hasID = input.readByte(); if (hasID == 1) { segmentID = new byte[StringHelper.ID_LENGTH]; input.readBytes(segmentID, 0, segmentID.length); } else if (hasID == 0) { segmentID = null; // 4.x segment, doesn't have an ID } else { throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input); } } else { segmentID = null; } Codec codec = readCodec(input, format < VERSION_53); SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ); info.setCodec(codec); totalDocs += info.maxDoc(); long delGen = input.readLong(); int delCount = input.readInt(); if (delCount < 0 || delCount > info.maxDoc()) { throw new CorruptIndexException( "invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input); } long fieldInfosGen = -1; if (format >= VERSION_46) { fieldInfosGen = input.readLong(); } long dvGen = -1; if (format >= VERSION_49) { dvGen = input.readLong(); } else { dvGen = fieldInfosGen; } SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen); if (format >= VERSION_46) { if (format < VERSION_49) { // Recorded per-generation files, which were buggy (see // LUCENE-5636). We need to read and keep them so we continue to // reference those files. Unfortunately it means that the files will // be referenced even if the fields are updated again, until the // segment is merged. final int numGensUpdatesFiles = input.readInt(); final Map<Long, Set<String>> genUpdatesFiles; if (numGensUpdatesFiles == 0) { genUpdatesFiles = Collections.emptyMap(); } else { genUpdatesFiles = new HashMap<>(numGensUpdatesFiles); for (int i = 0; i < numGensUpdatesFiles; i++) { genUpdatesFiles.put(input.readLong(), input.readStringSet()); } } siPerCommit.setGenUpdatesFiles(genUpdatesFiles); } else { if (format >= VERSION_51) { siPerCommit.setFieldInfosFiles(input.readSetOfStrings()); } else { siPerCommit.setFieldInfosFiles(Collections.unmodifiableSet(input.readStringSet())); } final Map<Integer, Set<String>> dvUpdateFiles; final int numDVFields = input.readInt(); if (numDVFields == 0) { dvUpdateFiles = Collections.emptyMap(); } else { Map<Integer, Set<String>> map = new HashMap<>(numDVFields); for (int i = 0; i < numDVFields; i++) { if (format >= VERSION_51) { map.put(input.readInt(), input.readSetOfStrings()); } else { map.put(input.readInt(), Collections.unmodifiableSet(input.readStringSet())); } } dvUpdateFiles = Collections.unmodifiableMap(map); } siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles); } } infos.add(siPerCommit); Version segmentVersion = info.getVersion(); if (format < VERSION_53) { if (infos.minSegmentLuceneVersion == null || segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) { infos.minSegmentLuceneVersion = segmentVersion; } } else if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) { throw new CorruptIndexException( "segments file recorded minSegmentLuceneVersion=" + infos.minSegmentLuceneVersion + " but segment=" + info + " has older version=" + segmentVersion, input); } } if (format >= VERSION_51) { infos.userData = input.readMapOfStrings(); } else { infos.userData = Collections.unmodifiableMap(input.readStringStringMap()); } if (format >= VERSION_48) { CodecUtil.checkFooter(input); } else { final long checksumNow = input.getChecksum(); final long checksumThen = input.readLong(); if (checksumNow != checksumThen) { throw new CorruptIndexException( "checksum failed (hardware problem?) : expected=" + Long.toHexString(checksumThen) + " actual=" + Long.toHexString(checksumNow), input); } CodecUtil.checkEOF(input); } // LUCENE-6299: check we are in bounds if (totalDocs > IndexWriter.getActualMaxDocs()) { throw new CorruptIndexException( "Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + totalDocs, input); } return infos; } }
/** * Returns a {@link CharacterUtils} implementation according to the given {@link Version} * instance. * * @param matchVersion a version instance * @return a {@link CharacterUtils} implementation according to the given {@link Version} * instance. */ public static CharacterUtils getInstance(final Version matchVersion) { return matchVersion.onOrAfter(Version.LUCENE_31) ? JAVA_5 : JAVA_4; }