private final void init(Version matchVersion) {
   // best effort NPE if you dont call reset
   if (matchVersion.onOrAfter(Version.LUCENE_40)) {
     this.scanner = new StandardTokenizerImpl(input);
   } else if (matchVersion.onOrAfter(Version.LUCENE_34)) {
     this.scanner = new StandardTokenizerImpl34(input);
   } else if (matchVersion.onOrAfter(Version.LUCENE_31)) {
     this.scanner = new StandardTokenizerImpl31(input);
   } else {
     this.scanner = new ClassicTokenizerImpl(input);
   }
 }
 public RussianAnalyzer(Version matchVersion) {
   this(
       matchVersion,
       matchVersion.onOrAfter(Version.LUCENE_31)
           ? DefaultSetHolder.DEFAULT_STOP_SET
           : DefaultSetHolder.DEFAULT_STOP_SET_30);
 }
 @Override
 public void writeTo(StreamOutput out) throws IOException {
   out.writeString(name);
   out.writeVLong(length);
   out.writeString(checksum);
   out.writeString(writtenBy.toString());
   out.writeBytesRef(hash);
 }
Пример #4
0
 private void init(Reader input, Version matchVersion) {
   if (matchVersion.onOrAfter(Version.LUCENE_24)) {
     replaceInvalidAcronym = true;
   } else {
     replaceInvalidAcronym = false;
   }
   this.input = input;
   termAtt = addAttribute(TermAttribute.class);
   offsetAtt = addAttribute(OffsetAttribute.class);
   posIncrAtt = addAttribute(PositionIncrementAttribute.class);
   typeAtt = addAttribute(TypeAttribute.class);
 }
Пример #5
0
 /** Initialize this factory via a set of key-value pairs. */
 protected AbstractAnalysisFactory(Map<String, String> args) {
   originalArgs = Collections.unmodifiableMap(new HashMap<>(args));
   String version = get(args, LUCENE_MATCH_VERSION_PARAM);
   if (version == null) {
     luceneMatchVersion = Version.LATEST;
   } else {
     try {
       luceneMatchVersion = Version.parseLeniently(version);
     } catch (ParseException pe) {
       throw new IllegalArgumentException(pe);
     }
   }
   args.remove(CLASS_NAME); // consume the class arg
 }
Пример #6
0
 static {
   Version ver = DEFAULT_LUCENE_VERSION;
   String versionProp = System.getProperty(LUCENE_VERSION_PROPERTY);
   if (versionProp != null && versionProp.length() > 0) {
     try {
       ver = Version.valueOf(versionProp);
     } catch (IllegalArgumentException e) {
       logger.warn(
           "Illegal Lucene query language version property '"
               + versionProp
               + "', defaulting to "
               + DEFAULT_LUCENE_VERSION);
     }
   }
   assert ver != null;
   LUCENE_VERSION = ver;
 }
 @Override
 public void readFrom(StreamInput in) throws IOException {
   super.readFrom(in);
   recoveryId = in.readLong();
   shardId = ShardId.readShardId(in);
   String name = in.readString();
   position = in.readVLong();
   long length = in.readVLong();
   String checksum = in.readOptionalString();
   content = in.readBytesReference();
   Version writtenBy = null;
   if (in.getVersion().onOrAfter(org.elasticsearch.Version.V_1_3_0)) {
     String versionString = in.readOptionalString();
     writtenBy = versionString == null ? null : Version.parseLeniently(versionString);
   }
   metaData = new StoreFileMetaData(name, length, checksum, writtenBy);
 }
 public StoreFileMetaData(
     String name, long length, String checksum, Version writtenBy, BytesRef hash) {
   // its possible here to have a _na_ checksum or an unsupported writtenBy version, if the
   // file is a segments_N file, but that is fine in the case of a segments_N file because
   // we handle that case upstream
   assert name.startsWith("segments_")
           || (writtenBy != null && writtenBy.onOrAfter(FIRST_LUCENE_CHECKSUM_VERSION))
       : "index version less that "
           + FIRST_LUCENE_CHECKSUM_VERSION
           + " are not supported but got: "
           + writtenBy;
   this.name = Objects.requireNonNull(name, "name must not be null");
   this.length = length;
   this.checksum = Objects.requireNonNull(checksum, "checksum must not be null");
   this.writtenBy = Objects.requireNonNull(writtenBy, "writtenBy must not be null");
   this.hash = hash == null ? new BytesRef() : hash;
 }
Пример #9
0
 /*
  * simple test that ensures that we bumb the version on Upgrade
  */
 @Test
 public void testVersion() {
   ESLogger logger = ESLoggerFactory.getLogger(LuceneTest.class.getName());
   Version[] values = Version.values();
   assertThat(Version.LUCENE_CURRENT, equalTo(values[values.length - 1]));
   assertThat(
       "Latest Lucene Version is not set after upgrade",
       Lucene.VERSION,
       equalTo(values[values.length - 2]));
   assertThat(Lucene.parseVersion(null, Lucene.VERSION, null), equalTo(Lucene.VERSION));
   for (int i = 0; i < values.length - 1; i++) {
     // this should fail if the lucene version is not mapped as a string in Lucene.java
     assertThat(
         Lucene.parseVersion(
             values[i].name().replaceFirst("^LUCENE_(\\d)(\\d)$", "$1.$2"),
             Version.LUCENE_CURRENT,
             logger),
         equalTo(values[i]));
   }
 }
Пример #10
0
 private Version getLuceneMatchVersion(SearchConfiguration cfg) {
   final Version version;
   String tmp = cfg.getProperty(Environment.LUCENE_MATCH_VERSION);
   if (StringHelper.isEmpty(tmp)) {
     log.recommendConfiguringLuceneVersion();
     version = Environment.DEFAULT_LUCENE_MATCH_VERSION;
   } else {
     try {
       version = Version.parseLeniently(tmp);
       if (log.isDebugEnabled()) {
         log.debug("Setting Lucene compatibility to Version " + version);
       }
     } catch (IllegalArgumentException e) {
       throw log.illegalLuceneVersionFormat(tmp, e.getMessage());
     } catch (ParseException e) {
       throw log.illegalLuceneVersionFormat(tmp, e.getMessage());
     }
   }
   return version;
 }
Пример #11
0
  public static final Version parseLuceneVersionString(final String matchVersion) {
    final Version version;
    try {
      version = Version.parseLeniently(matchVersion);
    } catch (ParseException pe) {
      throw new SolrException(
          SolrException.ErrorCode.SERVER_ERROR,
          "Invalid luceneMatchVersion.  Should be of the form 'V.V.V' (e.g. 4.8.0)",
          pe);
    }

    if (version == Version.LATEST && !versionWarningAlreadyLogged.getAndSet(true)) {
      log.warn(
          "You should not use LATEST as luceneMatchVersion property: "
              + "if you use this setting, and then Solr upgrades to a newer release of Lucene, "
              + "sizable changes may happen. If precise back compatibility is important "
              + "then you should instead explicitly specify an actual Lucene version.");
    }

    return version;
  }
Пример #12
0
 /**
  * Builds an analyzer with the given stop words.
  *
  * @param matchVersion Lucene version to match See {@link <a href="#version">above</a>}
  * @param stopWords stop words
  */
 public PhaidraAnalyzer(Version matchVersion, Set<?> stopWords) {
   super(matchVersion, stopWords);
   replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_33);
 }
Пример #13
0
    public IndexMetaData build() {
      ImmutableOpenMap.Builder<String, AliasMetaData> tmpAliases = aliases;
      Settings tmpSettings = settings;

      // update default mapping on the MappingMetaData
      if (mappings.containsKey(MapperService.DEFAULT_MAPPING)) {
        MappingMetaData defaultMapping = mappings.get(MapperService.DEFAULT_MAPPING);
        for (ObjectCursor<MappingMetaData> cursor : mappings.values()) {
          cursor.value.updateDefaultMapping(defaultMapping);
        }
      }

      Integer maybeNumberOfShards = settings.getAsInt(SETTING_NUMBER_OF_SHARDS, null);
      if (maybeNumberOfShards == null) {
        throw new IllegalArgumentException("must specify numberOfShards for index [" + index + "]");
      }
      int numberOfShards = maybeNumberOfShards;
      if (numberOfShards <= 0) {
        throw new IllegalArgumentException(
            "must specify positive number of shards for index [" + index + "]");
      }

      Integer maybeNumberOfReplicas = settings.getAsInt(SETTING_NUMBER_OF_REPLICAS, null);
      if (maybeNumberOfReplicas == null) {
        throw new IllegalArgumentException(
            "must specify numberOfReplicas for index [" + index + "]");
      }
      int numberOfReplicas = maybeNumberOfReplicas;
      if (numberOfReplicas < 0) {
        throw new IllegalArgumentException(
            "must specify non-negative number of shards for index [" + index + "]");
      }

      // fill missing slots in activeAllocationIds with empty set if needed and make all entries
      // immutable
      ImmutableOpenIntMap.Builder<Set<String>> filledActiveAllocationIds =
          ImmutableOpenIntMap.builder();
      for (int i = 0; i < numberOfShards; i++) {
        if (activeAllocationIds.containsKey(i)) {
          filledActiveAllocationIds.put(
              i, Collections.unmodifiableSet(new HashSet<>(activeAllocationIds.get(i))));
        } else {
          filledActiveAllocationIds.put(i, Collections.emptySet());
        }
      }
      final Map<String, String> requireMap =
          INDEX_ROUTING_REQUIRE_GROUP_SETTING.get(settings).getAsMap();
      final DiscoveryNodeFilters requireFilters;
      if (requireMap.isEmpty()) {
        requireFilters = null;
      } else {
        requireFilters = DiscoveryNodeFilters.buildFromKeyValue(AND, requireMap);
      }
      Map<String, String> includeMap = INDEX_ROUTING_INCLUDE_GROUP_SETTING.get(settings).getAsMap();
      final DiscoveryNodeFilters includeFilters;
      if (includeMap.isEmpty()) {
        includeFilters = null;
      } else {
        includeFilters = DiscoveryNodeFilters.buildFromKeyValue(OR, includeMap);
      }
      Map<String, String> excludeMap = INDEX_ROUTING_EXCLUDE_GROUP_SETTING.get(settings).getAsMap();
      final DiscoveryNodeFilters excludeFilters;
      if (excludeMap.isEmpty()) {
        excludeFilters = null;
      } else {
        excludeFilters = DiscoveryNodeFilters.buildFromKeyValue(OR, excludeMap);
      }
      Version indexCreatedVersion = Version.indexCreated(settings);
      Version indexUpgradedVersion =
          settings.getAsVersion(IndexMetaData.SETTING_VERSION_UPGRADED, indexCreatedVersion);
      String stringLuceneVersion = settings.get(SETTING_VERSION_MINIMUM_COMPATIBLE);
      final org.apache.lucene.util.Version minimumCompatibleLuceneVersion;
      if (stringLuceneVersion != null) {
        try {
          minimumCompatibleLuceneVersion =
              org.apache.lucene.util.Version.parse(stringLuceneVersion);
        } catch (ParseException ex) {
          throw new IllegalStateException(
              "Cannot parse lucene version ["
                  + stringLuceneVersion
                  + "] in the ["
                  + SETTING_VERSION_MINIMUM_COMPATIBLE
                  + "] setting",
              ex);
        }
      } else {
        minimumCompatibleLuceneVersion = null;
      }

      final String uuid = settings.get(SETTING_INDEX_UUID, INDEX_UUID_NA_VALUE);
      return new IndexMetaData(
          new Index(index, uuid),
          version,
          state,
          numberOfShards,
          numberOfReplicas,
          tmpSettings,
          mappings.build(),
          tmpAliases.build(),
          customs.build(),
          filledActiveAllocationIds.build(),
          requireFilters,
          includeFilters,
          excludeFilters,
          indexCreatedVersion,
          indexUpgradedVersion,
          minimumCompatibleLuceneVersion);
    }
Пример #14
0
  private void write(Directory directory) throws IOException {

    long nextGeneration = getNextPendingGeneration();
    String segmentFileName =
        IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, "", nextGeneration);

    // Always advance the generation on write:
    generation = nextGeneration;

    IndexOutput segnOutput = null;
    boolean success = false;

    try {
      segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
      CodecUtil.writeIndexHeader(
          segnOutput,
          "segments",
          VERSION_CURRENT,
          StringHelper.randomId(),
          Long.toString(nextGeneration, Character.MAX_RADIX));
      segnOutput.writeVInt(Version.LATEST.major);
      segnOutput.writeVInt(Version.LATEST.minor);
      segnOutput.writeVInt(Version.LATEST.bugfix);

      segnOutput.writeLong(version);
      segnOutput.writeInt(counter); // write counter
      segnOutput.writeInt(size());

      if (size() > 0) {

        Version minSegmentVersion = null;

        // We do a separate loop up front so we can write the minSegmentVersion before
        // any SegmentInfo; this makes it cleaner to throw IndexFormatTooOldExc at read time:
        for (SegmentCommitInfo siPerCommit : this) {
          Version segmentVersion = siPerCommit.info.getVersion();
          if (minSegmentVersion == null || segmentVersion.onOrAfter(minSegmentVersion) == false) {
            minSegmentVersion = segmentVersion;
          }
        }

        segnOutput.writeVInt(minSegmentVersion.major);
        segnOutput.writeVInt(minSegmentVersion.minor);
        segnOutput.writeVInt(minSegmentVersion.bugfix);
      }

      // write infos
      for (SegmentCommitInfo siPerCommit : this) {
        SegmentInfo si = siPerCommit.info;
        segnOutput.writeString(si.name);
        byte segmentID[] = si.getId();
        // TODO: remove this in lucene 6, we don't need to include 4.x segments in commits anymore
        if (segmentID == null) {
          segnOutput.writeByte((byte) 0);
        } else {
          if (segmentID.length != StringHelper.ID_LENGTH) {
            throw new IllegalStateException(
                "cannot write segment: invalid id segment="
                    + si.name
                    + "id="
                    + StringHelper.idToString(segmentID));
          }
          segnOutput.writeByte((byte) 1);
          segnOutput.writeBytes(segmentID, segmentID.length);
        }
        segnOutput.writeString(si.getCodec().getName());
        segnOutput.writeLong(siPerCommit.getDelGen());
        int delCount = siPerCommit.getDelCount();
        if (delCount < 0 || delCount > si.maxDoc()) {
          throw new IllegalStateException(
              "cannot write segment: invalid maxDoc segment="
                  + si.name
                  + " maxDoc="
                  + si.maxDoc()
                  + " delCount="
                  + delCount);
        }
        segnOutput.writeInt(delCount);
        segnOutput.writeLong(siPerCommit.getFieldInfosGen());
        segnOutput.writeLong(siPerCommit.getDocValuesGen());
        segnOutput.writeSetOfStrings(siPerCommit.getFieldInfosFiles());
        final Map<Integer, Set<String>> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles();
        segnOutput.writeInt(dvUpdatesFiles.size());
        for (Entry<Integer, Set<String>> e : dvUpdatesFiles.entrySet()) {
          segnOutput.writeInt(e.getKey());
          segnOutput.writeSetOfStrings(e.getValue());
        }
      }
      segnOutput.writeMapOfStrings(userData);
      CodecUtil.writeFooter(segnOutput);
      segnOutput.close();
      directory.sync(Collections.singleton(segmentFileName));
      success = true;
    } finally {
      if (success) {
        pendingCommit = true;
      } else {
        // We hit an exception above; try to close the file
        // but suppress any exception:
        IOUtils.closeWhileHandlingException(segnOutput);
        // Try not to leave a truncated segments_N file in
        // the index:
        IOUtils.deleteFilesIgnoringExceptions(directory, segmentFileName);
      }
    }
  }
Пример #15
0
  /**
   * Read a particular segmentFileName. Note that this may throw an IOException if a commit is in
   * process.
   *
   * @param directory -- directory containing the segments file
   * @param segmentFileName -- segment file to load
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   */
  public static final SegmentInfos readCommit(Directory directory, String segmentFileName)
      throws IOException {

    long generation = generationFromSegmentsFileName(segmentFileName);
    try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) {
      // NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need
      // to read the magic ourselves.
      int magic = input.readInt();
      if (magic != CodecUtil.CODEC_MAGIC) {
        throw new IndexFormatTooOldException(
            input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
      }
      // 4.0+
      int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_CURRENT);
      // 5.0+
      byte id[] = null;
      if (format >= VERSION_50) {
        id = new byte[StringHelper.ID_LENGTH];
        input.readBytes(id, 0, id.length);
        CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX));
      }

      SegmentInfos infos = new SegmentInfos();
      infos.id = id;
      infos.generation = generation;
      infos.lastGeneration = generation;
      if (format >= VERSION_53) {
        // TODO: in the future (7.0?  sigh) we can use this to throw IndexFormatTooOldException ...
        // or just rely on the
        // minSegmentLuceneVersion check instead:
        infos.luceneVersion =
            Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
      } else {
        // else compute the min version down below in the for loop
      }

      infos.version = input.readLong();
      infos.counter = input.readInt();
      int numSegments = input.readInt();
      if (numSegments < 0) {
        throw new CorruptIndexException("invalid segment count: " + numSegments, input);
      }

      if (format >= VERSION_53) {
        if (numSegments > 0) {
          infos.minSegmentLuceneVersion =
              Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
          if (infos.minSegmentLuceneVersion.onOrAfter(Version.LUCENE_4_0_0_ALPHA) == false) {
            throw new IndexFormatTooOldException(
                input,
                "this index contains a too-old segment (version: "
                    + infos.minSegmentLuceneVersion
                    + ")");
          }
        } else {
          // else leave as null: no segments
        }
      } else {
        // else we recompute it below as we visit segments; it can't be used for throwing
        // IndexFormatTooOldExc, but consumers of
        // SegmentInfos can maybe still use it for other reasons
      }

      long totalDocs = 0;
      for (int seg = 0; seg < numSegments; seg++) {
        String segName = input.readString();
        final byte segmentID[];
        if (format >= VERSION_50) {
          byte hasID = input.readByte();
          if (hasID == 1) {
            segmentID = new byte[StringHelper.ID_LENGTH];
            input.readBytes(segmentID, 0, segmentID.length);
          } else if (hasID == 0) {
            segmentID = null; // 4.x segment, doesn't have an ID
          } else {
            throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input);
          }
        } else {
          segmentID = null;
        }
        Codec codec = readCodec(input, format < VERSION_53);
        SegmentInfo info =
            codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ);
        info.setCodec(codec);
        totalDocs += info.maxDoc();
        long delGen = input.readLong();
        int delCount = input.readInt();
        if (delCount < 0 || delCount > info.maxDoc()) {
          throw new CorruptIndexException(
              "invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input);
        }
        long fieldInfosGen = -1;
        if (format >= VERSION_46) {
          fieldInfosGen = input.readLong();
        }
        long dvGen = -1;
        if (format >= VERSION_49) {
          dvGen = input.readLong();
        } else {
          dvGen = fieldInfosGen;
        }
        SegmentCommitInfo siPerCommit =
            new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen);
        if (format >= VERSION_46) {
          if (format < VERSION_49) {
            // Recorded per-generation files, which were buggy (see
            // LUCENE-5636). We need to read and keep them so we continue to
            // reference those files. Unfortunately it means that the files will
            // be referenced even if the fields are updated again, until the
            // segment is merged.
            final int numGensUpdatesFiles = input.readInt();
            final Map<Long, Set<String>> genUpdatesFiles;
            if (numGensUpdatesFiles == 0) {
              genUpdatesFiles = Collections.emptyMap();
            } else {
              genUpdatesFiles = new HashMap<>(numGensUpdatesFiles);
              for (int i = 0; i < numGensUpdatesFiles; i++) {
                genUpdatesFiles.put(input.readLong(), input.readStringSet());
              }
            }
            siPerCommit.setGenUpdatesFiles(genUpdatesFiles);
          } else {
            if (format >= VERSION_51) {
              siPerCommit.setFieldInfosFiles(input.readSetOfStrings());
            } else {
              siPerCommit.setFieldInfosFiles(Collections.unmodifiableSet(input.readStringSet()));
            }
            final Map<Integer, Set<String>> dvUpdateFiles;
            final int numDVFields = input.readInt();
            if (numDVFields == 0) {
              dvUpdateFiles = Collections.emptyMap();
            } else {
              Map<Integer, Set<String>> map = new HashMap<>(numDVFields);
              for (int i = 0; i < numDVFields; i++) {
                if (format >= VERSION_51) {
                  map.put(input.readInt(), input.readSetOfStrings());
                } else {
                  map.put(input.readInt(), Collections.unmodifiableSet(input.readStringSet()));
                }
              }
              dvUpdateFiles = Collections.unmodifiableMap(map);
            }
            siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles);
          }
        }
        infos.add(siPerCommit);

        Version segmentVersion = info.getVersion();
        if (format < VERSION_53) {
          if (infos.minSegmentLuceneVersion == null
              || segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
            infos.minSegmentLuceneVersion = segmentVersion;
          }
        } else if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
          throw new CorruptIndexException(
              "segments file recorded minSegmentLuceneVersion="
                  + infos.minSegmentLuceneVersion
                  + " but segment="
                  + info
                  + " has older version="
                  + segmentVersion,
              input);
        }
      }

      if (format >= VERSION_51) {
        infos.userData = input.readMapOfStrings();
      } else {
        infos.userData = Collections.unmodifiableMap(input.readStringStringMap());
      }

      if (format >= VERSION_48) {
        CodecUtil.checkFooter(input);
      } else {
        final long checksumNow = input.getChecksum();
        final long checksumThen = input.readLong();
        if (checksumNow != checksumThen) {
          throw new CorruptIndexException(
              "checksum failed (hardware problem?) : expected="
                  + Long.toHexString(checksumThen)
                  + " actual="
                  + Long.toHexString(checksumNow),
              input);
        }
        CodecUtil.checkEOF(input);
      }

      // LUCENE-6299: check we are in bounds
      if (totalDocs > IndexWriter.getActualMaxDocs()) {
        throw new CorruptIndexException(
            "Too many documents: an index cannot exceed "
                + IndexWriter.getActualMaxDocs()
                + " but readers have total maxDoc="
                + totalDocs,
            input);
      }

      return infos;
    }
  }
 /**
  * Returns a {@link CharacterUtils} implementation according to the given {@link Version}
  * instance.
  *
  * @param matchVersion a version instance
  * @return a {@link CharacterUtils} implementation according to the given {@link Version}
  *     instance.
  */
 public static CharacterUtils getInstance(final Version matchVersion) {
   return matchVersion.onOrAfter(Version.LUCENE_31) ? JAVA_5 : JAVA_4;
 }