// we don't actually write a .fdx-like index, instead we read the
 // stored fields file in entirety up-front and save the offsets
 // so we can seek to the documents later.
 private void readIndex(int size) throws IOException {
   ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
   offsets = new long[size];
   int upto = 0;
   while (!scratch.get().equals(END)) {
     SimpleTextUtil.readLine(input, scratch);
     if (StringHelper.startsWith(scratch.get(), DOC)) {
       offsets[upto] = input.getFilePointer();
       upto++;
     }
   }
   SimpleTextUtil.checkFooter(input);
   assert upto == offsets.length;
 }
Ejemplo n.º 2
0
  @Override
  public FieldInfos read(
      Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context)
      throws IOException {
    final String fileName =
        IndexFileNames.segmentFileName(
            segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
    try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
      int codecVersion =
          CodecUtil.checkHeader(
              input,
              Lucene46FieldInfosFormat.CODEC_NAME,
              Lucene46FieldInfosFormat.FORMAT_START,
              Lucene46FieldInfosFormat.FORMAT_CURRENT);

      final int size = input.readVInt(); // read in the size
      FieldInfo infos[] = new FieldInfo[size];

      for (int i = 0; i < size; i++) {
        String name = input.readString();
        final int fieldNumber = input.readVInt();
        if (fieldNumber < 0) {
          throw new CorruptIndexException(
              "invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
        }
        byte bits = input.readByte();
        boolean isIndexed = (bits & Lucene46FieldInfosFormat.IS_INDEXED) != 0;
        boolean storeTermVector = (bits & Lucene46FieldInfosFormat.STORE_TERMVECTOR) != 0;
        boolean omitNorms = (bits & Lucene46FieldInfosFormat.OMIT_NORMS) != 0;
        boolean storePayloads = (bits & Lucene46FieldInfosFormat.STORE_PAYLOADS) != 0;
        final IndexOptions indexOptions;
        if (!isIndexed) {
          indexOptions = IndexOptions.NONE;
        } else if ((bits & Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
          indexOptions = IndexOptions.DOCS;
        } else if ((bits & Lucene46FieldInfosFormat.OMIT_POSITIONS) != 0) {
          indexOptions = IndexOptions.DOCS_AND_FREQS;
        } else if ((bits & Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
          indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
        } else {
          indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
        }

        // DV Types are packed in one byte
        byte val = input.readByte();
        final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
        final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
        final long dvGen = input.readLong();
        final Map<String, String> attributes = input.readStringStringMap();

        if (isIndexed && omitNorms == false && normsType == DocValuesType.NONE) {
          // Undead norms!  Lucene42NormsProducer will check this and bring norms back from the
          // dead:
          UndeadNormsProducer.setUndead(attributes);
        }

        infos[i] =
            new FieldInfo(
                name,
                fieldNumber,
                storeTermVector,
                omitNorms,
                storePayloads,
                indexOptions,
                docValuesType,
                dvGen,
                Collections.unmodifiableMap(attributes));
        infos[i].checkConsistency();
      }

      if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) {
        CodecUtil.checkFooter(input);
      } else {
        CodecUtil.checkEOF(input);
      }
      return new FieldInfos(infos);
    }
  }
Ejemplo n.º 3
0
  @Test
  public void testMixedChecksums() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    // this time random codec....
    IndexWriter writer =
        new IndexWriter(
            store.directory(),
            newIndexWriterConfig(random(), new MockAnalyzer(random()))
                .setCodec(actualDefaultCodec()));
    int docs = 1 + random().nextInt(100);

    for (int i = 0; i < docs; i++) {
      Document doc = new Document();
      doc.add(
          new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }
    if (random().nextBoolean()) {
      for (int i = 0; i < docs; i++) {
        if (random().nextBoolean()) {
          Document doc = new Document();
          doc.add(
              new TextField(
                  "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          doc.add(
              new TextField(
                  "body",
                  TestUtil.randomRealisticUnicodeString(random()),
                  random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          writer.updateDocument(new Term("id", "" + i), doc);
        }
      }
    }
    if (random().nextBoolean()) {
      DirectoryReader.open(writer, random().nextBoolean()).close(); // flush
    }
    Store.MetadataSnapshot metadata;
    // check before we committed
    try {
      store.getMetadata();
      fail("no index present - expected exception");
    } catch (IndexNotFoundException ex) {
      // expected
    }
    assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed
    writer.commit();
    writer.close();
    Store.LegacyChecksums checksums = new Store.LegacyChecksums();
    metadata = store.getMetadata();
    assertThat(metadata.asMap().isEmpty(), is(false));
    for (StoreFileMetaData meta : metadata) {
      try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
        if (meta.checksum() == null) {
          String checksum = null;
          try {
            CodecUtil.retrieveChecksum(input);
            fail("expected a corrupt index - posting format has not checksums");
          } catch (CorruptIndexException
              | IndexFormatTooOldException
              | IndexFormatTooNewException ex) {
            try (ChecksumIndexInput checksumIndexInput =
                store.directory().openChecksumInput(meta.name(), IOContext.DEFAULT)) {
              checksumIndexInput.seek(meta.length());
              checksum = Store.digestToString(checksumIndexInput.getChecksum());
            }
            // fine - it's a postings format without checksums
            checksums.add(new StoreFileMetaData(meta.name(), meta.length(), checksum, null));
          }
        } else {
          String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
          assertThat(
              "File: " + meta.name() + " has a different checksum",
              meta.checksum(),
              equalTo(checksum));
          assertThat(meta.hasLegacyChecksum(), equalTo(false));
          assertThat(meta.writtenBy(), equalTo(Version.LATEST));
        }
      }
    }
    assertConsistent(store, metadata);
    checksums.write(store);
    metadata = store.getMetadata();
    assertThat(metadata.asMap().isEmpty(), is(false));
    for (StoreFileMetaData meta : metadata) {
      assertThat(
          "file: " + meta.name() + " has a null checksum", meta.checksum(), not(nullValue()));
      if (meta.hasLegacyChecksum()) {
        try (ChecksumIndexInput checksumIndexInput =
            store.directory().openChecksumInput(meta.name(), IOContext.DEFAULT)) {
          checksumIndexInput.seek(meta.length());
          assertThat(
              meta.checksum(), equalTo(Store.digestToString(checksumIndexInput.getChecksum())));
        }
      } else {
        try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
          String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
          assertThat(
              "File: " + meta.name() + " has a different checksum",
              meta.checksum(),
              equalTo(checksum));
          assertThat(meta.hasLegacyChecksum(), equalTo(false));
          assertThat(meta.writtenBy(), equalTo(Version.LATEST));
        }
      }
    }
    assertConsistent(store, metadata);
    TestUtil.checkIndex(store.directory());
    assertDeleteContent(store, directoryService);
    IOUtils.close(store);
  }
Ejemplo n.º 4
0
  /**
   * Read a particular segmentFileName. Note that this may throw an IOException if a commit is in
   * process.
   *
   * @param directory -- directory containing the segments file
   * @param segmentFileName -- segment file to load
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   */
  public final void read(Directory directory, String segmentFileName)
      throws CorruptIndexException, IOException {
    boolean success = false;

    // Clear any previous segments:
    this.clear();

    ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName));

    generation = generationFromSegmentsFileName(segmentFileName);

    lastGeneration = generation;

    try {
      int format = input.readInt();
      if (format < 0) { // file contains explicit format info
        // check that it is a format we can understand
        if (format < CURRENT_FORMAT)
          throw new CorruptIndexException("Unknown format version: " + format);
        version = input.readLong(); // read version
        counter = input.readInt(); // read counter
      } else { // file is in old format without explicit format info
        counter = format;
      }

      for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
        SegmentInfo si = new SegmentInfo(directory, format, input);
        if (si.getVersion() == null) {
          // It's a pre-3.1 segment, upgrade its version to either 3.0 or 2.x
          Directory dir = directory;
          if (si.getDocStoreOffset() != -1) {
            if (si.getDocStoreIsCompoundFile()) {
              dir =
                  new CompoundFileReader(
                      dir,
                      IndexFileNames.segmentFileName(
                          si.getDocStoreSegment(), IndexFileNames.COMPOUND_FILE_STORE_EXTENSION),
                      1024);
            }
          } else if (si.getUseCompoundFile()) {
            dir =
                new CompoundFileReader(
                    dir,
                    IndexFileNames.segmentFileName(si.name, IndexFileNames.COMPOUND_FILE_EXTENSION),
                    1024);
          }

          try {
            String store = si.getDocStoreOffset() != -1 ? si.getDocStoreSegment() : si.name;
            si.setVersion(FieldsReader.detectCodeVersion(dir, store));
          } finally {
            // If we opened the directory, close it
            if (dir != directory) dir.close();
          }
        }
        add(si);
      }

      if (format >= 0) { // in old format the version number may be at the end of the file
        if (input.getFilePointer() >= input.length())
          version = System.currentTimeMillis(); // old file format without version number
        else version = input.readLong(); // read version
      }

      if (format <= FORMAT_USER_DATA) {
        if (format <= FORMAT_DIAGNOSTICS) {
          userData = input.readStringStringMap();
        } else if (0 != input.readByte()) {
          userData = Collections.singletonMap("userData", input.readString());
        } else {
          userData = Collections.<String, String>emptyMap();
        }
      } else {
        userData = Collections.<String, String>emptyMap();
      }

      if (format <= FORMAT_CHECKSUM) {
        final long checksumNow = input.getChecksum();
        final long checksumThen = input.readLong();
        if (checksumNow != checksumThen)
          throw new CorruptIndexException("checksum mismatch in segments file");
      }
      success = true;
    } finally {
      input.close();
      if (!success) {
        // Clear any segment infos we had loaded so we
        // have a clean slate on retry:
        this.clear();
      }
    }
  }
Ejemplo n.º 5
0
  /**
   * Read a particular segmentFileName. Note that this may throw an IOException if a commit is in
   * process.
   *
   * @param directory -- directory containing the segments file
   * @param segmentFileName -- segment file to load
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   */
  public static final SegmentInfos readCommit(Directory directory, String segmentFileName)
      throws IOException {

    long generation = generationFromSegmentsFileName(segmentFileName);
    try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) {
      // NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need
      // to read the magic ourselves.
      int magic = input.readInt();
      if (magic != CodecUtil.CODEC_MAGIC) {
        throw new IndexFormatTooOldException(
            input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
      }
      // 4.0+
      int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_CURRENT);
      // 5.0+
      byte id[] = null;
      if (format >= VERSION_50) {
        id = new byte[StringHelper.ID_LENGTH];
        input.readBytes(id, 0, id.length);
        CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX));
      }

      SegmentInfos infos = new SegmentInfos();
      infos.id = id;
      infos.generation = generation;
      infos.lastGeneration = generation;
      if (format >= VERSION_53) {
        // TODO: in the future (7.0?  sigh) we can use this to throw IndexFormatTooOldException ...
        // or just rely on the
        // minSegmentLuceneVersion check instead:
        infos.luceneVersion =
            Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
      } else {
        // else compute the min version down below in the for loop
      }

      infos.version = input.readLong();
      infos.counter = input.readInt();
      int numSegments = input.readInt();
      if (numSegments < 0) {
        throw new CorruptIndexException("invalid segment count: " + numSegments, input);
      }

      if (format >= VERSION_53) {
        if (numSegments > 0) {
          infos.minSegmentLuceneVersion =
              Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
          if (infos.minSegmentLuceneVersion.onOrAfter(Version.LUCENE_4_0_0_ALPHA) == false) {
            throw new IndexFormatTooOldException(
                input,
                "this index contains a too-old segment (version: "
                    + infos.minSegmentLuceneVersion
                    + ")");
          }
        } else {
          // else leave as null: no segments
        }
      } else {
        // else we recompute it below as we visit segments; it can't be used for throwing
        // IndexFormatTooOldExc, but consumers of
        // SegmentInfos can maybe still use it for other reasons
      }

      long totalDocs = 0;
      for (int seg = 0; seg < numSegments; seg++) {
        String segName = input.readString();
        final byte segmentID[];
        if (format >= VERSION_50) {
          byte hasID = input.readByte();
          if (hasID == 1) {
            segmentID = new byte[StringHelper.ID_LENGTH];
            input.readBytes(segmentID, 0, segmentID.length);
          } else if (hasID == 0) {
            segmentID = null; // 4.x segment, doesn't have an ID
          } else {
            throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input);
          }
        } else {
          segmentID = null;
        }
        Codec codec = readCodec(input, format < VERSION_53);
        SegmentInfo info =
            codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ);
        info.setCodec(codec);
        totalDocs += info.maxDoc();
        long delGen = input.readLong();
        int delCount = input.readInt();
        if (delCount < 0 || delCount > info.maxDoc()) {
          throw new CorruptIndexException(
              "invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input);
        }
        long fieldInfosGen = -1;
        if (format >= VERSION_46) {
          fieldInfosGen = input.readLong();
        }
        long dvGen = -1;
        if (format >= VERSION_49) {
          dvGen = input.readLong();
        } else {
          dvGen = fieldInfosGen;
        }
        SegmentCommitInfo siPerCommit =
            new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen);
        if (format >= VERSION_46) {
          if (format < VERSION_49) {
            // Recorded per-generation files, which were buggy (see
            // LUCENE-5636). We need to read and keep them so we continue to
            // reference those files. Unfortunately it means that the files will
            // be referenced even if the fields are updated again, until the
            // segment is merged.
            final int numGensUpdatesFiles = input.readInt();
            final Map<Long, Set<String>> genUpdatesFiles;
            if (numGensUpdatesFiles == 0) {
              genUpdatesFiles = Collections.emptyMap();
            } else {
              genUpdatesFiles = new HashMap<>(numGensUpdatesFiles);
              for (int i = 0; i < numGensUpdatesFiles; i++) {
                genUpdatesFiles.put(input.readLong(), input.readStringSet());
              }
            }
            siPerCommit.setGenUpdatesFiles(genUpdatesFiles);
          } else {
            if (format >= VERSION_51) {
              siPerCommit.setFieldInfosFiles(input.readSetOfStrings());
            } else {
              siPerCommit.setFieldInfosFiles(Collections.unmodifiableSet(input.readStringSet()));
            }
            final Map<Integer, Set<String>> dvUpdateFiles;
            final int numDVFields = input.readInt();
            if (numDVFields == 0) {
              dvUpdateFiles = Collections.emptyMap();
            } else {
              Map<Integer, Set<String>> map = new HashMap<>(numDVFields);
              for (int i = 0; i < numDVFields; i++) {
                if (format >= VERSION_51) {
                  map.put(input.readInt(), input.readSetOfStrings());
                } else {
                  map.put(input.readInt(), Collections.unmodifiableSet(input.readStringSet()));
                }
              }
              dvUpdateFiles = Collections.unmodifiableMap(map);
            }
            siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles);
          }
        }
        infos.add(siPerCommit);

        Version segmentVersion = info.getVersion();
        if (format < VERSION_53) {
          if (infos.minSegmentLuceneVersion == null
              || segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
            infos.minSegmentLuceneVersion = segmentVersion;
          }
        } else if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
          throw new CorruptIndexException(
              "segments file recorded minSegmentLuceneVersion="
                  + infos.minSegmentLuceneVersion
                  + " but segment="
                  + info
                  + " has older version="
                  + segmentVersion,
              input);
        }
      }

      if (format >= VERSION_51) {
        infos.userData = input.readMapOfStrings();
      } else {
        infos.userData = Collections.unmodifiableMap(input.readStringStringMap());
      }

      if (format >= VERSION_48) {
        CodecUtil.checkFooter(input);
      } else {
        final long checksumNow = input.getChecksum();
        final long checksumThen = input.readLong();
        if (checksumNow != checksumThen) {
          throw new CorruptIndexException(
              "checksum failed (hardware problem?) : expected="
                  + Long.toHexString(checksumThen)
                  + " actual="
                  + Long.toHexString(checksumNow),
              input);
        }
        CodecUtil.checkEOF(input);
      }

      // LUCENE-6299: check we are in bounds
      if (totalDocs > IndexWriter.getActualMaxDocs()) {
        throw new CorruptIndexException(
            "Too many documents: an index cannot exceed "
                + IndexWriter.getActualMaxDocs()
                + " but readers have total maxDoc="
                + totalDocs,
            input);
      }

      return infos;
    }
  }