Java IndexFileNames примеры, org.apache.lucene.index.IndexFileNames Java примеры использования

Пример #1

0

Показать файл

Файл: Lucene50NormsConsumer.java Проект: tylerjharden/KnowledgeSharingPlatform

 Lucene50NormsConsumer(
     SegmentWriteState state,
     String dataCodec,
     String dataExtension,
     String metaCodec,
     String metaExtension)
     throws IOException {
   boolean success = false;
   try {
     String dataName =
         IndexFileNames.segmentFileName(
             state.segmentInfo.name, state.segmentSuffix, dataExtension);
     data = state.directory.createOutput(dataName, state.context);
     CodecUtil.writeIndexHeader(
         data, dataCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
     String metaName =
         IndexFileNames.segmentFileName(
             state.segmentInfo.name, state.segmentSuffix, metaExtension);
     meta = state.directory.createOutput(metaName, state.context);
     CodecUtil.writeIndexHeader(
         meta, metaCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
     success = true;
   } finally {
     if (!success) {
       IOUtils.closeWhileHandlingException(this);
     }
   }
 }

Пример #2

0

Показать файл

Файл: SepPostingsReader.java Проект: ankit-zettata/solr-analytics

  public SepPostingsReader(
      Directory dir,
      FieldInfos fieldInfos,
      SegmentInfo segmentInfo,
      IOContext context,
      IntStreamFactory intFactory,
      String segmentSuffix)
      throws IOException {
    boolean success = false;
    try {

      final String docFileName =
          IndexFileNames.segmentFileName(
              segmentInfo.name, segmentSuffix, SepPostingsWriter.DOC_EXTENSION);
      docIn = intFactory.openInput(dir, docFileName, context);

      skipIn =
          dir.openInput(
              IndexFileNames.segmentFileName(
                  segmentInfo.name, segmentSuffix, SepPostingsWriter.SKIP_EXTENSION),
              context);

      if (fieldInfos.hasFreq()) {
        freqIn =
            intFactory.openInput(
                dir,
                IndexFileNames.segmentFileName(
                    segmentInfo.name, segmentSuffix, SepPostingsWriter.FREQ_EXTENSION),
                context);
      } else {
        freqIn = null;
      }
      if (fieldInfos.hasProx()) {
        posIn =
            intFactory.openInput(
                dir,
                IndexFileNames.segmentFileName(
                    segmentInfo.name, segmentSuffix, SepPostingsWriter.POS_EXTENSION),
                context);
        payloadIn =
            dir.openInput(
                IndexFileNames.segmentFileName(
                    segmentInfo.name, segmentSuffix, SepPostingsWriter.PAYLOAD_EXTENSION),
                context);
      } else {
        posIn = null;
        payloadIn = null;
      }
      success = true;
    } finally {
      if (!success) {
        close();
      }
    }
  }

Пример #3

0

Показать файл

Файл: Completion090PostingsFormat.java Проект: superwangvip/elasticsearch

 public CompletionFieldsConsumer(SegmentWriteState state) throws IOException {
   this.delegatesFieldsConsumer = delegatePostingsFormat.fieldsConsumer(state);
   String suggestFSTFile =
       IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
   IndexOutput output = null;
   boolean success = false;
   try {
     output = state.directory.createOutput(suggestFSTFile, state.context);
     CodecUtil.writeIndexHeader(
         output,
         CODEC_NAME,
         SUGGEST_VERSION_CURRENT,
         state.segmentInfo.getId(),
         state.segmentSuffix);
     /*
      * we write the delegate postings format name so we can load it
      * without getting an instance in the ctor
      */
     output.writeString(delegatePostingsFormat.getName());
     output.writeString(writeProvider.getName());
     this.suggestFieldsConsumer = writeProvider.consumer(output);
     success = true;
   } finally {
     if (!success) {
       IOUtils.closeWhileHandlingException(output);
     }
   }
 }

Пример #4

0

Показать файл

Файл: Lucene60PointsWriter.java Проект: apache/lucene-solr

 /** Full constructor */
 public Lucene60PointsWriter(
     SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap)
     throws IOException {
   assert writeState.fieldInfos.hasPointValues();
   this.writeState = writeState;
   this.maxPointsInLeafNode = maxPointsInLeafNode;
   this.maxMBSortInHeap = maxMBSortInHeap;
   String dataFileName =
       IndexFileNames.segmentFileName(
           writeState.segmentInfo.name,
           writeState.segmentSuffix,
           Lucene60PointsFormat.DATA_EXTENSION);
   dataOut = writeState.directory.createOutput(dataFileName, writeState.context);
   boolean success = false;
   try {
     CodecUtil.writeIndexHeader(
         dataOut,
         Lucene60PointsFormat.DATA_CODEC_NAME,
         Lucene60PointsFormat.DATA_VERSION_CURRENT,
         writeState.segmentInfo.getId(),
         writeState.segmentSuffix);
     success = true;
   } finally {
     if (success == false) {
       IOUtils.closeWhileHandlingException(dataOut);
     }
   }
 }

Пример #5

0

Показать файл

Файл: BloomFilteringPostingsFormat.java Проект: bozydar/solr

    public BloomFilteredFieldsProducer(SegmentReadState state) throws IOException {

      String bloomFileName =
          IndexFileNames.segmentFileName(
              state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
      IndexInput bloomIn = null;
      boolean success = false;
      try {
        bloomIn = state.directory.openInput(bloomFileName, state.context);
        CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION, BLOOM_CODEC_VERSION);
        // // Load the hash function used in the BloomFilter
        // hashFunction = HashFunction.forName(bloomIn.readString());
        // Load the delegate postings format
        PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn.readString());

        this.delegateFieldsProducer = delegatePostingsFormat.fieldsProducer(state);
        int numBlooms = bloomIn.readInt();
        for (int i = 0; i < numBlooms; i++) {
          int fieldNum = bloomIn.readInt();
          FuzzySet bloom = FuzzySet.deserialize(bloomIn);
          FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
          bloomsByFieldName.put(fieldInfo.name, bloom);
        }
        IOUtils.close(bloomIn);
        success = true;
      } finally {
        if (!success) {
          IOUtils.closeWhileHandlingException(bloomIn, delegateFieldsProducer);
        }
      }
    }

Пример #6

0

Показать файл

Файл: Lucene40RWNormsFormat.java Проект: bozydar/solr

 @Override
 public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
   String filename =
       IndexFileNames.segmentFileName(
           state.segmentInfo.name, "nrm", IndexFileNames.COMPOUND_FILE_EXTENSION);
   return new Lucene40DocValuesWriter(
       state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY);
 }

Пример #7

0

Показать файл

Файл: Completion090PostingsFormat.java Проект: superwangvip/elasticsearch

 public CompletionFieldsProducer(SegmentReadState state) throws IOException {
   String suggestFSTFile =
       IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
   IndexInput input = state.directory.openInput(suggestFSTFile, state.context);
   if (state.segmentInfo.getVersion().onOrAfter(Version.LUCENE_6_2_0)) {
     // Lucene 6.2.0+ requires all index files to use index header, but prior to that we used an
     // ordinary codec header:
     version =
         CodecUtil.checkIndexHeader(
             input,
             CODEC_NAME,
             SUGGEST_CODEC_VERSION,
             SUGGEST_VERSION_CURRENT,
             state.segmentInfo.getId(),
             state.segmentSuffix);
   } else {
     version =
         CodecUtil.checkHeader(
             input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT);
   }
   FieldsProducer delegateProducer = null;
   boolean success = false;
   try {
     PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString());
     String providerName = input.readString();
     CompletionLookupProvider completionLookupProvider = providers.get(providerName);
     if (completionLookupProvider == null) {
       throw new IllegalStateException(
           "no provider with name [" + providerName + "] registered");
     }
     // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent
     // unecessary heap usage?
     delegateProducer = delegatePostingsFormat.fieldsProducer(state);
     /*
      * If we are merging we don't load the FSTs at all such that we
      * don't consume so much memory during merge
      */
     if (state.context.context != Context.MERGE) {
       // TODO: maybe we can do this in a fully lazy fashion based on some configuration
       // eventually we should have some kind of curciut breaker that prevents us from going OOM
       // here
       // with some configuration
       this.lookupFactory = completionLookupProvider.load(input);
     } else {
       this.lookupFactory = null;
     }
     this.delegateProducer = delegateProducer;
     success = true;
   } finally {
     if (!success) {
       IOUtils.closeWhileHandlingException(delegateProducer, input);
     } else {
       IOUtils.close(input);
     }
   }
 }

Пример #8

0

Показать файл

Файл: SimpleTextCompoundFormat.java Проект: sail-umkc/Examples

  @Override
  public void write(Directory dir, SegmentInfo si, IOContext context) throws IOException {
    String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION);

    int numFiles = si.files().size();
    String names[] = si.files().toArray(new String[numFiles]);
    Arrays.sort(names);
    long startOffsets[] = new long[numFiles];
    long endOffsets[] = new long[numFiles];

    BytesRefBuilder scratch = new BytesRefBuilder();

    try (IndexOutput out = dir.createOutput(dataFile, context)) {
      for (int i = 0; i < names.length; i++) {
        // write header for file
        SimpleTextUtil.write(out, HEADER);
        SimpleTextUtil.write(out, names[i], scratch);
        SimpleTextUtil.writeNewline(out);

        // write bytes for file
        startOffsets[i] = out.getFilePointer();
        try (IndexInput in = dir.openInput(names[i], IOContext.READONCE)) {
          out.copyBytes(in, in.length());
        }
        endOffsets[i] = out.getFilePointer();
      }

      long tocPos = out.getFilePointer();

      // write CFS table
      SimpleTextUtil.write(out, TABLE);
      SimpleTextUtil.write(out, Integer.toString(numFiles), scratch);
      SimpleTextUtil.writeNewline(out);

      for (int i = 0; i < names.length; i++) {
        SimpleTextUtil.write(out, TABLENAME);
        SimpleTextUtil.write(out, names[i], scratch);
        SimpleTextUtil.writeNewline(out);

        SimpleTextUtil.write(out, TABLESTART);
        SimpleTextUtil.write(out, Long.toString(startOffsets[i]), scratch);
        SimpleTextUtil.writeNewline(out);

        SimpleTextUtil.write(out, TABLEEND);
        SimpleTextUtil.write(out, Long.toString(endOffsets[i]), scratch);
        SimpleTextUtil.writeNewline(out);
      }

      DecimalFormat df =
          new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT));
      SimpleTextUtil.write(out, TABLEPOS);
      SimpleTextUtil.write(out, df.format(tocPos), scratch);
      SimpleTextUtil.writeNewline(out);
    }
  }

Пример #9

0

Показать файл

Файл: Lucene46FieldInfosFormat.java Проект: PATRIC3/p3_solr

  @Override
  public void write(
      Directory directory,
      SegmentInfo segmentInfo,
      String segmentSuffix,
      FieldInfos infos,
      IOContext context)
      throws IOException {
    final String fileName =
        IndexFileNames.segmentFileName(
            segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
    try (IndexOutput output = directory.createOutput(fileName, context)) {
      CodecUtil.writeHeader(
          output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT);
      output.writeVInt(infos.size());
      for (FieldInfo fi : infos) {
        IndexOptions indexOptions = fi.getIndexOptions();
        byte bits = 0x0;
        if (fi.hasVectors()) bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR;
        if (fi.omitsNorms()) bits |= Lucene46FieldInfosFormat.OMIT_NORMS;
        if (fi.hasPayloads()) bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS;
        if (fi.getIndexOptions() != IndexOptions.NONE) {
          bits |= Lucene46FieldInfosFormat.IS_INDEXED;
          assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0
              || !fi.hasPayloads();
          if (indexOptions == IndexOptions.DOCS) {
            bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
          } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
            bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
          } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
            bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS;
          }
        }
        output.writeString(fi.name);
        output.writeVInt(fi.number);
        output.writeByte(bits);

        // pack the DV types in one byte
        final byte dv = docValuesByte(fi.getDocValuesType());
        final byte nrm = docValuesByte(fi.hasNorms() ? DocValuesType.NUMERIC : DocValuesType.NONE);
        assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
        byte val = (byte) (0xff & ((nrm << 4) | dv));
        output.writeByte(val);
        output.writeLong(fi.getDocValuesGen());
        output.writeStringStringMap(fi.attributes());
      }
      CodecUtil.writeFooter(output);
    }
  }

Пример #10

0

Показать файл

Файл: VariableGapTermsIndexWriter.java Проект: sudarshang/lucene-solr

 public VariableGapTermsIndexWriter(SegmentWriteState state, IndexTermSelector policy)
     throws IOException {
   final String indexFileName =
       IndexFileNames.segmentFileName(
           state.segmentName, state.segmentSuffix, TERMS_INDEX_EXTENSION);
   out = state.directory.createOutput(indexFileName, state.context);
   boolean success = false;
   try {
     fieldInfos = state.fieldInfos;
     this.policy = policy;
     writeHeader(out);
     success = true;
   } finally {
     if (!success) {
       IOUtils.closeWhileHandlingException(out);
     }
   }
 }

Пример #11

0

Показать файл

Файл: SimpleTextTermVectorsReader.java Проект: rmuir/lucene-solr

 public SimpleTextTermVectorsReader(Directory directory, SegmentInfo si, IOContext context)
     throws IOException {
   boolean success = false;
   try {
     in =
         directory.openInput(
             IndexFileNames.segmentFileName(si.name, "", VECTORS_EXTENSION), context);
     success = true;
   } finally {
     if (!success) {
       try {
         close();
       } catch (Throwable t) {
       } // ensure we throw our original exception
     }
   }
   readIndex(si.maxDoc());
 }

Пример #12

0

Показать файл

Файл: FSTTermsReader.java Проект: sugarlisu/solr_4.9.0

  public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader)
      throws IOException {
    final String termsFileName =
        IndexFileNames.segmentFileName(
            state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);

    this.postingsReader = postingsReader;
    final IndexInput in = state.directory.openInput(termsFileName, state.context);

    boolean success = false;
    try {
      version = readHeader(in);
      if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) {
        CodecUtil.checksumEntireFile(in);
      }
      this.postingsReader.init(in);
      seekDir(in);

      final FieldInfos fieldInfos = state.fieldInfos;
      final int numFields = in.readVInt();
      for (int i = 0; i < numFields; i++) {
        int fieldNumber = in.readVInt();
        FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
        long numTerms = in.readVLong();
        long sumTotalTermFreq =
            fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
        long sumDocFreq = in.readVLong();
        int docCount = in.readVInt();
        int longsSize = in.readVInt();
        TermsReader current =
            new TermsReader(
                fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
        TermsReader previous = fields.put(fieldInfo.name, current);
        checkFieldSummary(state.segmentInfo, in, current, previous);
      }
      success = true;
    } finally {
      if (success) {
        IOUtils.close(in);
      } else {
        IOUtils.closeWhileHandlingException(in);
      }
    }
  }

Пример #13

0

Показать файл

Файл: SimpleTextStoredFieldsReader.java Проект: RainingWang/lucene-solr

 public SimpleTextStoredFieldsReader(
     Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
   this.fieldInfos = fn;
   boolean success = false;
   try {
     in =
         directory.openInput(
             IndexFileNames.segmentFileName(
                 si.name, "", SimpleTextStoredFieldsWriter.FIELDS_EXTENSION),
             context);
     success = true;
   } finally {
     if (!success) {
       try {
         close();
       } catch (Throwable t) {
       } // ensure we throw our original exception
     }
   }
   readIndex(si.maxDoc());
 }

Пример #14

0

Показать файл

Файл: MockRandomPostingsFormat.java Проект: jmercouris/Lucene--Date-Range-Search

 @Override
 public void files(SegmentInfo segmentInfo, String segmentSuffix, Set<String> files)
     throws IOException {
   final String seedFileName =
       IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, SEED_EXT);
   files.add(seedFileName);
   SepPostingsReader.files(segmentInfo, segmentSuffix, files);
   Lucene40PostingsReader.files(segmentInfo, segmentSuffix, files);
   BlockTermsReader.files(segmentInfo, segmentSuffix, files);
   BlockTreeTermsReader.files(segmentInfo, segmentSuffix, files);
   FixedGapTermsIndexReader.files(segmentInfo, segmentSuffix, files);
   VariableGapTermsIndexReader.files(segmentInfo, segmentSuffix, files);
   // hackish!
   Iterator<String> it = files.iterator();
   while (it.hasNext()) {
     final String file = it.next();
     if (!segmentInfo.dir.fileExists(file)) {
       it.remove();
     }
   }
   // System.out.println("MockRandom.files return " + files);
 }

Пример #15

0

Показать файл

Файл: RAMDirectory.java Проект: markrmiller/lucene-solr-svn2git

  @Override
  public IndexOutput createTempOutput(String prefix, String suffix, IOContext context)
      throws IOException {
    ensureOpen();

    // Make the file first...
    RAMFile file = newRAMFile();

    // ... then try to find a unique name for it:
    while (true) {
      String name =
          IndexFileNames.segmentFileName(
              prefix,
              suffix
                  + "_"
                  + Long.toString(nextTempFileCounter.getAndIncrement(), Character.MAX_RADIX),
              "tmp");
      if (fileMap.putIfAbsent(name, file) == null) {
        return new RAMOutputStream(name, file, true);
      }
    }
  }

Пример #16

0

Показать файл

Файл: BloomFilteringPostingsFormat.java Проект: bozydar/solr

    @Override
    public void close() throws IOException {
      delegateFieldsConsumer.close();
      // Now we are done accumulating values for these fields
      List<Entry<FieldInfo, FuzzySet>> nonSaturatedBlooms =
          new ArrayList<Map.Entry<FieldInfo, FuzzySet>>();

      for (Entry<FieldInfo, FuzzySet> entry : bloomFilters.entrySet()) {
        FuzzySet bloomFilter = entry.getValue();
        if (!bloomFilterFactory.isSaturated(bloomFilter, entry.getKey())) {
          nonSaturatedBlooms.add(entry);
        }
      }
      String bloomFileName =
          IndexFileNames.segmentFileName(
              state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
      IndexOutput bloomOutput = null;
      try {
        bloomOutput = state.directory.createOutput(bloomFileName, state.context);
        CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION);
        // remember the name of the postings format we will delegate to
        bloomOutput.writeString(delegatePostingsFormat.getName());

        // First field in the output file is the number of fields+blooms saved
        bloomOutput.writeInt(nonSaturatedBlooms.size());
        for (Entry<FieldInfo, FuzzySet> entry : nonSaturatedBlooms) {
          FieldInfo fieldInfo = entry.getKey();
          FuzzySet bloomFilter = entry.getValue();
          bloomOutput.writeInt(fieldInfo.number);
          saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo);
        }
      } finally {
        IOUtils.close(bloomOutput);
      }
      // We are done with large bitsets so no need to keep them hanging around
      bloomFilters.clear();
    }

Пример #17

0

Показать файл

Файл: Lucene60PointsWriter.java Проект: apache/lucene-solr

  @Override
  public void finish() throws IOException {
    if (finished) {
      throw new IllegalStateException("already finished");
    }
    finished = true;
    CodecUtil.writeFooter(dataOut);

    String indexFileName =
        IndexFileNames.segmentFileName(
            writeState.segmentInfo.name,
            writeState.segmentSuffix,
            Lucene60PointsFormat.INDEX_EXTENSION);
    // Write index file
    try (IndexOutput indexOut =
        writeState.directory.createOutput(indexFileName, writeState.context)) {
      CodecUtil.writeIndexHeader(
          indexOut,
          Lucene60PointsFormat.META_CODEC_NAME,
          Lucene60PointsFormat.INDEX_VERSION_CURRENT,
          writeState.segmentInfo.getId(),
          writeState.segmentSuffix);
      int count = indexFPs.size();
      indexOut.writeVInt(count);
      for (Map.Entry<String, Long> ent : indexFPs.entrySet()) {
        FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(ent.getKey());
        if (fieldInfo == null) {
          throw new IllegalStateException(
              "wrote field=\"" + ent.getKey() + "\" but that field doesn't exist in FieldInfos");
        }
        indexOut.writeVInt(fieldInfo.number);
        indexOut.writeVLong(ent.getValue());
      }
      CodecUtil.writeFooter(indexOut);
    }
  }

Пример #18

0

Показать файл

Файл: CorruptedFileIT.java Проект: superwangvip/elasticsearch

 /**
  * prunes the list of index files such that only the latest del generation files are contained.
  */
 private void pruneOldDeleteGenerations(Set<Path> files) {
   final TreeSet<Path> delFiles = new TreeSet<>();
   for (Path file : files) {
     if (file.getFileName().toString().endsWith(".liv")) {
       delFiles.add(file);
     }
   }
   Path last = null;
   for (Path current : delFiles) {
     if (last != null) {
       final String newSegmentName =
           IndexFileNames.parseSegmentName(current.getFileName().toString());
       final String oldSegmentName =
           IndexFileNames.parseSegmentName(last.getFileName().toString());
       if (newSegmentName.equals(oldSegmentName)) {
         int oldGen =
             Integer.parseInt(
                 IndexFileNames.stripExtension(
                         IndexFileNames.stripSegmentName(last.getFileName().toString()))
                     .replace("_", ""),
                 Character.MAX_RADIX);
         int newGen =
             Integer.parseInt(
                 IndexFileNames.stripExtension(
                         IndexFileNames.stripSegmentName(current.getFileName().toString()))
                     .replace("_", ""),
                 Character.MAX_RADIX);
         if (newGen > oldGen) {
           files.remove(last);
         } else {
           files.remove(current);
           continue;
         }
       }
     }
     last = current;
   }
 }

Пример #19

0

Показать файл

Файл: SimpleTextCompoundFormat.java Проект: sail-umkc/Examples

  @Override
  public Directory getCompoundReader(Directory dir, SegmentInfo si, IOContext context)
      throws IOException {
    String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION);
    final IndexInput in = dir.openInput(dataFile, context);

    BytesRefBuilder scratch = new BytesRefBuilder();

    // first get to TOC:
    DecimalFormat df =
        new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT));
    long pos = in.length() - TABLEPOS.length - OFFSETPATTERN.length() - 1;
    in.seek(pos);
    SimpleTextUtil.readLine(in, scratch);
    assert StringHelper.startsWith(scratch.get(), TABLEPOS);
    long tablePos = -1;
    try {
      tablePos = df.parse(stripPrefix(scratch, TABLEPOS)).longValue();
    } catch (ParseException e) {
      throw new CorruptIndexException(
          "can't parse CFS trailer, got: " + scratch.get().utf8ToString(), in);
    }

    // seek to TOC and read it
    in.seek(tablePos);
    SimpleTextUtil.readLine(in, scratch);
    assert StringHelper.startsWith(scratch.get(), TABLE);
    int numEntries = Integer.parseInt(stripPrefix(scratch, TABLE));

    final String fileNames[] = new String[numEntries];
    final long startOffsets[] = new long[numEntries];
    final long endOffsets[] = new long[numEntries];

    for (int i = 0; i < numEntries; i++) {
      SimpleTextUtil.readLine(in, scratch);
      assert StringHelper.startsWith(scratch.get(), TABLENAME);
      fileNames[i] = si.name + IndexFileNames.stripSegmentName(stripPrefix(scratch, TABLENAME));

      if (i > 0) {
        // files must be unique and in sorted order
        assert fileNames[i].compareTo(fileNames[i - 1]) > 0;
      }

      SimpleTextUtil.readLine(in, scratch);
      assert StringHelper.startsWith(scratch.get(), TABLESTART);
      startOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLESTART));

      SimpleTextUtil.readLine(in, scratch);
      assert StringHelper.startsWith(scratch.get(), TABLEEND);
      endOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLEEND));
    }

    return new Directory() {

      private int getIndex(String name) throws IOException {
        int index = Arrays.binarySearch(fileNames, name);
        if (index < 0) {
          throw new FileNotFoundException(
              "No sub-file found (fileName="
                  + name
                  + " files: "
                  + Arrays.toString(fileNames)
                  + ")");
        }
        return index;
      }

      @Override
      public String[] listAll() throws IOException {
        ensureOpen();
        return fileNames.clone();
      }

      @Override
      public long fileLength(String name) throws IOException {
        ensureOpen();
        int index = getIndex(name);
        return endOffsets[index] - startOffsets[index];
      }

      @Override
      public IndexInput openInput(String name, IOContext context) throws IOException {
        ensureOpen();
        int index = getIndex(name);
        return in.slice(name, startOffsets[index], endOffsets[index] - startOffsets[index]);
      }

      @Override
      public void close() throws IOException {
        in.close();
      }

      // write methods: disabled

      @Override
      public IndexOutput createOutput(String name, IOContext context) {
        throw new UnsupportedOperationException();
      }

      @Override
      public void sync(Collection<String> names) {
        throw new UnsupportedOperationException();
      }

      @Override
      public void deleteFile(String name) {
        throw new UnsupportedOperationException();
      }

      @Override
      public void renameFile(String source, String dest) {
        throw new UnsupportedOperationException();
      }

      @Override
      public Lock makeLock(String name) {
        throw new UnsupportedOperationException();
      }
    };
  }

Пример #20

0

Показать файл

Файл: Lucene42DocValuesProducer.java Проект: fullstorydev/lucene-solr

  Lucene42DocValuesProducer(
      SegmentReadState state,
      String dataCodec,
      String dataExtension,
      String metaCodec,
      String metaExtension)
      throws IOException {
    maxDoc = state.segmentInfo.maxDoc();
    merging = false;
    String metaName =
        IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
    // read in the entries from the metadata file.
    ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
    boolean success = false;
    ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
    try {
      version = CodecUtil.checkHeader(in, metaCodec, VERSION_START, VERSION_CURRENT);
      numerics = new HashMap<>();
      binaries = new HashMap<>();
      fsts = new HashMap<>();
      numEntries = readFields(in, state.fieldInfos);

      if (version >= VERSION_CHECKSUM) {
        CodecUtil.checkFooter(in);
      } else {
        CodecUtil.checkEOF(in);
      }

      success = true;
    } finally {
      if (success) {
        IOUtils.close(in);
      } else {
        IOUtils.closeWhileHandlingException(in);
      }
    }

    String dataName =
        IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
    this.data = state.directory.openInput(dataName, state.context);
    success = false;
    try {
      final int version2 = CodecUtil.checkHeader(data, dataCodec, VERSION_START, VERSION_CURRENT);
      if (version != version2) {
        throw new CorruptIndexException(
            "Format versions mismatch: meta=" + version + ", data=" + version2, data);
      }

      if (version >= VERSION_CHECKSUM) {
        // NOTE: data file is too costly to verify checksum against all the bytes on open,
        // but for now we at least verify proper structure of the checksum footer: which looks
        // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
        // such as file truncation.
        CodecUtil.retrieveChecksum(data);
      }

      success = true;
    } finally {
      if (!success) {
        IOUtils.closeWhileHandlingException(this.data);
      }
    }
  }

Пример #21

0

Показать файл

Файл: MockRandomPostingsFormat.java Проект: jmercouris/Lucene--Date-Range-Search

  @Override
  public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {

    final String seedFileName =
        IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT);
    final IndexInput in = state.dir.openInput(seedFileName, state.context);
    final long seed = in.readLong();
    if (LuceneTestCase.VERBOSE) {
      System.out.println(
          "MockRandomCodec: reading from seg="
              + state.segmentInfo.name
              + " formatID="
              + state.segmentSuffix
              + " seed="
              + seed);
    }
    in.close();

    final Random random = new Random(seed);

    int readBufferSize = _TestUtil.nextInt(random, 1, 4096);
    if (LuceneTestCase.VERBOSE) {
      System.out.println("MockRandomCodec: readBufferSize=" + readBufferSize);
    }

    PostingsReaderBase postingsReader;

    if (random.nextBoolean()) {
      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: reading Sep postings");
      }
      postingsReader =
          new SepPostingsReader(
              state.dir,
              state.segmentInfo,
              state.context,
              new MockIntStreamFactory(random),
              state.segmentSuffix);
    } else {
      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: reading Standard postings");
      }
      postingsReader =
          new Lucene40PostingsReader(
              state.dir, state.segmentInfo, state.context, state.segmentSuffix);
    }

    if (random.nextBoolean()) {
      final int totTFCutoff = _TestUtil.nextInt(random, 1, 20);
      if (LuceneTestCase.VERBOSE) {
        System.out.println(
            "MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff);
      }
      postingsReader = new PulsingPostingsReader(postingsReader);
    }

    final FieldsProducer fields;

    if (random.nextBoolean()) {
      // Use BlockTree terms dict
      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: reading BlockTree terms dict");
      }

      boolean success = false;
      try {
        fields =
            new BlockTreeTermsReader(
                state.dir,
                state.fieldInfos,
                state.segmentInfo.name,
                postingsReader,
                state.context,
                state.segmentSuffix,
                state.termsIndexDivisor);
        success = true;
      } finally {
        if (!success) {
          postingsReader.close();
        }
      }
    } else {

      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: reading Block terms dict");
      }
      final TermsIndexReaderBase indexReader;
      boolean success = false;
      try {
        final boolean doFixedGap = random.nextBoolean();

        // randomness diverges from writer, here:
        if (state.termsIndexDivisor != -1) {
          state.termsIndexDivisor = _TestUtil.nextInt(random, 1, 10);
        }

        if (doFixedGap) {
          // if termsIndexDivisor is set to -1, we should not touch it. It means a
          // test explicitly instructed not to load the terms index.
          if (LuceneTestCase.VERBOSE) {
            System.out.println(
                "MockRandomCodec: fixed-gap terms index (divisor=" + state.termsIndexDivisor + ")");
          }
          indexReader =
              new FixedGapTermsIndexReader(
                  state.dir,
                  state.fieldInfos,
                  state.segmentInfo.name,
                  state.termsIndexDivisor,
                  BytesRef.getUTF8SortedAsUnicodeComparator(),
                  state.segmentSuffix,
                  state.context);
        } else {
          final int n2 = random.nextInt(3);
          if (n2 == 1) {
            random.nextInt();
          } else if (n2 == 2) {
            random.nextLong();
          }
          if (LuceneTestCase.VERBOSE) {
            System.out.println(
                "MockRandomCodec: variable-gap terms index (divisor="
                    + state.termsIndexDivisor
                    + ")");
          }
          indexReader =
              new VariableGapTermsIndexReader(
                  state.dir,
                  state.fieldInfos,
                  state.segmentInfo.name,
                  state.termsIndexDivisor,
                  state.segmentSuffix,
                  state.context);
        }

        success = true;
      } finally {
        if (!success) {
          postingsReader.close();
        }
      }

      final int termsCacheSize = _TestUtil.nextInt(random, 1, 1024);

      success = false;
      try {
        fields =
            new BlockTermsReader(
                indexReader,
                state.dir,
                state.fieldInfos,
                state.segmentInfo.name,
                postingsReader,
                state.context,
                termsCacheSize,
                state.segmentSuffix);
        success = true;
      } finally {
        if (!success) {
          try {
            postingsReader.close();
          } finally {
            indexReader.close();
          }
        }
      }
    }

    return fields;
  }

Пример #22

0

Показать файл

Файл: BlockTreeTermsWriter.java Проект: shaie/lucene-solr

  /**
   * Create a new writer. The number of items (terms or sub-blocks) per block will aim to be between
   * minItemsPerBlock and maxItemsPerBlock, though in some cases the blocks may be smaller than the
   * min.
   */
  public BlockTreeTermsWriter(
      SegmentWriteState state,
      PostingsWriterBase postingsWriter,
      int minItemsInBlock,
      int maxItemsInBlock)
      throws IOException {
    if (minItemsInBlock <= 1) {
      throw new IllegalArgumentException("minItemsInBlock must be >= 2; got " + minItemsInBlock);
    }
    if (maxItemsInBlock <= 0) {
      throw new IllegalArgumentException("maxItemsInBlock must be >= 1; got " + maxItemsInBlock);
    }
    if (minItemsInBlock > maxItemsInBlock) {
      throw new IllegalArgumentException(
          "maxItemsInBlock must be >= minItemsInBlock; got maxItemsInBlock="
              + maxItemsInBlock
              + " minItemsInBlock="
              + minItemsInBlock);
    }
    if (2 * (minItemsInBlock - 1) > maxItemsInBlock) {
      throw new IllegalArgumentException(
          "maxItemsInBlock must be at least 2*(minItemsInBlock-1); got maxItemsInBlock="
              + maxItemsInBlock
              + " minItemsInBlock="
              + minItemsInBlock);
    }

    final String termsFileName =
        IndexFileNames.segmentFileName(
            state.segmentInfo.name, state.segmentSuffix, TERMS_EXTENSION);
    out = state.directory.createOutput(termsFileName, state.context);
    boolean success = false;
    IndexOutput indexOut = null;
    try {
      fieldInfos = state.fieldInfos;
      this.minItemsInBlock = minItemsInBlock;
      this.maxItemsInBlock = maxItemsInBlock;
      writeHeader(out);

      // DEBUG = state.segmentName.equals("_4a");

      final String termsIndexFileName =
          IndexFileNames.segmentFileName(
              state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION);
      indexOut = state.directory.createOutput(termsIndexFileName, state.context);
      writeIndexHeader(indexOut);

      currentField = null;
      this.postingsWriter = postingsWriter;
      // segment = state.segmentName;

      // System.out.println("BTW.init seg=" + state.segmentName);

      postingsWriter.start(out); // have consumer write its format/header
      success = true;
    } finally {
      if (!success) {
        IOUtils.closeWhileHandlingException(out, indexOut);
      }
    }
    this.indexOut = indexOut;
  }

Пример #23

0

Показать файл

Файл: MockRandomPostingsFormat.java Проект: jmercouris/Lucene--Date-Range-Search

  @Override
  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
    // we pull this before the seed intentionally: because its not consumed at runtime
    // (the skipInterval is written into postings header)
    int skipInterval = _TestUtil.nextInt(seedRandom, 2, 10);

    if (LuceneTestCase.VERBOSE) {
      System.out.println("MockRandomCodec: skipInterval=" + skipInterval);
    }

    final long seed = seedRandom.nextLong();

    if (LuceneTestCase.VERBOSE) {
      System.out.println(
          "MockRandomCodec: writing to seg="
              + state.segmentName
              + " formatID="
              + state.segmentSuffix
              + " seed="
              + seed);
    }

    final String seedFileName =
        IndexFileNames.segmentFileName(state.segmentName, state.segmentSuffix, SEED_EXT);
    final IndexOutput out = state.directory.createOutput(seedFileName, state.context);
    try {
      out.writeLong(seed);
    } finally {
      out.close();
    }

    final Random random = new Random(seed);

    random.nextInt(); // consume a random for buffersize

    PostingsWriterBase postingsWriter;
    if (random.nextBoolean()) {
      postingsWriter = new SepPostingsWriter(state, new MockIntStreamFactory(random), skipInterval);
    } else {
      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: writing Standard postings");
      }
      postingsWriter = new Lucene40PostingsWriter(state, skipInterval);
    }

    if (random.nextBoolean()) {
      final int totTFCutoff = _TestUtil.nextInt(random, 1, 20);
      if (LuceneTestCase.VERBOSE) {
        System.out.println(
            "MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff);
      }
      postingsWriter = new PulsingPostingsWriter(totTFCutoff, postingsWriter);
    }

    final FieldsConsumer fields;

    if (random.nextBoolean()) {
      // Use BlockTree terms dict

      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: writing BlockTree terms dict");
      }

      // TODO: would be nice to allow 1 but this is very
      // slow to write
      final int minTermsInBlock = _TestUtil.nextInt(random, 2, 100);
      final int maxTermsInBlock = Math.max(2, (minTermsInBlock - 1) * 2 + random.nextInt(100));

      boolean success = false;
      try {
        fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock);
        success = true;
      } finally {
        if (!success) {
          postingsWriter.close();
        }
      }
    } else {

      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: writing Block terms dict");
      }

      boolean success = false;

      final TermsIndexWriterBase indexWriter;
      try {
        if (random.nextBoolean()) {
          state.termIndexInterval = _TestUtil.nextInt(random, 1, 100);
          if (LuceneTestCase.VERBOSE) {
            System.out.println(
                "MockRandomCodec: fixed-gap terms index (tii=" + state.termIndexInterval + ")");
          }
          indexWriter = new FixedGapTermsIndexWriter(state);
        } else {
          final VariableGapTermsIndexWriter.IndexTermSelector selector;
          final int n2 = random.nextInt(3);
          if (n2 == 0) {
            final int tii = _TestUtil.nextInt(random, 1, 100);
            selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii);
            if (LuceneTestCase.VERBOSE) {
              System.out.println("MockRandomCodec: variable-gap terms index (tii=" + tii + ")");
            }
          } else if (n2 == 1) {
            final int docFreqThresh = _TestUtil.nextInt(random, 2, 100);
            final int tii = _TestUtil.nextInt(random, 1, 100);
            selector =
                new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii);
          } else {
            final long seed2 = random.nextLong();
            final int gap = _TestUtil.nextInt(random, 2, 40);
            if (LuceneTestCase.VERBOSE) {
              System.out.println("MockRandomCodec: random-gap terms index (max gap=" + gap + ")");
            }
            selector =
                new VariableGapTermsIndexWriter.IndexTermSelector() {
                  final Random rand = new Random(seed2);

                  @Override
                  public boolean isIndexTerm(BytesRef term, TermStats stats) {
                    return rand.nextInt(gap) == gap / 2;
                  }

                  @Override
                  public void newField(FieldInfo fieldInfo) {}
                };
          }
          indexWriter = new VariableGapTermsIndexWriter(state, selector);
        }
        success = true;
      } finally {
        if (!success) {
          postingsWriter.close();
        }
      }

      success = false;
      try {
        fields = new BlockTermsWriter(indexWriter, state, postingsWriter);
        success = true;
      } finally {
        if (!success) {
          try {
            postingsWriter.close();
          } finally {
            indexWriter.close();
          }
        }
      }
    }

    return fields;
  }

Пример #24

0

Показать файл

Файл: Lucene46FieldInfosFormat.java Проект: PATRIC3/p3_solr

  @Override
  public FieldInfos read(
      Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context)
      throws IOException {
    final String fileName =
        IndexFileNames.segmentFileName(
            segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
    try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
      int codecVersion =
          CodecUtil.checkHeader(
              input,
              Lucene46FieldInfosFormat.CODEC_NAME,
              Lucene46FieldInfosFormat.FORMAT_START,
              Lucene46FieldInfosFormat.FORMAT_CURRENT);

      final int size = input.readVInt(); // read in the size
      FieldInfo infos[] = new FieldInfo[size];

      for (int i = 0; i < size; i++) {
        String name = input.readString();
        final int fieldNumber = input.readVInt();
        if (fieldNumber < 0) {
          throw new CorruptIndexException(
              "invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
        }
        byte bits = input.readByte();
        boolean isIndexed = (bits & Lucene46FieldInfosFormat.IS_INDEXED) != 0;
        boolean storeTermVector = (bits & Lucene46FieldInfosFormat.STORE_TERMVECTOR) != 0;
        boolean omitNorms = (bits & Lucene46FieldInfosFormat.OMIT_NORMS) != 0;
        boolean storePayloads = (bits & Lucene46FieldInfosFormat.STORE_PAYLOADS) != 0;
        final IndexOptions indexOptions;
        if (!isIndexed) {
          indexOptions = IndexOptions.NONE;
        } else if ((bits & Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
          indexOptions = IndexOptions.DOCS;
        } else if ((bits & Lucene46FieldInfosFormat.OMIT_POSITIONS) != 0) {
          indexOptions = IndexOptions.DOCS_AND_FREQS;
        } else if ((bits & Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
          indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
        } else {
          indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
        }

        // DV Types are packed in one byte
        byte val = input.readByte();
        final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
        final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
        final long dvGen = input.readLong();
        final Map<String, String> attributes = input.readStringStringMap();

        if (isIndexed && omitNorms == false && normsType == DocValuesType.NONE) {
          // Undead norms!  Lucene42NormsProducer will check this and bring norms back from the
          // dead:
          UndeadNormsProducer.setUndead(attributes);
        }

        infos[i] =
            new FieldInfo(
                name,
                fieldNumber,
                storeTermVector,
                omitNorms,
                storePayloads,
                indexOptions,
                docValuesType,
                dvGen,
                Collections.unmodifiableMap(attributes));
        infos[i].checkConsistency();
      }

      if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) {
        CodecUtil.checkFooter(input);
      } else {
        CodecUtil.checkEOF(input);
      }
      return new FieldInfos(infos);
    }
  }

Пример #25

0

Показать файл

Файл: BlockTreeTermsReader.java Проект: silvestrelosada/KnowledgeSharingPlatform

  /** Sole constructor. */
  public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state)
      throws IOException {
    boolean success = false;
    IndexInput indexIn = null;

    this.postingsReader = postingsReader;
    this.segment = state.segmentInfo.name;

    String termsName =
        IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION);
    try {
      termsIn = state.directory.openInput(termsName, state.context);
      version =
          CodecUtil.checkIndexHeader(
              termsIn,
              TERMS_CODEC_NAME,
              VERSION_START,
              VERSION_CURRENT,
              state.segmentInfo.getId(),
              state.segmentSuffix);

      String indexName =
          IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
      indexIn = state.directory.openInput(indexName, state.context);
      CodecUtil.checkIndexHeader(
          indexIn,
          TERMS_INDEX_CODEC_NAME,
          version,
          version,
          state.segmentInfo.getId(),
          state.segmentSuffix);
      CodecUtil.checksumEntireFile(indexIn);

      // Have PostingsReader init itself
      postingsReader.init(termsIn, state);

      // NOTE: data file is too costly to verify checksum against all the bytes on open,
      // but for now we at least verify proper structure of the checksum footer: which looks
      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
      // such as file truncation.
      CodecUtil.retrieveChecksum(termsIn);

      // Read per-field details
      seekDir(termsIn, dirOffset);
      seekDir(indexIn, indexDirOffset);

      final int numFields = termsIn.readVInt();
      if (numFields < 0) {
        throw new CorruptIndexException("invalid numFields: " + numFields, termsIn);
      }

      for (int i = 0; i < numFields; ++i) {
        final int field = termsIn.readVInt();
        final long numTerms = termsIn.readVLong();
        if (numTerms <= 0) {
          throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn);
        }
        final int numBytes = termsIn.readVInt();
        if (numBytes < 0) {
          throw new CorruptIndexException(
              "invalid rootCode for field number: " + field + ", numBytes=" + numBytes, termsIn);
        }
        final BytesRef rootCode = new BytesRef(new byte[numBytes]);
        termsIn.readBytes(rootCode.bytes, 0, numBytes);
        rootCode.length = numBytes;
        final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
        if (fieldInfo == null) {
          throw new CorruptIndexException("invalid field number: " + field, termsIn);
        }
        final long sumTotalTermFreq =
            fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : termsIn.readVLong();
        final long sumDocFreq = termsIn.readVLong();
        final int docCount = termsIn.readVInt();
        final int longsSize = termsIn.readVInt();
        if (longsSize < 0) {
          throw new CorruptIndexException(
              "invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize,
              termsIn);
        }
        BytesRef minTerm = readBytesRef(termsIn);
        BytesRef maxTerm = readBytesRef(termsIn);
        if (docCount < 0
            || docCount > state.segmentInfo.getDocCount()) { // #docs with field must be <= #docs
          throw new CorruptIndexException(
              "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.getDocCount(),
              termsIn);
        }
        if (sumDocFreq < docCount) { // #postings must be >= #docs with field
          throw new CorruptIndexException(
              "invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn);
        }
        if (sumTotalTermFreq != -1
            && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
          throw new CorruptIndexException(
              "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq,
              termsIn);
        }
        final long indexStartFP = indexIn.readVLong();
        FieldReader previous =
            fields.put(
                fieldInfo.name,
                new FieldReader(
                    this,
                    fieldInfo,
                    numTerms,
                    rootCode,
                    sumTotalTermFreq,
                    sumDocFreq,
                    docCount,
                    indexStartFP,
                    longsSize,
                    indexIn,
                    minTerm,
                    maxTerm));
        if (previous != null) {
          throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn);
        }
      }

      indexIn.close();
      success = true;
    } finally {
      if (!success) {
        // this.close() will close in:
        IOUtils.closeWhileHandlingException(indexIn, this);
      }
    }
  }

Пример #26

0

Показать файл

Файл: BlockTermsReader.java Проект: PATRIC3/p3_solr

  public BlockTermsReader(
      TermsIndexReaderBase indexReader, PostingsReaderBase postingsReader, SegmentReadState state)
      throws IOException {

    this.postingsReader = postingsReader;

    String filename =
        IndexFileNames.segmentFileName(
            state.segmentInfo.name, state.segmentSuffix, BlockTermsWriter.TERMS_EXTENSION);
    in = state.directory.openInput(filename, state.context);

    boolean success = false;
    try {
      CodecUtil.checkIndexHeader(
          in,
          BlockTermsWriter.CODEC_NAME,
          BlockTermsWriter.VERSION_START,
          BlockTermsWriter.VERSION_CURRENT,
          state.segmentInfo.getId(),
          state.segmentSuffix);

      // Have PostingsReader init itself
      postingsReader.init(in, state);

      // NOTE: data file is too costly to verify checksum against all the bytes on open,
      // but for now we at least verify proper structure of the checksum footer: which looks
      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
      // such as file truncation.
      CodecUtil.retrieveChecksum(in);

      // Read per-field details
      seekDir(in);

      final int numFields = in.readVInt();
      if (numFields < 0) {
        throw new CorruptIndexException("invalid number of fields: " + numFields, in);
      }
      for (int i = 0; i < numFields; i++) {
        final int field = in.readVInt();
        final long numTerms = in.readVLong();
        assert numTerms >= 0;
        final long termsStartPointer = in.readVLong();
        final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
        final long sumTotalTermFreq =
            fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong();
        final long sumDocFreq = in.readVLong();
        final int docCount = in.readVInt();
        final int longsSize = in.readVInt();
        if (docCount < 0
            || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
          throw new CorruptIndexException(
              "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), in);
        }
        if (sumDocFreq < docCount) { // #postings must be >= #docs with field
          throw new CorruptIndexException(
              "invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in);
        }
        if (sumTotalTermFreq != -1
            && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
          throw new CorruptIndexException(
              "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
        }
        FieldReader previous =
            fields.put(
                fieldInfo.name,
                new FieldReader(
                    fieldInfo,
                    numTerms,
                    termsStartPointer,
                    sumTotalTermFreq,
                    sumDocFreq,
                    docCount,
                    longsSize));
        if (previous != null) {
          throw new CorruptIndexException("duplicate fields: " + fieldInfo.name, in);
        }
      }
      success = true;
    } finally {
      if (!success) {
        in.close();
      }
    }

    this.indexReader = indexReader;
  }

Пример #27

0

Показать файл

Файл: Lucene70NormsProducer.java Проект: shalinmangar/lucene-solr

  Lucene70NormsProducer(
      SegmentReadState state,
      String dataCodec,
      String dataExtension,
      String metaCodec,
      String metaExtension)
      throws IOException {
    maxDoc = state.segmentInfo.maxDoc();
    String metaName =
        IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
    int version = -1;

    // read in the entries from the metadata file.
    try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context)) {
      Throwable priorE = null;
      try {
        version =
            CodecUtil.checkIndexHeader(
                in,
                metaCodec,
                VERSION_START,
                VERSION_CURRENT,
                state.segmentInfo.getId(),
                state.segmentSuffix);
        readFields(in, state.fieldInfos);
      } catch (Throwable exception) {
        priorE = exception;
      } finally {
        CodecUtil.checkFooter(in, priorE);
      }
    }

    String dataName =
        IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
    data = state.directory.openInput(dataName, state.context);
    boolean success = false;
    try {
      final int version2 =
          CodecUtil.checkIndexHeader(
              data,
              dataCodec,
              VERSION_START,
              VERSION_CURRENT,
              state.segmentInfo.getId(),
              state.segmentSuffix);
      if (version != version2) {
        throw new CorruptIndexException(
            "Format versions mismatch: meta=" + version + ",data=" + version2, data);
      }

      // NOTE: data file is too costly to verify checksum against all the bytes on open,
      // but for now we at least verify proper structure of the checksum footer: which looks
      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
      // such as file truncation.
      CodecUtil.retrieveChecksum(data);

      success = true;
    } finally {
      if (!success) {
        IOUtils.closeWhileHandlingException(this.data);
      }
    }
  }

Java IndexFileNames примеры использования