Esempio n. 1
0
  /** create a RandomIndexWriter with the provided config */
  public RandomIndexWriter(Random r, Directory dir, IndexWriterConfig c) throws IOException {
    // TODO: this should be solved in a different way; Random should not be shared (!).
    this.r = new Random(r.nextLong());
    w = mockIndexWriter(dir, c, r);
    flushAt = _TestUtil.nextInt(r, 10, 1000);
    codec = w.getConfig().getCodec();
    if (LuceneTestCase.VERBOSE) {
      System.out.println("RIW dir=" + dir + " config=" + w.getConfig());
      System.out.println("codec default=" + codec.getName());
    }

    // Make sure we sometimes test indices that don't get
    // any forced merges:
    doRandomForceMerge = !(c.getMergePolicy() instanceof NoMergePolicy) && r.nextBoolean();
  }
Esempio n. 2
0
 @Override
 protected final void writerSegmentStats(SegmentsStats stats) {
   stats.addVersionMapMemoryInBytes(versionMap.ramBytesUsed());
   stats.addIndexWriterMemoryInBytes(indexWriter.ramBytesUsed());
   stats.addIndexWriterMaxMemoryInBytes(
       (long) (indexWriter.getConfig().getRAMBufferSizeMB() * 1024 * 1024));
 }
Esempio n. 3
0
  /**
   * Indexes the given file using the given writer, or if a directory is given, recurses over files
   * and directories found under the given directory.
   *
   * <p>NOTE: This method indexes one document per input file. This is slow. For good throughput,
   * put multiple documents into your input file(s). An example of this is in the benchmark module,
   * which can create "line doc" files, one document per line, using the <a
   * href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
   * >WriteLineDocTask</a>.
   *
   * @param writer Writer to the index where the given file/dir info will be stored
   * @param file The file to index, or the directory to recurse into to find files to index
   * @throws IOException If there is a low-level I/O error
   */
  private void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
      if (!file.isDirectory()) {
        FileInputStream fis;
        try {
          fis = new FileInputStream(file);
        } catch (FileNotFoundException fnfe) {
          // at least on windows, some temporary files raise this exception with an "access denied"
          // message
          // checking if the file can be read doesn't help
          return;
        }

        try {
          // make a new, empty document
          Document doc = new Document();

          // Add the path of the file as a field named "path".  Use a
          // field that is indexed (i.e. searchable), but don't tokenize
          // the field into separate words and don't index term frequency
          // or positional information:
          Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
          doc.add(pathField);

          // Add the last modified date of the file a field named "modified".
          // Use a LongField that is indexed (i.e. efficiently filterable with
          // NumericRangeFilter).  This indexes to milli-second resolution, which
          // is often too fine.  You could instead create a number based on
          // year/month/day/hour/minutes/seconds, down the resolution you require.
          // For example the long value 2011021714 would mean
          // February 17, 2011, 2-3 PM.
          doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

          // Add the contents of the file to a field named "contents".  Specify a Reader,
          // so that the text of the file is tokenized and indexed, but not stored.
          // Note that FileReader expects the file to be in UTF-8 encoding.
          // If that's not the case searching for special characters will fail.
          doc.add(
              new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

          if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
          } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.getPath()), doc);
          }

        } finally {
          fis.close();
        }
      }
    }
  }
Esempio n. 4
0
  /** Indexes a single document */
  static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
      // make a new, empty document
      Document doc = new Document();

      // Add the path of the file as a field named "path".  Use a
      // field that is indexed (i.e. searchable), but don't tokenize
      // the field into separate words and don't index term frequency
      // or positional information:
      Field pathField = new StringField("path", file.toString(), Field.Store.YES);
      doc.add(pathField);

      String f = file.getFileName().toString();
      f = f.replaceFirst("\\.txt", "");
      doc.add(new StringField("filename", f, Field.Store.YES));

      // Add the last modified date of the file a field named "modified".
      // Use a LongField that is indexed (i.e. efficiently filterable with
      // NumericRangeFilter).  This indexes to milli-second resolution, which
      // is often too fine.  You could instead create a number based on
      // year/month/day/hour/minutes/seconds, down the resolution you require.
      // For example the long value 2011021714 would mean
      // February 17, 2011, 2-3 PM.
      doc.add(new LongField("modified", lastModified, Field.Store.NO));

      // Add the contents of the file to a field named "contents".  Specify a Reader,
      // so that the text of the file is tokenized and indexed, but not stored.
      // Note that FileReader expects the file to be in UTF-8 encoding.
      // If that's not the case searching for special characters will fail.
      doc.add(
          new TextField(
              "contents",
              new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));
      if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
        // New index, so we just add the document (no old document can be there):
        // log("adding " + file);
        writer.addDocument(doc);
      } else {
        // Existing index (an old copy of this document may have been indexed) so
        // we use updateDocument instead to replace the old one matching the exact
        // path, if present:
        // log("updating " + file);
        writer.updateDocument(new Term("path", file.toString()), doc);
      }
    }
  }
Esempio n. 5
0
  private void addDoc(IndexWriter writer, IndexingValue indexingValue) throws Exception {
    Document doc = new Document();
    // type
    Field typeField = new IntField(FIELD_LABEL_TYPE, indexingValue.getType(), Field.Store.YES);
    doc.add(typeField);
    // id
    Field idField = new StringField(FIELD_LABEL_ID, indexingValue.getId(), Field.Store.YES);
    doc.add(idField);
    // タイトル
    doc.add(new TextField(FIELD_LABEL_TITLE, indexingValue.getTitle(), Field.Store.YES));
    // 内容
    doc.add(new TextField(FIELD_LABEL_CONTENTS, indexingValue.getContents(), Field.Store.YES));

    // タグ
    Field tagField = new TextField(FIELD_LABEL_TAGS, indexingValue.getTags(), Field.Store.YES);
    doc.add(tagField);
    // アクセスできるユーザ
    Field userField =
        new TextField(FIELD_LABEL_USERS, indexingValue.getUsers().toString(), Field.Store.YES);
    doc.add(userField);
    // アクセスできるグループ
    Field groupField =
        new TextField(FIELD_LABEL_GROUPS, indexingValue.getGroups().toString(), Field.Store.YES);
    doc.add(groupField);

    // 登録者
    Field creatorField =
        new StringField(FIELD_LABEL_CREATE_USER, indexingValue.getCreator(), Field.Store.YES);
    doc.add(creatorField);

    // 時刻
    Field timeField = new LongField(FIELD_LABEL_TIME, indexingValue.getTime(), Field.Store.YES);
    doc.add(timeField);

    if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
      log.debug("adding " + indexingValue.getId());
      writer.addDocument(doc);
    } else {
      log.debug("updating " + indexingValue.getId());
      writer.updateDocument(new Term(FIELD_LABEL_ID, indexingValue.getId()), doc);
    }
  }
Esempio n. 6
0
 @Override
 public void forceMerge(
     final boolean flush,
     int maxNumSegments,
     boolean onlyExpungeDeletes,
     final boolean upgrade,
     final boolean upgradeOnlyAncientSegments)
     throws EngineException, EngineClosedException, IOException {
   /*
    * We do NOT acquire the readlock here since we are waiting on the merges to finish
    * that's fine since the IW.rollback should stop all the threads and trigger an IOException
    * causing us to fail the forceMerge
    *
    * The way we implement upgrades is a bit hackish in the sense that we set an instance
    * variable and that this setting will thus apply to the next forced merge that will be run.
    * This is ok because (1) this is the only place we call forceMerge, (2) we have a single
    * thread for optimize, and the 'optimizeLock' guarding this code, and (3) ConcurrentMergeScheduler
    * syncs calls to findForcedMerges.
    */
   assert indexWriter.getConfig().getMergePolicy() instanceof ElasticsearchMergePolicy
       : "MergePolicy is " + indexWriter.getConfig().getMergePolicy().getClass().getName();
   ElasticsearchMergePolicy mp =
       (ElasticsearchMergePolicy) indexWriter.getConfig().getMergePolicy();
   optimizeLock.lock();
   try {
     ensureOpen();
     if (upgrade) {
       logger.info(
           "starting segment upgrade upgradeOnlyAncientSegments={}", upgradeOnlyAncientSegments);
       mp.setUpgradeInProgress(true, upgradeOnlyAncientSegments);
     }
     store.incRef(); // increment the ref just to ensure nobody closes the store while we optimize
     try {
       if (onlyExpungeDeletes) {
         assert upgrade == false;
         indexWriter.forceMergeDeletes(true /* blocks and waits for merges*/);
       } else if (maxNumSegments <= 0) {
         assert upgrade == false;
         indexWriter.maybeMerge();
       } else {
         indexWriter.forceMerge(maxNumSegments, true /* blocks and waits for merges*/);
       }
       if (flush) {
         if (tryRenewSyncCommit() == false) {
           flush(false, true);
         }
       }
       if (upgrade) {
         logger.info("finished segment upgrade");
       }
     } finally {
       store.decRef();
     }
   } catch (Throwable t) {
     maybeFailEngine("force merge", t);
     throw t;
   } finally {
     try {
       mp.setUpgradeInProgress(
           false, false); // reset it just to make sure we reset it in a case of an error
     } finally {
       optimizeLock.unlock();
     }
   }
 }
Esempio n. 7
0
 LiveIndexWriterConfig getCurrentIndexWriterConfig() {
   return indexWriter.getConfig();
 }