Exemplo n.º 1
0
 private IndexWriter createWriter(boolean create) throws IOException {
   try {
     final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
     iwc.setCommitOnClose(false); // we by default don't commit on close
     iwc.setOpenMode(
         create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND);
     iwc.setIndexDeletionPolicy(deletionPolicy);
     // with tests.verbose, lucene sets this up: plumb to align with filesystem stream
     boolean verbose = false;
     try {
       verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
     } catch (Throwable ignore) {
     }
     iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
     iwc.setMergeScheduler(mergeScheduler);
     MergePolicy mergePolicy = config().getMergePolicy();
     // Give us the opportunity to upgrade old segments while performing
     // background merges
     mergePolicy = new ElasticsearchMergePolicy(mergePolicy);
     iwc.setMergePolicy(mergePolicy);
     iwc.setSimilarity(engineConfig.getSimilarity());
     iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().mbFrac());
     iwc.setCodec(engineConfig.getCodec());
     iwc.setUseCompoundFile(
         true); // always use compound on flush - reduces # of file-handles on refresh
     return new IndexWriter(store.directory(), iwc);
   } catch (LockObtainFailedException ex) {
     logger.warn("could not lock IndexWriter", ex);
     throw ex;
   }
 }
    @Override
    public void run() {
      IndexWriter writer = null;
      ByteArrayOutputStream baos = new ByteArrayOutputStream();
      for (int i = 0; i < this.numIteration; i++) {
        if (VERBOSE) {
          System.out.println("TEST: WriterThread iter=" + i);
        }

        IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));

        // We only print the IW infoStream output on exc, below:
        PrintStream printStream;
        try {
          printStream = new PrintStream(baos, true, "UTF8");
        } catch (UnsupportedEncodingException uee) {
          // shouldn't happen
          throw new RuntimeException(uee);
        }

        iwc.setInfoStream(new PrintStreamInfoStream(printStream));

        printStream.println("\nTEST: WriterThread iter=" + i);
        iwc.setOpenMode(OpenMode.APPEND);
        try {
          writer = new IndexWriter(dir, iwc);
        } catch (Throwable t) {
          if (Constants.WINDOWS && t instanceof AccessDeniedException) {
            // LUCENE-6684: suppress this: on Windows, a file in the curious "pending delete" state
            // can
            // cause this exc on IW init, where one thread/process deleted an old
            // segments_N, but the delete hasn't finished yet because other threads/processes
            // still have it open
            printStream.println("TEST: AccessDeniedException on init witer");
            t.printStackTrace(printStream);
          } else {
            hitException = true;
            System.out.println(
                "Stress Test Index Writer: creation hit unexpected exception: " + t.toString());
            t.printStackTrace(System.out);
            System.out.println(toString(baos));
          }
          break;
        }
        if (writer != null) {
          try {
            addDoc(writer);
          } catch (Throwable t) {
            hitException = true;
            System.out.println(
                "Stress Test Index Writer: addDoc hit unexpected exception: " + t.toString());
            t.printStackTrace(System.out);
            System.out.println(toString(baos));
            break;
          }
          try {
            writer.close();
          } catch (Throwable t) {
            hitException = true;
            System.out.println(
                "Stress Test Index Writer: close hit unexpected exception: " + t.toString());
            t.printStackTrace(System.out);
            System.out.println(toString(baos));
            break;
          }
          writer = null;
        }
      }
    }
    @Override
    public void close(TaskAttemptContext context) throws IOException {
      LOG.debug(
          "Task "
              + context.getTaskAttemptID()
              + " merging into dstDir: "
              + workDir
              + ", srcDirs: "
              + shards);
      writeShardNumberFile(context);
      heartBeater.needHeartBeat();
      try {
        Directory mergedIndex =
            new HdfsDirectory(workDir, NoLockFactory.INSTANCE, context.getConfiguration());

        // TODO: shouldn't we pull the Version from the solrconfig.xml?
        IndexWriterConfig writerConfig =
            new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE).setUseCompoundFile(false)
            // .setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml?
            // .setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml?
            ;

        if (LOG.isDebugEnabled()) {
          writerConfig.setInfoStream(System.out);
        }
        //        writerConfig.setRAMBufferSizeMB(100); // improve performance
        //        writerConfig.setMaxThreadStates(1);

        // disable compound file to improve performance
        // also see
        // http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html
        // also see defaults in SolrIndexConfig
        MergePolicy mergePolicy = writerConfig.getMergePolicy();
        LOG.debug("mergePolicy was: {}", mergePolicy);
        if (mergePolicy instanceof TieredMergePolicy) {
          ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0);
          //          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000);
          //          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000);
          //          ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000);
        } else if (mergePolicy instanceof LogMergePolicy) {
          ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0);
        }
        LOG.info("Using mergePolicy: {}", mergePolicy);

        IndexWriter writer = new IndexWriter(mergedIndex, writerConfig);

        Directory[] indexes = new Directory[shards.size()];
        for (int i = 0; i < shards.size(); i++) {
          indexes[i] =
              new HdfsDirectory(shards.get(i), NoLockFactory.INSTANCE, context.getConfiguration());
        }

        context.setStatus("Logically merging " + shards.size() + " shards into one shard");
        LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir);
        RTimer timer = new RTimer();

        writer.addIndexes(indexes);
        // TODO: avoid intermediate copying of files into dst directory; rename the files into the
        // dir instead (cp -> rename)
        // This can improve performance and turns this phase into a true "logical" merge, completing
        // in constant time.
        // See https://issues.apache.org/jira/browse/LUCENE-4746

        timer.stop();
        if (LOG.isDebugEnabled()) {
          context
              .getCounter(
                  SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString())
              .increment((long) timer.getTime());
        }
        LOG.info("Logical merge took {}ms", timer.getTime());
        int maxSegments =
            context
                .getConfiguration()
                .getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE);
        context.setStatus(
            "Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments");
        LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments);
        timer = new RTimer();
        if (maxSegments < Integer.MAX_VALUE) {
          writer.forceMerge(maxSegments);
          // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data
          // see
          // http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html
        }
        timer.stop();
        if (LOG.isDebugEnabled()) {
          context
              .getCounter(
                  SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString())
              .increment((long) timer.getTime());
        }
        LOG.info(
            "Optimizing Solr: done forcing tree merge down to {} segments in {}ms",
            maxSegments,
            timer.getTime());

        timer = new RTimer();
        LOG.info("Optimizing Solr: Closing index writer");
        writer.close();
        LOG.info("Optimizing Solr: Done closing index writer in {}ms", timer.getTime());
        context.setStatus("Done");
      } finally {
        heartBeater.cancelHeartBeat();
        heartBeater.close();
      }
    }