private IndexWriter createWriter(boolean create) throws IOException { try { final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer()); iwc.setCommitOnClose(false); // we by default don't commit on close iwc.setOpenMode( create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); iwc.setIndexDeletionPolicy(deletionPolicy); // with tests.verbose, lucene sets this up: plumb to align with filesystem stream boolean verbose = false; try { verbose = Boolean.parseBoolean(System.getProperty("tests.verbose")); } catch (Throwable ignore) { } iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger)); iwc.setMergeScheduler(mergeScheduler); MergePolicy mergePolicy = config().getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); iwc.setMergePolicy(mergePolicy); iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().mbFrac()); iwc.setCodec(engineConfig.getCodec()); iwc.setUseCompoundFile( true); // always use compound on flush - reduces # of file-handles on refresh return new IndexWriter(store.directory(), iwc); } catch (LockObtainFailedException ex) { logger.warn("could not lock IndexWriter", ex); throw ex; } }
@Override public void run() { IndexWriter writer = null; ByteArrayOutputStream baos = new ByteArrayOutputStream(); for (int i = 0; i < this.numIteration; i++) { if (VERBOSE) { System.out.println("TEST: WriterThread iter=" + i); } IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); // We only print the IW infoStream output on exc, below: PrintStream printStream; try { printStream = new PrintStream(baos, true, "UTF8"); } catch (UnsupportedEncodingException uee) { // shouldn't happen throw new RuntimeException(uee); } iwc.setInfoStream(new PrintStreamInfoStream(printStream)); printStream.println("\nTEST: WriterThread iter=" + i); iwc.setOpenMode(OpenMode.APPEND); try { writer = new IndexWriter(dir, iwc); } catch (Throwable t) { if (Constants.WINDOWS && t instanceof AccessDeniedException) { // LUCENE-6684: suppress this: on Windows, a file in the curious "pending delete" state // can // cause this exc on IW init, where one thread/process deleted an old // segments_N, but the delete hasn't finished yet because other threads/processes // still have it open printStream.println("TEST: AccessDeniedException on init witer"); t.printStackTrace(printStream); } else { hitException = true; System.out.println( "Stress Test Index Writer: creation hit unexpected exception: " + t.toString()); t.printStackTrace(System.out); System.out.println(toString(baos)); } break; } if (writer != null) { try { addDoc(writer); } catch (Throwable t) { hitException = true; System.out.println( "Stress Test Index Writer: addDoc hit unexpected exception: " + t.toString()); t.printStackTrace(System.out); System.out.println(toString(baos)); break; } try { writer.close(); } catch (Throwable t) { hitException = true; System.out.println( "Stress Test Index Writer: close hit unexpected exception: " + t.toString()); t.printStackTrace(System.out); System.out.println(toString(baos)); break; } writer = null; } } }
@Override public void close(TaskAttemptContext context) throws IOException { LOG.debug( "Task " + context.getTaskAttemptID() + " merging into dstDir: " + workDir + ", srcDirs: " + shards); writeShardNumberFile(context); heartBeater.needHeartBeat(); try { Directory mergedIndex = new HdfsDirectory(workDir, NoLockFactory.INSTANCE, context.getConfiguration()); // TODO: shouldn't we pull the Version from the solrconfig.xml? IndexWriterConfig writerConfig = new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE).setUseCompoundFile(false) // .setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml? // .setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml? ; if (LOG.isDebugEnabled()) { writerConfig.setInfoStream(System.out); } // writerConfig.setRAMBufferSizeMB(100); // improve performance // writerConfig.setMaxThreadStates(1); // disable compound file to improve performance // also see // http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html // also see defaults in SolrIndexConfig MergePolicy mergePolicy = writerConfig.getMergePolicy(); LOG.debug("mergePolicy was: {}", mergePolicy); if (mergePolicy instanceof TieredMergePolicy) { ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0); // ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000); // ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000); // ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000); } else if (mergePolicy instanceof LogMergePolicy) { ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0); } LOG.info("Using mergePolicy: {}", mergePolicy); IndexWriter writer = new IndexWriter(mergedIndex, writerConfig); Directory[] indexes = new Directory[shards.size()]; for (int i = 0; i < shards.size(); i++) { indexes[i] = new HdfsDirectory(shards.get(i), NoLockFactory.INSTANCE, context.getConfiguration()); } context.setStatus("Logically merging " + shards.size() + " shards into one shard"); LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir); RTimer timer = new RTimer(); writer.addIndexes(indexes); // TODO: avoid intermediate copying of files into dst directory; rename the files into the // dir instead (cp -> rename) // This can improve performance and turns this phase into a true "logical" merge, completing // in constant time. // See https://issues.apache.org/jira/browse/LUCENE-4746 timer.stop(); if (LOG.isDebugEnabled()) { context .getCounter( SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()) .increment((long) timer.getTime()); } LOG.info("Logical merge took {}ms", timer.getTime()); int maxSegments = context .getConfiguration() .getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE); context.setStatus( "Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments"); LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments); timer = new RTimer(); if (maxSegments < Integer.MAX_VALUE) { writer.forceMerge(maxSegments); // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data // see // http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html } timer.stop(); if (LOG.isDebugEnabled()) { context .getCounter( SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()) .increment((long) timer.getTime()); } LOG.info( "Optimizing Solr: done forcing tree merge down to {} segments in {}ms", maxSegments, timer.getTime()); timer = new RTimer(); LOG.info("Optimizing Solr: Closing index writer"); writer.close(); LOG.info("Optimizing Solr: Done closing index writer in {}ms", timer.getTime()); context.setStatus("Done"); } finally { heartBeater.cancelHeartBeat(); heartBeater.close(); } }