/** * Merges the given taxonomy and index directories and commits the changes to the given writers. */ public static void merge( Directory srcIndexDir, Directory srcTaxDir, OrdinalMap map, IndexWriter destIndexWriter, DirectoryTaxonomyWriter destTaxWriter) throws IOException { // merge the taxonomies destTaxWriter.addTaxonomy(srcTaxDir, map); int ordinalMap[] = map.getMap(); FacetIndexingParams params = new DefaultFacetIndexingParams(); DirectoryReader reader = DirectoryReader.open(srcIndexDir, -1); List<AtomicReaderContext> leaves = reader.leaves(); AtomicReader wrappedLeaves[] = new AtomicReader[leaves.size()]; for (int i = 0; i < leaves.size(); i++) { wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordinalMap, params); } try { destIndexWriter.addIndexes(new MultiReader(wrappedLeaves)); // commit changes to taxonomy and index respectively. destTaxWriter.commit(); destIndexWriter.commit(); } finally { reader.close(); } }
public void mergeIndex() throws IOException { File indexDir = new File(FILE_INDEX); FSDirectory fsdir = FSDirectory.open(indexDir); Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_47); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, luceneAnalyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter fswriter = new IndexWriter(fsdir, config); fswriter.addIndexes(new Directory[] {ramdir}); // 合并数据 fswriter.close(); }
private void mergeIndexByPart( Path minorPath, Path inputPath, Path outputPath, int partId, int localIndexVer, int maxVersion, int partNo) throws CorruptIndexException, IOException { List<IndexReader> mergeIndexArray = new ArrayList<IndexReader>(); if (minorPath != null && PathUtil.exists(minorPath)) { if (PathUtil.exists(minorPath.cat(partId + ""))) { mergeIndexArray.add( IndexReader.open( FSDirectory.open( minorPath .cat(MailConstants.PART_PRE + partId) .cat(IndexBuilder.LUCENE_INDEX_DIR) .asFile()))); } } for (int i = localIndexVer + 1; i <= maxVersion; i++) { Path segPath = inputPath.cat(i + ""); Path[] userPathes = segPath.listPathes(); for (Path userPath : userPathes) { if (!userPath.getName().equals("built")) { int shouldInPart = LSUtils.genPartId(userPath.getName(), partNo); if (PathUtil.exists(segPath) && shouldInPart == partId) { mergeIndexArray.add( IndexReader.open( FSDirectory.open(userPath.cat(IndexBuilder.LUCENE_INDEX_DIR).asFile()))); } } } } IndexWriter indexWriter = new IndexWriter( FSDirectory.open( outputPath .cat(MailConstants.PART_PRE + partId) .cat(IndexBuilder.LUCENE_INDEX_DIR) .asFile()), new IKAnalyzer(true), true, IndexWriter.MaxFieldLength.LIMITED); indexWriter.setMaxMergeDocs(1024); indexWriter.setMergeFactor(100); indexWriter.addIndexes(mergeIndexArray.toArray(new IndexReader[0])); indexWriter.close(); }
public void sort(File directory) throws IOException { LOG.info("IndexSorter: starting."); Date start = new Date(); int termIndexInterval = getConf().getInt("indexer.termIndexInterval", 128); IndexReader reader = IndexReader.open(new File(directory, "index")); Searcher searcher = new IndexSearcher(new File(directory, "index").getAbsolutePath()); // TODO MC SortingReader sorter = new SortingReader(reader, newToOld(reader, searcher)); // TODO MC IndexWriter writer = new IndexWriter(new File(directory, "index-sorted"), null, true); writer.setTermIndexInterval(termIndexInterval); writer.setUseCompoundFile(false); writer.addIndexes(new IndexReader[] {sorter}); writer.close(); Date end = new Date(); LOG.info("IndexSorter: done, " + (end.getTime() - start.getTime()) + " total milliseconds"); }
public static void main(String[] args) { Date start = new Date(); try { String path; BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); System.out.println("Enter the merged index path:"); path = br.readLine(); Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_30); File file = new File(path); Directory directory = new SimpleFSDirectory(file); IndexWriter iwriter = new IndexWriter(directory, analyzer, new IndexWriter.MaxFieldLength(Integer.MAX_VALUE)); iwriter.setMergeFactor(10000); iwriter.setMaxMergeDocs(Integer.MAX_VALUE); iwriter.setRAMBufferSizeMB(100); String index_path; Directory index; while (true) { System.out.println("Enter the index Directory or 'exit' to quit"); index_path = br.readLine(); if (index_path.equals("exit")) break; File folder = new File(index_path); String filepath = ""; File[] listOfFiles = folder.listFiles(); for (int i = 0; i < listOfFiles.length; i++) { if (listOfFiles[i].isDirectory()) { filepath = index_path + listOfFiles[i].getName(); try { index = new SimpleFSDirectory(new File(filepath)); System.out.println("Merging " + filepath + " optimize"); iwriter.addIndexes(index); System.out.println("Merging " + filepath + " done"); } catch (Exception e) { System.out.println("Index creation/merge failed for directiory " + filepath); e.printStackTrace(); } } } } System.out.print("Optimizing index..."); try { iwriter.optimize(); System.out.println("Optimzation successful ..."); } catch (Exception e) { System.out.println("Optimzation failed ..."); e.printStackTrace(); } try { iwriter.close(); System.out.println("Close successful ..."); } catch (Exception e) { System.out.println("Close failed ..."); } System.out.println("done all merging"); Date end = new Date(); System.out.println( "Total indexing time with optimize: " + ((end.getTime() - start.getTime()) / 1000)); } catch (IOException e) { e.printStackTrace(); } }
public void addIndexes(IndexReader... readers) throws IOException { w.addIndexes(readers); }
public void addIndexes(Directory... dirs) throws IOException { w.addIndexes(dirs); }
@Override public void close(TaskAttemptContext context) throws IOException { LOG.debug( "Task " + context.getTaskAttemptID() + " merging into dstDir: " + workDir + ", srcDirs: " + shards); writeShardNumberFile(context); heartBeater.needHeartBeat(); try { Directory mergedIndex = new HdfsDirectory(workDir, NoLockFactory.INSTANCE, context.getConfiguration()); // TODO: shouldn't we pull the Version from the solrconfig.xml? IndexWriterConfig writerConfig = new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE).setUseCompoundFile(false) // .setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml? // .setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml? ; if (LOG.isDebugEnabled()) { writerConfig.setInfoStream(System.out); } // writerConfig.setRAMBufferSizeMB(100); // improve performance // writerConfig.setMaxThreadStates(1); // disable compound file to improve performance // also see // http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html // also see defaults in SolrIndexConfig MergePolicy mergePolicy = writerConfig.getMergePolicy(); LOG.debug("mergePolicy was: {}", mergePolicy); if (mergePolicy instanceof TieredMergePolicy) { ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0); // ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000); // ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000); // ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000); } else if (mergePolicy instanceof LogMergePolicy) { ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0); } LOG.info("Using mergePolicy: {}", mergePolicy); IndexWriter writer = new IndexWriter(mergedIndex, writerConfig); Directory[] indexes = new Directory[shards.size()]; for (int i = 0; i < shards.size(); i++) { indexes[i] = new HdfsDirectory(shards.get(i), NoLockFactory.INSTANCE, context.getConfiguration()); } context.setStatus("Logically merging " + shards.size() + " shards into one shard"); LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir); RTimer timer = new RTimer(); writer.addIndexes(indexes); // TODO: avoid intermediate copying of files into dst directory; rename the files into the // dir instead (cp -> rename) // This can improve performance and turns this phase into a true "logical" merge, completing // in constant time. // See https://issues.apache.org/jira/browse/LUCENE-4746 timer.stop(); if (LOG.isDebugEnabled()) { context .getCounter( SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()) .increment((long) timer.getTime()); } LOG.info("Logical merge took {}ms", timer.getTime()); int maxSegments = context .getConfiguration() .getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE); context.setStatus( "Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments"); LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments); timer = new RTimer(); if (maxSegments < Integer.MAX_VALUE) { writer.forceMerge(maxSegments); // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data // see // http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html } timer.stop(); if (LOG.isDebugEnabled()) { context .getCounter( SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()) .increment((long) timer.getTime()); } LOG.info( "Optimizing Solr: done forcing tree merge down to {} segments in {}ms", maxSegments, timer.getTime()); timer = new RTimer(); LOG.info("Optimizing Solr: Closing index writer"); writer.close(); LOG.info("Optimizing Solr: Done closing index writer in {}ms", timer.getTime()); context.setStatus("Done"); } finally { heartBeater.cancelHeartBeat(); heartBeater.close(); } }