/**
   * Merges the given taxonomy and index directories and commits the changes to the given writers.
   */
  public static void merge(
      Directory srcIndexDir,
      Directory srcTaxDir,
      OrdinalMap map,
      IndexWriter destIndexWriter,
      DirectoryTaxonomyWriter destTaxWriter)
      throws IOException {
    // merge the taxonomies
    destTaxWriter.addTaxonomy(srcTaxDir, map);

    int ordinalMap[] = map.getMap();
    FacetIndexingParams params = new DefaultFacetIndexingParams();

    DirectoryReader reader = DirectoryReader.open(srcIndexDir, -1);
    List<AtomicReaderContext> leaves = reader.leaves();
    AtomicReader wrappedLeaves[] = new AtomicReader[leaves.size()];
    for (int i = 0; i < leaves.size(); i++) {
      wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordinalMap, params);
    }
    try {
      destIndexWriter.addIndexes(new MultiReader(wrappedLeaves));

      // commit changes to taxonomy and index respectively.
      destTaxWriter.commit();
      destIndexWriter.commit();
    } finally {
      reader.close();
    }
  }
Пример #2
0
 public void mergeIndex() throws IOException {
   File indexDir = new File(FILE_INDEX);
   FSDirectory fsdir = FSDirectory.open(indexDir);
   Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_47);
   IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, luceneAnalyzer);
   config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
   IndexWriter fswriter = new IndexWriter(fsdir, config);
   fswriter.addIndexes(new Directory[] {ramdir}); // 合并数据
   fswriter.close();
 }
  private void mergeIndexByPart(
      Path minorPath,
      Path inputPath,
      Path outputPath,
      int partId,
      int localIndexVer,
      int maxVersion,
      int partNo)
      throws CorruptIndexException, IOException {
    List<IndexReader> mergeIndexArray = new ArrayList<IndexReader>();
    if (minorPath != null && PathUtil.exists(minorPath)) {
      if (PathUtil.exists(minorPath.cat(partId + ""))) {
        mergeIndexArray.add(
            IndexReader.open(
                FSDirectory.open(
                    minorPath
                        .cat(MailConstants.PART_PRE + partId)
                        .cat(IndexBuilder.LUCENE_INDEX_DIR)
                        .asFile())));
      }
    }

    for (int i = localIndexVer + 1; i <= maxVersion; i++) {
      Path segPath = inputPath.cat(i + "");
      Path[] userPathes = segPath.listPathes();
      for (Path userPath : userPathes) {
        if (!userPath.getName().equals("built")) {
          int shouldInPart = LSUtils.genPartId(userPath.getName(), partNo);
          if (PathUtil.exists(segPath) && shouldInPart == partId) {
            mergeIndexArray.add(
                IndexReader.open(
                    FSDirectory.open(userPath.cat(IndexBuilder.LUCENE_INDEX_DIR).asFile())));
          }
        }
      }
    }
    IndexWriter indexWriter =
        new IndexWriter(
            FSDirectory.open(
                outputPath
                    .cat(MailConstants.PART_PRE + partId)
                    .cat(IndexBuilder.LUCENE_INDEX_DIR)
                    .asFile()),
            new IKAnalyzer(true),
            true,
            IndexWriter.MaxFieldLength.LIMITED);
    indexWriter.setMaxMergeDocs(1024);
    indexWriter.setMergeFactor(100);
    indexWriter.addIndexes(mergeIndexArray.toArray(new IndexReader[0]));
    indexWriter.close();
  }
  public void sort(File directory) throws IOException {
    LOG.info("IndexSorter: starting.");
    Date start = new Date();
    int termIndexInterval = getConf().getInt("indexer.termIndexInterval", 128);
    IndexReader reader = IndexReader.open(new File(directory, "index"));
    Searcher searcher =
        new IndexSearcher(new File(directory, "index").getAbsolutePath()); // TODO MC

    SortingReader sorter = new SortingReader(reader, newToOld(reader, searcher)); // TODO MC
    IndexWriter writer = new IndexWriter(new File(directory, "index-sorted"), null, true);
    writer.setTermIndexInterval(termIndexInterval);
    writer.setUseCompoundFile(false);
    writer.addIndexes(new IndexReader[] {sorter});
    writer.close();
    Date end = new Date();
    LOG.info("IndexSorter: done, " + (end.getTime() - start.getTime()) + " total milliseconds");
  }
  public static void main(String[] args) {
    Date start = new Date();
    try {

      String path;

      BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
      System.out.println("Enter the merged index path:");
      path = br.readLine();

      Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_30);
      File file = new File(path);

      Directory directory = new SimpleFSDirectory(file);

      IndexWriter iwriter =
          new IndexWriter(directory, analyzer, new IndexWriter.MaxFieldLength(Integer.MAX_VALUE));
      iwriter.setMergeFactor(10000);
      iwriter.setMaxMergeDocs(Integer.MAX_VALUE);
      iwriter.setRAMBufferSizeMB(100);

      String index_path;
      Directory index;

      while (true) {
        System.out.println("Enter the index Directory or 'exit' to quit");
        index_path = br.readLine();
        if (index_path.equals("exit")) break;

        File folder = new File(index_path);
        String filepath = "";
        File[] listOfFiles = folder.listFiles();
        for (int i = 0; i < listOfFiles.length; i++) {
          if (listOfFiles[i].isDirectory()) {
            filepath = index_path + listOfFiles[i].getName();
            try {
              index = new SimpleFSDirectory(new File(filepath));
              System.out.println("Merging " + filepath + " optimize");
              iwriter.addIndexes(index);
              System.out.println("Merging " + filepath + " done");
            } catch (Exception e) {
              System.out.println("Index creation/merge failed for directiory " + filepath);
              e.printStackTrace();
            }
          }
        }
      }

      System.out.print("Optimizing index...");
      try {
        iwriter.optimize();
        System.out.println("Optimzation successful ...");
      } catch (Exception e) {
        System.out.println("Optimzation failed ...");
        e.printStackTrace();
      }
      try {
        iwriter.close();
        System.out.println("Close successful ...");
      } catch (Exception e) {
        System.out.println("Close failed ...");
      }
      System.out.println("done all merging");

      Date end = new Date();
      System.out.println(
          "Total indexing time with optimize: " + ((end.getTime() - start.getTime()) / 1000));

    } catch (IOException e) {
      e.printStackTrace();
    }
  }
Пример #6
0
 public void addIndexes(IndexReader... readers) throws IOException {
   w.addIndexes(readers);
 }
Пример #7
0
 public void addIndexes(Directory... dirs) throws IOException {
   w.addIndexes(dirs);
 }
    @Override
    public void close(TaskAttemptContext context) throws IOException {
      LOG.debug(
          "Task "
              + context.getTaskAttemptID()
              + " merging into dstDir: "
              + workDir
              + ", srcDirs: "
              + shards);
      writeShardNumberFile(context);
      heartBeater.needHeartBeat();
      try {
        Directory mergedIndex =
            new HdfsDirectory(workDir, NoLockFactory.INSTANCE, context.getConfiguration());

        // TODO: shouldn't we pull the Version from the solrconfig.xml?
        IndexWriterConfig writerConfig =
            new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE).setUseCompoundFile(false)
            // .setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml?
            // .setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml?
            ;

        if (LOG.isDebugEnabled()) {
          writerConfig.setInfoStream(System.out);
        }
        //        writerConfig.setRAMBufferSizeMB(100); // improve performance
        //        writerConfig.setMaxThreadStates(1);

        // disable compound file to improve performance
        // also see
        // http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html
        // also see defaults in SolrIndexConfig
        MergePolicy mergePolicy = writerConfig.getMergePolicy();
        LOG.debug("mergePolicy was: {}", mergePolicy);
        if (mergePolicy instanceof TieredMergePolicy) {
          ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0);
          //          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000);
          //          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000);
          //          ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000);
        } else if (mergePolicy instanceof LogMergePolicy) {
          ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0);
        }
        LOG.info("Using mergePolicy: {}", mergePolicy);

        IndexWriter writer = new IndexWriter(mergedIndex, writerConfig);

        Directory[] indexes = new Directory[shards.size()];
        for (int i = 0; i < shards.size(); i++) {
          indexes[i] =
              new HdfsDirectory(shards.get(i), NoLockFactory.INSTANCE, context.getConfiguration());
        }

        context.setStatus("Logically merging " + shards.size() + " shards into one shard");
        LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir);
        RTimer timer = new RTimer();

        writer.addIndexes(indexes);
        // TODO: avoid intermediate copying of files into dst directory; rename the files into the
        // dir instead (cp -> rename)
        // This can improve performance and turns this phase into a true "logical" merge, completing
        // in constant time.
        // See https://issues.apache.org/jira/browse/LUCENE-4746

        timer.stop();
        if (LOG.isDebugEnabled()) {
          context
              .getCounter(
                  SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString())
              .increment((long) timer.getTime());
        }
        LOG.info("Logical merge took {}ms", timer.getTime());
        int maxSegments =
            context
                .getConfiguration()
                .getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE);
        context.setStatus(
            "Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments");
        LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments);
        timer = new RTimer();
        if (maxSegments < Integer.MAX_VALUE) {
          writer.forceMerge(maxSegments);
          // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data
          // see
          // http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html
        }
        timer.stop();
        if (LOG.isDebugEnabled()) {
          context
              .getCounter(
                  SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString())
              .increment((long) timer.getTime());
        }
        LOG.info(
            "Optimizing Solr: done forcing tree merge down to {} segments in {}ms",
            maxSegments,
            timer.getTime());

        timer = new RTimer();
        LOG.info("Optimizing Solr: Closing index writer");
        writer.close();
        LOG.info("Optimizing Solr: Done closing index writer in {}ms", timer.getTime());
        context.setStatus("Done");
      } finally {
        heartBeater.cancelHeartBeat();
        heartBeater.close();
      }
    }