private void mergeIndexByPart(
      Path minorPath,
      Path inputPath,
      Path outputPath,
      int partId,
      int localIndexVer,
      int maxVersion,
      int partNo)
      throws CorruptIndexException, IOException {
    List<IndexReader> mergeIndexArray = new ArrayList<IndexReader>();
    if (minorPath != null && PathUtil.exists(minorPath)) {
      if (PathUtil.exists(minorPath.cat(partId + ""))) {
        mergeIndexArray.add(
            IndexReader.open(
                FSDirectory.open(
                    minorPath
                        .cat(MailConstants.PART_PRE + partId)
                        .cat(IndexBuilder.LUCENE_INDEX_DIR)
                        .asFile())));
      }
    }

    for (int i = localIndexVer + 1; i <= maxVersion; i++) {
      Path segPath = inputPath.cat(i + "");
      Path[] userPathes = segPath.listPathes();
      for (Path userPath : userPathes) {
        if (!userPath.getName().equals("built")) {
          int shouldInPart = LSUtils.genPartId(userPath.getName(), partNo);
          if (PathUtil.exists(segPath) && shouldInPart == partId) {
            mergeIndexArray.add(
                IndexReader.open(
                    FSDirectory.open(userPath.cat(IndexBuilder.LUCENE_INDEX_DIR).asFile())));
          }
        }
      }
    }
    IndexWriter indexWriter =
        new IndexWriter(
            FSDirectory.open(
                outputPath
                    .cat(MailConstants.PART_PRE + partId)
                    .cat(IndexBuilder.LUCENE_INDEX_DIR)
                    .asFile()),
            new IKAnalyzer(true),
            true,
            IndexWriter.MaxFieldLength.LIMITED);
    indexWriter.setMaxMergeDocs(1024);
    indexWriter.setMergeFactor(100);
    indexWriter.addIndexes(mergeIndexArray.toArray(new IndexReader[0]));
    indexWriter.close();
  }
Exemplo n.º 2
0
  @SuppressWarnings("unchecked")
  private void loadExternalFileDictionary(IndexSchema schema, SolrResourceLoader loader) {
    try {

      // Get the field's analyzer
      if (fieldTypeName != null && schema.getFieldTypeNoEx(fieldTypeName) != null) {
        FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
        // Do index-time analysis using the given fieldType's analyzer
        RAMDirectory ramDir = new RAMDirectory();
        IndexWriter writer =
            new IndexWriter(
                ramDir, fieldType.getAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
        writer.setMergeFactor(300);
        writer.setMaxBufferedDocs(150);

        List<String> lines = loader.getLines(sourceLocation, characterEncoding);

        for (String s : lines) {
          Document d = new Document();
          d.add(new Field(WORD_FIELD_NAME, s, Field.Store.NO, Field.Index.TOKENIZED));
          writer.addDocument(d);
        }
        writer.optimize();
        writer.close();

        dictionary = new HighFrequencyDictionary(IndexReader.open(ramDir), WORD_FIELD_NAME, 0.0f);
      } else {
        // check if character encoding is defined
        if (characterEncoding == null) {
          dictionary = new PlainTextDictionary(loader.openResource(sourceLocation));
        } else {
          dictionary =
              new PlainTextDictionary(
                  new InputStreamReader(loader.openResource(sourceLocation), characterEncoding));
        }
      }

    } catch (IOException e) {
      log.error("Unable to load spellings", e);
    }
  }
  public static void main(String[] args) {
    Date start = new Date();
    try {

      String path;

      BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
      System.out.println("Enter the merged index path:");
      path = br.readLine();

      Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_30);
      File file = new File(path);

      Directory directory = new SimpleFSDirectory(file);

      IndexWriter iwriter =
          new IndexWriter(directory, analyzer, new IndexWriter.MaxFieldLength(Integer.MAX_VALUE));
      iwriter.setMergeFactor(10000);
      iwriter.setMaxMergeDocs(Integer.MAX_VALUE);
      iwriter.setRAMBufferSizeMB(100);

      String index_path;
      Directory index;

      while (true) {
        System.out.println("Enter the index Directory or 'exit' to quit");
        index_path = br.readLine();
        if (index_path.equals("exit")) break;

        File folder = new File(index_path);
        String filepath = "";
        File[] listOfFiles = folder.listFiles();
        for (int i = 0; i < listOfFiles.length; i++) {
          if (listOfFiles[i].isDirectory()) {
            filepath = index_path + listOfFiles[i].getName();
            try {
              index = new SimpleFSDirectory(new File(filepath));
              System.out.println("Merging " + filepath + " optimize");
              iwriter.addIndexes(index);
              System.out.println("Merging " + filepath + " done");
            } catch (Exception e) {
              System.out.println("Index creation/merge failed for directiory " + filepath);
              e.printStackTrace();
            }
          }
        }
      }

      System.out.print("Optimizing index...");
      try {
        iwriter.optimize();
        System.out.println("Optimzation successful ...");
      } catch (Exception e) {
        System.out.println("Optimzation failed ...");
        e.printStackTrace();
      }
      try {
        iwriter.close();
        System.out.println("Close successful ...");
      } catch (Exception e) {
        System.out.println("Close failed ...");
      }
      System.out.println("done all merging");

      Date end = new Date();
      System.out.println(
          "Total indexing time with optimize: " + ((end.getTime() - start.getTime()) / 1000));

    } catch (IOException e) {
      e.printStackTrace();
    }
  }