Ejemplo n.º 1
0
 /**
  * 删除所有索引文档
  *
  * @param writer
  */
 public static void deleteAllIndex(IndexWriter writer) {
   try {
     writer.deleteAll();
   } catch (IOException e) {
     e.printStackTrace();
   }
 }
Ejemplo n.º 2
0
  public void delete() {
    IndexWriter writer = null;

    try {
      writer =
          new IndexWriter(
              directory,
              new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
      // 参数是一个选项,可以是一个Query,也可以是一个term,term是一个精确查找的值
      // 此时删除的文档并不会被完全删除,而是存储在一个回收站中的,可以恢复
      writer.deleteAll(); // 删除所有
      // writer.deleteDocuments(new Term("id","1"));
      writer.commit(); // 执行删除
    } catch (CorruptIndexException e) {
      e.printStackTrace();
    } catch (LockObtainFailedException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
      try {
        if (writer != null) writer.close();
      } catch (CorruptIndexException e) {
        e.printStackTrace();
      } catch (IOException e) {
        e.printStackTrace();
      }
    }
  }
Ejemplo n.º 3
0
  public void buildIndex(JSONObject indexData) {

    try {
      Directory dir = FSDirectory.open(new File(indexDir));
      IKAnalyzer analyzer = new IKAnalyzer();
      analyzer.setUseSmart(true);
      IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer);
      indexWriter = new IndexWriter(dir, iwc);
      indexWriter.deleteAll();

      JSONArray statusData = indexData.getJSONArray("statusData");
      for (int i = 0; i < statusData.length(); i++) {
        String text = statusData.getString(i);
        Document doc = new Document();
        doc.add(
            new Field(
                "text",
                text,
                Field.Store.YES,
                Field.Index.ANALYZED,
                Field.TermVector.WITH_POSITIONS_OFFSETS));
        indexWriter.addDocument(doc);
      }

      JSONArray userData = indexData.getJSONArray("userData");
      for (int i = 0; i < userData.length(); i++) {
        String text = userData.getString(i);
        Document doc = new Document();
        doc.add(
            new Field(
                "text",
                text,
                Field.Store.YES,
                Field.Index.ANALYZED,
                Field.TermVector.WITH_POSITIONS_OFFSETS));
        indexWriter.addDocument(doc);
      }
      // indexWriter.commit();
      System.out.println("Index is done");
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (JSONException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } finally {
      try {
        indexWriter.close();
      } catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }
  }
Ejemplo n.º 4
0
 public NewIndexer(String indexDir2) throws IOException {
   // create the index
   if (indexWriter2 == null) {
     indexWriter2 =
         new IndexWriter(
             FSDirectory.open(new File("resultsList")),
             new IndexWriterConfig(
                 Version.LUCENE_36,
                 new EnglishAnalyzer(Version.LUCENE_36, StandardAnalyzer.STOP_WORDS_SET)));
     indexWriter2.deleteAll();
   }
 }
Ejemplo n.º 5
0
 /** 删除索引 */
 public void deleteIndex() {
   Directory fsDir;
   try {
     fsDir = FSDirectory.open(new File(PATH));
     NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 5.0, 60.0);
     IndexWriterConfig conf =
         new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36));
     conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
     IndexWriter writer = new IndexWriter(cachedFSDir, conf);
     writer.deleteAll();
   } catch (IOException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   }
 }
Ejemplo n.º 6
0
 public void index() { // 创建索引
   IndexWriter writer = null;
   try {
     writer =
         new IndexWriter(
             directory,
             new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
     writer.deleteAll();
     Document doc = null;
     for (int i = 0; i < ids.length; i++) {
       doc = new Document();
       doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
       doc.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
       doc.add(
           new Field(
               "email", "test" + i + "@test.com", Field.Store.YES, Field.Index.NOT_ANALYZED));
       doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));
       doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
       // 存储数字
       doc.add(new NumericField("attach", Field.Store.YES, true).setIntValue(attachs[i]));
       // 存储日期
       doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(dates[i].getTime()));
       String et = emails[i].substring(emails[i].lastIndexOf("@") + 1);
       System.out.println(et);
       if (scores.containsKey(et)) {
         doc.setBoost(scores.get(et)); // 默认为1
       } else {
         doc.setBoost(0.5f);
       }
       writer.addDocument(doc);
     }
   } catch (CorruptIndexException e) {
     e.printStackTrace();
   } catch (LockObtainFailedException e) {
     e.printStackTrace();
   } catch (IOException e) {
     e.printStackTrace();
   } finally {
     try {
       if (writer != null) writer.close();
     } catch (CorruptIndexException e) {
       e.printStackTrace();
     } catch (IOException e) {
       e.printStackTrace();
     }
   }
 }
 public synchronized void clear() throws IcatException {
   try {
     populateList.clear();
     stopPopulation = true;
     while (populatingClass != null) {
       try {
         Thread.sleep(1000);
       } catch (InterruptedException e) {
         // Do nothing
       }
     }
     stopPopulation = false;
     iwriter.deleteAll();
   } catch (IOException e) {
     throw new IcatException(IcatExceptionType.INTERNAL, e.getMessage());
   }
 }
 /**
  * Deletes all the {@link Document}s.
  *
  * @throws IOException If Lucene throws IO errors.
  */
 public void truncate() throws IOException {
   indexWriter.deleteAll();
   Log.info("%s truncated", logName);
 }
Ejemplo n.º 9
0
 public void deleteAll() throws IOException {
   w.deleteAll();
 }
Ejemplo n.º 10
0
  public void createSenIndex(JSONArray jsonArray, String indexPath, String stopwordsFile)
      throws Exception {

    if (jsonArray == null) {
      System.out.println("error: jsonArray is null!\n");
      return;
    }

    Analyzer analyzer = null;
    if (stopwordsFile == null) {
      analyzer = new SimpleAnalyzer();
    } else {
      analyzer = new StopAnalyzer(Paths.get(stopwordsFile));
    }

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(Paths.get(indexPath)), iwc);
    indexWriter.deleteAll();

    TextUtil textUtil = new TextUtil();

    long startTime = new Date().getTime();

    System.out.println("jsonArray size: " + jsonArray.size());

    long num_sentence = 0;

    for (JSONObject jsonObj : (List<JSONObject>) jsonArray) {
      long id = (long) jsonObj.get(idKey);
      String review = (String) jsonObj.get(reviewKey);

      if (review == null || review.isEmpty()) {
        continue;
      }

      if (review.matches(".*[^\\x00-\\x7F].*")) {
        continue;
      }

      String[] tokens = textUtil.tokenize(review);
      if (tokens.length <= TERM_MIN_THRESHOLD) {
        continue;
      }

      String[] sentences = textUtil.sentenceDetect(review);
      // System.out.println(body.toLowerCase() + "\n");
      num_sentence = 0;

      for (int i = 0; i < sentences.length; i++) {
        if (sentences[i] == null || sentences[i].isEmpty()) {
          continue;
        }
        // System.out.println(sentences[i]);
        Document doc = new Document();
        Field idField = new LongField(idKey, id, Field.Store.YES);
        Field numField = new LongField("num", num_sentence, Field.Store.NO);
        Field contentField =
            new TextField(
                reviewKey,
                sentences[i].replaceAll("[_'.,]", " ").replaceAll("[0-9]", ""),
                Field.Store.YES);

        doc.add(idField);
        doc.add(numField);
        doc.add(contentField);

        indexWriter.addDocument(doc);
        num_sentence++;
      }
    }

    indexWriter.commit();
    indexWriter.close();

    long endTime = new Date().getTime();
    System.out.println("\n\ncreate index time: " + (endTime - startTime) + "ms");
    System.out.println("\n sentence num: " + num_sentence + "\n");
  }
Ejemplo n.º 11
0
  /** 创建索引 */
  private static void createIndex() throws IOException {

    Directory fsDir = FSDirectory.open(new File(PATH));
    NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 5.0, 60.0);
    Analyzer analyzer = new IKAnalyzer();
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, analyzer);
    conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
    IndexWriter writer = new IndexWriter(cachedFSDir, conf);
    writer.deleteAll();

    // 索引的数据源
    List<MapBean> ls = LuceneIndexOperator.createDataSource();
    int i = 0;
    for (MapBean mdata : ls) {
      // mdata.getClass().getDeclaredMethods();
      Document doc = new Document();
      doc.add(new Field("id", "" + mdata.getId(), Field.Store.YES, Field.Index.ANALYZED));
      doc.add(new Field("name", mdata.getName(), Field.Store.YES, Field.Index.ANALYZED));
      doc.add(new Field("address", mdata.getAddress(), Field.Store.YES, Field.Index.ANALYZED));
      doc.add(new Field("city", mdata.getCity(), Field.Store.YES, Field.Index.ANALYZED));
      doc.add(new Field("num", mdata.getNum(), Field.Store.YES, Field.Index.ANALYZED));
      doc.add(
          new Field(
              "eastNew",
              nullToBlank(mdata.getEastNew()),
              Field.Store.YES,
              Field.Index.NOT_ANALYZED));
      doc.add(
          new Field(
              "northNew",
              nullToBlank(mdata.getNorthNew()),
              Field.Store.YES,
              Field.Index.NOT_ANALYZED));
      doc.add(new Field("datatype", mdata.getDatatype(), Field.Store.YES, Field.Index.ANALYZED));
      doc.add(new Field("phone", mdata.getPhone(), Field.Store.YES, Field.Index.NOT_ANALYZED));
      doc.add(
          new Field(
              "geom",
              mdata.getEastNew() + "," + mdata.getNorthNew(),
              Field.Store.YES,
              Field.Index.ANALYZED));
      doc.add(
          new Field(
              "dataType", nullToBlank(mdata.getDatatype()), Field.Store.YES, Field.Index.ANALYZED));
      doc.add(
          new Field(
              "comType",
              nullToBlank(mdata.getDataTypeByComType().getDataTypeName()),
              Field.Store.YES,
              Field.Index.ANALYZED));
      doc.add(
          new Field(
              "dataTypeKey",
              nullToBlank(mdata.getDataTypeByDataType().getDataTypeKey()),
              Field.Store.YES,
              Field.Index.ANALYZED));
      doc.add(
          new Field(
              "comTypeKey",
              nullToBlank(mdata.getDataTypeByComType().getDataTypeKey()),
              Field.Store.YES,
              Field.Index.ANALYZED));
      //	ReflectFillTheBean(mdata,doc);
      //	System.out.println(nullToBlank(mdata.getDataTypeByDataType().getDataTypeKey()));
      i++;
      writer.addDocument(doc);
      if (i % 10000 == 0) {
        writer.commit();
        System.out.println("提交" + i);
      }
    }
    writer.optimize(true);
    writer.commit();
    writer.close();
  }