Ejemplo n.º 1
0
 public void index() { // 创建索引
   IndexWriter writer = null;
   try {
     writer =
         new IndexWriter(
             directory,
             new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
     writer.deleteAll();
     Document doc = null;
     for (int i = 0; i < ids.length; i++) {
       doc = new Document();
       doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
       doc.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
       doc.add(
           new Field(
               "email", "test" + i + "@test.com", Field.Store.YES, Field.Index.NOT_ANALYZED));
       doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));
       doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
       // 存储数字
       doc.add(new NumericField("attach", Field.Store.YES, true).setIntValue(attachs[i]));
       // 存储日期
       doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(dates[i].getTime()));
       String et = emails[i].substring(emails[i].lastIndexOf("@") + 1);
       System.out.println(et);
       if (scores.containsKey(et)) {
         doc.setBoost(scores.get(et)); // 默认为1
       } else {
         doc.setBoost(0.5f);
       }
       writer.addDocument(doc);
     }
   } catch (CorruptIndexException e) {
     e.printStackTrace();
   } catch (LockObtainFailedException e) {
     e.printStackTrace();
   } catch (IOException e) {
     e.printStackTrace();
   } finally {
     try {
       if (writer != null) writer.close();
     } catch (CorruptIndexException e) {
       e.printStackTrace();
     } catch (IOException e) {
       e.printStackTrace();
     }
   }
 }
Ejemplo n.º 2
0
  /**
   * Builds an in-memory index of the sentences in the text with the appropriate document boosts if
   * specified.
   *
   * @throws Exception if one is thrown.
   */
  protected void buildIndex() throws Exception {
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, Tokenizer.getAnalyzer());
    IndexWriter writer = new IndexWriter(ramdir, config);

    int pno = 0;
    for (ArrayList<ArrayList<TokenInfo>> paragraph : structure.getStructure()) {
      ArrayList<String> strSentences = TokenizerUtils.recombineTokens2d(paragraph);
      int sno = 0;
      for (String sentence : strSentences) {
        Document doc = new Document();
        doc.add(new Field("text", sentence, Store.YES, Index.ANALYZED));
        doc.setBoost(computeBoost(pno, sno));
        writer.addDocument(doc);
        sno++;
      }
      pno++;
    }

    writer.commit();
    writer.close();
  }