public static List<String> analyze(String content) {
    List<String> resultList = null;
    try {
      // 创建分词对象
      resultList = new ArrayList<String>(1);
      resultList.add(content);
      IKAnalyzer analyer = new IKAnalyzer(true);
      analyer.setUseSmart(true);
      StringReader reader = new StringReader(content);
      // 分词
      TokenStream tokenStream = analyer.tokenStream("", reader);
      CharTermAttribute term = tokenStream.getAttribute(CharTermAttribute.class);
      // 遍历分词数据
      while (tokenStream.incrementToken()) {
        if (!term.toString().isEmpty()) {
          resultList.add(term.toString());
        }
      }
      reader.close();

    } catch (IOException ex) {
      logger.error("分词出错", ex);
    }
    return resultList;
  }
Example #2
0
  public void buildIndex(JSONObject indexData) {

    try {
      Directory dir = FSDirectory.open(new File(indexDir));
      IKAnalyzer analyzer = new IKAnalyzer();
      analyzer.setUseSmart(true);
      IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer);
      indexWriter = new IndexWriter(dir, iwc);
      indexWriter.deleteAll();

      JSONArray statusData = indexData.getJSONArray("statusData");
      for (int i = 0; i < statusData.length(); i++) {
        String text = statusData.getString(i);
        Document doc = new Document();
        doc.add(
            new Field(
                "text",
                text,
                Field.Store.YES,
                Field.Index.ANALYZED,
                Field.TermVector.WITH_POSITIONS_OFFSETS));
        indexWriter.addDocument(doc);
      }

      JSONArray userData = indexData.getJSONArray("userData");
      for (int i = 0; i < userData.length(); i++) {
        String text = userData.getString(i);
        Document doc = new Document();
        doc.add(
            new Field(
                "text",
                text,
                Field.Store.YES,
                Field.Index.ANALYZED,
                Field.TermVector.WITH_POSITIONS_OFFSETS));
        indexWriter.addDocument(doc);
      }
      // indexWriter.commit();
      System.out.println("Index is done");
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (JSONException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } finally {
      try {
        indexWriter.close();
      } catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }
  }