public static List<String> analyze(String content) { List<String> resultList = null; try { // 创建分词对象 resultList = new ArrayList<String>(1); resultList.add(content); IKAnalyzer analyer = new IKAnalyzer(true); analyer.setUseSmart(true); StringReader reader = new StringReader(content); // 分词 TokenStream tokenStream = analyer.tokenStream("", reader); CharTermAttribute term = tokenStream.getAttribute(CharTermAttribute.class); // 遍历分词数据 while (tokenStream.incrementToken()) { if (!term.toString().isEmpty()) { resultList.add(term.toString()); } } reader.close(); } catch (IOException ex) { logger.error("分词出错", ex); } return resultList; }
public void buildIndex(JSONObject indexData) { try { Directory dir = FSDirectory.open(new File(indexDir)); IKAnalyzer analyzer = new IKAnalyzer(); analyzer.setUseSmart(true); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer); indexWriter = new IndexWriter(dir, iwc); indexWriter.deleteAll(); JSONArray statusData = indexData.getJSONArray("statusData"); for (int i = 0; i < statusData.length(); i++) { String text = statusData.getString(i); Document doc = new Document(); doc.add( new Field( "text", text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(doc); } JSONArray userData = indexData.getJSONArray("userData"); for (int i = 0; i < userData.length(); i++) { String text = userData.getString(i); Document doc = new Document(); doc.add( new Field( "text", text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(doc); } // indexWriter.commit(); System.out.println("Index is done"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (JSONException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { try { indexWriter.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }