public void createSenIndex(JSONArray jsonArray, String indexPath, String stopwordsFile) throws Exception { if (jsonArray == null) { System.out.println("error: jsonArray is null!\n"); return; } Analyzer analyzer = null; if (stopwordsFile == null) { analyzer = new SimpleAnalyzer(); } else { analyzer = new StopAnalyzer(Paths.get(stopwordsFile)); } IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter indexWriter = new IndexWriter(FSDirectory.open(Paths.get(indexPath)), iwc); indexWriter.deleteAll(); TextUtil textUtil = new TextUtil(); long startTime = new Date().getTime(); System.out.println("jsonArray size: " + jsonArray.size()); long num_sentence = 0; for (JSONObject jsonObj : (List<JSONObject>) jsonArray) { long id = (long) jsonObj.get(idKey); String review = (String) jsonObj.get(reviewKey); if (review == null || review.isEmpty()) { continue; } if (review.matches(".*[^\\x00-\\x7F].*")) { continue; } String[] tokens = textUtil.tokenize(review); if (tokens.length <= TERM_MIN_THRESHOLD) { continue; } String[] sentences = textUtil.sentenceDetect(review); // System.out.println(body.toLowerCase() + "\n"); num_sentence = 0; for (int i = 0; i < sentences.length; i++) { if (sentences[i] == null || sentences[i].isEmpty()) { continue; } // System.out.println(sentences[i]); Document doc = new Document(); Field idField = new LongField(idKey, id, Field.Store.YES); Field numField = new LongField("num", num_sentence, Field.Store.NO); Field contentField = new TextField( reviewKey, sentences[i].replaceAll("[_'.,]", " ").replaceAll("[0-9]", ""), Field.Store.YES); doc.add(idField); doc.add(numField); doc.add(contentField); indexWriter.addDocument(doc); num_sentence++; } } indexWriter.commit(); indexWriter.close(); long endTime = new Date().getTime(); System.out.println("\n\ncreate index time: " + (endTime - startTime) + "ms"); System.out.println("\n sentence num: " + num_sentence + "\n"); }