/** * 删除所有索引文档 * * @param writer */ public static void deleteAllIndex(IndexWriter writer) { try { writer.deleteAll(); } catch (IOException e) { e.printStackTrace(); } }
public void delete() { IndexWriter writer = null; try { writer = new IndexWriter( directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); // 参数是一个选项,可以是一个Query,也可以是一个term,term是一个精确查找的值 // 此时删除的文档并不会被完全删除,而是存储在一个回收站中的,可以恢复 writer.deleteAll(); // 删除所有 // writer.deleteDocuments(new Term("id","1")); writer.commit(); // 执行删除 } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if (writer != null) writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
public void buildIndex(JSONObject indexData) { try { Directory dir = FSDirectory.open(new File(indexDir)); IKAnalyzer analyzer = new IKAnalyzer(); analyzer.setUseSmart(true); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer); indexWriter = new IndexWriter(dir, iwc); indexWriter.deleteAll(); JSONArray statusData = indexData.getJSONArray("statusData"); for (int i = 0; i < statusData.length(); i++) { String text = statusData.getString(i); Document doc = new Document(); doc.add( new Field( "text", text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(doc); } JSONArray userData = indexData.getJSONArray("userData"); for (int i = 0; i < userData.length(); i++) { String text = userData.getString(i); Document doc = new Document(); doc.add( new Field( "text", text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(doc); } // indexWriter.commit(); System.out.println("Index is done"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (JSONException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { try { indexWriter.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
public NewIndexer(String indexDir2) throws IOException { // create the index if (indexWriter2 == null) { indexWriter2 = new IndexWriter( FSDirectory.open(new File("resultsList")), new IndexWriterConfig( Version.LUCENE_36, new EnglishAnalyzer(Version.LUCENE_36, StandardAnalyzer.STOP_WORDS_SET))); indexWriter2.deleteAll(); } }
/** 删除索引 */ public void deleteIndex() { Directory fsDir; try { fsDir = FSDirectory.open(new File(PATH)); NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 5.0, 60.0); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)); conf.setMergeScheduler(cachedFSDir.getMergeScheduler()); IndexWriter writer = new IndexWriter(cachedFSDir, conf); writer.deleteAll(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
public void index() { // 创建索引 IndexWriter writer = null; try { writer = new IndexWriter( directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); writer.deleteAll(); Document doc = null; for (int i = 0; i < ids.length; i++) { doc = new Document(); doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add( new Field( "email", "test" + i + "@test.com", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); // 存储数字 doc.add(new NumericField("attach", Field.Store.YES, true).setIntValue(attachs[i])); // 存储日期 doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(dates[i].getTime())); String et = emails[i].substring(emails[i].lastIndexOf("@") + 1); System.out.println(et); if (scores.containsKey(et)) { doc.setBoost(scores.get(et)); // 默认为1 } else { doc.setBoost(0.5f); } writer.addDocument(doc); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if (writer != null) writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
public synchronized void clear() throws IcatException { try { populateList.clear(); stopPopulation = true; while (populatingClass != null) { try { Thread.sleep(1000); } catch (InterruptedException e) { // Do nothing } } stopPopulation = false; iwriter.deleteAll(); } catch (IOException e) { throw new IcatException(IcatExceptionType.INTERNAL, e.getMessage()); } }
/** * Deletes all the {@link Document}s. * * @throws IOException If Lucene throws IO errors. */ public void truncate() throws IOException { indexWriter.deleteAll(); Log.info("%s truncated", logName); }
public void deleteAll() throws IOException { w.deleteAll(); }
public void createSenIndex(JSONArray jsonArray, String indexPath, String stopwordsFile) throws Exception { if (jsonArray == null) { System.out.println("error: jsonArray is null!\n"); return; } Analyzer analyzer = null; if (stopwordsFile == null) { analyzer = new SimpleAnalyzer(); } else { analyzer = new StopAnalyzer(Paths.get(stopwordsFile)); } IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter indexWriter = new IndexWriter(FSDirectory.open(Paths.get(indexPath)), iwc); indexWriter.deleteAll(); TextUtil textUtil = new TextUtil(); long startTime = new Date().getTime(); System.out.println("jsonArray size: " + jsonArray.size()); long num_sentence = 0; for (JSONObject jsonObj : (List<JSONObject>) jsonArray) { long id = (long) jsonObj.get(idKey); String review = (String) jsonObj.get(reviewKey); if (review == null || review.isEmpty()) { continue; } if (review.matches(".*[^\\x00-\\x7F].*")) { continue; } String[] tokens = textUtil.tokenize(review); if (tokens.length <= TERM_MIN_THRESHOLD) { continue; } String[] sentences = textUtil.sentenceDetect(review); // System.out.println(body.toLowerCase() + "\n"); num_sentence = 0; for (int i = 0; i < sentences.length; i++) { if (sentences[i] == null || sentences[i].isEmpty()) { continue; } // System.out.println(sentences[i]); Document doc = new Document(); Field idField = new LongField(idKey, id, Field.Store.YES); Field numField = new LongField("num", num_sentence, Field.Store.NO); Field contentField = new TextField( reviewKey, sentences[i].replaceAll("[_'.,]", " ").replaceAll("[0-9]", ""), Field.Store.YES); doc.add(idField); doc.add(numField); doc.add(contentField); indexWriter.addDocument(doc); num_sentence++; } } indexWriter.commit(); indexWriter.close(); long endTime = new Date().getTime(); System.out.println("\n\ncreate index time: " + (endTime - startTime) + "ms"); System.out.println("\n sentence num: " + num_sentence + "\n"); }
/** 创建索引 */ private static void createIndex() throws IOException { Directory fsDir = FSDirectory.open(new File(PATH)); NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 5.0, 60.0); Analyzer analyzer = new IKAnalyzer(); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, analyzer); conf.setMergeScheduler(cachedFSDir.getMergeScheduler()); IndexWriter writer = new IndexWriter(cachedFSDir, conf); writer.deleteAll(); // 索引的数据源 List<MapBean> ls = LuceneIndexOperator.createDataSource(); int i = 0; for (MapBean mdata : ls) { // mdata.getClass().getDeclaredMethods(); Document doc = new Document(); doc.add(new Field("id", "" + mdata.getId(), Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("name", mdata.getName(), Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("address", mdata.getAddress(), Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("city", mdata.getCity(), Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("num", mdata.getNum(), Field.Store.YES, Field.Index.ANALYZED)); doc.add( new Field( "eastNew", nullToBlank(mdata.getEastNew()), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add( new Field( "northNew", nullToBlank(mdata.getNorthNew()), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("datatype", mdata.getDatatype(), Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("phone", mdata.getPhone(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add( new Field( "geom", mdata.getEastNew() + "," + mdata.getNorthNew(), Field.Store.YES, Field.Index.ANALYZED)); doc.add( new Field( "dataType", nullToBlank(mdata.getDatatype()), Field.Store.YES, Field.Index.ANALYZED)); doc.add( new Field( "comType", nullToBlank(mdata.getDataTypeByComType().getDataTypeName()), Field.Store.YES, Field.Index.ANALYZED)); doc.add( new Field( "dataTypeKey", nullToBlank(mdata.getDataTypeByDataType().getDataTypeKey()), Field.Store.YES, Field.Index.ANALYZED)); doc.add( new Field( "comTypeKey", nullToBlank(mdata.getDataTypeByComType().getDataTypeKey()), Field.Store.YES, Field.Index.ANALYZED)); // ReflectFillTheBean(mdata,doc); // System.out.println(nullToBlank(mdata.getDataTypeByDataType().getDataTypeKey())); i++; writer.addDocument(doc); if (i % 10000 == 0) { writer.commit(); System.out.println("提交" + i); } } writer.optimize(true); writer.commit(); writer.close(); }