// test rollback of deleteAll()
  public void testDeleteAllRollback() throws IOException {
    Directory dir = new MockRAMDirectory();
    IndexWriter modifier =
        new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
    modifier.setMaxBufferedDocs(2);
    modifier.setMaxBufferedDeleteTerms(2);

    int id = 0;
    int value = 100;

    for (int i = 0; i < 7; i++) {
      addDoc(modifier, ++id, value);
    }
    modifier.commit();

    addDoc(modifier, ++id, value);

    IndexReader reader = IndexReader.open(dir, true);
    assertEquals(7, reader.numDocs());
    reader.close();

    // Delete all
    modifier.deleteAll();

    // Roll it back
    modifier.rollback();
    modifier.close();

    // Validate that the docs are still there
    reader = IndexReader.open(dir, true);
    assertEquals(7, reader.numDocs());
    reader.close();

    dir.close();
  }
  public void buildIndex(JSONObject indexData) {

    try {
      Directory dir = FSDirectory.open(new File(indexDir));
      IKAnalyzer analyzer = new IKAnalyzer();
      analyzer.setUseSmart(true);
      IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer);
      indexWriter = new IndexWriter(dir, iwc);
      indexWriter.deleteAll();

      JSONArray statusData = indexData.getJSONArray("statusData");
      for (int i = 0; i < statusData.length(); i++) {
        String text = statusData.getString(i);
        Document doc = new Document();
        doc.add(
            new Field(
                "text",
                text,
                Field.Store.YES,
                Field.Index.ANALYZED,
                Field.TermVector.WITH_POSITIONS_OFFSETS));
        indexWriter.addDocument(doc);
      }

      JSONArray userData = indexData.getJSONArray("userData");
      for (int i = 0; i < userData.length(); i++) {
        String text = userData.getString(i);
        Document doc = new Document();
        doc.add(
            new Field(
                "text",
                text,
                Field.Store.YES,
                Field.Index.ANALYZED,
                Field.TermVector.WITH_POSITIONS_OFFSETS));
        indexWriter.addDocument(doc);
      }
      // indexWriter.commit();
      System.out.println("Index is done");
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (JSONException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } finally {
      try {
        indexWriter.close();
      } catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }
  }
 public NewIndexer(String indexDir2) throws IOException {
   // create the index
   if (indexWriter2 == null) {
     indexWriter2 =
         new IndexWriter(
             FSDirectory.open(new File("resultsList")),
             new IndexWriterConfig(
                 Version.LUCENE_36,
                 new EnglishAnalyzer(Version.LUCENE_36, StandardAnalyzer.STOP_WORDS_SET)));
     indexWriter2.deleteAll();
   }
 }
 public void testTypeChangeAfterDeleteAll() throws Exception {
   Directory dir = newDirectory();
   IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
   IndexWriter writer = new IndexWriter(dir, conf);
   Document doc = new Document();
   doc.add(new NumericDocValuesField("dv", 0L));
   writer.addDocument(doc);
   writer.deleteAll();
   doc = new Document();
   doc.add(new SortedDocValuesField("dv", new BytesRef("foo")));
   writer.addDocument(doc);
   writer.close();
   dir.close();
 }
 /**
  * Persists all snapshots information. If the given id and segment are not null, it persists their
  * information as well.
  */
 private void persistSnapshotInfos(String id, String segment) throws IOException {
   writer.deleteAll();
   Document d = new Document();
   FieldType ft = new FieldType();
   ft.setStored(true);
   d.add(new Field(SNAPSHOTS_ID, "", ft));
   for (Entry<String, String> e : super.getSnapshots().entrySet()) {
     d.add(new Field(e.getKey(), e.getValue(), ft));
   }
   if (id != null) {
     d.add(new Field(id, segment, ft));
   }
   writer.addDocument(d);
   writer.commit();
 }
  // Add inconsistent document after deleteAll
  public void testMixedTypesAfterDeleteAll() throws Exception {
    Directory dir = newDirectory();
    IndexWriter w =
        new IndexWriter(
            dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    Document doc = new Document();
    doc.add(new NumericDocValuesField("foo", 0));
    w.addDocument(doc);
    w.deleteAll();

    doc = new Document();
    doc.add(new SortedDocValuesField("foo", new BytesRef("hello")));
    w.addDocument(doc);
    w.close();
    dir.close();
  }
  // test deleteAll()
  public void testDeleteAll() throws IOException {
    Directory dir = new MockRAMDirectory();
    IndexWriter modifier =
        new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
    modifier.setMaxBufferedDocs(2);
    modifier.setMaxBufferedDeleteTerms(2);

    int id = 0;
    int value = 100;

    for (int i = 0; i < 7; i++) {
      addDoc(modifier, ++id, value);
    }
    modifier.commit();

    IndexReader reader = IndexReader.open(dir, true);
    assertEquals(7, reader.numDocs());
    reader.close();

    // Add 1 doc (so we will have something buffered)
    addDoc(modifier, 99, value);

    // Delete all
    modifier.deleteAll();

    // Delete all shouldn't be on disk yet
    reader = IndexReader.open(dir, true);
    assertEquals(7, reader.numDocs());
    reader.close();

    // Add a doc and update a doc (after the deleteAll, before the commit)
    addDoc(modifier, 101, value);
    updateDoc(modifier, 102, value);

    // commit the delete all
    modifier.commit();

    // Validate there are no docs left
    reader = IndexReader.open(dir, true);
    assertEquals(2, reader.numDocs());
    reader.close();

    modifier.close();
    dir.close();
  }
Beispiel #8
0
  public void createSenIndex(JSONArray jsonArray, String indexPath, String stopwordsFile)
      throws Exception {

    if (jsonArray == null) {
      System.out.println("error: jsonArray is null!\n");
      return;
    }

    Analyzer analyzer = null;
    if (stopwordsFile == null) {
      analyzer = new SimpleAnalyzer();
    } else {
      analyzer = new StopAnalyzer(Paths.get(stopwordsFile));
    }

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(Paths.get(indexPath)), iwc);
    indexWriter.deleteAll();

    TextUtil textUtil = new TextUtil();

    long startTime = new Date().getTime();

    System.out.println("jsonArray size: " + jsonArray.size());

    long num_sentence = 0;

    for (JSONObject jsonObj : (List<JSONObject>) jsonArray) {
      long id = (long) jsonObj.get(idKey);
      String review = (String) jsonObj.get(reviewKey);

      if (review == null || review.isEmpty()) {
        continue;
      }

      if (review.matches(".*[^\\x00-\\x7F].*")) {
        continue;
      }

      String[] tokens = textUtil.tokenize(review);
      if (tokens.length <= TERM_MIN_THRESHOLD) {
        continue;
      }

      String[] sentences = textUtil.sentenceDetect(review);
      // System.out.println(body.toLowerCase() + "\n");
      num_sentence = 0;

      for (int i = 0; i < sentences.length; i++) {
        if (sentences[i] == null || sentences[i].isEmpty()) {
          continue;
        }
        // System.out.println(sentences[i]);
        Document doc = new Document();
        Field idField = new LongField(idKey, id, Field.Store.YES);
        Field numField = new LongField("num", num_sentence, Field.Store.NO);
        Field contentField =
            new TextField(
                reviewKey,
                sentences[i].replaceAll("[_'.,]", " ").replaceAll("[0-9]", ""),
                Field.Store.YES);

        doc.add(idField);
        doc.add(numField);
        doc.add(contentField);

        indexWriter.addDocument(doc);
        num_sentence++;
      }
    }

    indexWriter.commit();
    indexWriter.close();

    long endTime = new Date().getTime();
    System.out.println("\n\ncreate index time: " + (endTime - startTime) + "ms");
    System.out.println("\n sentence num: " + num_sentence + "\n");
  }
 /** Calls {@link IndexWriter#deleteAll} and returns the generation that reflects this change. */
 public long deleteAll() throws IOException {
   writer.deleteAll();
   // Return gen as of when indexing finished:
   return indexingGen.get();
 }