Пример #1
0
 /**
  * 更新索引文档
  *
  * @param writer
  * @param term
  * @param document
  */
 public static void updateIndex(IndexWriter writer, Term term, Document document) {
   try {
     writer.updateDocument(term, document);
   } catch (IOException e) {
     e.printStackTrace();
   }
 }
Пример #2
0
  private void indexNoCommit(CObj o, boolean onlynew) throws IOException {
    if (o.getDig() == null && o.getId() == null) {
      throw new IOException("Digest or id required!");
    }

    boolean indexit = true;
    Term updateterm = null;

    if (o.getId() != null) {
      updateterm = new Term("id", o.getId());
      Query id0 = new TermQuery(updateterm);

      if (onlynew) {
        CObjList cl = search(id0, 1);
        indexit = (cl.size() == 0);
        cl.close();
      }
    }

    if (o.getDig() != null && o.getId() == null) {
      updateterm = new Term("dig", o.getDig());
      Query id0 = new TermQuery(updateterm);

      if (onlynew) {
        CObjList cl = search(id0, 1);
        indexit = (cl.size() == 0);
        cl.close();
      }
    }

    if (indexit) {
      Document d = o.getDocument();
      writer.updateDocument(updateterm, d);
    }
  }
Пример #3
0
 public void update() {
   IndexWriter writer = null;
   try {
     writer =
         new IndexWriter(
             directory,
             new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
     /*
      * Lucene并没有提供更新,这里的更新操作其实是如下两个操作的合集
      * 先删除之后再添加
      */
     Document doc = new Document();
     doc.add(new Field("id", "11", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
     doc.add(new Field("email", emails[0], Field.Store.YES, Field.Index.NOT_ANALYZED));
     doc.add(new Field("content", contents[0], Field.Store.NO, Field.Index.ANALYZED));
     doc.add(new Field("name", names[0], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
     writer.updateDocument(new Term("id", "1"), doc);
   } catch (CorruptIndexException e) {
     e.printStackTrace();
   } catch (LockObtainFailedException e) {
     e.printStackTrace();
   } catch (IOException e) {
     e.printStackTrace();
   } finally {
     try {
       if (writer != null) writer.close();
     } catch (CorruptIndexException e) {
       e.printStackTrace();
     } catch (IOException e) {
       e.printStackTrace();
     }
   }
 }
 @Override
 public AddResponse add(Collection<InputDocument> inputDocuments) {
   try {
     if (logger.isDebugEnabled()) {
       logger.debug("adding documents...");
     }
     for (InputDocument inputDocument : inputDocuments) {
       assertIdExist(inputDocument);
     }
     for (Document document : DocumentTransformUtil.toLuceneDocuments(inputDocuments, schema)) {
       indexWriter.updateDocument(
           new Term(schema.getIdName(), document.getFieldable(schema.getIdName()).stringValue()),
           document,
           schema.getAnalyzer());
     }
     updateCount.addAndGet(inputDocuments.size());
     if (logger.isDebugEnabled()) {
       logger.debug("add documents finish.");
     }
   } catch (Exception e) {
     logger.error("add documents error", e);
     return new AddResponse(e.getMessage(), ResultCodes.COMMON_ERROR);
   }
   return new AddResponse();
 }
Пример #5
0
  /** @see org.wyona.yarep.impl.repo.vfs.DateIndexerSearcher#addRevision(String) */
  public void addRevision(String revisionName) throws Exception {
    Date creationDate =
        node.getRevision(revisionName)
            .getCreationDate(); // WARN: Older creation dates might not have milliseconds and hence
                                // are not corresponding exactly with the revision name, hence in
                                // order to build the date index correctly one needs to use the
                                // creation date
    log.debug(
        "Add revision '"
            + revisionName
            + "' with creation date '"
            + creationDate
            + "' to date index ...");

    Document doc = new Document();
    doc.add(
        new NumericField(CREATION_DATE_FIELD_NAME, Field.Store.YES, true)
            .setLongValue(creationDate.getTime()));
    // doc.add(new Field(CREATION_DATE_FIELD_NAME,
    // org.apache.lucene.document.DateTools.dateToString(creationDate,
    // org.apache.lucene.document.DateTools.Resolution.MILLISECOND), Field.Store.YES,
    // Field.Index.NOT_ANALYZED));
    doc.add(
        new Field(
            REVISION_NAME_FIELD_NAME, revisionName, Field.Store.YES, Field.Index.NOT_ANALYZED));

    IndexWriter iw = getIndexWriter();
    Term revisionNameTerm = new Term(REVISION_NAME_FIELD_NAME, revisionName);
    iw.updateDocument(revisionNameTerm, doc);
    iw.optimize();
    iw.close();
  }
Пример #6
0
  public void index(NodeInstanceDesc node) throws IOException {
    Document d = new Document();

    d.add(new StringField("processName", node.getName(), Field.Store.YES));
    d.add(new StringField("nodeId", String.valueOf(node.getNodeId()), Field.Store.YES));
    d.add(
        new StringField(
            "processInstanceId", String.valueOf(node.getProcessInstanceId()), Field.Store.YES));
    d.add(new StringField("sessionId", String.valueOf(node.getDeploymentId()), Field.Store.YES));
    // d.add(new TextField()); if I need to store large texts that requires tokenization
    // To support any value search
    d.add(new StringField("all", String.valueOf(node.getDeploymentId()), Field.Store.NO));
    d.add(new StringField("all", String.valueOf(node.getProcessInstanceId()), Field.Store.NO));
    d.add(new StringField("all", String.valueOf(node.getNodeId()), Field.Store.NO));
    d.add(new StringField("all", String.valueOf(node.getName()), Field.Store.NO));
    // for suggestion
    d.add(new StringField("suggest", node.getName(), Field.Store.NO));

    iw.updateDocument(new Term("processName"), d);

    DirectoryReader ireader = DirectoryReader.open(iw, true);
    is = new IndexSearcher(ireader);

    // iw.commit(); this should be executed ever X minutes

  }
Пример #7
0
  /**
   * Indexes the given file using the given writer, or if a directory is given, recurses over files
   * and directories found under the given directory.
   *
   * <p>NOTE: This method indexes one document per input file. This is slow. For good throughput,
   * put multiple documents into your input file(s). An example of this is in the benchmark module,
   * which can create "line doc" files, one document per line, using the <a
   * href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
   * >WriteLineDocTask</a>.
   *
   * @param writer Writer to the index where the given file/dir info will be stored
   * @param file The file to index, or the directory to recurse into to find files to index
   * @throws IOException If there is a low-level I/O error
   */
  private void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
      if (!file.isDirectory()) {
        FileInputStream fis;
        try {
          fis = new FileInputStream(file);
        } catch (FileNotFoundException fnfe) {
          // at least on windows, some temporary files raise this exception with an "access denied"
          // message
          // checking if the file can be read doesn't help
          return;
        }

        try {
          // make a new, empty document
          Document doc = new Document();

          // Add the path of the file as a field named "path".  Use a
          // field that is indexed (i.e. searchable), but don't tokenize
          // the field into separate words and don't index term frequency
          // or positional information:
          Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
          doc.add(pathField);

          // Add the last modified date of the file a field named "modified".
          // Use a LongField that is indexed (i.e. efficiently filterable with
          // NumericRangeFilter).  This indexes to milli-second resolution, which
          // is often too fine.  You could instead create a number based on
          // year/month/day/hour/minutes/seconds, down the resolution you require.
          // For example the long value 2011021714 would mean
          // February 17, 2011, 2-3 PM.
          doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

          // Add the contents of the file to a field named "contents".  Specify a Reader,
          // so that the text of the file is tokenized and indexed, but not stored.
          // Note that FileReader expects the file to be in UTF-8 encoding.
          // If that's not the case searching for special characters will fail.
          doc.add(
              new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

          if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
          } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.getPath()), doc);
          }

        } finally {
          fis.close();
        }
      }
    }
  }
 public void deleteFieldFromIndex(String fieldName, int docId, Analyzer analyzer)
     throws IOException, ConfigurationException {
   Document doc = reader.document(docId);
   doc.removeFields(fieldName);
   Field uri = doc.getField("URI");
   Term term = new Term("URI", uri.stringValue());
   writer.updateDocument(term, doc, analyzer);
 }
Пример #9
0
 /** 更新索引数据 */
 public void updateIndexData(UserAllParamsDomain bozhu) {
   try {
     writer.updateDocument(new Term("username", bozhu.getUsername()), getDoc(bozhu));
     // 提交更新
     writer.commit();
   } catch (IOException e) {
     logger.info("User: "******"'s indexed data updated error.");
   }
 }
 public void updateDocument(EntityBaseBean bean) throws IcatException {
   Document doc = buildDoc(bean);
   String id = bean.getClass().getSimpleName() + ":" + bean.getId();
   try {
     iwriter.updateDocument(new Term("id", id), doc);
   } catch (IOException e) {
     throw new IcatException(IcatExceptionType.INTERNAL, e.getMessage());
   }
 }
Пример #11
0
 @Override
 public EngineException[] bulk(Bulk bulk) throws EngineException {
   EngineException[] failures = null;
   rwl.readLock().lock();
   try {
     IndexWriter writer = this.indexWriter;
     if (writer == null) {
       throw new EngineClosedException(shardId);
     }
     for (int i = 0; i < bulk.ops().length; i++) {
       Operation op = bulk.ops()[i];
       if (op == null) {
         continue;
       }
       try {
         switch (op.opType()) {
           case CREATE:
             Create create = (Create) op;
             writer.addDocument(create.doc(), create.analyzer());
             translog.add(new Translog.Create(create));
             break;
           case INDEX:
             Index index = (Index) op;
             writer.updateDocument(index.uid(), index.doc(), index.analyzer());
             translog.add(new Translog.Index(index));
             break;
           case DELETE:
             Delete delete = (Delete) op;
             writer.deleteDocuments(delete.uid());
             translog.add(new Translog.Delete(delete));
             break;
         }
       } catch (Exception e) {
         if (failures == null) {
           failures = new EngineException[bulk.ops().length];
         }
         switch (op.opType()) {
           case CREATE:
             failures[i] = new CreateFailedEngineException(shardId, (Create) op, e);
             break;
           case INDEX:
             failures[i] = new IndexFailedEngineException(shardId, (Index) op, e);
             break;
           case DELETE:
             failures[i] = new DeleteFailedEngineException(shardId, (Delete) op, e);
             break;
         }
       }
     }
     dirty = true;
   } finally {
     rwl.readLock().unlock();
   }
   return failures;
 }
Пример #12
0
 public static boolean addOrUpdateIndex(MapBean mdata) {
   boolean success = false;
   try {
     IndexWriter writer = getIndexWriter();
     //	deleteIndex(mdata);
     Document doc = new Document();
     doc = new Document();
     doc.add(new Field("id", "" + mdata.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED));
     doc.add(
         new Field("name", nullToBlank(mdata.getName()), Field.Store.YES, Field.Index.ANALYZED));
     doc.add(
         new Field(
             "address", nullToBlank(mdata.getAddress()), Field.Store.YES, Field.Index.ANALYZED));
     doc.add(
         new Field("city", nullToBlank(mdata.getCity()), Field.Store.YES, Field.Index.ANALYZED));
     doc.add(new Field("num", nullToBlank(mdata.getNum()), Field.Store.YES, Field.Index.ANALYZED));
     doc.add(
         new Field(
             "eastNew",
             nullToBlank(mdata.getEastNew()),
             Field.Store.YES,
             Field.Index.NOT_ANALYZED));
     doc.add(
         new Field(
             "northNew",
             nullToBlank(mdata.getNorthNew()),
             Field.Store.YES,
             Field.Index.NOT_ANALYZED));
     doc.add(
         new Field(
             "datatype",
             nullToBlank(mdata.getDatatype()),
             Field.Store.YES,
             Field.Index.NOT_ANALYZED));
     doc.add(
         new Field("phone", nullToBlank(mdata.getPhone()), Field.Store.YES, Field.Index.ANALYZED));
     doc.add(
         new Field(
             "geom",
             nullToBlank(mdata.getEastNew()) + "," + nullToBlank(mdata.getNorthNew()),
             Field.Store.YES,
             Field.Index.ANALYZED));
     writer.updateDocument(new Term("id", "" + mdata.getId()), doc);
     writer.optimize();
     writer.commit();
     writer.close();
     success = true;
     System.out.println("索引建立完毕!!!");
   } catch (Exception e) {
     e.printStackTrace();
   }
   return success;
 }
  /**
   * We assume that the initial indexing has been done and a set of reference objects has been found
   * and indexed in the separate directory. However further documents were added and they now need
   * to get a ranked list of reference objects. So we (i) get all these new documents missing the
   * field "ro-order" and (ii) add this field.
   *
   * @param indexPath the index to update
   * @throws IOException
   */
  public void updateIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    boolean hasDeletions = reader.hasDeletions();
    int countUpdated = 0;

    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher =
        new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1);
    perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper =
        new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField);

    IndexWriter iw =
        new IndexWriter(
            FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
      Document document = reader.document(i);
      if (document.getField("ro-order") == null) { // if the field is not here we create it.
        ImageSearchHits hits = searcher.search(document, readerRo);
        sb.delete(0, sb.length());
        for (int j = 0; j < numReferenceObjectsUsed; j++) {
          sb.append(hits.doc(j).getValues("ro-id")[0]);
          sb.append(' ');
        }
        // System.out.println(sb.toString());
        document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
        iw.updateDocument(
            new Term(
                DocumentBuilder.FIELD_NAME_IDENTIFIER,
                document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
            document);
        countUpdated++;
      }

      // progress report
      progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

      // debug:
      System.out.println("countUpdated = " + countUpdated);
    }
    iw.commit();
    iw.close();
  }
  protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {

    Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
    for (TestFieldSetting field : testDocs[0].fieldSettings) {
      if (field.storedPayloads) {
        mapping.put(
            field.name,
            new Analyzer() {
              @Override
              protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
                Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader);
                TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer);
                filter = new TypeAsPayloadTokenFilter(filter);
                return new TokenStreamComponents(tokenizer, filter);
              }
            });
      }
    }
    PerFieldAnalyzerWrapper wrapper =
        new PerFieldAnalyzerWrapper(
            new StandardAnalyzer(Version.CURRENT.luceneVersion, CharArraySet.EMPTY_SET), mapping);

    Directory dir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper);

    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);

    for (TestDoc doc : testDocs) {
      Document d = new Document();
      d.add(new Field("id", doc.id, StringField.TYPE_STORED));
      for (int i = 0; i < doc.fieldContent.length; i++) {
        FieldType type = new FieldType(TextField.TYPE_STORED);
        TestFieldSetting fieldSetting = doc.fieldSettings[i];

        type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
        type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
        type.setStoreTermVectorPositions(
            fieldSetting.storedPositions
                || fieldSetting.storedPayloads
                || fieldSetting.storedOffset);
        type.setStoreTermVectors(true);
        type.freeze();
        d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
      }
      writer.updateDocument(new Term("id", doc.id), d);
      writer.commit();
    }
    writer.close();

    return DirectoryReader.open(dir);
  }
 @Override
 public void updateDocument(Term term, Iterable<? extends IndexableField> doc, Analyzer analyzer)
     throws IOException {
   super.updateDocument(term, doc, analyzer);
   try {
     if (waitAfterUpdate) {
       signal.countDown();
       latch.await();
     }
   } catch (InterruptedException e) {
     throw new ThreadInterruptedException(e);
   }
 }
 /**
  * Updates a previous suggestion, matching the exact same text as before. Use this to change the
  * weight or payload of an already added suggstion. If you know this text is not already present
  * you can use {@link #add} instead. After adding or updating a batch of new suggestions, you must
  * call {@link #refresh} in the end in order to see the suggestions in {@link #lookup}
  */
 public void update(BytesRef text, long weight, BytesRef payload) throws IOException {
   String textString = text.utf8ToString();
   Document doc = new Document();
   FieldType ft = getTextFieldType();
   doc.add(new Field(TEXT_FIELD_NAME, textString, ft));
   doc.add(new Field("textgrams", textString, ft));
   doc.add(new StringField(EXACT_TEXT_FIELD_NAME, textString, Field.Store.NO));
   doc.add(new BinaryDocValuesField(TEXT_FIELD_NAME, text));
   doc.add(new NumericDocValuesField("weight", weight));
   if (payload != null) {
     doc.add(new BinaryDocValuesField("payloads", payload));
   }
   writer.updateDocument(new Term(EXACT_TEXT_FIELD_NAME, textString), doc);
 }
Пример #17
0
  protected void addDocument() throws IOException {
    if (currentDocument != null) {
      if (updating) {

        writer.updateDocument(new Term(IndexedField.KEY_FIELD, keyEncodedString), currentDocument);
        logger.debug("Updated {}", currentDocument);
      } else {
        writer.addDocument(currentDocument);
        logger.debug("Added {}", currentDocument);
      }
      documentCount++;
      currentDocument = null;
    }
  }
Пример #18
0
 private static boolean update(Index index, VersionValue versionValue, IndexWriter indexWriter)
     throws IOException {
   boolean created;
   if (versionValue != null) {
     created = versionValue.delete(); // we have a delete which is not GC'ed...
   } else {
     created = false;
   }
   if (index.docs().size() > 1) {
     indexWriter.updateDocuments(index.uid(), index.docs());
   } else {
     indexWriter.updateDocument(index.uid(), index.docs().get(0));
   }
   return created;
 }
Пример #19
0
  /** Indexes a single document */
  static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
      // make a new, empty document
      Document doc = new Document();

      // Add the path of the file as a field named "path".  Use a
      // field that is indexed (i.e. searchable), but don't tokenize
      // the field into separate words and don't index term frequency
      // or positional information:
      Field pathField = new StringField("path", file.toString(), Field.Store.YES);
      doc.add(pathField);

      String f = file.getFileName().toString();
      f = f.replaceFirst("\\.txt", "");
      doc.add(new StringField("filename", f, Field.Store.YES));

      // Add the last modified date of the file a field named "modified".
      // Use a LongField that is indexed (i.e. efficiently filterable with
      // NumericRangeFilter).  This indexes to milli-second resolution, which
      // is often too fine.  You could instead create a number based on
      // year/month/day/hour/minutes/seconds, down the resolution you require.
      // For example the long value 2011021714 would mean
      // February 17, 2011, 2-3 PM.
      doc.add(new LongField("modified", lastModified, Field.Store.NO));

      // Add the contents of the file to a field named "contents".  Specify a Reader,
      // so that the text of the file is tokenized and indexed, but not stored.
      // Note that FileReader expects the file to be in UTF-8 encoding.
      // If that's not the case searching for special characters will fail.
      doc.add(
          new TextField(
              "contents",
              new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));
      if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
        // New index, so we just add the document (no old document can be there):
        // log("adding " + file);
        writer.addDocument(doc);
      } else {
        // Existing index (an old copy of this document may have been indexed) so
        // we use updateDocument instead to replace the old one matching the exact
        // path, if present:
        // log("updating " + file);
        writer.updateDocument(new Term("path", file.toString()), doc);
      }
    }
  }
Пример #20
0
  public void updateDocument(Term term, Document document, String collectionName) {

    IndexWriter indexWriter = indexWriterManager.getIndexWriter(collectionName);

    try {

      logger.info("collectionName : {}", collectionName);
      logger.info("update indexing start................{}", term);

      indexWriter.updateDocument(term, document);

      logger.info("end");

    } catch (IOException e) {

      logger.error("error : ", e);
      throw new RuntimeException("색인 중 에러가 발생하였습니다. [" + e.getMessage() + "]");
    }
  }
Пример #21
0
  protected void shiftContextInTime(IndexingContext ctx, int shiftDays) throws IOException {
    if (shiftDays != 0) {
      IndexWriter iw = ctx.getIndexWriter();

      for (int docNum = 0; docNum < ctx.getIndexReader().maxDoc(); docNum++) {
        if (!ctx.getIndexReader().isDeleted(docNum)) {
          Document doc = ctx.getIndexReader().document(docNum);

          String lastModified = doc.get(ArtifactInfo.LAST_MODIFIED);

          if (lastModified != null) {
            long lm = Long.parseLong(lastModified);

            lm = lm + (shiftDays * A_DAY_MILLIS);

            doc.removeFields(ArtifactInfo.LAST_MODIFIED);

            doc.add(
                new Field(
                    ArtifactInfo.LAST_MODIFIED,
                    Long.toString(lm),
                    Field.Store.YES,
                    Field.Index.NO));

            iw.updateDocument(new Term(ArtifactInfo.UINFO, doc.get(ArtifactInfo.UINFO)), doc);
          }
        }
      }

      iw.optimize();

      iw.close();

      // shift timestamp too
      if (ctx.getTimestamp() != null) {
        ctx.updateTimestamp(
            true, new Date(ctx.getTimestamp().getTime() + (shiftDays * A_DAY_MILLIS)));
      } else {
        ctx.updateTimestamp(
            true, new Date(System.currentTimeMillis() + (shiftDays * A_DAY_MILLIS)));
      }
    }
  }
  public void testDuelMemoryIndexCoreDirectoryWithArrayField() throws Exception {

    final String field_name = "text";
    MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
    if (random().nextBoolean()) {
      mockAnalyzer.setOffsetGap(random().nextInt(100));
    }
    // index into a random directory
    FieldType type = new FieldType(TextField.TYPE_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPayloads(false);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();

    Document doc = new Document();
    doc.add(new Field(field_name, "la la", type));
    doc.add(new Field(field_name, "foo bar foo bar foo", type));

    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer));
    writer.updateDocument(new Term("id", "1"), doc);
    writer.commit();
    writer.close();
    DirectoryReader reader = DirectoryReader.open(dir);

    // Index document in Memory index
    MemoryIndex memIndex = new MemoryIndex(true);
    memIndex.addField(field_name, "la la", mockAnalyzer);
    memIndex.addField(field_name, "foo bar foo bar foo", mockAnalyzer);

    // compare term vectors
    Terms ramTv = reader.getTermVector(0, field_name);
    IndexReader memIndexReader = memIndex.createSearcher().getIndexReader();
    TestUtil.checkReader(memIndexReader);
    Terms memTv = memIndexReader.getTermVector(0, field_name);

    compareTermVectors(ramTv, memTv, field_name);
    memIndexReader.close();
    reader.close();
    dir.close();
  }
Пример #23
0
  private void addDoc(IndexWriter writer, IndexingValue indexingValue) throws Exception {
    Document doc = new Document();
    // type
    Field typeField = new IntField(FIELD_LABEL_TYPE, indexingValue.getType(), Field.Store.YES);
    doc.add(typeField);
    // id
    Field idField = new StringField(FIELD_LABEL_ID, indexingValue.getId(), Field.Store.YES);
    doc.add(idField);
    // タイトル
    doc.add(new TextField(FIELD_LABEL_TITLE, indexingValue.getTitle(), Field.Store.YES));
    // 内容
    doc.add(new TextField(FIELD_LABEL_CONTENTS, indexingValue.getContents(), Field.Store.YES));

    // タグ
    Field tagField = new TextField(FIELD_LABEL_TAGS, indexingValue.getTags(), Field.Store.YES);
    doc.add(tagField);
    // アクセスできるユーザ
    Field userField =
        new TextField(FIELD_LABEL_USERS, indexingValue.getUsers().toString(), Field.Store.YES);
    doc.add(userField);
    // アクセスできるグループ
    Field groupField =
        new TextField(FIELD_LABEL_GROUPS, indexingValue.getGroups().toString(), Field.Store.YES);
    doc.add(groupField);

    // 登録者
    Field creatorField =
        new StringField(FIELD_LABEL_CREATE_USER, indexingValue.getCreator(), Field.Store.YES);
    doc.add(creatorField);

    // 時刻
    Field timeField = new LongField(FIELD_LABEL_TIME, indexingValue.getTime(), Field.Store.YES);
    doc.add(timeField);

    if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
      log.debug("adding " + indexingValue.getId());
      writer.addDocument(doc);
    } else {
      log.debug("updating " + indexingValue.getId());
      writer.updateDocument(new Term(FIELD_LABEL_ID, indexingValue.getId()), doc);
    }
  }
Пример #24
0
 @Override
 public void index(Index index) throws EngineException {
   rwl.readLock().lock();
   try {
     IndexWriter writer = this.indexWriter;
     if (writer == null) {
       throw new EngineClosedException(shardId);
     }
     writer.updateDocument(index.uid(), index.doc(), index.analyzer());
     translog.add(new Translog.Index(index));
     dirty = true;
     if (index.refresh()) {
       refresh(new Refresh(false));
     }
   } catch (IOException e) {
     throw new IndexFailedEngineException(shardId, index, e);
   } finally {
     rwl.readLock().unlock();
   }
 }
Пример #25
0
 @Override
 public AddResponse add(InputDocument inputDocument) {
   try {
     if (logger.isDebugEnabled()) {
       logger.debug("adding document...");
     }
     assertIdExist(inputDocument);
     Document document = DocumentTransformUtil.toLuceneDocument(inputDocument, schema);
     indexWriter.updateDocument(
         new Term(schema.getIdName(), document.getFieldable(schema.getIdName()).stringValue()),
         document,
         schema.getAnalyzer());
     updateCount.incrementAndGet();
     if (logger.isDebugEnabled()) {
       logger.debug("add document finish.");
     }
   } catch (IOException e) {
     return new AddResponse(e.getMessage(), ResultCodes.COMMON_ERROR);
   }
   return new AddResponse();
 }
Пример #26
0
  /** @inheritDoc */
  public synchronized void add(final String docId, final Document itdoc) {
    if (null == docId) {
      logger.error("No documentId specified. Ignoring addition.");
      return;
    }

    org.apache.lucene.document.Document doc = asLuceneDocument(itdoc);
    org.apache.lucene.document.Field docidPayloadField =
        new org.apache.lucene.document.Field(
            LsiIndex.PAYLOAD_TERM_FIELD, docId, Field.Store.NO, Field.Index.ANALYZED);
    doc.add(docidPayloadField);

    doc.add(new Field("documentId", docId, Field.Store.NO, Field.Index.NOT_ANALYZED));
    try {
      if (logger.isDebugEnabled()) {
        logger.debug("Adding document with docId=" + docId + ". Doc is " + itdoc.getFieldNames());
      }
      writer.updateDocument(docIdTerm(docId), doc);
    } catch (IOException e) {
      logger.error(e);
    }
  }
Пример #27
0
  /**
   * Updates a document.
   *
   * @see IndexWriter#updateDocument(Term, org.apache.lucene.index.IndexDocument)
   */
  public <T extends IndexableField> void updateDocument(Term t, final IndexDocument doc)
      throws IOException {
    if (r.nextInt(5) == 3) {
      w.updateDocuments(
          t,
          new Iterable<IndexDocument>() {

            @Override
            public Iterator<IndexDocument> iterator() {
              return new Iterator<IndexDocument>() {
                boolean done;

                @Override
                public boolean hasNext() {
                  return !done;
                }

                @Override
                public void remove() {
                  throw new UnsupportedOperationException();
                }

                @Override
                public IndexDocument next() {
                  if (done) {
                    throw new IllegalStateException();
                  }
                  done = true;
                  return doc;
                }
              };
            }
          });
    } else {
      w.updateDocument(t, doc);
    }
    maybeCommit();
  }
 /**
  * Updates the specified {@link Document} by first deleting the documents containing {@code Term}
  * and then adding the new document. The delete and then add are atomic as seen by a reader on the
  * same index (flush may happen only after the add).
  *
  * @param term The {@link Term} to identify the document(s) to be deleted.
  * @param document The {@link Document} to be added.
  * @throws IOException If Lucene throws IO errors.
  */
 public void upsert(Term term, Document document) throws IOException {
   Log.debug("%s update document %s with term %s", logName, document, term);
   indexWriter.updateDocument(term, document);
 }
Пример #29
0
  @Test
  public void testMixedChecksums() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    // this time random codec....
    IndexWriter writer =
        new IndexWriter(
            store.directory(),
            newIndexWriterConfig(random(), new MockAnalyzer(random()))
                .setCodec(actualDefaultCodec()));
    int docs = 1 + random().nextInt(100);

    for (int i = 0; i < docs; i++) {
      Document doc = new Document();
      doc.add(
          new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }
    if (random().nextBoolean()) {
      for (int i = 0; i < docs; i++) {
        if (random().nextBoolean()) {
          Document doc = new Document();
          doc.add(
              new TextField(
                  "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          doc.add(
              new TextField(
                  "body",
                  TestUtil.randomRealisticUnicodeString(random()),
                  random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          writer.updateDocument(new Term("id", "" + i), doc);
        }
      }
    }
    if (random().nextBoolean()) {
      DirectoryReader.open(writer, random().nextBoolean()).close(); // flush
    }
    Store.MetadataSnapshot metadata;
    // check before we committed
    try {
      store.getMetadata();
      fail("no index present - expected exception");
    } catch (IndexNotFoundException ex) {
      // expected
    }
    assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed
    writer.commit();
    writer.close();
    Store.LegacyChecksums checksums = new Store.LegacyChecksums();
    metadata = store.getMetadata();
    assertThat(metadata.asMap().isEmpty(), is(false));
    for (StoreFileMetaData meta : metadata) {
      try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
        if (meta.checksum() == null) {
          String checksum = null;
          try {
            CodecUtil.retrieveChecksum(input);
            fail("expected a corrupt index - posting format has not checksums");
          } catch (CorruptIndexException
              | IndexFormatTooOldException
              | IndexFormatTooNewException ex) {
            try (ChecksumIndexInput checksumIndexInput =
                store.directory().openChecksumInput(meta.name(), IOContext.DEFAULT)) {
              checksumIndexInput.seek(meta.length());
              checksum = Store.digestToString(checksumIndexInput.getChecksum());
            }
            // fine - it's a postings format without checksums
            checksums.add(new StoreFileMetaData(meta.name(), meta.length(), checksum, null));
          }
        } else {
          String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
          assertThat(
              "File: " + meta.name() + " has a different checksum",
              meta.checksum(),
              equalTo(checksum));
          assertThat(meta.hasLegacyChecksum(), equalTo(false));
          assertThat(meta.writtenBy(), equalTo(Version.LATEST));
        }
      }
    }
    assertConsistent(store, metadata);
    checksums.write(store);
    metadata = store.getMetadata();
    assertThat(metadata.asMap().isEmpty(), is(false));
    for (StoreFileMetaData meta : metadata) {
      assertThat(
          "file: " + meta.name() + " has a null checksum", meta.checksum(), not(nullValue()));
      if (meta.hasLegacyChecksum()) {
        try (ChecksumIndexInput checksumIndexInput =
            store.directory().openChecksumInput(meta.name(), IOContext.DEFAULT)) {
          checksumIndexInput.seek(meta.length());
          assertThat(
              meta.checksum(), equalTo(Store.digestToString(checksumIndexInput.getChecksum())));
        }
      } else {
        try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
          String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
          assertThat(
              "File: " + meta.name() + " has a different checksum",
              meta.checksum(),
              equalTo(checksum));
          assertThat(meta.hasLegacyChecksum(), equalTo(false));
          assertThat(meta.writtenBy(), equalTo(Version.LATEST));
        }
      }
    }
    assertConsistent(store, metadata);
    TestUtil.checkIndex(store.directory());
    assertDeleteContent(store, directoryService);
    IOUtils.close(store);
  }
  /**
   * Creates a set of reference objects and stores it in a new index (name "<indexPath>-ro"). Then
   * creates ordered lists of reference object positions for each data item in the index with given
   * feature. Finally a new index (name "<indexPath>-ms") is created where all the original
   * documents as well as the new data are stored.
   *
   * @param indexPath the path to the original index
   * @throws IOException
   */
  public void createIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();

    if (numDocs < numReferenceObjects) {
      throw new UnsupportedOperationException("Too few documents in index.");
    }

    // progress report
    progress.setNumDocsAll(numDocs);
    progress.setCurrentState(State.RoSelection);

    boolean hasDeletions = reader.hasDeletions();

    // init reference objects:
    IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-ro", true);
    HashSet<Integer> referenceObjsIds = new HashSet<Integer>(numReferenceObjects);

    double numDocsDouble = (double) numDocs;
    while (referenceObjsIds.size() < numReferenceObjects) {
      referenceObjsIds.add((int) (numDocsDouble * Math.random()));
    }
    int count = 0;

    if (hasDeletions) {
      System.err.println(
          "WARNING: There are deleted docs in your index. You should "
              + "optimize your index before using this method.");
    }

    // progress report
    progress.setCurrentState(State.RoIndexing);

    // find them in the index and put them into a separate index:
    for (int i : referenceObjsIds) {
      count++;
      Document document = reader.document(i);
      document.add(new Field("ro-id", count + "", StringField.TYPE_STORED));
      iw.addDocument(document);
    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Indexing);

    // now find the reference objects for each entry ;)
    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher =
        new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
    analyzerPerField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper =
        new PerFieldAnalyzerWrapper(
            new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), analyzerPerField);

    iw =
        new IndexWriter(
            FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
      Document document = reader.document(i);
      ImageSearchHits hits = searcher.search(document, readerRo);
      sb.delete(0, sb.length());
      for (int j = 0; j < numReferenceObjectsUsed; j++) {
        sb.append(hits.doc(j).getValues("ro-id")[0]);
        sb.append(' ');
      }
      // System.out.println(sb.toString());
      document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
      iw.updateDocument(
          new Term(
              DocumentBuilder.FIELD_NAME_IDENTIFIER,
              document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
          document);

      // progress report
      progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);
    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Idle);
  }