/** * 更新索引文档 * * @param writer * @param term * @param document */ public static void updateIndex(IndexWriter writer, Term term, Document document) { try { writer.updateDocument(term, document); } catch (IOException e) { e.printStackTrace(); } }
private void indexNoCommit(CObj o, boolean onlynew) throws IOException { if (o.getDig() == null && o.getId() == null) { throw new IOException("Digest or id required!"); } boolean indexit = true; Term updateterm = null; if (o.getId() != null) { updateterm = new Term("id", o.getId()); Query id0 = new TermQuery(updateterm); if (onlynew) { CObjList cl = search(id0, 1); indexit = (cl.size() == 0); cl.close(); } } if (o.getDig() != null && o.getId() == null) { updateterm = new Term("dig", o.getDig()); Query id0 = new TermQuery(updateterm); if (onlynew) { CObjList cl = search(id0, 1); indexit = (cl.size() == 0); cl.close(); } } if (indexit) { Document d = o.getDocument(); writer.updateDocument(updateterm, d); } }
public void update() { IndexWriter writer = null; try { writer = new IndexWriter( directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); /* * Lucene并没有提供更新,这里的更新操作其实是如下两个操作的合集 * 先删除之后再添加 */ Document doc = new Document(); doc.add(new Field("id", "11", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field("email", emails[0], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("content", contents[0], Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("name", names[0], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); writer.updateDocument(new Term("id", "1"), doc); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if (writer != null) writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
@Override public AddResponse add(Collection<InputDocument> inputDocuments) { try { if (logger.isDebugEnabled()) { logger.debug("adding documents..."); } for (InputDocument inputDocument : inputDocuments) { assertIdExist(inputDocument); } for (Document document : DocumentTransformUtil.toLuceneDocuments(inputDocuments, schema)) { indexWriter.updateDocument( new Term(schema.getIdName(), document.getFieldable(schema.getIdName()).stringValue()), document, schema.getAnalyzer()); } updateCount.addAndGet(inputDocuments.size()); if (logger.isDebugEnabled()) { logger.debug("add documents finish."); } } catch (Exception e) { logger.error("add documents error", e); return new AddResponse(e.getMessage(), ResultCodes.COMMON_ERROR); } return new AddResponse(); }
/** @see org.wyona.yarep.impl.repo.vfs.DateIndexerSearcher#addRevision(String) */ public void addRevision(String revisionName) throws Exception { Date creationDate = node.getRevision(revisionName) .getCreationDate(); // WARN: Older creation dates might not have milliseconds and hence // are not corresponding exactly with the revision name, hence in // order to build the date index correctly one needs to use the // creation date log.debug( "Add revision '" + revisionName + "' with creation date '" + creationDate + "' to date index ..."); Document doc = new Document(); doc.add( new NumericField(CREATION_DATE_FIELD_NAME, Field.Store.YES, true) .setLongValue(creationDate.getTime())); // doc.add(new Field(CREATION_DATE_FIELD_NAME, // org.apache.lucene.document.DateTools.dateToString(creationDate, // org.apache.lucene.document.DateTools.Resolution.MILLISECOND), Field.Store.YES, // Field.Index.NOT_ANALYZED)); doc.add( new Field( REVISION_NAME_FIELD_NAME, revisionName, Field.Store.YES, Field.Index.NOT_ANALYZED)); IndexWriter iw = getIndexWriter(); Term revisionNameTerm = new Term(REVISION_NAME_FIELD_NAME, revisionName); iw.updateDocument(revisionNameTerm, doc); iw.optimize(); iw.close(); }
public void index(NodeInstanceDesc node) throws IOException { Document d = new Document(); d.add(new StringField("processName", node.getName(), Field.Store.YES)); d.add(new StringField("nodeId", String.valueOf(node.getNodeId()), Field.Store.YES)); d.add( new StringField( "processInstanceId", String.valueOf(node.getProcessInstanceId()), Field.Store.YES)); d.add(new StringField("sessionId", String.valueOf(node.getDeploymentId()), Field.Store.YES)); // d.add(new TextField()); if I need to store large texts that requires tokenization // To support any value search d.add(new StringField("all", String.valueOf(node.getDeploymentId()), Field.Store.NO)); d.add(new StringField("all", String.valueOf(node.getProcessInstanceId()), Field.Store.NO)); d.add(new StringField("all", String.valueOf(node.getNodeId()), Field.Store.NO)); d.add(new StringField("all", String.valueOf(node.getName()), Field.Store.NO)); // for suggestion d.add(new StringField("suggest", node.getName(), Field.Store.NO)); iw.updateDocument(new Term("processName"), d); DirectoryReader ireader = DirectoryReader.open(iw, true); is = new IndexSearcher(ireader); // iw.commit(); this should be executed ever X minutes }
/** * Indexes the given file using the given writer, or if a directory is given, recurses over files * and directories found under the given directory. * * <p>NOTE: This method indexes one document per input file. This is slow. For good throughput, * put multiple documents into your input file(s). An example of this is in the benchmark module, * which can create "line doc" files, one document per line, using the <a * href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ private void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (!file.isDirectory()) { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" // message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add( new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
public void deleteFieldFromIndex(String fieldName, int docId, Analyzer analyzer) throws IOException, ConfigurationException { Document doc = reader.document(docId); doc.removeFields(fieldName); Field uri = doc.getField("URI"); Term term = new Term("URI", uri.stringValue()); writer.updateDocument(term, doc, analyzer); }
/** 更新索引数据 */ public void updateIndexData(UserAllParamsDomain bozhu) { try { writer.updateDocument(new Term("username", bozhu.getUsername()), getDoc(bozhu)); // 提交更新 writer.commit(); } catch (IOException e) { logger.info("User: "******"'s indexed data updated error."); } }
public void updateDocument(EntityBaseBean bean) throws IcatException { Document doc = buildDoc(bean); String id = bean.getClass().getSimpleName() + ":" + bean.getId(); try { iwriter.updateDocument(new Term("id", id), doc); } catch (IOException e) { throw new IcatException(IcatExceptionType.INTERNAL, e.getMessage()); } }
@Override public EngineException[] bulk(Bulk bulk) throws EngineException { EngineException[] failures = null; rwl.readLock().lock(); try { IndexWriter writer = this.indexWriter; if (writer == null) { throw new EngineClosedException(shardId); } for (int i = 0; i < bulk.ops().length; i++) { Operation op = bulk.ops()[i]; if (op == null) { continue; } try { switch (op.opType()) { case CREATE: Create create = (Create) op; writer.addDocument(create.doc(), create.analyzer()); translog.add(new Translog.Create(create)); break; case INDEX: Index index = (Index) op; writer.updateDocument(index.uid(), index.doc(), index.analyzer()); translog.add(new Translog.Index(index)); break; case DELETE: Delete delete = (Delete) op; writer.deleteDocuments(delete.uid()); translog.add(new Translog.Delete(delete)); break; } } catch (Exception e) { if (failures == null) { failures = new EngineException[bulk.ops().length]; } switch (op.opType()) { case CREATE: failures[i] = new CreateFailedEngineException(shardId, (Create) op, e); break; case INDEX: failures[i] = new IndexFailedEngineException(shardId, (Index) op, e); break; case DELETE: failures[i] = new DeleteFailedEngineException(shardId, (Delete) op, e); break; } } } dirty = true; } finally { rwl.readLock().unlock(); } return failures; }
public static boolean addOrUpdateIndex(MapBean mdata) { boolean success = false; try { IndexWriter writer = getIndexWriter(); // deleteIndex(mdata); Document doc = new Document(); doc = new Document(); doc.add(new Field("id", "" + mdata.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add( new Field("name", nullToBlank(mdata.getName()), Field.Store.YES, Field.Index.ANALYZED)); doc.add( new Field( "address", nullToBlank(mdata.getAddress()), Field.Store.YES, Field.Index.ANALYZED)); doc.add( new Field("city", nullToBlank(mdata.getCity()), Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("num", nullToBlank(mdata.getNum()), Field.Store.YES, Field.Index.ANALYZED)); doc.add( new Field( "eastNew", nullToBlank(mdata.getEastNew()), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add( new Field( "northNew", nullToBlank(mdata.getNorthNew()), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add( new Field( "datatype", nullToBlank(mdata.getDatatype()), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add( new Field("phone", nullToBlank(mdata.getPhone()), Field.Store.YES, Field.Index.ANALYZED)); doc.add( new Field( "geom", nullToBlank(mdata.getEastNew()) + "," + nullToBlank(mdata.getNorthNew()), Field.Store.YES, Field.Index.ANALYZED)); writer.updateDocument(new Term("id", "" + mdata.getId()), doc); writer.optimize(); writer.commit(); writer.close(); success = true; System.out.println("索引建立完毕!!!"); } catch (Exception e) { e.printStackTrace(); } return success; }
/** * We assume that the initial indexing has been done and a set of reference objects has been found * and indexed in the separate directory. However further documents were added and they now need * to get a ranked list of reference objects. So we (i) get all these new documents missing the * field "ro-order" and (ii) add this field. * * @param indexPath the index to update * @throws IOException */ public void updateIndex(String indexPath) throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); boolean hasDeletions = reader.hasDeletions(); int countUpdated = 0; IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro"))); ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1); perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField); IndexWriter iw = new IndexWriter( FSDirectory.open(new File(indexPath)), new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); StringBuilder sb = new StringBuilder(256); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < numDocs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document document = reader.document(i); if (document.getField("ro-order") == null) { // if the field is not here we create it. ImageSearchHits hits = searcher.search(document, readerRo); sb.delete(0, sb.length()); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } // System.out.println(sb.toString()); document.add(new TextField("ro-order", sb.toString(), Field.Store.YES)); iw.updateDocument( new Term( DocumentBuilder.FIELD_NAME_IDENTIFIER, document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document); countUpdated++; } // progress report progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1); // debug: System.out.println("countUpdated = " + countUpdated); } iw.commit(); iw.close(); }
protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException { Map<String, Analyzer> mapping = new HashMap<String, Analyzer>(); for (TestFieldSetting field : testDocs[0].fieldSettings) { if (field.storedPayloads) { mapping.put( field.name, new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader); TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer); filter = new TypeAsPayloadTokenFilter(filter); return new TokenStreamComponents(tokenizer, filter); } }); } } PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper( new StandardAnalyzer(Version.CURRENT.luceneVersion, CharArraySet.EMPTY_SET), mapping); Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper); conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); for (TestDoc doc : testDocs) { Document d = new Document(); d.add(new Field("id", doc.id, StringField.TYPE_STORED)); for (int i = 0; i < doc.fieldContent.length; i++) { FieldType type = new FieldType(TextField.TYPE_STORED); TestFieldSetting fieldSetting = doc.fieldSettings[i]; type.setStoreTermVectorOffsets(fieldSetting.storedOffset); type.setStoreTermVectorPayloads(fieldSetting.storedPayloads); type.setStoreTermVectorPositions( fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset); type.setStoreTermVectors(true); type.freeze(); d.add(new Field(fieldSetting.name, doc.fieldContent[i], type)); } writer.updateDocument(new Term("id", doc.id), d); writer.commit(); } writer.close(); return DirectoryReader.open(dir); }
@Override public void updateDocument(Term term, Iterable<? extends IndexableField> doc, Analyzer analyzer) throws IOException { super.updateDocument(term, doc, analyzer); try { if (waitAfterUpdate) { signal.countDown(); latch.await(); } } catch (InterruptedException e) { throw new ThreadInterruptedException(e); } }
/** * Updates a previous suggestion, matching the exact same text as before. Use this to change the * weight or payload of an already added suggstion. If you know this text is not already present * you can use {@link #add} instead. After adding or updating a batch of new suggestions, you must * call {@link #refresh} in the end in order to see the suggestions in {@link #lookup} */ public void update(BytesRef text, long weight, BytesRef payload) throws IOException { String textString = text.utf8ToString(); Document doc = new Document(); FieldType ft = getTextFieldType(); doc.add(new Field(TEXT_FIELD_NAME, textString, ft)); doc.add(new Field("textgrams", textString, ft)); doc.add(new StringField(EXACT_TEXT_FIELD_NAME, textString, Field.Store.NO)); doc.add(new BinaryDocValuesField(TEXT_FIELD_NAME, text)); doc.add(new NumericDocValuesField("weight", weight)); if (payload != null) { doc.add(new BinaryDocValuesField("payloads", payload)); } writer.updateDocument(new Term(EXACT_TEXT_FIELD_NAME, textString), doc); }
protected void addDocument() throws IOException { if (currentDocument != null) { if (updating) { writer.updateDocument(new Term(IndexedField.KEY_FIELD, keyEncodedString), currentDocument); logger.debug("Updated {}", currentDocument); } else { writer.addDocument(currentDocument); logger.debug("Added {}", currentDocument); } documentCount++; currentDocument = null; } }
private static boolean update(Index index, VersionValue versionValue, IndexWriter indexWriter) throws IOException { boolean created; if (versionValue != null) { created = versionValue.delete(); // we have a delete which is not GC'ed... } else { created = false; } if (index.docs().size() > 1) { indexWriter.updateDocuments(index.uid(), index.docs()); } else { indexWriter.updateDocument(index.uid(), index.docs().get(0)); } return created; }
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); String f = file.getFileName().toString(); f = f.replaceFirst("\\.txt", ""); doc.add(new StringField("filename", f, Field.Store.YES)); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", lastModified, Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add( new TextField( "contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): // log("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: // log("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
public void updateDocument(Term term, Document document, String collectionName) { IndexWriter indexWriter = indexWriterManager.getIndexWriter(collectionName); try { logger.info("collectionName : {}", collectionName); logger.info("update indexing start................{}", term); indexWriter.updateDocument(term, document); logger.info("end"); } catch (IOException e) { logger.error("error : ", e); throw new RuntimeException("색인 중 에러가 발생하였습니다. [" + e.getMessage() + "]"); } }
protected void shiftContextInTime(IndexingContext ctx, int shiftDays) throws IOException { if (shiftDays != 0) { IndexWriter iw = ctx.getIndexWriter(); for (int docNum = 0; docNum < ctx.getIndexReader().maxDoc(); docNum++) { if (!ctx.getIndexReader().isDeleted(docNum)) { Document doc = ctx.getIndexReader().document(docNum); String lastModified = doc.get(ArtifactInfo.LAST_MODIFIED); if (lastModified != null) { long lm = Long.parseLong(lastModified); lm = lm + (shiftDays * A_DAY_MILLIS); doc.removeFields(ArtifactInfo.LAST_MODIFIED); doc.add( new Field( ArtifactInfo.LAST_MODIFIED, Long.toString(lm), Field.Store.YES, Field.Index.NO)); iw.updateDocument(new Term(ArtifactInfo.UINFO, doc.get(ArtifactInfo.UINFO)), doc); } } } iw.optimize(); iw.close(); // shift timestamp too if (ctx.getTimestamp() != null) { ctx.updateTimestamp( true, new Date(ctx.getTimestamp().getTime() + (shiftDays * A_DAY_MILLIS))); } else { ctx.updateTimestamp( true, new Date(System.currentTimeMillis() + (shiftDays * A_DAY_MILLIS))); } } }
public void testDuelMemoryIndexCoreDirectoryWithArrayField() throws Exception { final String field_name = "text"; MockAnalyzer mockAnalyzer = new MockAnalyzer(random()); if (random().nextBoolean()) { mockAnalyzer.setOffsetGap(random().nextInt(100)); } // index into a random directory FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(false); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Document doc = new Document(); doc.add(new Field(field_name, "la la", type)); doc.add(new Field(field_name, "foo bar foo bar foo", type)); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer)); writer.updateDocument(new Term("id", "1"), doc); writer.commit(); writer.close(); DirectoryReader reader = DirectoryReader.open(dir); // Index document in Memory index MemoryIndex memIndex = new MemoryIndex(true); memIndex.addField(field_name, "la la", mockAnalyzer); memIndex.addField(field_name, "foo bar foo bar foo", mockAnalyzer); // compare term vectors Terms ramTv = reader.getTermVector(0, field_name); IndexReader memIndexReader = memIndex.createSearcher().getIndexReader(); TestUtil.checkReader(memIndexReader); Terms memTv = memIndexReader.getTermVector(0, field_name); compareTermVectors(ramTv, memTv, field_name); memIndexReader.close(); reader.close(); dir.close(); }
private void addDoc(IndexWriter writer, IndexingValue indexingValue) throws Exception { Document doc = new Document(); // type Field typeField = new IntField(FIELD_LABEL_TYPE, indexingValue.getType(), Field.Store.YES); doc.add(typeField); // id Field idField = new StringField(FIELD_LABEL_ID, indexingValue.getId(), Field.Store.YES); doc.add(idField); // タイトル doc.add(new TextField(FIELD_LABEL_TITLE, indexingValue.getTitle(), Field.Store.YES)); // 内容 doc.add(new TextField(FIELD_LABEL_CONTENTS, indexingValue.getContents(), Field.Store.YES)); // タグ Field tagField = new TextField(FIELD_LABEL_TAGS, indexingValue.getTags(), Field.Store.YES); doc.add(tagField); // アクセスできるユーザ Field userField = new TextField(FIELD_LABEL_USERS, indexingValue.getUsers().toString(), Field.Store.YES); doc.add(userField); // アクセスできるグループ Field groupField = new TextField(FIELD_LABEL_GROUPS, indexingValue.getGroups().toString(), Field.Store.YES); doc.add(groupField); // 登録者 Field creatorField = new StringField(FIELD_LABEL_CREATE_USER, indexingValue.getCreator(), Field.Store.YES); doc.add(creatorField); // 時刻 Field timeField = new LongField(FIELD_LABEL_TIME, indexingValue.getTime(), Field.Store.YES); doc.add(timeField); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { log.debug("adding " + indexingValue.getId()); writer.addDocument(doc); } else { log.debug("updating " + indexingValue.getId()); writer.updateDocument(new Term(FIELD_LABEL_ID, indexingValue.getId()), doc); } }
@Override public void index(Index index) throws EngineException { rwl.readLock().lock(); try { IndexWriter writer = this.indexWriter; if (writer == null) { throw new EngineClosedException(shardId); } writer.updateDocument(index.uid(), index.doc(), index.analyzer()); translog.add(new Translog.Index(index)); dirty = true; if (index.refresh()) { refresh(new Refresh(false)); } } catch (IOException e) { throw new IndexFailedEngineException(shardId, index, e); } finally { rwl.readLock().unlock(); } }
@Override public AddResponse add(InputDocument inputDocument) { try { if (logger.isDebugEnabled()) { logger.debug("adding document..."); } assertIdExist(inputDocument); Document document = DocumentTransformUtil.toLuceneDocument(inputDocument, schema); indexWriter.updateDocument( new Term(schema.getIdName(), document.getFieldable(schema.getIdName()).stringValue()), document, schema.getAnalyzer()); updateCount.incrementAndGet(); if (logger.isDebugEnabled()) { logger.debug("add document finish."); } } catch (IOException e) { return new AddResponse(e.getMessage(), ResultCodes.COMMON_ERROR); } return new AddResponse(); }
/** @inheritDoc */ public synchronized void add(final String docId, final Document itdoc) { if (null == docId) { logger.error("No documentId specified. Ignoring addition."); return; } org.apache.lucene.document.Document doc = asLuceneDocument(itdoc); org.apache.lucene.document.Field docidPayloadField = new org.apache.lucene.document.Field( LsiIndex.PAYLOAD_TERM_FIELD, docId, Field.Store.NO, Field.Index.ANALYZED); doc.add(docidPayloadField); doc.add(new Field("documentId", docId, Field.Store.NO, Field.Index.NOT_ANALYZED)); try { if (logger.isDebugEnabled()) { logger.debug("Adding document with docId=" + docId + ". Doc is " + itdoc.getFieldNames()); } writer.updateDocument(docIdTerm(docId), doc); } catch (IOException e) { logger.error(e); } }
/** * Updates a document. * * @see IndexWriter#updateDocument(Term, org.apache.lucene.index.IndexDocument) */ public <T extends IndexableField> void updateDocument(Term t, final IndexDocument doc) throws IOException { if (r.nextInt(5) == 3) { w.updateDocuments( t, new Iterable<IndexDocument>() { @Override public Iterator<IndexDocument> iterator() { return new Iterator<IndexDocument>() { boolean done; @Override public boolean hasNext() { return !done; } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public IndexDocument next() { if (done) { throw new IllegalStateException(); } done = true; return doc; } }; } }); } else { w.updateDocument(t, doc); } maybeCommit(); }
/** * Updates the specified {@link Document} by first deleting the documents containing {@code Term} * and then adding the new document. The delete and then add are atomic as seen by a reader on the * same index (flush may happen only after the add). * * @param term The {@link Term} to identify the document(s) to be deleted. * @param document The {@link Document} to be added. * @throws IOException If Lucene throws IO errors. */ public void upsert(Term term, Document document) throws IOException { Log.debug("%s update document %s with term %s", logName, document, term); indexWriter.updateDocument(term, document); }
@Test public void testMixedChecksums() throws IOException { final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random()); Store store = new Store( shardId, ImmutableSettings.EMPTY, directoryService, randomDistributor(directoryService), new DummyShardLock(shardId)); // this time random codec.... IndexWriter writer = new IndexWriter( store.directory(), newIndexWriterConfig(random(), new MockAnalyzer(random())) .setCodec(actualDefaultCodec())); int docs = 1 + random().nextInt(100); for (int i = 0; i < docs; i++) { Document doc = new Document(); doc.add( new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add( new TextField( "body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add( new SortedDocValuesField( "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random())))); writer.addDocument(doc); } if (random().nextBoolean()) { for (int i = 0; i < docs; i++) { if (random().nextBoolean()) { Document doc = new Document(); doc.add( new TextField( "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add( new TextField( "body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.updateDocument(new Term("id", "" + i), doc); } } } if (random().nextBoolean()) { DirectoryReader.open(writer, random().nextBoolean()).close(); // flush } Store.MetadataSnapshot metadata; // check before we committed try { store.getMetadata(); fail("no index present - expected exception"); } catch (IndexNotFoundException ex) { // expected } assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed writer.commit(); writer.close(); Store.LegacyChecksums checksums = new Store.LegacyChecksums(); metadata = store.getMetadata(); assertThat(metadata.asMap().isEmpty(), is(false)); for (StoreFileMetaData meta : metadata) { try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) { if (meta.checksum() == null) { String checksum = null; try { CodecUtil.retrieveChecksum(input); fail("expected a corrupt index - posting format has not checksums"); } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) { try (ChecksumIndexInput checksumIndexInput = store.directory().openChecksumInput(meta.name(), IOContext.DEFAULT)) { checksumIndexInput.seek(meta.length()); checksum = Store.digestToString(checksumIndexInput.getChecksum()); } // fine - it's a postings format without checksums checksums.add(new StoreFileMetaData(meta.name(), meta.length(), checksum, null)); } } else { String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input)); assertThat( "File: " + meta.name() + " has a different checksum", meta.checksum(), equalTo(checksum)); assertThat(meta.hasLegacyChecksum(), equalTo(false)); assertThat(meta.writtenBy(), equalTo(Version.LATEST)); } } } assertConsistent(store, metadata); checksums.write(store); metadata = store.getMetadata(); assertThat(metadata.asMap().isEmpty(), is(false)); for (StoreFileMetaData meta : metadata) { assertThat( "file: " + meta.name() + " has a null checksum", meta.checksum(), not(nullValue())); if (meta.hasLegacyChecksum()) { try (ChecksumIndexInput checksumIndexInput = store.directory().openChecksumInput(meta.name(), IOContext.DEFAULT)) { checksumIndexInput.seek(meta.length()); assertThat( meta.checksum(), equalTo(Store.digestToString(checksumIndexInput.getChecksum()))); } } else { try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) { String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input)); assertThat( "File: " + meta.name() + " has a different checksum", meta.checksum(), equalTo(checksum)); assertThat(meta.hasLegacyChecksum(), equalTo(false)); assertThat(meta.writtenBy(), equalTo(Version.LATEST)); } } } assertConsistent(store, metadata); TestUtil.checkIndex(store.directory()); assertDeleteContent(store, directoryService); IOUtils.close(store); }
/** * Creates a set of reference objects and stores it in a new index (name "<indexPath>-ro"). Then * creates ordered lists of reference object positions for each data item in the index with given * feature. Finally a new index (name "<indexPath>-ms") is created where all the original * documents as well as the new data are stored. * * @param indexPath the path to the original index * @throws IOException */ public void createIndex(String indexPath) throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); if (numDocs < numReferenceObjects) { throw new UnsupportedOperationException("Too few documents in index."); } // progress report progress.setNumDocsAll(numDocs); progress.setCurrentState(State.RoSelection); boolean hasDeletions = reader.hasDeletions(); // init reference objects: IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-ro", true); HashSet<Integer> referenceObjsIds = new HashSet<Integer>(numReferenceObjects); double numDocsDouble = (double) numDocs; while (referenceObjsIds.size() < numReferenceObjects) { referenceObjsIds.add((int) (numDocsDouble * Math.random())); } int count = 0; if (hasDeletions) { System.err.println( "WARNING: There are deleted docs in your index. You should " + "optimize your index before using this method."); } // progress report progress.setCurrentState(State.RoIndexing); // find them in the index and put them into a separate index: for (int i : referenceObjsIds) { count++; Document document = reader.document(i); document.add(new Field("ro-id", count + "", StringField.TYPE_STORED)); iw.addDocument(document); } iw.commit(); iw.close(); // progress report progress.setCurrentState(State.Indexing); // now find the reference objects for each entry ;) IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro"))); ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); analyzerPerField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper( new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), analyzerPerField); iw = new IndexWriter( FSDirectory.open(new File(indexPath)), new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); StringBuilder sb = new StringBuilder(256); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < numDocs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document document = reader.document(i); ImageSearchHits hits = searcher.search(document, readerRo); sb.delete(0, sb.length()); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } // System.out.println(sb.toString()); document.add(new TextField("ro-order", sb.toString(), Field.Store.YES)); iw.updateDocument( new Term( DocumentBuilder.FIELD_NAME_IDENTIFIER, document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document); // progress report progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1); } iw.commit(); iw.close(); // progress report progress.setCurrentState(State.Idle); }