static { // Id IdFielType = new FieldType(); IdFielType.setStored(true); IdFielType.setTokenized(false); IdFielType.setOmitNorms(true); IdFielType.setIndexOptions(IndexOptions.DOCS); IdFielType.freeze(); // content ContentFielType = new FieldType(); ContentFielType.setStored(false); ContentFielType.setTokenized(true); ContentFielType.setOmitNorms(false); ContentFielType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); ContentFielType.freeze(); // title TitleFielType = new FieldType(); TitleFielType.setStored(true); TitleFielType.setTokenized(true); TitleFielType.setOmitNorms(false); TitleFielType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); TitleFielType.freeze(); // onlyForStore OnLyStoreFieldType = new FieldType(); OnLyStoreFieldType.setStored(true); OnLyStoreFieldType.setTokenized(false); OnLyStoreFieldType.setOmitNorms(false); OnLyStoreFieldType.setIndexOptions(IndexOptions.NONE); OnLyStoreFieldType.freeze(); }
static { LABEL_FIELD_TYPE.setStored(true); LABEL_FIELD_TYPE.setTokenized(true); LABEL_FIELD_TYPE.freeze(); URI_FIELD_TYPE.setStored(true); URI_FIELD_TYPE.setTokenized(false); URI_FIELD_TYPE.freeze(); FIELD_TYPE.setStored(true); FIELD_TYPE.freeze(); }
/** * Translates the pre-4.0 enums for specifying how a field should be indexed into the 4.0 {@link * FieldType} approach. * * @deprecated This is here only to ease transition from the pre-4.0 APIs. */ @Deprecated public static final FieldType translateFieldType( Store store, Index index, TermVector termVector) { final FieldType ft = new FieldType(); ft.setStored(store == Store.YES); switch (index) { case ANALYZED: ft.setIndexed(true); ft.setTokenized(true); break; case ANALYZED_NO_NORMS: ft.setIndexed(true); ft.setTokenized(true); ft.setOmitNorms(true); break; case NOT_ANALYZED: ft.setIndexed(true); ft.setTokenized(false); break; case NOT_ANALYZED_NO_NORMS: ft.setIndexed(true); ft.setTokenized(false); ft.setOmitNorms(true); break; case NO: break; } switch (termVector) { case NO: break; case YES: ft.setStoreTermVectors(true); break; case WITH_POSITIONS: ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(true); break; case WITH_OFFSETS: ft.setStoreTermVectors(true); ft.setStoreTermVectorOffsets(true); break; case WITH_POSITIONS_OFFSETS: ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(true); ft.setStoreTermVectorOffsets(true); break; } ft.freeze(); return ft; }
static { TYPE_NOT_STORED.setOmitNorms(true); TYPE_NOT_STORED.setIndexOptions(IndexOptions.DOCS); TYPE_NOT_STORED.setTokenized(false); TYPE_NOT_STORED.freeze(); TYPE_STORED.setOmitNorms(true); TYPE_STORED.setIndexOptions(IndexOptions.DOCS); TYPE_STORED.setStored(true); TYPE_STORED.setTokenized(false); TYPE_STORED.freeze(); }
/* * Test per field codec support - adding fields with random codecs */ @Test public void testStressPerFieldCodec() throws IOException { Directory dir = newDirectory(random()); final int docsPerRound = 97; int numRounds = atLeast(1); for (int i = 0; i < numRounds; i++) { int num = TestUtil.nextInt(random(), 30, 60); IndexWriterConfig config = newIndexWriterConfig(random(), TEST_VERSION_CURRENT, new MockAnalyzer(random())); config.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = newWriter(dir, config); for (int j = 0; j < docsPerRound; j++) { final Document doc = new Document(); for (int k = 0; k < num; k++) { FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setTokenized(random().nextBoolean()); customType.setOmitNorms(random().nextBoolean()); Field field = newField("" + k, TestUtil.randomRealisticUnicodeString(random(), 128), customType); doc.add(field); } writer.addDocument(doc); } if (random().nextBoolean()) { writer.forceMerge(1); } writer.commit(); assertEquals((i + 1) * docsPerRound, writer.maxDoc()); writer.close(); } dir.close(); }
public Builder( String index, @Nullable Settings indexSettings, RootObjectMapper.Builder builder) { this.index = index; this.indexSettings = indexSettings; this.builderContext = new Mapper.BuilderContext(indexSettings, new ContentPath(1)); this.rootObjectMapper = builder.build(builderContext); IdFieldMapper idFieldMapper = new IdFieldMapper(); if (indexSettings != null) { String idIndexed = indexSettings.get("index.mapping._id.indexed"); if (idIndexed != null && Booleans.parseBoolean(idIndexed, false)) { FieldType fieldType = new FieldType(IdFieldMapper.Defaults.FIELD_TYPE); fieldType.setTokenized(false); idFieldMapper = new IdFieldMapper(fieldType); } } this.rootMappers.put(IdFieldMapper.class, idFieldMapper); // add default mappers, order is important (for example analyzer should come before the rest // to set context.analyzer) this.rootMappers.put(SizeFieldMapper.class, new SizeFieldMapper()); this.rootMappers.put(IndexFieldMapper.class, new IndexFieldMapper()); this.rootMappers.put(SourceFieldMapper.class, new SourceFieldMapper()); this.rootMappers.put(TypeFieldMapper.class, new TypeFieldMapper()); this.rootMappers.put(AnalyzerMapper.class, new AnalyzerMapper()); this.rootMappers.put(AllFieldMapper.class, new AllFieldMapper()); this.rootMappers.put(BoostFieldMapper.class, new BoostFieldMapper()); this.rootMappers.put(RoutingFieldMapper.class, new RoutingFieldMapper()); this.rootMappers.put(TimestampFieldMapper.class, new TimestampFieldMapper()); this.rootMappers.put(TTLFieldMapper.class, new TTLFieldMapper()); this.rootMappers.put(UidFieldMapper.class, new UidFieldMapper()); // don't add parent field, by default its "null" }
/** * Used for adding a document when a field needs to be created from a type and a string. * * <p>By default, the indexed value is the same as the stored value (taken from toInternal()). * Having a different representation for external, internal, and indexed would present quite a few * problems given the current Lucene architecture. An analyzer for adding docs would need to * translate internal->indexed while an analyzer for querying would need to translate * external->indexed. * * <p>The only other alternative to having internal==indexed would be to have internal==external. * In this case, toInternal should convert to the indexed representation, toExternal() should do * nothing, and createField() should *not* call toInternal, but use the external value and set * tokenized=true to get Lucene to convert to the internal(indexed) form. :TODO: clean up and * clarify this explanation. * * @see #toInternal */ public StorableField createField(SchemaField field, Object value, float boost) { if (!field.indexed() && !field.stored()) { if (log.isTraceEnabled()) log.trace("Ignoring unindexed/unstored field: " + field); return null; } String val; try { val = toInternal(value.toString()); } catch (RuntimeException e) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Error while creating field '" + field + "' from value '" + value + "'", e); } if (val == null) return null; org.apache.lucene.document.FieldType newType = new org.apache.lucene.document.FieldType(); newType.setTokenized(field.isTokenized()); newType.setStored(field.stored()); newType.setOmitNorms(field.omitNorms()); newType.setIndexOptions(field.indexed() ? getIndexOptions(field, val) : IndexOptions.NONE); newType.setStoreTermVectors(field.storeTermVector()); newType.setStoreTermVectorOffsets(field.storeTermOffsets()); newType.setStoreTermVectorPositions(field.storeTermPositions()); newType.setStoreTermVectorPayloads(field.storeTermPayloads()); return createField(field.getName(), val, newType, boost); }
// LUCENE-325: test forceMergeDeletes without waiting, when // many adjacent merges are required public void testForceMergeDeletes3() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setMaxBufferedDocs(2) .setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setMergePolicy(newLogMergePolicy(50))); FieldType customType = new FieldType(); customType.setStored(true); FieldType customType1 = new FieldType(TextField.TYPE_NOT_STORED); customType1.setTokenized(false); customType1.setStoreTermVectors(true); customType1.setStoreTermVectorPositions(true); customType1.setStoreTermVectorOffsets(true); Document document = new Document(); Field storedField = newField("stored", "stored", customType); document.add(storedField); Field termVectorField = newField("termVector", "termVector", customType1); document.add(termVectorField); Field idField = newStringField("id", "", Field.Store.NO); document.add(idField); for (int i = 0; i < 98; i++) { idField.setStringValue("" + i); writer.addDocument(document); } writer.close(); IndexReader ir = DirectoryReader.open(dir); assertEquals(98, ir.maxDoc()); assertEquals(98, ir.numDocs()); ir.close(); IndexWriterConfig dontMergeConfig = new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE); writer = new IndexWriter(dir, dontMergeConfig); for (int i = 0; i < 98; i += 2) { writer.deleteDocuments(new Term("id", "" + i)); } writer.close(); ir = DirectoryReader.open(dir); assertEquals(49, ir.numDocs()); ir.close(); writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy(3))); writer.forceMergeDeletes(false); writer.close(); ir = DirectoryReader.open(dir); assertEquals(49, ir.maxDoc()); assertEquals(49, ir.numDocs()); ir.close(); dir.close(); }
static { TYPE.setIndexed(true); TYPE.setOmitNorms(true); TYPE.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS); TYPE.setTokenized(true); TYPE.setStoreTermVectors(true); TYPE.freeze(); }
static { FIELD_TYPE.setIndexed(true); FIELD_TYPE.setTokenized(false); FIELD_TYPE.setStored(true); FIELD_TYPE.setOmitNorms(true); FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY); FIELD_TYPE.freeze(); }
private static FieldType idFieldType(Settings indexSettings) { FieldType fieldType = new FieldType(Defaults.FIELD_TYPE); boolean pre2x = Version.indexCreated(indexSettings).before(Version.V_2_0_0); if (pre2x && indexSettings.getAsBoolean("index.mapping._id.indexed", true) == false) { fieldType.setTokenized(false); } return fieldType; }
@Test public void testName() throws Exception { FieldType TYPE_STORED = new FieldType(); TYPE_STORED.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); TYPE_STORED.setTokenized(false); TYPE_STORED.setStored(true); TYPE_STORED.freeze(); System.out.println(TYPE_STORED); }
static { FIELD_TYPE.setIndexed(true); FIELD_TYPE.setTokenized(false); FIELD_TYPE.setStored(true); FIELD_TYPE.setOmitNorms(true); FIELD_TYPE.setIndexOptions( FieldInfo.IndexOptions .DOCS_AND_FREQS_AND_POSITIONS); // we store payload (otherwise, we really need just // docs) FIELD_TYPE.freeze(); NESTED_FIELD_TYPE.setIndexed(true); NESTED_FIELD_TYPE.setTokenized(false); NESTED_FIELD_TYPE.setStored(false); NESTED_FIELD_TYPE.setOmitNorms(true); // we can set this to another index option when we move away from storing payload.. // NESTED_FIELD_TYPE.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY); NESTED_FIELD_TYPE.freeze(); }
static { UID_FIELD_TYPE.setIndexed(true); UID_FIELD_TYPE.setTokenized(false); UID_FIELD_TYPE.setStored(true); UID_FIELD_TYPE.setOmitNorms(true); UID_FIELD_TYPE.setIndexOptions( FieldInfo.IndexOptions .DOCS_AND_FREQS_AND_POSITIONS); // we store payload (otherwise, we really need just // docs) UID_FIELD_TYPE.freeze(); }
public void testChangeGaps() throws Exception { // LUCENE-5324: check that it is possible to change the wrapper's gaps final int positionGap = random().nextInt(1000); final int offsetGap = random().nextInt(1000); final Analyzer delegate = new MockAnalyzer(random()); final Analyzer a = new DelegatingAnalyzerWrapper(delegate.getReuseStrategy()) { @Override protected Analyzer getWrappedAnalyzer(String fieldName) { return delegate; } @Override public int getPositionIncrementGap(String fieldName) { return positionGap; } @Override public int getOffsetGap(String fieldName) { return offsetGap; } }; final RandomIndexWriter writer = new RandomIndexWriter(random(), newDirectory(), a); final Document doc = new Document(); final FieldType ft = new FieldType(); ft.setIndexOptions(IndexOptions.DOCS); ft.setTokenized(true); ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(true); ft.setStoreTermVectorOffsets(true); doc.add(new Field("f", "a", ft)); doc.add(new Field("f", "a", ft)); writer.addDocument(doc); final LeafReader reader = getOnlySegmentReader(writer.getReader()); final Fields fields = reader.getTermVectors(0); final Terms terms = fields.terms("f"); final TermsEnum te = terms.iterator(); assertEquals(new BytesRef("a"), te.next()); final PostingsEnum dpe = te.postings(null, PostingsEnum.ALL); assertEquals(0, dpe.nextDoc()); assertEquals(2, dpe.freq()); assertEquals(0, dpe.nextPosition()); assertEquals(0, dpe.startOffset()); final int endOffset = dpe.endOffset(); assertEquals(1 + positionGap, dpe.nextPosition()); assertEquals(1 + endOffset + offsetGap, dpe.endOffset()); assertEquals(null, te.next()); reader.close(); writer.close(); writer.w.getDirectory().close(); }
/** * Construct Indexer * * @param directory the main BlackLab index directory * @param create if true, creates a new index; otherwise, appends to existing index * @param docIndexerClass how to index the files, or null to autodetect * @param indexTemplateFile JSON file to use as template for index structure / metadata (if * creating new index) * @throws DocumentFormatException if no DocIndexer was specified and autodetection failed * @throws IOException */ public Indexer( File directory, boolean create, Class<? extends DocIndexer> docIndexerClass, File indexTemplateFile) throws DocumentFormatException, IOException { this.docIndexerClass = docIndexerClass; searcher = Searcher.openForWriting(directory, create, indexTemplateFile); if (!create) searcher.getIndexStructure().setModified(); if (this.docIndexerClass == null) { // No DocIndexer supplied; try to detect it from the index // metadata. String formatId = searcher.getIndexStructure().getDocumentFormat(); if (formatId != null && formatId.length() > 0) setDocIndexer(DocumentFormats.getIndexerClass(formatId)); else { throw new DocumentFormatException("Cannot detect document format for index!"); } } metadataFieldTypeTokenized = new FieldType(); metadataFieldTypeTokenized.setStored(true); metadataFieldTypeTokenized.setIndexed(true); metadataFieldTypeTokenized.setTokenized(true); metadataFieldTypeTokenized.setOmitNorms(true); // @@@ <-- depending on setting? metadataFieldTypeTokenized.setStoreTermVectors(true); metadataFieldTypeTokenized.setStoreTermVectorPositions(true); metadataFieldTypeTokenized.setStoreTermVectorOffsets(true); metadataFieldTypeTokenized.freeze(); metadataFieldTypeUntokenized = new FieldType(metadataFieldTypeTokenized); metadataFieldTypeUntokenized.setTokenized(false); metadataFieldTypeUntokenized.freeze(); }
private QueryEngine(File path) throws FileNotFoundException, UnsupportedEncodingException, IOException { dir = new RAMDirectory(); StandardAnalyzer sa = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, sa); IndexWriter iw = new IndexWriter(dir, iwc); File[] files; files = path.listFiles(); iwc.setSimilarity(new DefaultSimilarity()); FileInputStream fis; for (File fi : files) { String out = ""; String curline; BufferedReader br = new BufferedReader(new FileReader(fi)); while ((curline = br.readLine()) != null) { out += (curline + "\n"); } fis = new FileInputStream(fi); Document doc = new Document(); doc.add(new StringField("path", fi.getAbsolutePath(), Field.Store.YES)); // doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, // "UTF-8")))); FieldType ft = new FieldType(); ft.setIndexed(true); ft.setTokenized(true); ft.setStoreTermVectors(true); doc.add( new Field("contents", out, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); iw.addDocument(doc); fis.close(); } iw.close(); IndexReader ir = DirectoryReader.open(dir); ir2 = ir; is = new IndexSearcher(ir); is.setSimilarity(new DefaultSimilarity()); qp = new QueryParser(Version.LUCENE_40, "contents", sa); }
private boolean isAHit(Query q, String content, Analyzer analyzer) throws IOException { Directory ramDir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), ramDir, analyzer); Document doc = new Document(); FieldType fieldType = new FieldType(); fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); fieldType.setTokenized(true); fieldType.setStored(true); Field field = new Field(FIELD, content, fieldType); doc.add(field); writer.addDocument(doc); writer.close(); DirectoryReader ir = DirectoryReader.open(ramDir); IndexSearcher is = new IndexSearcher(ir); int hits = is.search(q, 10).totalHits; ir.close(); ramDir.close(); if (hits == 1) { return true; } else { return false; } }
static { TYPE_STORED_NOT_INDEXED.setIndexed(false); TYPE_STORED_NOT_INDEXED.setStored(true); TYPE_STORED_NOT_INDEXED.setTokenized(true); TYPE_STORED_NOT_INDEXED.freeze(); }
static { FIELD_TYPE.setIndexed(true); FIELD_TYPE.setTokenized(true); FIELD_TYPE.freeze(); }
public void indexDoc() throws IOException { Document d = new Document(); FieldType customType1 = new FieldType(TextField.TYPE_STORED); customType1.setTokenized(false); customType1.setOmitNorms(true); ArrayList<Field> fields = new ArrayList<>(); String idString = getIdString(); Field idField = newField("id", idString, customType1); fields.add(idField); int nFields = nextInt(maxFields); for (int i = 0; i < nFields; i++) { FieldType customType = new FieldType(); switch (nextInt(4)) { case 0: break; case 1: customType.setStoreTermVectors(true); break; case 2: customType.setStoreTermVectors(true); customType.setStoreTermVectorPositions(true); break; case 3: customType.setStoreTermVectors(true); customType.setStoreTermVectorOffsets(true); break; } switch (nextInt(4)) { case 0: customType.setStored(true); customType.setOmitNorms(true); customType.setIndexed(true); fields.add(newField("f" + nextInt(100), getString(1), customType)); break; case 1: customType.setIndexed(true); customType.setTokenized(true); fields.add(newField("f" + nextInt(100), getString(0), customType)); break; case 2: customType.setStored(true); customType.setStoreTermVectors(false); customType.setStoreTermVectorOffsets(false); customType.setStoreTermVectorPositions(false); fields.add(newField("f" + nextInt(100), getString(0), customType)); break; case 3: customType.setStored(true); customType.setIndexed(true); customType.setTokenized(true); fields.add(newField("f" + nextInt(100), getString(bigFieldSize), customType)); break; } } if (sameFieldOrder) { Collections.sort(fields, fieldNameComparator); } else { // random placement of id field also Collections.swap(fields, nextInt(fields.size()), 0); } for (int i = 0; i < fields.size(); i++) { d.add(fields.get(i)); } if (VERBOSE) { System.out.println(Thread.currentThread().getName() + ": indexing id:" + idString); } w.updateDocument(new Term("id", idString), d); // System.out.println(Thread.currentThread().getName() + ": indexing "+d); docs.put(idString, d); }
/** * Method to create Lucene Index Keep in mind that always index text value to Lucene for * calculating Cosine Similarity. You have to generate tokens, terms and their frequencies and * store them in the Lucene Index. * * @throws CorruptIndexException * @throws LockObtainFailedException * @throws IOException */ public void index() throws CorruptIndexException, LockObtainFailedException, IOException { System.out.println(">>> Process source directory : " + sourceDirectory.getAbsolutePath()); System.out.println(">>> Create index in directory : " + indexDirectory.getAbsolutePath()); Directory dir = FSDirectory.open(indexDirectory); Analyzer analyzer = new StandardAnalyzer(StandardAnalyzer.STOP_WORDS_SET); // using stop words // Analyzer analyzer = new StandardAnalyzer(CharArraySet.EMPTY_SET); // using stop words // System.out.println(">>> Overwrite the Analyser: DO NOT USE STOPWORDS FILTER !!!"); /** * Source: http://toolongdidntread.com/lucene/using-lucene-to-generate-n-grams/ * * <p>1) Run a document through an Analyzer which filters out the stuff we don’t care about. * SimpleAnalyzer, in this case, applies a lower case filter and a letter tokenizer, which makes * all text lowercase and divides text at non-letters, respectively. * * <p>2) Wrap this analyzer with ShingleAnalyzerWrapper which constructs shingles (token * n-grams) from a stream. This is the main thing we want to accomplish. * * <p>3) We generate a TokenStream which enumerates (a fancy word for establishes) “fields” from * a “document” (what I mentioned earlier). * * <p>4) Given a token stream, we want to extract certain things from it, like just the * characters and not all the other stuff that comes along with the stream. We’ll use * CharTermAttribute which extract just the words from the stream. * * <p>5) Finally, we iterate over the stream by incrementing the tokens, extracting each * CharTermAttribute from the tokens. */ if (useNgramsAsTerms) { analyzer = getNGramAnalyser(analyzer, N); } IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer); if (indexDirectory.exists()) { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } else { // Add new documents to an existing index: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } IndexWriter writer = new IndexWriter(dir, iwc); for (File f : sourceDirectory.listFiles()) { System.out.println("> DOC : " + f.getAbsolutePath()); if (f.isDirectory()) { System.out.println(">>> Indexer processes FILE : " + f.getAbsolutePath()); for (File fileTXT : f.listFiles()) { String at = getAllText(fileTXT); System.out.println("> file : " + fileTXT.getAbsolutePath()); Document doc = new Document(); FieldType fieldType = new FieldType(); fieldType.setIndexed(true); fieldType.setIndexOptions( FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); fieldType.setStored(true); fieldType.setStoreTermVectors(true); fieldType.setTokenized(true); Field contentField = new Field(fieldName, at, fieldType); doc.add(contentField); FieldType fieldType2 = new FieldType(); fieldType2.setIndexed(true); fieldType2.setStored(true); fieldType2.setStoreTermVectors(false); fieldType2.setTokenized(false); Field idField = new Field("id", fileTXT.getAbsolutePath(), fieldType2); doc.add(idField); FieldType fieldType3 = new FieldType(); fieldType3.setIndexed(false); fieldType3.setStored(true); fieldType3.setStoreTermVectors(false); fieldType3.setTokenized(false); Field rawField = new Field("raw", at, fieldType3); doc.add(rawField); writer.addDocument(doc); } } else { if (!f.getName().startsWith(".DS_Store")) { String at = getAllText(f); Document doc = new Document(); FieldType fieldType = new FieldType(); fieldType.setIndexed(true); fieldType.setIndexOptions( FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); fieldType.setStored(true); fieldType.setStoreTermVectors(true); fieldType.setTokenized(true); Field contentField = new Field(fieldName, at, fieldType); doc.add(contentField); FieldType fieldType2 = new FieldType(); fieldType2.setIndexed(true); fieldType2.setStored(true); fieldType2.setStoreTermVectors(false); fieldType2.setTokenized(false); Field idField = new Field("id", f.getAbsolutePath(), fieldType2); doc.add(idField); FieldType fieldType3 = new FieldType(); fieldType3.setIndexed(false); fieldType3.setStored(true); fieldType3.setStoreTermVectors(false); fieldType3.setTokenized(false); Field rawField = new Field("raw", at, fieldType3); doc.add(rawField); writer.addDocument(doc); } } } writer.close(); }
@Slow public void testNoWaitClose() throws Throwable { Directory directory = newDirectory(); if (directory instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) directory).setPreventDoubleWrite(false); } final Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setTokenized(false); Field idField = newField("id", "", customType); doc.add(idField); for (int pass = 0; pass < 2; pass++) { if (VERBOSE) { System.out.println("TEST: pass="******"TEST: iter=" + iter); } for (int j = 0; j < 199; j++) { idField.setStringValue(Integer.toString(iter * 201 + j)); writer.addDocument(doc); } int delID = iter * 199; for (int j = 0; j < 20; j++) { writer.deleteDocuments(new Term("id", Integer.toString(delID))); delID += 5; } writer.commit(); // Force a bunch of merge threads to kick off so we // stress out aborting them on close: ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(2); final IndexWriter finalWriter = writer; final AtomicReference<Throwable> failure = new AtomicReference<>(); Thread t1 = new Thread() { @Override public void run() { boolean done = false; while (!done) { for (int i = 0; i < 100; i++) { try { finalWriter.addDocument(doc); } catch (AlreadyClosedException e) { done = true; break; } catch (NullPointerException e) { done = true; break; } catch (Throwable e) { e.printStackTrace(System.out); failure.set(e); done = true; break; } } Thread.yield(); } } }; t1.start(); writer.close(); t1.join(); if (failure.get() != null) { throw failure.get(); } // Make sure reader can read IndexReader reader = DirectoryReader.open(directory); reader.close(); // Reopen writer = new IndexWriter( directory, newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setMergePolicy(newLogMergePolicy()) .setCommitOnClose(false)); } writer.close(); } directory.close(); }
static { customType5 = new FieldType(TextField.TYPE_STORED); customType5.setOmitNorms(true); customType5.setTokenized(false); noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, customType5); }
protected void _add(Topic topic) { if (topic == null) return; // 虽然不太可能,还是预防一下吧 // 暂时不索引评论 dao.fetchLinks(topic, "replies"); Document document; document = new Document(); Field field; FieldType fieldType; // 先加入id fieldType = new FieldType(); fieldType.setIndexed(true); // 索引 fieldType.setStored(true); // 存储 fieldType.setStoreTermVectors(true); fieldType.setTokenized(true); fieldType.setStoreTermVectorPositions(true); // 存储位置 fieldType.setStoreTermVectorOffsets(true); // 存储偏移量 field = new Field("id", topic.getId(), fieldType); document.add(field); // 加入标题 fieldType = new FieldType(); fieldType.setIndexed(true); // 索引 fieldType.setStored(true); // 存储 fieldType.setStoreTermVectors(true); fieldType.setTokenized(true); fieldType.setStoreTermVectorPositions(true); // 存储位置 fieldType.setStoreTermVectorOffsets(true); // 存储偏移量 field = new Field("title", topic.getTitle(), fieldType); document.add(field); // 加入文章内容 fieldType = new FieldType(); fieldType.setIndexed(true); // 索引 fieldType.setStored(false); // 存储 fieldType.setStoreTermVectors(true); fieldType.setTokenized(true); fieldType.setStoreTermVectorPositions(true); // 存储位置 fieldType.setStoreTermVectorOffsets(true); // 存储偏移量 field = new Field("content", topic.getContent(), fieldType); document.add(field); StringBuilder sb = new StringBuilder(); if (topic.getReplies() != null) { for (TopicReply reply : topic.getReplies()) { if (reply == null) continue; bigContentService.fill(reply); if (reply.getContent() != null) { if (sb.length() + reply.getContent().length() > (IndexWriter.MAX_TERM_LENGTH / 3)) { break; } sb.append(reply.getContent()); } } } fieldType = new FieldType(); fieldType.setIndexed(true); // 索引 fieldType.setStored(false); // 存储 fieldType.setStoreTermVectors(true); fieldType.setTokenized(true); fieldType.setStoreTermVectorPositions(true); // 存储位置 fieldType.setStoreTermVectorOffsets(true); // 存储偏移量 field = new Field("reply", sb.toString(), fieldType); document.add(field); try { luceneIndex.writer.addDocument(document); } catch (IOException e) { log.debug("add to index fail : id=" + topic.getId()); } catch (Error e) { log.debug("add to index fail : id=" + topic.getId()); } }
public void testRandomStoredFields() throws IOException { Directory dir = newDirectory(); Random rand = random(); RandomIndexWriter w = new RandomIndexWriter( rand, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(_TestUtil.nextInt(rand, 5, 20))); // w.w.setNoCFSRatio(0.0); final int docCount = atLeast(200); final int fieldCount = _TestUtil.nextInt(rand, 1, 5); final List<Integer> fieldIDs = new ArrayList<Integer>(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.setTokenized(false); Field idField = newField("id", "", customType); for (int i = 0; i < fieldCount; i++) { fieldIDs.add(i); } final Map<String, Document> docs = new HashMap<String, Document>(); if (VERBOSE) { System.out.println("TEST: build index docCount=" + docCount); } FieldType customType2 = new FieldType(); customType2.setStored(true); for (int i = 0; i < docCount; i++) { Document doc = new Document(); doc.add(idField); final String id = "" + i; idField.setStringValue(id); docs.put(id, doc); if (VERBOSE) { System.out.println("TEST: add doc id=" + id); } for (int field : fieldIDs) { final String s; if (rand.nextInt(4) != 3) { s = _TestUtil.randomUnicodeString(rand, 1000); doc.add(newField("f" + field, s, customType2)); } else { s = null; } } w.addDocument(doc); if (rand.nextInt(50) == 17) { // mixup binding of field name -> Number every so often Collections.shuffle(fieldIDs); } if (rand.nextInt(5) == 3 && i > 0) { final String delID = "" + rand.nextInt(i); if (VERBOSE) { System.out.println("TEST: delete doc id=" + delID); } w.deleteDocuments(new Term("id", delID)); docs.remove(delID); } } if (VERBOSE) { System.out.println("TEST: " + docs.size() + " docs in index; now load fields"); } if (docs.size() > 0) { String[] idsList = docs.keySet().toArray(new String[docs.size()]); for (int x = 0; x < 2; x++) { IndexReader r = w.getReader(); IndexSearcher s = newSearcher(r); if (VERBOSE) { System.out.println("TEST: cycle x=" + x + " r=" + r); } int num = atLeast(1000); for (int iter = 0; iter < num; iter++) { String testID = idsList[rand.nextInt(idsList.length)]; if (VERBOSE) { System.out.println("TEST: test id=" + testID); } TopDocs hits = s.search(new TermQuery(new Term("id", testID)), 1); assertEquals(1, hits.totalHits); Document doc = r.document(hits.scoreDocs[0].doc); Document docExp = docs.get(testID); for (int i = 0; i < fieldCount; i++) { assertEquals( "doc " + testID + ", field f" + fieldCount + " is wrong", docExp.get("f" + i), doc.get("f" + i)); } } r.close(); w.forceMerge(1); } } w.close(); dir.close(); }