@Override protected Document convertDocument(DemoData data, String param) { Document doc = new Document(); if (true) { Field fd = new Field(TYPE_FIELD_NAME, TYPE_VALUE, Field.Store.YES, Field.Index.NOT_ANALYZED); doc.add(fd); } if (true) { Field fd = new Field( "termId", TYPE_VALUE + "_" + data.getId(), Field.Store.NO, Field.Index.NOT_ANALYZED); doc.add(fd); } if (true) { Field fd = new Field("id", Integer.toString(data.getId()), Field.Store.YES, Field.Index.NO); doc.add(fd); } if (true) { Field fd = new Field( "name", data.getName(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(fd); } if (true) { Field fd = new Field("refId", Integer.toString(data.getRefId()), Field.Store.YES, Field.Index.NO); doc.add(fd); } return doc; }
// TODO: randomize public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException { Directory directory = new MockDirectoryWrapper(random, new RAMDirectory()); PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter( directory, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity)); // writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(new Field(FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.add( new Field( MULTI_FIELD, English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.add( new Field( NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } reader = IndexReader.open(writer, true); writer.close(); IndexSearcher searcher = LuceneTestCase.newSearcher(reader); searcher.setSimilarity(similarity); return searcher; }
public void testMoreThan32ProhibitedClauses() throws Exception { final Directory d = newDirectory(); final RandomIndexWriter w = new RandomIndexWriter(random(), d); Document doc = new Document(); doc.add( new TextField( "field", "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33", Field.Store.NO)); w.addDocument(doc); doc = new Document(); doc.add(new TextField("field", "33", Field.Store.NO)); w.addDocument(doc); final IndexReader r = w.getReader(); w.close(); final IndexSearcher s = newSearcher(r); final BooleanQuery q = new BooleanQuery(); for (int term = 0; term < 33; term++) { q.add( new BooleanClause( new TermQuery(new Term("field", "" + term)), BooleanClause.Occur.MUST_NOT)); } q.add(new BooleanClause(new TermQuery(new Term("field", "33")), BooleanClause.Occur.SHOULD)); final int[] count = new int[1]; s.search( q, new Collector() { private Scorer scorer; @Override public void setScorer(Scorer scorer) { // Make sure we got BooleanScorer: this.scorer = scorer; assertEquals( "Scorer is implemented by wrong class", BooleanScorer.class.getName() + "$BucketScorer", scorer.getClass().getName()); } @Override public void collect(int doc) { count[0]++; } @Override public void setNextReader(AtomicReaderContext context) {} @Override public boolean acceptsDocsOutOfOrder() { return true; } }); assertEquals(1, count[0]); r.close(); d.close(); }
public void update() { IndexWriter writer = null; try { writer = new IndexWriter( directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); /* * Lucene并没有提供更新,这里的更新操作其实是如下两个操作的合集 * 先删除之后再添加 */ Document doc = new Document(); doc.add(new Field("id", "11", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field("email", emails[0], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("content", contents[0], Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("name", names[0], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); writer.updateDocument(new Term("id", "1"), doc); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if (writer != null) writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
@Test public void testFuzzyQuery() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field")); Document document = new Document(); document.add(new SuggestField("suggest_field", "suggestion", 2)); document.add(new SuggestField("suggest_field", "suaggestion", 4)); document.add(new SuggestField("suggest_field", "ssuggestion", 1)); iw.addDocument(document); document = new Document(); document.add(new SuggestField("suggest_field", "sugfoo", 1)); iw.addDocument(document); if (rarely()) { iw.commit(); } DirectoryReader reader = iw.getReader(); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); CompletionQuery query = new FuzzyCompletionQuery(analyzer, new Term("suggest_field", "sugg")); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4); assertSuggestions( suggest, new Entry("suaggestion", 4 * 2), new Entry("suggestion", 2 * 3), new Entry("sugfoo", 1 * 3), new Entry("ssuggestion", 1 * 1)); reader.close(); iw.close(); }
public void testDocsWithField() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new NumericDocValuesField("dv", 0L)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("dv", "some text", Field.Store.NO)); doc.add(new NumericDocValuesField("dv", 0L)); writer.addDocument(doc); DirectoryReader r = writer.getReader(); writer.close(); AtomicReader subR = r.leaves().get(0).reader(); assertEquals(2, subR.numDocs()); Bits bits = FieldCache.DEFAULT.getDocsWithField(subR, "dv"); assertTrue(bits.get(0)); assertTrue(bits.get(1)); r.close(); dir.close(); }
public Document getDocument() { Document doc = new Document(); doc.add(new StringField("id", "3", Store.YES)); doc.add(new TextField("title", "lucene", Store.YES)); doc.add(new TextField("content", "lucene的秘诀是创建索引和查询索引", Store.YES)); return doc; }
/** * @param worker * @param connection * @throws Exception */ private void createIndex(SearchIndexBuilderWorker worker, Connection connection) throws Exception { IndexWriter indexWrite = null; try { if (worker.isRunning()) { indexWrite = indexStorage.getIndexWriter(false); } if (indexWrite != null) { Document doc = new Document(); // The date of indexing String timeStamp = String.valueOf(System.currentTimeMillis()); doc.add( new Field( SearchService.DATE_STAMP, timeStamp, Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add( new Field( SearchService.DATE_STAMP, CompressionTools.compressString(timeStamp), Field.Store.YES)); String ref = "---INDEX-CREATED---"; doc.add( new Field( SearchService.FIELD_REFERENCE, CompressionTools.compressString(ref), Field.Store.YES)); doc.add( new Field( SearchService.FIELD_REFERENCE, ref, Field.Store.NO, Field.Index.NOT_ANALYZED)); indexWrite.addDocument(doc); } else { log.error("Couldn't get indexWriter to add document!"); } } catch (Exception ex) { log.error("Failed to Add Documents ", ex); throw new Exception(ex); } finally { if (indexWrite != null) { if (log.isDebugEnabled()) { log.debug("Closing Index Writer With " + indexWrite.maxDoc() + " documents"); Directory d = indexWrite.getDirectory(); String[] s = d.listAll(); log.debug("Directory Contains "); for (int i = 0; i < s.length; i++) { File f = new File(s[i]); log.debug( "\t" + String.valueOf(f.length()) + "\t" + new Date(f.lastModified()) + "\t" + s[i]); } } indexStorage.closeIndexWriter(indexWrite); } } }
/** @see org.wyona.yarep.impl.repo.vfs.DateIndexerSearcher#addRevision(String) */ public void addRevision(String revisionName) throws Exception { Date creationDate = node.getRevision(revisionName) .getCreationDate(); // WARN: Older creation dates might not have milliseconds and hence // are not corresponding exactly with the revision name, hence in // order to build the date index correctly one needs to use the // creation date log.debug( "Add revision '" + revisionName + "' with creation date '" + creationDate + "' to date index ..."); Document doc = new Document(); doc.add( new NumericField(CREATION_DATE_FIELD_NAME, Field.Store.YES, true) .setLongValue(creationDate.getTime())); // doc.add(new Field(CREATION_DATE_FIELD_NAME, // org.apache.lucene.document.DateTools.dateToString(creationDate, // org.apache.lucene.document.DateTools.Resolution.MILLISECOND), Field.Store.YES, // Field.Index.NOT_ANALYZED)); doc.add( new Field( REVISION_NAME_FIELD_NAME, revisionName, Field.Store.YES, Field.Index.NOT_ANALYZED)); IndexWriter iw = getIndexWriter(); Term revisionNameTerm = new Term(REVISION_NAME_FIELD_NAME, revisionName); iw.updateDocument(revisionNameTerm, doc); iw.optimize(); iw.close(); }
/** * @param title * @param key * @param content * @param custom1 * @param custom2 * @param custom3 * @param custom4 * @return Document */ public static Document getDocument( String title, String key, String content, String urlpath, String custom1, String custom2, String custom3, String custom4) { // make a new, empty document Document doc = new Document(); doc.add(FieldUtil.UnIndexed("size", Caster.toString(content.length()))); doc.add(FieldUtil.Text("key", key)); FieldUtil.setMimeType(doc, "text/plain"); FieldUtil.setRaw(doc, content); FieldUtil.setContent(doc, content); FieldUtil.setSummary(doc, StringUtil.max(content, SUMMERY_SIZE), false); FieldUtil.setTitle(doc, title); FieldUtil.setURL(doc, urlpath); FieldUtil.setCustom(doc, custom1, 1); FieldUtil.setCustom(doc, custom2, 2); FieldUtil.setCustom(doc, custom3, 3); FieldUtil.setCustom(doc, custom4, 4); return doc; }
@Test public void testVectorHighlighter() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add( new Field( "content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(document); IndexReader reader = IndexReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); XFastVectorHighlighter highlighter = new XFastVectorHighlighter(); String fragment = highlighter.getBestFragment( highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, notNullValue()); assertThat(fragment, equalTo("the big <b>bad</b> dog")); }
public void testSetAllGroups() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter( random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); doc.add(newField("group", "foo", StringField.TYPE_NOT_STORED)); doc.add(new SortedDocValuesField("group", new BytesRef("foo"))); w.addDocument(doc); IndexSearcher indexSearcher = newSearcher(w.getReader()); w.close(); GroupingSearch gs = new GroupingSearch("group"); gs.setAllGroups(true); TopGroups<?> groups = gs.search(indexSearcher, new TermQuery(new Term("group", "foo")), 0, 10); assertEquals(1, groups.totalHitCount); // assertEquals(1, groups.totalGroupCount.intValue()); assertEquals(1, groups.totalGroupedHitCount); assertEquals(1, gs.getAllMatchingGroups().size()); indexSearcher.getIndexReader().close(); dir.close(); }
// LUCENE-1727: make sure doc fields are stored in order public void testStoredFieldsOrder() throws Throwable { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); Document doc = new Document(); FieldType customType = new FieldType(); customType.setStored(true); doc.add(newField("zzz", "a b c", customType)); doc.add(newField("aaa", "a b c", customType)); doc.add(newField("zzz", "1 2 3", customType)); w.addDocument(doc); IndexReader r = w.getReader(); Document doc2 = r.document(0); Iterator<IndexableField> it = doc2.getFields().iterator(); assertTrue(it.hasNext()); Field f = (Field) it.next(); assertEquals(f.name(), "zzz"); assertEquals(f.stringValue(), "a b c"); assertTrue(it.hasNext()); f = (Field) it.next(); assertEquals(f.name(), "aaa"); assertEquals(f.stringValue(), "a b c"); assertTrue(it.hasNext()); f = (Field) it.next(); assertEquals(f.name(), "zzz"); assertEquals(f.stringValue(), "1 2 3"); assertFalse(it.hasNext()); r.close(); w.close(); d.close(); }
@Override public Document createDocument(MediaFile mediaFile) { Document doc = new Document(); doc.add(new NumericField(FIELD_ID, Field.Store.YES, false).setIntValue(mediaFile.getId())); if (mediaFile.getArtist() != null) { doc.add( new Field( FIELD_ARTIST, mediaFile.getArtist(), Field.Store.YES, Field.Index.ANALYZED)); } if (mediaFile.getAlbumName() != null) { doc.add( new Field( FIELD_ALBUM, mediaFile.getAlbumName(), Field.Store.YES, Field.Index.ANALYZED)); } if (mediaFile.getFolder() != null) { doc.add( new Field( FIELD_FOLDER, mediaFile.getFolder(), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); } return doc; }
public void testDocValuesUnstored() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwconfig.setMergePolicy(newLogMergePolicy()); IndexWriter writer = new IndexWriter(dir, iwconfig); for (int i = 0; i < 50; i++) { Document doc = new Document(); doc.add(new NumericDocValuesField("dv", i)); doc.add(new TextField("docId", "" + i, Field.Store.YES)); writer.addDocument(doc); } DirectoryReader r = writer.getReader(); SlowCompositeReaderWrapper slow = new SlowCompositeReaderWrapper(r); FieldInfos fi = slow.getFieldInfos(); FieldInfo dvInfo = fi.fieldInfo("dv"); assertTrue(dvInfo.hasDocValues()); NumericDocValues dv = slow.getNumericDocValues("dv"); for (int i = 0; i < 50; i++) { assertEquals(i, dv.get(i)); StoredDocument d = slow.document(i); // cannot use d.get("dv") due to another bug! assertNull(d.getField("dv")); assertEquals(Integer.toString(i), d.get("docId")); } slow.close(); writer.close(); dir.close(); }
@Test public void testContextQueryOnSuggestField() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field")); Document document = new Document(); document.add(new SuggestField("suggest_field", "abc", 3)); document.add(new SuggestField("suggest_field", "abd", 4)); document.add(new SuggestField("suggest_field", "The Foo Fighters", 2)); iw.addDocument(document); document = new Document(); document.add(new SuggestField("suggest_field", "abcdd", 5)); iw.addDocument(document); if (rarely()) { iw.commit(); } DirectoryReader reader = iw.getReader(); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab"))); try { suggestIndexSearcher.suggest(query, 4); } catch (IllegalStateException expected) { assertTrue(expected.getMessage().contains("SuggestField")); } reader.close(); iw.close(); }
public void testTypeChangeViaAddIndexesIR2() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new NumericDocValuesField("dv", 0L)); writer.addDocument(doc); writer.close(); Directory dir2 = newDirectory(); writer = new IndexWriter(dir2, conf); IndexReader[] readers = new IndexReader[] {DirectoryReader.open(dir)}; writer.addIndexes(readers); readers[0].close(); doc = new Document(); doc.add(new SortedDocValuesField("dv", new BytesRef("foo"))); try { writer.addDocument(doc); fail("did not hit exception"); } catch (IllegalArgumentException iae) { // expected } writer.close(); dir2.close(); dir.close(); }
@Test public void testAllContextQuery() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field")); Document document = new Document(); document.add(new ContextSuggestField("suggest_field", "suggestion1", 4, "type1")); document.add(new ContextSuggestField("suggest_field", "suggestion2", 3, "type2")); document.add(new ContextSuggestField("suggest_field", "suggestion3", 2, "type3")); iw.addDocument(document); document = new Document(); document.add(new ContextSuggestField("suggest_field", "suggestion4", 1, "type4")); iw.addDocument(document); if (rarely()) { iw.commit(); } DirectoryReader reader = iw.getReader(); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg"))); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4); assertSuggestions( suggest, new Entry("suggestion1", "type1", 4), new Entry("suggestion2", "type2", 3), new Entry("suggestion3", "type3", 2), new Entry("suggestion4", "type4", 1)); reader.close(); iw.close(); }
public void testMultiValuedDocValuesField() throws Exception { Directory d = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), d); Document doc = new Document(); Field f = new NumericDocValuesField("field", 17); // Index doc values are single-valued so we should not // be able to add same field more than once: doc.add(f); doc.add(f); try { w.addDocument(doc); fail("didn't hit expected exception"); } catch (IllegalArgumentException iae) { // expected } doc = new Document(); doc.add(f); w.addDocument(doc); w.forceMerge(1); DirectoryReader r = w.getReader(); w.close(); assertEquals(17, FieldCache.DEFAULT.getInts(getOnlySegmentReader(r), "field", false).get(0)); r.close(); d.close(); }
private ParsedDocument testParsedDocument( String uid, String id, String type, String routing, long timestamp, long ttl, Document document, Analyzer analyzer, BytesReference source, boolean mappingsModified) { Field uidField = new Field("_uid", uid, UidFieldMapper.Defaults.FIELD_TYPE); Field versionField = new NumericDocValuesField("_version", 0); document.add(uidField); document.add(versionField); return new ParsedDocument( uidField, versionField, id, type, routing, timestamp, ttl, Arrays.asList(document), analyzer, source, mappingsModified); }
private Document getDocument(JournalEntry entry) { Document doc = new Document(); doc.add( new Field( EntryIndexFields.ID.name(), entry.getUid().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add( new Field( EntryIndexFields.NAME.name(), entry.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add( new Field( EntryIndexFields.FULLTEXT.name(), entry.getText(), Field.Store.YES, Field.Index.ANALYZED)); doc.add( new Field( EntryIndexFields.DATE.name(), Long.toString(entry.getDateTime().getMillis()), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add( new Field( EntryIndexFields.TYPE.name(), entry.getItemType().name(), Field.Store.YES, Field.Index.NOT_ANALYZED)); return doc; }
@Override public void addFields(Document bannerDoc, AdDefinition bannerDefinition) { KeywordConditionDefinition kdef = null; if (bannerDefinition.hasConditionDefinition(ConditionDefinitions.KEYWORD)) { kdef = (KeywordConditionDefinition) bannerDefinition.getConditionDefinition(ConditionDefinitions.KEYWORD); } if (kdef != null && kdef.getKeywords().size() > 0) { // keywords im Dokument speichern List<Keyword> kws = kdef.getKeywords(); for (Keyword k : kws) { bannerDoc.add(new StringField(AdDBConstants.ADDB_AD_KEYWORD, k.word, Field.Store.NO)); } } else { /* * für alle Banner ohne angegebenem Keyword wird das default ALL-Keyword gesetzt */ bannerDoc.add( new StringField( AdDBConstants.ADDB_AD_KEYWORD, AdDBConstants.ADDB_AD_KEYWORD_ALL, Field.Store.NO)); } }
private boolean indexField(Document document, Object state, String fieldName, Type type) { if (Arrays.binarySearch(FIELDS, fieldName) >= 0) { // index all fields we know about document.add(new Field(fieldName, (String) state, Field.Store.YES, Field.Index.TOKENIZED)); document.add( new Field(Indexer.DEFAULT_FIELD, (String) state, Field.Store.NO, Field.Index.TOKENIZED)); return true; } else if (type instanceof StringType) { // index all strings with the exception of the fields explicitly listed as sensitive if (Arrays.binarySearch(SENSITIVE_FIELDS, fieldName) < 0) { document.add( new Field( Indexer.DEFAULT_FIELD, (String) state, Field.Store.NO, Field.Index.TOKENIZED)); } return true; } else if (fieldName.equals("aliases")) { Set aliases = (Set) state; for (Iterator a = aliases.iterator(); a.hasNext(); ) { String alias = (String) a.next(); document.add(new Field("alias", alias, Field.Store.NO, Field.Index.TOKENIZED)); document.add( new Field(Indexer.DEFAULT_FIELD, alias, Field.Store.NO, Field.Index.TOKENIZED)); } return true; } return false; }
/** * Create a basic Lucene document to add to the index. This document is suitable to be parsed with * the StandardAnalyzer. */ private Document createStandardDocument(Topic topic) { String topicContent = topic.getTopicContent(); if (topicContent == null) { topicContent = ""; } Document doc = new Document(); // store the (not analyzed) topic name to use when deleting records from the index. doc.add( new Field( FIELD_TOPIC_NAME, topic.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); // add the topic namespace (not analyzed) topic namespace to allow retrieval by namespace. // this field is used internally in searches. doc.add( new Field( FIELD_TOPIC_NAMESPACE, topic.getNamespace().getId().toString(), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); // analyze the topic name so that (for example) a search for "New York" will match "New York // City" Field nameField = new Field(FIELD_TOPIC_NAME_ANALYZED, topic.getName(), Field.Store.NO, Field.Index.ANALYZED); // make the topic name worth 3x as much as topic content in searches nameField.setBoost(3.0f); doc.add(nameField); // analyze & store the topic content so that it is searchable and also usable for display in // search result summaries doc.add(new Field(FIELD_TOPIC_CONTENT, topicContent, Field.Store.YES, Field.Index.ANALYZED)); return doc; }
private static void index_h(String prefix, File file, IndexWriter indexWriter) throws IOException { Document doc = null; if (file.isDirectory()) { File files[] = file.listFiles(); for (File file1 : files) { index_h(prefix + FILE_SEPARATOR + file.getName(), file1, indexWriter); } } else { String content = FileUtils.readFileToString(file, "utf-8"); System.out.println("=============================================================="); System.out.println("index_h " + content); System.out.println("=============================================================="); String filename = prefix + FILE_SEPARATOR + file.getName(); String path = file.getAbsolutePath(); doc = new Document(); doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("relative_path", filename, Field.Store.YES, Field.Index.NOT_ANALYZED)); indexWriter.addDocument(doc); } }
private void indexDocument(IndexWriter iw, ProcessedDocument parsedDoc) throws IOException { org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.setStoreTermVectors(true); customType.setStoreTermVectorPositions(true); customType.setStoreTermVectorOffsets(false); doc.add(new Field(INDEX_FIELD_CONTENT, parsedDoc.getText(), customType)); doc.add(new StringField(INDEX_FIELD_URL, parsedDoc.getDocumentURL(), Field.Store.YES)); doc.add(new StringField(INDEX_FIELD_DOC_ID, parsedDoc.getDocumentId(), Field.Store.YES)); doc.add(new TextField(INDEX_FIELD_TITLE, parsedDoc.getDocumentTitle(), Field.Store.YES)); doc.add(new StringField(INDEX_FIELD_DOC_TYPE, parsedDoc.getDocumentType(), Field.Store.YES)); /** * TODO: 2.2 -- The effect of boosting (Book Section 2.1.2) * * <p>Uncomment the lines below to demonstrate the effect of boosting */ // if ( parsedDoc.getDocumentId().equals("g1-d13")) { // doc.setBoost(2); // } iw.addDocument(doc); }
public void testFarsiRangeFilterCollating( Analyzer analyzer, String firstBeg, String firstEnd, String secondBeg, String secondEnd) throws Exception { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); Query query = new TermQuery(new Term("body", "body")); // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi searcher not // supported). ScoreDoc[] result = searcher.search(query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1) .scoreDocs; assertEquals("The index Term should not be included.", 0, result.length); result = searcher.search(query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1) .scoreDocs; assertEquals("The index Term should be included.", 1, result.length); searcher.close(); reader.close(); dir.close(); }
public void testDifferentTypedDocValuesField2() throws Exception { Directory d = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), d); Document doc = new Document(); // Index doc values are single-valued so we should not // be able to add same field more than once: Field f = new NumericDocValuesField("field", 17); doc.add(f); doc.add(new SortedDocValuesField("field", new BytesRef("hello"))); try { w.addDocument(doc); fail("didn't hit expected exception"); } catch (IllegalArgumentException iae) { // expected } doc = new Document(); doc.add(f); w.addDocument(doc); w.forceMerge(1); DirectoryReader r = w.getReader(); assertEquals(17, getOnlySegmentReader(r).getNumericDocValues("field").get(0)); r.close(); w.close(); d.close(); }
public Document createDocument(BufferedImage image, String identifier) { assert (image != null); BufferedImage bimg = image; // Scaling image is especially with the correlogram features very important! // All images are scaled to guarantee a certain upper limit for indexing. if (Math.max(image.getHeight(), image.getWidth()) > MAX_IMAGE_DIMENSION) { bimg = ImageUtils.scaleImage(image, MAX_IMAGE_DIMENSION); } Document doc = null; logger.finer("Starting extraction from image [CEDD - fast]."); CEDD vd = new CEDD(); vd.extract(bimg); logger.fine("Extraction finished [CEDD - fast]."); doc = new Document(); doc.add(new Field(DocumentBuilder.FIELD_NAME_CEDD, vd.getByteArrayRepresentation())); if (identifier != null) doc.add( new Field( DocumentBuilder.FIELD_NAME_IDENTIFIER, identifier, Field.Store.YES, Field.Index.NOT_ANALYZED)); return doc; }
/** * Overwrite this method if you want to filter the input, apply hashing, etc. * * @param feature the current feature. * @param document the current document. * @param featureFieldName the field hashFunctionsFileName of the feature. */ protected void addToDocument(LireFeature feature, Document document, String featureFieldName) { if (run == 0) { } // just count documents else if (run == 1) { // Select the representatives ... if (representativesID.contains(docCount) && feature .getClass() .getCanonicalName() .equals(featureClass.getCanonicalName())) { // it's a representative. // put it into a temporary data structure ... representatives.add(feature); } } else if (run == 2) { // actual hashing: find the nearest representatives and put those as a hash into a // document. if (feature .getClass() .getCanonicalName() .equals(featureClass.getCanonicalName())) { // it's a feature to be hashed int[] hashes = getHashes(feature); document.add( new TextField( featureFieldName + "_hash", createDocumentString(hashes, hashes.length), Field.Store.YES)); document.add( new TextField( featureFieldName + "_hash_q", createDocumentString(hashes, 10), Field.Store.YES)); } document.add(new StoredField(featureFieldName, feature.getByteArrayRepresentation())); } }