/** Build the example index. */ private void index() throws IOException { IndexWriterConfig iwc = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE); IndexWriter indexWriter = new IndexWriter(indexDir, iwc); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); // 3 occurrences for tag 'lucene' doc.add(new IntAssociationFacetField(3, "tags", "lucene")); // 87% confidence level of genre 'computing' doc.add(new FloatAssociationFacetField(0.87f, "genre", "computing")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); // 1 occurrence for tag 'lucene' doc.add(new IntAssociationFacetField(1, "tags", "lucene")); // 2 occurrence for tag 'solr' doc.add(new IntAssociationFacetField(2, "tags", "solr")); // 75% confidence level of genre 'computing' doc.add(new FloatAssociationFacetField(0.75f, "genre", "computing")); // 34% confidence level of genre 'software' doc.add(new FloatAssociationFacetField(0.34f, "genre", "software")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close(); taxoWriter.close(); }
/** Build the example index. */ public void index() throws IOException { IndexWriter writer = new IndexWriter( indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter // Add documents with latitude/longitude location: // we index these both as DoublePoints (for bounding box/ranges) and as NumericDocValuesFields // (for scoring) Document doc = new Document(); doc.add(new DoublePoint("latitude", 40.759011)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011))); doc.add(new DoublePoint("longitude", -73.9844722)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722))); writer.addDocument(doc); doc = new Document(); doc.add(new DoublePoint("latitude", 40.718266)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266))); doc.add(new DoublePoint("longitude", -74.007819)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819))); writer.addDocument(doc); doc = new Document(); doc.add(new DoublePoint("latitude", 40.7051157)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157))); doc.add(new DoublePoint("longitude", -74.0088305)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305))); writer.addDocument(doc); // Open near-real-time searcher searcher = new IndexSearcher(DirectoryReader.open(writer)); writer.close(); }
public static void main(String[] args) throws Exception { // setup Lucene to use an in-memory index Directory directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer); IndexWriter writer = new IndexWriter(directory, iwc); // index a few documents writer.addDocument(createDocument("1", "foo bar baz")); writer.addDocument(createDocument("2", "red green blue")); writer.addDocument(createDocument("3", "The Lucene was made by Doug Cutting")); writer.close(); IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); Query query = new PrefixQuery(new Term(FIELD, "cut")); // 自动在结尾添加 * // display search results TopDocs topDocs = searcher.search(query, 10); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { Document doc = searcher.doc(scoreDoc.doc); System.out.println(doc); } }
/** Test that core cache key (needed for NRT) is working */ public void testCoreCacheKey() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setMaxBufferedDocs(100); iwc.setMergePolicy(NoMergePolicy.INSTANCE); IndexWriter iw = new IndexWriter(dir, iwc); // add two docs, id:0 and id:1 Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); doc.add(idField); idField.setStringValue("0"); iw.addDocument(doc); idField.setStringValue("1"); iw.addDocument(doc); // open reader ShardId shardId = new ShardId("fake", "_na_", 1); DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(iw, true), shardId); assertEquals(2, ir.numDocs()); assertEquals(1, ir.leaves().size()); // delete id:0 and reopen iw.deleteDocuments(new Term("id", "0")); DirectoryReader ir2 = DirectoryReader.openIfChanged(ir); // we should have the same cache key as before assertEquals(1, ir2.numDocs()); assertEquals(1, ir2.leaves().size()); assertSame( ir.leaves().get(0).reader().getCoreCacheKey(), ir2.leaves().get(0).reader().getCoreCacheKey()); IOUtils.close(ir, ir2, iw, dir); }
public void buildIndex(JSONObject indexData) { try { Directory dir = FSDirectory.open(new File(indexDir)); IKAnalyzer analyzer = new IKAnalyzer(); analyzer.setUseSmart(true); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer); indexWriter = new IndexWriter(dir, iwc); indexWriter.deleteAll(); JSONArray statusData = indexData.getJSONArray("statusData"); for (int i = 0; i < statusData.length(); i++) { String text = statusData.getString(i); Document doc = new Document(); doc.add( new Field( "text", text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(doc); } JSONArray userData = indexData.getJSONArray("userData"); for (int i = 0; i < userData.length(); i++) { String text = userData.getString(i); Document doc = new Document(); doc.add( new Field( "text", text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(doc); } // indexWriter.commit(); System.out.println("Index is done"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (JSONException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { try { indexWriter.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
@Test public void testInfiniteValues() throws Exception { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); Document doc = new Document(); doc.add(new NumericField("double").setDoubleValue(Double.NEGATIVE_INFINITY)); doc.add(new NumericField("long").setLongValue(Long.MIN_VALUE)); writer.addDocument(doc); doc = new Document(); doc.add(new NumericField("double").setDoubleValue(Double.POSITIVE_INFINITY)); doc.add(new NumericField("long").setLongValue(Long.MAX_VALUE)); writer.addDocument(doc); doc = new Document(); doc.add(new NumericField("double").setDoubleValue(0.0)); doc.add(new NumericField("long").setLongValue(0L)); writer.addDocument(doc); writer.close(); IndexSearcher s = new IndexSearcher(dir); Query q = NumericRangeQuery.newLongRange("long", null, null, true, true); TopDocs topDocs = s.search(q, 10); assertEquals("Score doc count", 3, topDocs.scoreDocs.length); q = NumericRangeQuery.newLongRange("long", null, null, false, false); topDocs = s.search(q, 10); assertEquals("Score doc count", 3, topDocs.scoreDocs.length); q = NumericRangeQuery.newLongRange("long", Long.MIN_VALUE, Long.MAX_VALUE, true, true); topDocs = s.search(q, 10); assertEquals("Score doc count", 3, topDocs.scoreDocs.length); q = NumericRangeQuery.newLongRange("long", Long.MIN_VALUE, Long.MAX_VALUE, false, false); topDocs = s.search(q, 10); assertEquals("Score doc count", 1, topDocs.scoreDocs.length); q = NumericRangeQuery.newDoubleRange("double", null, null, true, true); topDocs = s.search(q, 10); assertEquals("Score doc count", 3, topDocs.scoreDocs.length); q = NumericRangeQuery.newDoubleRange("double", null, null, false, false); topDocs = s.search(q, 10); assertEquals("Score doc count", 3, topDocs.scoreDocs.length); s.close(); dir.close(); }
public static void main(String[] args) throws IOException { Version version = Version.LUCENE_43; // 创建一个Document Document document = new Document(); Field field = new TextField("fieldName", "Hello man can you see this in index!", Field.Store.YES); field.setBoost(2.0f); Field fieldStore = new StringField("fieldName2", "fieldValueOnlyStore", Field.Store.YES); // // FieldType fieldAllType = new FieldType(); // fieldAllType.setIndexed(true); // fieldAllType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); // fieldAllType.setOmitNorms(false); // fieldAllType.setStored(true); // fieldAllType.setStoreTermVectorOffsets(true); // fieldAllType.setStoreTermVectorPayloads(true); // fieldAllType.setStoreTermVectorPositions(true); // fieldAllType.setStoreTermVectors(true); // fieldAllType.setTokenized(true); // Field fieldAll = new Field("name", "all things need to store", // fieldAllType); document.add(field); // document.add(new BinaryDocValuesField("name", new // BytesRef("hello"))); document.add(fieldStore); // document.add(fieldAll); Document doc2 = new Document(); doc2.add(field); // 创建一个目录, 用于存放索引 Directory directory = FSDirectory.open(new File("/home/waf/tmp/index")); // Directory directory = new RAMDirectory(); // 定义索引写入器的一些参数 Analyzer analyzer = new StandardAnalyzer(version); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(version, analyzer); // indexWriterConfig.setCodec(new Lucene40Codec()); // 初始化索引写入器, 并把文档写入到索引中去 IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig); indexWriter.addDocument(document); indexWriter.addDocument(doc2); indexWriter.commit(); indexWriter.close(); // 对索引进行查询 IndexReader reader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(reader); TopDocs result = indexSearcher.search(new TermQuery(new Term("name", "value")), 10); System.out.println(result.totalHits); reader.close(); }
@Test public void baseUIMAAnalyzerIntegrationTest() throws Exception { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer)); // add the first doc Document doc = new Document(); String dummyTitle = "this is a dummy title "; doc.add(new TextField("title", dummyTitle, Field.Store.YES)); String dummyContent = "there is some content written here"; doc.add(new TextField("contents", dummyContent, Field.Store.YES)); writer.addDocument(doc); writer.commit(); // try the search over the first doc DirectoryReader directoryReader = DirectoryReader.open(dir); IndexSearcher indexSearcher = newSearcher(directoryReader); TopDocs result = indexSearcher.search(new MatchAllDocsQuery(), 1); assertTrue(result.totalHits > 0); Document d = indexSearcher.doc(result.scoreDocs[0].doc); assertNotNull(d); assertNotNull(d.getField("title")); assertEquals(dummyTitle, d.getField("title").stringValue()); assertNotNull(d.getField("contents")); assertEquals(dummyContent, d.getField("contents").stringValue()); // add a second doc doc = new Document(); String dogmasTitle = "dogmas"; doc.add(new TextField("title", dogmasTitle, Field.Store.YES)); String dogmasContents = "white men can't jump"; doc.add(new TextField("contents", dogmasContents, Field.Store.YES)); writer.addDocument(doc); writer.commit(); directoryReader.close(); directoryReader = DirectoryReader.open(dir); indexSearcher = newSearcher(directoryReader); result = indexSearcher.search(new MatchAllDocsQuery(), 2); Document d1 = indexSearcher.doc(result.scoreDocs[1].doc); assertNotNull(d1); assertNotNull(d1.getField("title")); assertEquals(dogmasTitle, d1.getField("title").stringValue()); assertNotNull(d1.getField("contents")); assertEquals(dogmasContents, d1.getField("contents").stringValue()); // do a matchalldocs query to retrieve both docs result = indexSearcher.search(new MatchAllDocsQuery(), 2); assertEquals(2, result.totalHits); writer.close(); indexSearcher.getIndexReader().close(); dir.close(); }
public void index(MediaFile mediaFile) { try { if (mediaFile.isFile()) { songWriter.addDocument(SONG.createDocument(mediaFile)); } else if (mediaFile.isAlbum()) { albumWriter.addDocument(ALBUM.createDocument(mediaFile)); } else { artistWriter.addDocument(ARTIST.createDocument(mediaFile)); } } catch (Exception x) { LOG.error("Failed to create search index for " + mediaFile, x); } }
/** * 添加文档 * * @param analyzer * @return * @throws CorruptIndexException * @throws LockObtainFailedException * @throws IOException */ public static Directory createRAMDirectory(Analyzer analyzer) throws CorruptIndexException, LockObtainFailedException, IOException { Directory directory = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(directory, analyzer, true, MaxFieldLength.UNLIMITED); Document doc1 = new Document(); Document doc2 = new Document(); Document doc3 = new Document(); // --1 doc1.add(new Field(FIELD_ID, "1", Store.YES, Index.NOT_ANALYZED_NO_NORMS)); doc1.add(new Field(FIELD_INDEX, "a", Store.YES, Index.NOT_ANALYZED_NO_NORMS)); doc1.add(new NumericField(FIELD_BIRTHDAY, Store.YES, true).setLongValue(1986)); doc1.add(new Field(FIELD_CATEGORY, "/usa/black", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); doc1.add(new Field(FIELD_CITY, "beijing", Store.YES, Index.NOT_ANALYZED_NO_NORMS)); doc1.add(new Field(FIELD_NAME, "Lebron James", Store.YES, Index.ANALYZED_NO_NORMS)); doc1.add( new Field( FIELD_SIGNATURE, "The quick brown fox jumped over the lazy dog!", Store.YES, Index.ANALYZED)); doc1.add(new Field(FIELD_INTRODUCTION, "中欣", Store.YES, Index.ANALYZED)); // --2 doc2.add(new Field(FIELD_ID, "2", Store.YES, Index.NOT_ANALYZED_NO_NORMS)); doc2.add(new Field(FIELD_INDEX, "b", Store.YES, Index.NOT_ANALYZED_NO_NORMS)); doc2.add(new NumericField(FIELD_BIRTHDAY, Store.YES, true).setLongValue(1988)); doc2.add(new Field(FIELD_CATEGORY, "/usa/white", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); doc2.add(new Field(FIELD_CITY, "beijing", Store.YES, Index.NOT_ANALYZED_NO_NORMS)); doc2.add(new Field(FIELD_NAME, "&KOBE**BYANT\n", Store.YES, Index.ANALYZED_NO_NORMS)); doc2.add(new Field(FIELD_SIGNATURE, "I am the hero!", Store.YES, Index.ANALYZED)); doc2.add(new Field(FIELD_INTRODUCTION, "中的欣", Store.YES, Index.ANALYZED)); // --3 doc3.add(new Field(FIELD_ID, "3", Store.YES, Index.NOT_ANALYZED_NO_NORMS)); doc3.add(new Field(FIELD_INDEX, "c", Store.YES, Index.NOT_ANALYZED_NO_NORMS)); doc3.add(new NumericField(FIELD_BIRTHDAY, Store.YES, true).setLongValue(1990)); doc3.add(new Field(FIELD_CATEGORY, "/china", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); doc3.add(new Field(FIELD_CITY, "jilin", Store.YES, Index.NOT_ANALYZED_NO_NORMS)); doc3.add(new Field(FIELD_NAME, "石欣愛北的的京城", Store.YES, Index.ANALYZED_NO_NORMS)); doc3.add(new Field(FIELD_SIGNATURE, "关山口职业技术学院", Store.YES, Index.ANALYZED)); doc3.add(new Field(FIELD_INTRODUCTION, "我爱中的天欣气", Store.YES, Index.ANALYZED)); indexWriter.addDocument(doc1); indexWriter.addDocument(doc2); indexWriter.addDocument(doc3); indexWriter.close(); return directory; }
@Test public void testHugeLabel() throws Exception { Directory indexDir = newDirectory(), taxoDir = newDirectory(); IndexWriter indexWriter = new IndexWriter( indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter( taxoDir, OpenMode.CREATE, new Cl2oTaxonomyWriterCache(2, 1f, 1)); FacetFields facetFields = new FacetFields(taxoWriter); // Add one huge label: String bigs = null; int ordinal = -1; CategoryPath cp = null; while (true) { int len = CategoryPath.MAX_CATEGORY_PATH_LENGTH - 4; // for the dimension and separator bigs = _TestUtil.randomSimpleString(random(), len, len); cp = new CategoryPath("dim", bigs); ordinal = taxoWriter.addCategory(cp); Document doc = new Document(); facetFields.addFields(doc, Collections.singletonList(cp)); indexWriter.addDocument(doc); break; } // Add tiny ones to cause a re-hash for (int i = 0; i < 3; i++) { String s = _TestUtil.randomSimpleString(random(), 1, 10); taxoWriter.addCategory(new CategoryPath("dim", s)); Document doc = new Document(); facetFields.addFields(doc, Collections.singletonList(new CategoryPath("dim", s))); indexWriter.addDocument(doc); } // when too large components were allowed to be added, this resulted in a new added category assertEquals(ordinal, taxoWriter.addCategory(cp)); IOUtils.close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.open(indexDir); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = new IndexSearcher(indexReader); DrillDownQuery ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT); ddq.add(cp); assertEquals(1, searcher.search(ddq, 10).totalHits); IOUtils.close(indexReader, taxoReader); IOUtils.close(indexDir, taxoDir); }
@Override protected void setUp() throws Exception { super.setUp(); RAMDirectory dirA = new RAMDirectory(); RAMDirectory dirB = new RAMDirectory(); IndexWriter wA = new IndexWriter(dirA, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); IndexWriter wB = new IndexWriter(dirB, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); long theLong = Long.MAX_VALUE; double theDouble = Double.MAX_VALUE; byte theByte = Byte.MAX_VALUE; short theShort = Short.MAX_VALUE; int theInt = Integer.MAX_VALUE; float theFloat = Float.MAX_VALUE; for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.add( new Field( "theLong", String.valueOf(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add( new Field( "theDouble", String.valueOf(theDouble--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add( new Field( "theByte", String.valueOf(theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add( new Field( "theShort", String.valueOf(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add( new Field("theInt", String.valueOf(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add( new Field( "theFloat", String.valueOf(theFloat--), Field.Store.NO, Field.Index.NOT_ANALYZED)); if (0 == i % 3) { wA.addDocument(doc); } else { wB.addDocument(doc); } } wA.close(); wB.close(); readerA = IndexReader.open(dirA, true); readerB = IndexReader.open(dirB, true); readerX = new MultiReader(new IndexReader[] {readerA, readerB}); }
@BeforeTest public void setUp() throws IOException { directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new KeywordAnalyzer())); writer.addDocument(newDocument("1", newPoint(-30, -30))); writer.addDocument(newDocument("2", newPoint(-45, -45))); writer.addDocument(newDocument("3", newPoint(-45, 50))); writer.addDocument( newDocument("4", newRectangle().topLeft(-50, 50).bottomRight(-38, 38).build())); indexReader = IndexReader.open(writer, true); indexSearcher = new IndexSearcher(indexReader); }
private void addDocument(final IndexWriter indexWriter, final GeoEntry geoEntry) throws IOException { final Document document = new Document(); document.add( new TextField(GeoNamesLuceneConstants.NAME_FIELD, geoEntry.getName(), Field.Store.YES)); document.add( new DoubleField( GeoNamesLuceneConstants.LATITUDE_FIELD, geoEntry.getLatitude(), Field.Store.YES)); document.add( new DoubleField( GeoNamesLuceneConstants.LONGITUDE_FIELD, geoEntry.getLongitude(), Field.Store.YES)); document.add( new StoredField(GeoNamesLuceneConstants.FEATURE_CODE_FIELD, geoEntry.getFeatureCode())); document.add( new StoredField(GeoNamesLuceneConstants.POPULATION_FIELD, geoEntry.getPopulation())); document.add( new TextField( GeoNamesLuceneConstants.ALTERNATE_NAMES_FIELD, geoEntry.getAlternateNames(), Field.Store.NO)); final float boost = calculateBoost(geoEntry); document.add(new FloatDocValuesField(GeoNamesLuceneConstants.BOOST_FIELD, boost)); indexWriter.addDocument(document); }
public void testFarsiRangeFilterCollating( Analyzer analyzer, String firstBeg, String firstEnd, String secondBeg, String secondEnd) throws Exception { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); Query query = new TermQuery(new Term("body", "body")); // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi searcher not // supported). ScoreDoc[] result = searcher.search(query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1) .scoreDocs; assertEquals("The index Term should not be included.", 0, result.length); result = searcher.search(query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1) .scoreDocs; assertEquals("The index Term should be included.", 1, result.length); searcher.close(); reader.close(); dir.close(); }
@Override public void index(List<AgeObject> aol) { try { IndexWriter iWriter = new IndexWriter( index, analyzer, objectList == null, IndexWriter.MaxFieldLength.UNLIMITED); if (objectList == null) objectList = aol; else objectList.addAll(aol); for (AgeObject ao : aol) { Document doc = new Document(); for (TextFieldExtractor tfe : extractors) doc.add( new Field( tfe.getName(), tfe.getExtractor().getValue(ao), Field.Store.NO, Field.Index.ANALYZED)); iWriter.addDocument(doc); } iWriter.close(); defaultFieldName = extractors.iterator().next().getName(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
// TODO: randomize public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException { Directory directory = new MockDirectoryWrapper(random, new RAMDirectory()); PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter( directory, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity)); // writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(new Field(FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.add( new Field( MULTI_FIELD, English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.add( new Field( NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } reader = IndexReader.open(writer, true); writer.close(); IndexSearcher searcher = LuceneTestCase.newSearcher(reader); searcher.setSimilarity(similarity); return searcher; }
@Before public void setUp() throws Exception { serializer = new LuceneSerializer(true, true); entityPath = new PathBuilder<Object>(Object.class, "obj"); title = entityPath.getString("title"); author = entityPath.getString("author"); text = entityPath.getString("text"); publisher = entityPath.getString("publisher"); year = entityPath.getNumber("year", Integer.class); rating = entityPath.getString("rating"); gross = entityPath.getNumber("gross", Double.class); titles = entityPath.getCollection("title", String.class, StringPath.class); longField = entityPath.getNumber("longField", Long.class); shortField = entityPath.getNumber("shortField", Short.class); byteField = entityPath.getNumber("byteField", Byte.class); floatField = entityPath.getNumber("floatField", Float.class); idx = new RAMDirectory(); config = new IndexWriterConfig(new StandardAnalyzer()) .setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(idx, config); writer.addDocument(createDocument()); writer.close(); IndexReader reader = DirectoryReader.open(idx); searcher = new IndexSearcher(reader); }
private static void index_h(String prefix, File file, IndexWriter indexWriter) throws IOException { Document doc = null; if (file.isDirectory()) { File files[] = file.listFiles(); for (File file1 : files) { index_h(prefix + FILE_SEPARATOR + file.getName(), file1, indexWriter); } } else { String content = FileUtils.readFileToString(file, "utf-8"); System.out.println("=============================================================="); System.out.println("index_h " + content); System.out.println("=============================================================="); String filename = prefix + FILE_SEPARATOR + file.getName(); String path = file.getAbsolutePath(); doc = new Document(); doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("relative_path", filename, Field.Store.YES, Field.Index.NOT_ANALYZED)); indexWriter.addDocument(doc); } }
public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir) throws Exception { int[] freq = new int[nTerms]; terms = new Term[nTerms]; for (int i = 0; i < nTerms; i++) { int f = (nTerms + 1) - i; // make first terms less frequent freq[i] = (int) Math.ceil(Math.pow(f, power)); terms[i] = new Term("f", Character.toString((char) ('A' + i))); } IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < nDocs; i++) { Document d = new Document(); for (int j = 0; j < nTerms; j++) { if (r.nextInt(freq[j]) == 0) { d.add(new Field("f", terms[j].text(), Field.Store.NO, Field.Index.NOT_ANALYZED)); // System.out.println(d); } } iw.addDocument(d); } iw.optimize(); iw.close(); }
public void createIndex() { loadTweets("datasets/sentiment-short.csv", 100); directory = new RAMDirectory(); try { IndexWriter writer = getWriter(); for (int i = 0; i < tweets.size(); i++) { Document doc = new Document(); doc.add( new Field( "tweet", tweets.get(i).getText(), Field.Store.YES, Field.Index.ANALYZED, TermVector.YES)); writer.addDocument(doc); } System.out.println("Docs: " + writer.numDocs()); writer.close(); } catch (Exception e) { e.printStackTrace(); } }
private static void index(Index index, IndexWriter indexWriter) throws IOException { if (index.docs().size() > 1) { indexWriter.addDocuments(index.docs()); } else { indexWriter.addDocument(index.docs().get(0)); } }
public void addDocument(List<Document> documentList, String collectionName) { IndexWriter indexWriter = indexWriterManager.getIndexWriter(collectionName); try { logger.info("collectionName : {}", collectionName); logger.info("add indexing start................"); int indexingDocumentCount = 0; for (Document doc : documentList) { indexingDocumentCount++; if ((indexingDocumentCount % 50000) == 0) { logger.info("{} indexed...", indexingDocumentCount); } indexWriter.addDocument(doc); } logger.info("total indexed document count {}", indexingDocumentCount); logger.info("end"); } catch (IOException e) { logger.error("error : ", e); throw new RuntimeException("색인 중 에러가 발생하였습니다. [" + e.getMessage() + "]"); } }
private void indexFiles(String dir, String index, int featureIndex, boolean createNewIndex) throws IOException { ArrayList<String> images = FileUtils.getAllImages(new File(dir), true); IndexWriter iw = LuceneUtils.createIndexWriter( index, createNewIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer); // select one feature for the large index: int count = 0; long ms = System.currentTimeMillis(); DocumentBuilder builder = new ChainedDocumentBuilder(); ((ChainedDocumentBuilder) builder).addBuilder(builders[featureIndex]); // ((ChainedDocumentBuilder) builder).addBuilder(builders[0]); for (Iterator<String> iterator = images.iterator(); iterator.hasNext(); ) { count++; if (count > 100 && count % 5000 == 0) { System.out.println( count + " files indexed. " + (System.currentTimeMillis() - ms) / (count) + " ms per file"); } String file = iterator.next(); try { iw.addDocument(builder.createDocument(new FileInputStream(file), file)); } catch (Exception e) { System.err.println("Error: " + e.getMessage()); } } iw.close(); }
@Test public void testDirectoryCleaned() throws Exception { final RAMDirectory directory = new RAMDirectory(); final StandardAnalyzer analyzer = new StandardAnalyzer(DefaultIndexManager.LUCENE_VERSION); { IndexWriterConfig conf = new IndexWriterConfig(DefaultIndexManager.LUCENE_VERSION, analyzer); final IndexWriter writer = new IndexWriter(directory, conf); writer.addDocument(new Document()); writer.close(); } final DefaultConfiguration configuration = new DefaultConfiguration(directory, analyzer); final DefaultIndexEngine engine = new DefaultIndexEngine( new Supplier<IndexSearcher>() { public IndexSearcher get() { throw new AssertionFailedError("no searcher required"); } }, new Function<Index.UpdateMode, Writer>() { public Writer get(final Index.UpdateMode mode) { throw new AssertionFailedError("no writer required"); } }, configuration, FlushPolicy.NONE); assertEquals(1, new IndexSearcher(directory).getIndexReader().numDocs()); engine.clean(); assertEquals(0, new IndexSearcher(directory).getIndexReader().numDocs()); }
@Test public void cretaeIndex() throws Exception { IndexWriter indexWriter = LuceneUtil.getIndexWriter(); indexWriter.addDocument(getDocument()); indexWriter.commit(); }
private static void search(IndexSearcher searcher, Query query, IndexWriter out, String field) throws IOException { /* Carlos's hack */ int hitsPerPage = 1; System.out.println("Consulta: " + query); TopDocs results = searcher.search(query, hitsPerPage); int numTotalHits = results.totalHits; if (numTotalHits > 0) results = searcher.search(query, numTotalHits); ScoreDoc[] hits = results.scoreDocs; /* End hack */ for (int i = 0; i < numTotalHits; i++) { Document doc = searcher.doc(hits[i].doc); // System.out.println("Title: " + doc.get("title")); if (field != null) { System.out.println(hits[i].doc + "\t" + hits[i].score + "\t" + doc.get(field)); } if (out != null) { out.addDocument(doc); } } System.out.println("Resultados: " + numTotalHits); }
public void testDemo() throws IOException, ParseException { Analyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: // Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); String text = "This is the text to be indexed."; doc.add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "fieldname", analyzer); Query query = parser.parse("text"); ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; assertEquals(1, hits.length); // Iterate through the results: for (int i = 0; i < hits.length; i++) { Document hitDoc = isearcher.doc(hits[i].doc); assertEquals("This is the text to be indexed.", hitDoc.get("fieldname")); } isearcher.close(); directory.close(); }
@Test public void testVectorHighlighter() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter.addDocument( doc() .add(field("_id", "1")) .add( field( "content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)) .build()); IndexReader reader = indexWriter.getReader(); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment( highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, notNullValue()); System.out.println(fragment); }
public void testListenerCalled() throws Exception { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); final AtomicBoolean afterRefreshCalled = new AtomicBoolean(false); SearcherManager sm = new SearcherManager(iw, true, new SearcherFactory()); sm.addListener( new ReferenceManager.RefreshListener() { @Override public void beforeRefresh() {} @Override public void afterRefresh(boolean didRefresh) { if (didRefresh) { afterRefreshCalled.set(true); } } }); iw.addDocument(new Document()); iw.commit(); assertFalse(afterRefreshCalled.get()); sm.maybeRefreshBlocking(); assertTrue(afterRefreshCalled.get()); sm.close(); iw.close(); dir.close(); }