private void addPoint(IndexWriter writer, String name, int year, int month, int day) throws IOException { LgteDocumentWrapper doc = new LgteDocumentWrapper(); ++i; doc.indexText("id", "" + i); doc.indexText("name", name); doc.addTimeField(year, month, day); doc.indexText("metafile", "doc"); ((LgteIndexWriter) writer).addDocument(doc); }
public void testTimeFilter() throws IOException, DocumentException, ParseException { LgteIndexWriter writer = new LgteIndexWriter(path + "Contents", true); LgteDocumentWrapper doc1 = new LgteDocumentWrapper(); doc1.indexText(Globals.DOCUMENT_ID_FIELD, "1"); doc1.indexText("contents", "word1 word2 word3"); doc1.indexStringNoStore(Config.S_HAS_TIMEXES, "true"); LgteDocumentWrapper doc2 = new LgteDocumentWrapper(); doc2.indexText(Globals.DOCUMENT_ID_FIELD, "2"); doc2.indexText("contents", "word2 word3 word4 word55 word96 word2 word54 word33 wordss"); writer.addDocument(doc1); writer.addDocument(doc2); writer.close(); LgteIndexWriter writer2 = new LgteIndexWriter(path + "Sentences", true); LgteDocumentWrapper sentence0 = new LgteDocumentWrapper(); sentence0.indexText(Globals.DOCUMENT_ID_FIELD, "1_0"); sentence0.indexText("doc_id", "1"); sentence0.indexText("sentences", "word1 word3"); LgteDocumentWrapper sentence1 = new LgteDocumentWrapper(); sentence1.indexText(Globals.DOCUMENT_ID_FIELD, "1_1"); sentence1.indexText("doc_id", "1"); sentence1.indexText("sentences", "word1 word2 word3"); LgteDocumentWrapper sentence2 = new LgteDocumentWrapper(); sentence2.indexStringNoStore(Config.S_HAS_TIMEXES + "_sentences", "true"); sentence2.indexText(Globals.DOCUMENT_ID_FIELD, "2_1"); sentence2.indexText("doc_id", "2"); sentence2.indexText("sentences", "word2 word3 word4 word55 word96 word2 word54 word33 wordss"); writer2.addDocument(sentence0); writer2.addDocument(sentence1); writer2.addDocument(sentence2); writer2.close(); IndexReader readerContents = LgteIndexManager.openReader(path + "Contents", Model.OkapiBM25Model); IndexReader readerSentences = LgteIndexManager.openReader(path + "Sentences", Model.OkapiBM25Model); Map<String, IndexReader> readers = new HashMap<String, IndexReader>(); readers.put("contents", readerContents); readers.put("sentences", readerSentences); readers.put(Config.S_HAS_TIMEXES, readerContents); readers.put(Config.S_HAS_TIMEXES + "_sentences", readerSentences); readers.put("doc_id", readerSentences); readers.put("id", readerSentences); LgteIsolatedIndexReader lgteIsolatedIndexReader = new LgteIsolatedIndexReader(readers); lgteIsolatedIndexReader.addTreeMapping(readerContents, readerSentences, "doc_id"); LgteIndexSearcherWrapper searcher = new LgteIndexSearcherWrapper(Model.OkapiBM25Model, lgteIsolatedIndexReader); QueryConfiguration queryConfiguration = new QueryConfiguration(); queryConfiguration.setProperty("bm25.idf.policy", "floor_epslon"); queryConfiguration.setProperty("bm25.idf.epslon", "0.01"); queryConfiguration.setProperty("bm25.k1", "2.0"); queryConfiguration.setProperty("bm25.b", "0.75"); queryConfiguration.setProperty("index.tree", "true"); QueryFilter queryFilter = new QueryFilter( org.apache.lucene.queryParser.QueryParser.parse( "true", Config.S_HAS_TIMEXES, new LgteNothingAnalyzer())); LgteQuery lgteQuery = LgteQueryParser.parseQuery( "sentences:word2", new LgteNothingAnalyzer(), searcher, queryConfiguration); LgteHits lgteHits = searcher.search(lgteQuery, queryFilter); assertEquals(lgteHits.length(), 1); assertEquals(lgteHits.id(0), 1); TermsFilter termsFilter = new TermsFilter(); termsFilter.addTerm(new Term(Config.S_HAS_TIMEXES, "true")); lgteHits = searcher.search(lgteQuery, termsFilter); assertEquals(lgteHits.length(), 1); assertEquals(lgteHits.id(0), 1); termsFilter = new TermsFilter(); termsFilter.addTerm(new Term(Config.S_HAS_TIMEXES + "_sentences", "true")); lgteHits = searcher.search(lgteQuery, termsFilter); assertEquals(lgteHits.length(), 1); assertEquals(lgteHits.id(0), 2); searcher.close(); Files.delDirsE(path + "Contents"); Files.delDirsE(path + "Sentences"); }
protected void setUp() throws IOException { String contents1Doc1 = "word1 word2 word3 word32 word1 word45 word56 word67 word67 word2 word67 word88 word99 word99 word33"; String contents2Doc1 = "word1 word2 word3 word32 word1 word45 word56 word67"; String contents1Doc2 = "word2 word3 word4 word55 word96 word2 word54 word33 wordss"; String contents2Doc2 = "word2 word3 word4 word55"; String contents1Doc3 = "word1 word100 word400 word555 word966 word544 word333 wordss"; String contents2Doc3 = "word1 word100"; LgteIndexWriter writer = new LgteIndexWriter(pathUnique, true, Model.OkapiBM25Model); LgteDocumentWrapper doc1 = new LgteDocumentWrapper(); doc1.indexText(Globals.DOCUMENT_ID_FIELD, "1"); doc1.indexText("contents1", contents1Doc1); doc1.indexText("contents2", contents2Doc1); LgteDocumentWrapper doc2 = new LgteDocumentWrapper(); doc2.indexText(Globals.DOCUMENT_ID_FIELD, "2"); doc2.indexText("contents1", contents1Doc2); doc2.indexText("contents2", contents2Doc2); LgteDocumentWrapper doc3 = new LgteDocumentWrapper(); doc3.indexText(Globals.DOCUMENT_ID_FIELD, "3"); doc3.indexText("contents1", contents1Doc3); doc3.indexText("contents2", contents2Doc3); writer.addDocument(doc1); writer.addDocument(doc2); writer.addDocument(doc3); writer.close(); // these two writers will replace the previous one // The first one will index contents1 and the second will index the contents2 writer = new LgteIndexWriter(pathMulti1, true); doc1 = new LgteDocumentWrapper(); doc1.indexText(Globals.DOCUMENT_ID_FIELD, "1"); doc1.indexText("contents1", contents1Doc1); doc2 = new LgteDocumentWrapper(); doc2.indexText(Globals.DOCUMENT_ID_FIELD, "2"); doc2.indexText("contents1", contents1Doc2); doc3 = new LgteDocumentWrapper(); doc3.indexText(Globals.DOCUMENT_ID_FIELD, "3"); doc3.indexText("contents1", contents1Doc3); writer.addDocument(doc1); writer.addDocument(doc2); writer.addDocument(doc3); writer.close(); writer = new LgteIndexWriter(pathMulti2, true); doc1 = new LgteDocumentWrapper(); doc1.indexText(Globals.DOCUMENT_ID_FIELD, "1"); doc1.indexText("contents2", contents2Doc1); doc2 = new LgteDocumentWrapper(); doc2.indexText(Globals.DOCUMENT_ID_FIELD, "2"); doc2.indexText("contents2", contents2Doc2); doc3 = new LgteDocumentWrapper(); doc3.indexText(Globals.DOCUMENT_ID_FIELD, "3"); doc3.indexText("contents2", contents2Doc3); writer.addDocument(doc1); writer.addDocument(doc2); writer.addDocument(doc3); writer.close(); }
public void testRange() throws IOException, InvalidGeoException { LgteIndexSearcherWrapper searcher; if (lm) searcher = new LgteIndexSearcherWrapper(Model.LanguageModel, path); else searcher = new LgteIndexSearcherWrapper(Model.VectorSpaceModel, path); int years = 14; // Building query QueryParams queryParams = new QueryParams(); queryParams.setTime("1990-6-8"); queryParams.setRadiumYears(years); // create a term level1query to searchCallback against indexText documents // doc is a word to find in text Query tq = new TermQuery(new Term("metafile", "doc")); LgteQuery lgteQuery = new LgteQuery(tq, queryParams); TimeDistanceSortSource dsort = new TimeDistanceSortSource(); LgteSort sort = new LgteSort(new SortField("foo", dsort)); LgteHits hits = searcher.search(lgteQuery, sort); int results = hits.length(); // Get a list of distances, you don't need this but we keep it available, our LgteHits gives you // Distance Information ITimeDistancesWrapper timeDistancesWrapper = dsort.getTimeDistancesWrapper(); // distances calculated from filter first pass must be less than total // docs, from the above test of 6 items, 5 will come from the boundary box // filter, but only 5 are actually in the radius of the results. // Note Boundary Box filtering, is not accurate enough for most systems. System.out.println( "Distance Filter filtered: " + timeDistancesWrapper.getTimeDistances().size()); System.out.println("Results: " + results); System.out.println("============================="); assertEquals(5, timeDistancesWrapper.getTimeDistances().size()); assertEquals(5, results); int lastYears = 0; long lastMili = 0; for (int i = 0; i < results; i++) { LgteDocumentWrapper d = hits.doc(i); String name = d.get("name"); int distanceYears = hits.timeDistanceYears(i); long distanceMili = hits.timeDistanceMiliseconds(i); assertTrue(distanceYears <= years); assertTrue(distanceYears >= lastYears); assertTrue(distanceMili >= lastMili); lastYears = distanceYears; lastMili = distanceMili; System.out.println( "Name: " + name + ", Distance (years, mili):" + distanceYears + " |" + distanceMili); switch (i) { case 0: assertTrue(d.get("name").equals("1")); break; case 1: assertTrue(d.get("name").equals("2")); break; case 2: assertTrue(d.get("name").equals("3")); break; case 3: assertTrue(d.get("name").equals("4")); break; case 4: assertTrue(d.get("name").equals("5")); break; } } searcher.close(); }