Пример #1
0
  protected void setUp() throws IOException {
    String contents1Doc1 =
        "word1 word2 word3 word32 word1 word45 word56 word67 word67 word2 word67 word88 word99 word99 word33";
    String contents2Doc1 = "word1 word2 word3 word32 word1 word45 word56 word67";
    String contents1Doc2 = "word2 word3 word4 word55 word96 word2 word54 word33 wordss";
    String contents2Doc2 = "word2 word3 word4 word55";
    String contents1Doc3 = "word1 word100 word400 word555 word966 word544 word333 wordss";
    String contents2Doc3 = "word1 word100";
    LgteIndexWriter writer = new LgteIndexWriter(pathUnique, true, Model.OkapiBM25Model);
    LgteDocumentWrapper doc1 = new LgteDocumentWrapper();
    doc1.indexText(Globals.DOCUMENT_ID_FIELD, "1");
    doc1.indexText("contents1", contents1Doc1);
    doc1.indexText("contents2", contents2Doc1);
    LgteDocumentWrapper doc2 = new LgteDocumentWrapper();
    doc2.indexText(Globals.DOCUMENT_ID_FIELD, "2");
    doc2.indexText("contents1", contents1Doc2);
    doc2.indexText("contents2", contents2Doc2);
    LgteDocumentWrapper doc3 = new LgteDocumentWrapper();
    doc3.indexText(Globals.DOCUMENT_ID_FIELD, "3");
    doc3.indexText("contents1", contents1Doc3);
    doc3.indexText("contents2", contents2Doc3);

    writer.addDocument(doc1);
    writer.addDocument(doc2);
    writer.addDocument(doc3);
    writer.close();

    // these two writers will replace the previous one
    // The first one will index contents1 and the second will index the contents2
    writer = new LgteIndexWriter(pathMulti1, true);
    doc1 = new LgteDocumentWrapper();
    doc1.indexText(Globals.DOCUMENT_ID_FIELD, "1");
    doc1.indexText("contents1", contents1Doc1);
    doc2 = new LgteDocumentWrapper();
    doc2.indexText(Globals.DOCUMENT_ID_FIELD, "2");
    doc2.indexText("contents1", contents1Doc2);
    doc3 = new LgteDocumentWrapper();
    doc3.indexText(Globals.DOCUMENT_ID_FIELD, "3");
    doc3.indexText("contents1", contents1Doc3);
    writer.addDocument(doc1);
    writer.addDocument(doc2);
    writer.addDocument(doc3);
    writer.close();

    writer = new LgteIndexWriter(pathMulti2, true);
    doc1 = new LgteDocumentWrapper();
    doc1.indexText(Globals.DOCUMENT_ID_FIELD, "1");
    doc1.indexText("contents2", contents2Doc1);
    doc2 = new LgteDocumentWrapper();
    doc2.indexText(Globals.DOCUMENT_ID_FIELD, "2");
    doc2.indexText("contents2", contents2Doc2);
    doc3 = new LgteDocumentWrapper();
    doc3.indexText(Globals.DOCUMENT_ID_FIELD, "3");
    doc3.indexText("contents2", contents2Doc3);
    writer.addDocument(doc1);
    writer.addDocument(doc2);
    writer.addDocument(doc3);
    writer.close();
  }
Пример #2
0
 private void addPoint(IndexWriter writer, String name, int year, int month, int day)
     throws IOException {
   LgteDocumentWrapper doc = new LgteDocumentWrapper();
   ++i;
   doc.indexText("id", "" + i);
   doc.indexText("name", name);
   doc.addTimeField(year, month, day);
   doc.indexText("metafile", "doc");
   ((LgteIndexWriter) writer).addDocument(doc);
 }
  public void testTimeFilter() throws IOException, DocumentException, ParseException {
    LgteIndexWriter writer = new LgteIndexWriter(path + "Contents", true);
    LgteDocumentWrapper doc1 = new LgteDocumentWrapper();
    doc1.indexText(Globals.DOCUMENT_ID_FIELD, "1");
    doc1.indexText("contents", "word1 word2 word3");
    doc1.indexStringNoStore(Config.S_HAS_TIMEXES, "true");
    LgteDocumentWrapper doc2 = new LgteDocumentWrapper();
    doc2.indexText(Globals.DOCUMENT_ID_FIELD, "2");
    doc2.indexText("contents", "word2 word3 word4 word55 word96 word2 word54 word33 wordss");
    writer.addDocument(doc1);
    writer.addDocument(doc2);
    writer.close();

    LgteIndexWriter writer2 = new LgteIndexWriter(path + "Sentences", true);
    LgteDocumentWrapper sentence0 = new LgteDocumentWrapper();
    sentence0.indexText(Globals.DOCUMENT_ID_FIELD, "1_0");
    sentence0.indexText("doc_id", "1");
    sentence0.indexText("sentences", "word1 word3");
    LgteDocumentWrapper sentence1 = new LgteDocumentWrapper();
    sentence1.indexText(Globals.DOCUMENT_ID_FIELD, "1_1");
    sentence1.indexText("doc_id", "1");
    sentence1.indexText("sentences", "word1 word2 word3");
    LgteDocumentWrapper sentence2 = new LgteDocumentWrapper();
    sentence2.indexStringNoStore(Config.S_HAS_TIMEXES + "_sentences", "true");
    sentence2.indexText(Globals.DOCUMENT_ID_FIELD, "2_1");
    sentence2.indexText("doc_id", "2");
    sentence2.indexText("sentences", "word2 word3 word4 word55 word96 word2 word54 word33 wordss");
    writer2.addDocument(sentence0);
    writer2.addDocument(sentence1);
    writer2.addDocument(sentence2);
    writer2.close();

    IndexReader readerContents =
        LgteIndexManager.openReader(path + "Contents", Model.OkapiBM25Model);
    IndexReader readerSentences =
        LgteIndexManager.openReader(path + "Sentences", Model.OkapiBM25Model);
    Map<String, IndexReader> readers = new HashMap<String, IndexReader>();
    readers.put("contents", readerContents);
    readers.put("sentences", readerSentences);
    readers.put(Config.S_HAS_TIMEXES, readerContents);
    readers.put(Config.S_HAS_TIMEXES + "_sentences", readerSentences);
    readers.put("doc_id", readerSentences);
    readers.put("id", readerSentences);
    LgteIsolatedIndexReader lgteIsolatedIndexReader = new LgteIsolatedIndexReader(readers);
    lgteIsolatedIndexReader.addTreeMapping(readerContents, readerSentences, "doc_id");

    LgteIndexSearcherWrapper searcher =
        new LgteIndexSearcherWrapper(Model.OkapiBM25Model, lgteIsolatedIndexReader);
    QueryConfiguration queryConfiguration = new QueryConfiguration();
    queryConfiguration.setProperty("bm25.idf.policy", "floor_epslon");
    queryConfiguration.setProperty("bm25.idf.epslon", "0.01");
    queryConfiguration.setProperty("bm25.k1", "2.0");
    queryConfiguration.setProperty("bm25.b", "0.75");
    queryConfiguration.setProperty("index.tree", "true");
    QueryFilter queryFilter =
        new QueryFilter(
            org.apache.lucene.queryParser.QueryParser.parse(
                "true", Config.S_HAS_TIMEXES, new LgteNothingAnalyzer()));
    LgteQuery lgteQuery =
        LgteQueryParser.parseQuery(
            "sentences:word2", new LgteNothingAnalyzer(), searcher, queryConfiguration);
    LgteHits lgteHits = searcher.search(lgteQuery, queryFilter);

    assertEquals(lgteHits.length(), 1);
    assertEquals(lgteHits.id(0), 1);

    TermsFilter termsFilter = new TermsFilter();
    termsFilter.addTerm(new Term(Config.S_HAS_TIMEXES, "true"));
    lgteHits = searcher.search(lgteQuery, termsFilter);
    assertEquals(lgteHits.length(), 1);
    assertEquals(lgteHits.id(0), 1);

    termsFilter = new TermsFilter();
    termsFilter.addTerm(new Term(Config.S_HAS_TIMEXES + "_sentences", "true"));
    lgteHits = searcher.search(lgteQuery, termsFilter);
    assertEquals(lgteHits.length(), 1);
    assertEquals(lgteHits.id(0), 2);

    searcher.close();

    Files.delDirsE(path + "Contents");
    Files.delDirsE(path + "Sentences");
  }