public void testCachingWorks() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    writer.close();

    IndexReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir));
    AtomicReaderContext context = (AtomicReaderContext) reader.getContext();
    MockFilter filter = new MockFilter();
    CachingWrapperFilter cacher = new CachingWrapperFilter(filter);

    // first time, nested filter is called
    DocIdSet strongRef = cacher.getDocIdSet(context, context.reader().getLiveDocs());
    assertTrue("first time", filter.wasCalled());

    // make sure no exception if cache is holding the wrong docIdSet
    cacher.getDocIdSet(context, context.reader().getLiveDocs());

    // second time, nested filter should not be called
    filter.clear();
    cacher.getDocIdSet(context, context.reader().getLiveDocs());
    assertFalse("second time", filter.wasCalled());

    reader.close();
    dir.close();
  }
  public void testNullDocIdSetIterator() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    writer.close();

    IndexReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir));
    AtomicReaderContext context = (AtomicReaderContext) reader.getContext();

    final Filter filter =
        new Filter() {
          @Override
          public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) {
            return new DocIdSet() {
              @Override
              public DocIdSetIterator iterator() {
                return null;
              }
            };
          }
        };
    CachingWrapperFilter cacher = new CachingWrapperFilter(filter);

    // the caching filter should return the empty set constant
    assertNull(cacher.getDocIdSet(context, context.reader().getLiveDocs()));

    reader.close();
    dir.close();
  }
Exemplo n.º 3
0
 public String[] getTerms() {
   IndexReader reader = null;
   int maxSize = 100;
   Set<String> searchResults = new HashSet<String>();
   try {
     reader = DirectoryReader.open(dir);
     Terms terms = SlowCompositeReaderWrapper.wrap(reader).terms("contents");
     TermsEnum termsEnum = terms.iterator(TermsEnum.EMPTY);
     BytesRef byteRef = null;
     while ((byteRef = termsEnum.next()) != null) {
       String term = new String(byteRef.bytes, byteRef.offset, byteRef.length);
       searchResults.add(term);
       if (searchResults.size() >= maxSize) {
         break;
       }
     }
   } catch (IOException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   } finally {
     try {
       if (reader != null) {
         reader.close();
       }
     } catch (IOException e) {
       // TODO Auto-generated catch block
       e.printStackTrace();
     }
   }
   return searchResults.toArray(new String[searchResults.size()]);
 }
Exemplo n.º 4
0
 protected LeafReaderContext refreshReader() throws Exception {
   if (readerContext != null) {
     readerContext.reader().close();
   }
   LeafReader reader =
       SlowCompositeReaderWrapper.wrap(topLevelReader = DirectoryReader.open(writer, true));
   readerContext = reader.getContext();
   return readerContext;
 }
Exemplo n.º 5
0
  @Override
  public void setUp() throws Exception {
    super.setUp();
    dirA = newDirectory();
    dirB = newDirectory();

    IndexWriter wA =
        new IndexWriter(
            dirA, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    IndexWriter wB =
        new IndexWriter(
            dirB, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));

    long theLong = Long.MAX_VALUE;
    double theDouble = Double.MAX_VALUE;
    byte theByte = Byte.MAX_VALUE;
    short theShort = Short.MAX_VALUE;
    int theInt = Integer.MAX_VALUE;
    float theFloat = Float.MAX_VALUE;
    for (int i = 0; i < NUM_DOCS; i++) {
      Document doc = new Document();
      doc.add(newStringField("theLong", String.valueOf(theLong--), Field.Store.NO));
      doc.add(newStringField("theDouble", String.valueOf(theDouble--), Field.Store.NO));
      doc.add(newStringField("theByte", String.valueOf(theByte--), Field.Store.NO));
      doc.add(newStringField("theShort", String.valueOf(theShort--), Field.Store.NO));
      doc.add(newStringField("theInt", String.valueOf(theInt--), Field.Store.NO));
      doc.add(newStringField("theFloat", String.valueOf(theFloat--), Field.Store.NO));
      if (0 == i % 3) {
        wA.addDocument(doc);
      } else {
        wB.addDocument(doc);
      }
    }
    wA.close();
    wB.close();
    DirectoryReader rA = DirectoryReader.open(dirA);
    readerA = SlowCompositeReaderWrapper.wrap(rA);
    readerAclone = SlowCompositeReaderWrapper.wrap(rA);
    readerA = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dirA));
    readerB = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dirB));
    readerX = SlowCompositeReaderWrapper.wrap(new MultiReader(readerA, readerB));
  }
Exemplo n.º 6
0
  @Override
  public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
    final int off = readerContext.docBase;
    final LeafReader r;
    Object o = context.get("searcher");
    if (o instanceof SolrIndexSearcher) {
      SolrIndexSearcher is = (SolrIndexSearcher) o;
      SchemaField sf = is.getSchema().getFieldOrNull(field);
      if (sf != null
          && sf.hasDocValues() == false
          && sf.multiValued() == false
          && sf.getType().getNumericType() != null) {
        // it's a single-valued numeric field: we must currently create insanity :(
        List<LeafReaderContext> leaves = is.getIndexReader().leaves();
        LeafReader insaneLeaves[] = new LeafReader[leaves.size()];
        int upto = 0;
        for (LeafReaderContext raw : leaves) {
          insaneLeaves[upto++] = Insanity.wrapInsanity(raw.reader(), field);
        }
        r = SlowCompositeReaderWrapper.wrap(new MultiReader(insaneLeaves));
      } else {
        // reuse ordinalmap
        r = ((SolrIndexSearcher) o).getLeafReader();
      }
    } else {
      IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader();
      r = SlowCompositeReaderWrapper.wrap(topReader);
    }
    // if it's e.g. tokenized/multivalued, emulate old behavior of single-valued fc
    final SortedDocValues sindex =
        SortedSetSelector.wrap(DocValues.getSortedSet(r, field), SortedSetSelector.Type.MIN);
    final int end = sindex.getValueCount();

    return new IntDocValues(this) {
      @Override
      public int intVal(int doc) {
        return (end - sindex.getOrd(doc + off) - 1);
      }
    };
  }
Exemplo n.º 7
0
  @Override
  public void setUp() throws Exception {
    super.setUp();
    directory = newDirectory();
    RandomIndexWriter writer =
        new RandomIndexWriter(
            random(), directory, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));

    // Add series of docs with filterable fields : acces rights, prices, dates and "in-stock" flags
    addDoc(writer, "admin guest", "010", "20040101", "Y");
    addDoc(writer, "guest", "020", "20040101", "Y");
    addDoc(writer, "guest", "020", "20050101", "Y");
    addDoc(writer, "admin", "020", "20050101", "Maybe");
    addDoc(writer, "admin guest", "030", "20050101", "N");
    reader = SlowCompositeReaderWrapper.wrap(writer.getReader());
    writer.close();
  }
Exemplo n.º 8
0
  public void testBasics() throws Exception {
    // sanity check of norms writer
    // TODO: generalize
    LeafReader slow = SlowCompositeReaderWrapper.wrap(reader);
    NumericDocValues fooNorms = slow.getNormValues("foo");
    NumericDocValues barNorms = slow.getNormValues("bar");
    for (int i = 0; i < slow.maxDoc(); i++) {
      assertFalse(fooNorms.get(i) == barNorms.get(i));
    }

    // sanity check of searching
    TopDocs foodocs = searcher.search(new TermQuery(new Term("foo", "brown")), 10);
    assertTrue(foodocs.totalHits > 0);
    TopDocs bardocs = searcher.search(new TermQuery(new Term("bar", "brown")), 10);
    assertTrue(bardocs.totalHits > 0);
    assertTrue(foodocs.scoreDocs[0].score < bardocs.scoreDocs[0].score);
  }
  @Before
  public void setup() throws Exception {
    super.setUp();

    // setup field mappers
    strMapper =
        new StringFieldMapper.Builder("str_value")
            .build(new Mapper.BuilderContext(null, new ContentPath(1)));

    lngMapper =
        new LongFieldMapper.Builder("lng_value")
            .build(new Mapper.BuilderContext(null, new ContentPath(1)));

    dblMapper =
        new DoubleFieldMapper.Builder("dbl_value")
            .build(new Mapper.BuilderContext(null, new ContentPath(1)));

    // create index and fielddata service
    ifdService = new IndexFieldDataService(new Index("test"), new DummyCircuitBreakerService());
    MapperService mapperService =
        MapperTestUtils.newMapperService(
            ifdService.index(), ImmutableSettings.Builder.EMPTY_SETTINGS);
    ifdService.setIndexService(new StubIndexService(mapperService));
    writer =
        new IndexWriter(
            new RAMDirectory(),
            new IndexWriterConfig(Lucene.VERSION, new StandardAnalyzer(Lucene.VERSION)));

    int numDocs = 10;
    for (int i = 0; i < numDocs; i++) {
      Document d = new Document();
      d.add(new StringField(strMapper.names().indexName(), "str" + i, Field.Store.NO));
      d.add(new LongField(lngMapper.names().indexName(), i, Field.Store.NO));
      d.add(new DoubleField(dblMapper.names().indexName(), Double.valueOf(i), Field.Store.NO));
      writer.addDocument(d);
    }

    reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(writer, true));
  }
  public void testIsCacheAble() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    writer.addDocument(new Document());
    writer.close();

    IndexReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir));

    // not cacheable:
    assertDocIdSetCacheable(
        reader, new QueryWrapperFilter(new TermQuery(new Term("test", "value"))), false);
    // returns default empty docidset, always cacheable:
    assertDocIdSetCacheable(
        reader,
        NumericRangeFilter.newIntRange(
            "test", Integer.valueOf(10000), Integer.valueOf(-10000), true, true),
        true);
    // is cacheable:
    assertDocIdSetCacheable(
        reader,
        FieldCacheRangeFilter.newIntRange(
            "test", Integer.valueOf(10), Integer.valueOf(20), true, true),
        true);
    // a fixedbitset filter is always cacheable
    assertDocIdSetCacheable(
        reader,
        new Filter() {
          @Override
          public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) {
            return new FixedBitSet(context.reader().maxDoc());
          }
        },
        true);

    reader.close();
    dir.close();
  }
  @Test
  public void testRandom() throws Exception {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    int numUniqueChildValues = 1 + random().nextInt(TEST_NIGHTLY ? 6000 : 600);
    String[] childValues = new String[numUniqueChildValues];
    for (int i = 0; i < numUniqueChildValues; i++) {
      childValues[i] = Integer.toString(i);
    }

    int childDocId = 0;
    int numParentDocs = 1 + random().nextInt(TEST_NIGHTLY ? 20000 : 1000);
    ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>> childValueToParentIds =
        new ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>>();
    for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) {
      boolean markParentAsDeleted = rarely();
      String parent = Integer.toString(parentDocId);
      Document document = new Document();
      document.add(
          new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
      document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
      if (markParentAsDeleted) {
        document.add(new StringField("delete", "me", Field.Store.NO));
      }
      indexWriter.addDocument(document);

      int numChildDocs = random().nextInt(TEST_NIGHTLY ? 100 : 25);
      for (int i = 0; i < numChildDocs; i++) {
        boolean markChildAsDeleted = rarely();
        String childValue = childValues[random().nextInt(childValues.length)];

        document = new Document();
        document.add(
            new StringField(
                UidFieldMapper.NAME,
                Uid.createUid("child", Integer.toString(childDocId)),
                Field.Store.NO));
        document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
        document.add(
            new StringField(
                ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
        document.add(new StringField("field1", childValue, Field.Store.NO));
        if (markChildAsDeleted) {
          document.add(new StringField("delete", "me", Field.Store.NO));
        }
        indexWriter.addDocument(document);

        if (!markChildAsDeleted) {
          NavigableMap<String, FloatArrayList> parentIdToChildScores;
          if (childValueToParentIds.containsKey(childValue)) {
            parentIdToChildScores = childValueToParentIds.lget();
          } else {
            childValueToParentIds.put(
                childValue, parentIdToChildScores = new TreeMap<String, FloatArrayList>());
          }
          if (!markParentAsDeleted) {
            FloatArrayList childScores = parentIdToChildScores.get(parent);
            if (childScores == null) {
              parentIdToChildScores.put(parent, childScores = new FloatArrayList());
            }
            childScores.add(1f);
          }
        }
      }
    }

    // Delete docs that are marked to be deleted.
    indexWriter.deleteDocuments(new Term("delete", "me"));

    indexWriter.close();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    Engine.Searcher engineSearcher =
        new Engine.SimpleSearcher(ChildrenQueryTests.class.getSimpleName(), searcher);
    ((TestSearchContext) SearchContext.current())
        .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));

    TermFilter parentFilter = new TermFilter(new Term(TypeFieldMapper.NAME, "parent"));
    for (String childValue : childValues) {
      Query childQuery = new ConstantScoreQuery(new TermQuery(new Term("field1", childValue)));
      int shortCircuitParentDocSet = random().nextInt(numParentDocs);
      ScoreType scoreType = ScoreType.values()[random().nextInt(ScoreType.values().length)];
      Query query =
          new ChildrenQuery(
              "parent", "child", parentFilter, childQuery, scoreType, shortCircuitParentDocSet);
      BitSetCollector collector = new BitSetCollector(indexReader.maxDoc());
      int numHits = 1 + random().nextInt(25);
      TopScoreDocCollector actualTopDocsCollector = TopScoreDocCollector.create(numHits, false);
      searcher.search(query, MultiCollector.wrap(collector, actualTopDocsCollector));
      FixedBitSet actualResult = collector.getResult();

      FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc());
      MockScorer mockScorer = new MockScorer(scoreType);
      TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits, false);
      expectedTopDocsCollector.setScorer(mockScorer);
      if (childValueToParentIds.containsKey(childValue)) {
        AtomicReader slowAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader);
        Terms terms = slowAtomicReader.terms(UidFieldMapper.NAME);
        if (terms != null) {
          NavigableMap<String, FloatArrayList> parentIdToChildScores = childValueToParentIds.lget();
          TermsEnum termsEnum = terms.iterator(null);
          DocsEnum docsEnum = null;
          for (Map.Entry<String, FloatArrayList> entry : parentIdToChildScores.entrySet()) {
            TermsEnum.SeekStatus seekStatus =
                termsEnum.seekCeil(Uid.createUidAsBytes("parent", entry.getKey()));
            if (seekStatus == TermsEnum.SeekStatus.FOUND) {
              docsEnum =
                  termsEnum.docs(slowAtomicReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
              expectedResult.set(docsEnum.nextDoc());
              mockScorer.scores = entry.getValue();
              expectedTopDocsCollector.collect(docsEnum.docID());
            } else if (seekStatus == TermsEnum.SeekStatus.END) {
              break;
            }
          }
        }
      }

      assertBitSet(actualResult, expectedResult, searcher);
      assertTopDocs(actualTopDocsCollector.topDocs(), expectedTopDocsCollector.topDocs());
    }

    indexReader.close();
    directory.close();
  }
  @Test
  public void testRandom() throws Exception {
    Directory directory = newDirectory();
    final Random r = random();
    final IndexWriterConfig iwc =
        LuceneTestCase.newIndexWriterConfig(r, new MockAnalyzer(r))
            .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
            .setRAMBufferSizeMB(
                scaledRandomIntBetween(16, 64)); // we might index a lot - don't go crazy here
    RandomIndexWriter indexWriter = new RandomIndexWriter(r, directory, iwc);
    int numUniqueChildValues = scaledRandomIntBetween(100, 2000);
    String[] childValues = new String[numUniqueChildValues];
    for (int i = 0; i < numUniqueChildValues; i++) {
      childValues[i] = Integer.toString(i);
    }

    IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet();
    int childDocId = 0;
    int numParentDocs = scaledRandomIntBetween(1, numUniqueChildValues);
    ObjectObjectOpenHashMap<String, NavigableSet<String>> childValueToParentIds =
        new ObjectObjectOpenHashMap<>();
    for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) {
      boolean markParentAsDeleted = rarely();
      boolean filterMe = rarely();
      String parent = Integer.toString(parentDocId);
      Document document = new Document();
      document.add(
          new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
      document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
      if (markParentAsDeleted) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("delete", "me", Field.Store.NO));
      }
      if (filterMe) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("filter", "me", Field.Store.NO));
      }
      indexWriter.addDocument(document);

      final int numChildDocs = scaledRandomIntBetween(0, 100);
      for (int i = 0; i < numChildDocs; i++) {
        boolean markChildAsDeleted = rarely();
        String childValue = childValues[random().nextInt(childValues.length)];

        document = new Document();
        document.add(
            new StringField(
                UidFieldMapper.NAME,
                Uid.createUid("child", Integer.toString(childDocId++)),
                Field.Store.NO));
        document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
        document.add(
            new StringField(
                ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
        document.add(new StringField("field1", childValue, Field.Store.NO));
        if (markChildAsDeleted) {
          document.add(new StringField("delete", "me", Field.Store.NO));
        }
        indexWriter.addDocument(document);

        if (!markChildAsDeleted) {
          NavigableSet<String> parentIds;
          if (childValueToParentIds.containsKey(childValue)) {
            parentIds = childValueToParentIds.lget();
          } else {
            childValueToParentIds.put(childValue, parentIds = new TreeSet<>());
          }
          if (!markParentAsDeleted && !filterMe) {
            parentIds.add(parent);
          }
        }
      }
    }

    // Delete docs that are marked to be deleted.
    indexWriter.deleteDocuments(new Term("delete", "me"));

    indexWriter.commit();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    Engine.Searcher engineSearcher =
        new Engine.Searcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher);
    ((TestSearchContext) SearchContext.current())
        .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));

    int max = numUniqueChildValues / 4;
    for (int i = 0; i < max; i++) {
      // Simulate a parent update
      if (random().nextBoolean()) {
        final int numberOfUpdatableParents = numParentDocs - filteredOrDeletedDocs.size();
        int numberOfUpdates = scaledRandomIntBetween(0, numberOfUpdatableParents);
        for (int j = 0; j < numberOfUpdates; j++) {
          int parentId;
          do {
            parentId = random().nextInt(numParentDocs);
          } while (filteredOrDeletedDocs.contains(parentId));

          String parentUid = Uid.createUid("parent", Integer.toString(parentId));
          indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid));

          Document document = new Document();
          document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES));
          document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
          indexWriter.addDocument(document);
        }

        indexReader.close();
        indexReader = DirectoryReader.open(indexWriter.w, true);
        searcher = new IndexSearcher(indexReader);
        engineSearcher =
            new Engine.Searcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher);
        ((TestSearchContext) SearchContext.current())
            .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));
      }

      String childValue = childValues[random().nextInt(numUniqueChildValues)];
      int shortCircuitParentDocSet = random().nextInt(numParentDocs);
      QueryBuilder queryBuilder;
      if (random().nextBoolean()) {
        queryBuilder =
            hasChildQuery("child", termQuery("field1", childValue))
                .setShortCircuitCutoff(shortCircuitParentDocSet);
      } else {
        queryBuilder =
            constantScoreQuery(
                hasChildFilter("child", termQuery("field1", childValue))
                    .setShortCircuitCutoff(shortCircuitParentDocSet));
      }
      // Using a FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer not
      // get live docs as acceptedDocs
      queryBuilder = filteredQuery(queryBuilder, notFilter(termFilter("filter", "me")));
      Query query = parseQuery(queryBuilder);

      BitSetCollector collector = new BitSetCollector(indexReader.maxDoc());
      searcher.search(query, collector);
      FixedBitSet actualResult = collector.getResult();

      FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc());
      if (childValueToParentIds.containsKey(childValue)) {
        LeafReader slowLeafReader = SlowCompositeReaderWrapper.wrap(indexReader);
        Terms terms = slowLeafReader.terms(UidFieldMapper.NAME);
        if (terms != null) {
          NavigableSet<String> parentIds = childValueToParentIds.lget();
          TermsEnum termsEnum = terms.iterator(null);
          PostingsEnum docsEnum = null;
          for (String id : parentIds) {
            TermsEnum.SeekStatus seekStatus =
                termsEnum.seekCeil(Uid.createUidAsBytes("parent", id));
            if (seekStatus == TermsEnum.SeekStatus.FOUND) {
              docsEnum =
                  termsEnum.postings(slowLeafReader.getLiveDocs(), docsEnum, PostingsEnum.NONE);
              expectedResult.set(docsEnum.nextDoc());
            } else if (seekStatus == TermsEnum.SeekStatus.END) {
              break;
            }
          }
        }
      }

      assertBitSet(actualResult, expectedResult, searcher);
    }

    indexWriter.close();
    indexReader.close();
    directory.close();
  }
  public void testCopyFieldsAndFieldBoostsAndDocBoosts() throws Exception {
    SolrCore core = h.getCore();
    IndexSchema schema = core.getLatestSchema();
    SolrInputDocument doc = new SolrInputDocument();

    final float DOC_BOOST = 3.0F;
    doc.setDocumentBoost(DOC_BOOST);
    doc.addField("id", "42");

    SolrInputField inTitle = new SolrInputField("title");
    inTitle.addValue("titleA", 2.0F);
    inTitle.addValue("titleB", 7.0F);
    final float TITLE_BOOST = 2.0F * 7.0F;
    assertEquals(TITLE_BOOST, inTitle.getBoost(), 0.0F);
    doc.put(inTitle.getName(), inTitle);

    SolrInputField inFoo = new SolrInputField("foo_t");
    inFoo.addValue("summer time", 1.0F);
    inFoo.addValue("in the city", 5.0F);
    inFoo.addValue("living is easy", 11.0F);
    final float FOO_BOOST = 1.0F * 5.0F * 11.0F;
    assertEquals(FOO_BOOST, inFoo.getBoost(), 0.0F);
    doc.put(inFoo.getName(), inFoo);

    Document out = DocumentBuilder.toDocument(doc, schema);

    IndexableField[] outTitle = out.getFields(inTitle.getName());
    assertEquals("wrong number of title values", 2, outTitle.length);

    IndexableField[] outNoNorms = out.getFields("title_stringNoNorms");
    assertEquals("wrong number of nonorms values", 2, outNoNorms.length);

    IndexableField[] outFoo = out.getFields(inFoo.getName());
    assertEquals("wrong number of foo values", 3, outFoo.length);

    IndexableField[] outText = out.getFields("text");
    assertEquals("wrong number of text values", 5, outText.length);

    // since Lucene no longer has native document boosts, we should find
    // the doc boost multiplied into the boost on the first field value
    // of each field.  All other field values should be 1.0f
    // (lucene will multiply all of the field value boosts later)
    assertEquals(TITLE_BOOST * DOC_BOOST, outTitle[0].boost(), 0.0F);
    assertEquals(1.0F, outTitle[1].boost(), 0.0F);
    //
    assertEquals(FOO_BOOST * DOC_BOOST, outFoo[0].boost(), 0.0F);
    assertEquals(1.0F, outFoo[1].boost(), 0.0F);
    assertEquals(1.0F, outFoo[2].boost(), 0.0F);
    //
    assertEquals(TITLE_BOOST * DOC_BOOST, outText[0].boost(), 0.0F);
    assertEquals(1.0F, outText[1].boost(), 0.0F);
    assertEquals(FOO_BOOST, outText[2].boost(), 0.0F);
    assertEquals(1.0F, outText[3].boost(), 0.0F);
    assertEquals(1.0F, outText[4].boost(), 0.0F);

    // copyField dest with no norms should not have recieved any boost
    assertEquals(1.0F, outNoNorms[0].boost(), 0.0F);
    assertEquals(1.0F, outNoNorms[1].boost(), 0.0F);

    // now index that SolrInputDocument to check the computed norms

    assertU(adoc(doc));
    assertU(commit());

    SolrQueryRequest req = req("q", "id:42");
    try {
      // very hack-ish

      SolrQueryResponse rsp = new SolrQueryResponse();
      core.execute(core.getRequestHandler(req.getParams().get(CommonParams.QT)), req, rsp);

      DocList dl = ((ResultContext) rsp.getValues().get("response")).docs;
      assertTrue("can't find the doc we just added", 1 == dl.size());
      int docid = dl.iterator().nextDoc();

      SolrIndexSearcher searcher = req.getSearcher();
      AtomicReader reader =
          SlowCompositeReaderWrapper.wrap(searcher.getTopReaderContext().reader());

      assertTrue(
          "similarity doesn't extend DefaultSimilarity, "
              + "config or defaults have changed since test was written",
          searcher.getSimilarity() instanceof DefaultSimilarity);

      DefaultSimilarity sim = (DefaultSimilarity) searcher.getSimilarity();

      NumericDocValues titleNorms = reader.getNormValues("title");
      NumericDocValues fooNorms = reader.getNormValues("foo_t");
      NumericDocValues textNorms = reader.getNormValues("text");

      assertEquals(expectedNorm(sim, 2, TITLE_BOOST * DOC_BOOST), titleNorms.get(docid));

      assertEquals(expectedNorm(sim, 8 - 3, FOO_BOOST * DOC_BOOST), fooNorms.get(docid));

      assertEquals(
          expectedNorm(sim, 2 + 8 - 3, TITLE_BOOST * FOO_BOOST * DOC_BOOST), textNorms.get(docid));

    } finally {
      req.close();
    }
  }
Exemplo n.º 14
0
  public void testPayloadsPos0() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new MockPayloadAnalyzer());
    Document doc = new Document();
    doc.add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k")));
    writer.addDocument(doc);

    final IndexReader readerFromWriter = writer.getReader();
    LeafReader r = SlowCompositeReaderWrapper.wrap(readerFromWriter);

    PostingsEnum tp = r.postings(new Term("content", "a"), PostingsEnum.ALL);

    int count = 0;
    assertTrue(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    // "a" occurs 4 times
    assertEquals(4, tp.freq());
    assertEquals(0, tp.nextPosition());
    assertEquals(1, tp.nextPosition());
    assertEquals(3, tp.nextPosition());
    assertEquals(6, tp.nextPosition());

    // only one doc has "a"
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, tp.nextDoc());

    IndexSearcher is = newSearcher(readerFromWriter);

    SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
    SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
    SpanQuery[] sqs = {stq1, stq2};
    SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);

    count = 0;
    boolean sawZero = false;
    if (VERBOSE) {
      System.out.println("\ngetPayloadSpans test");
    }
    PayloadSpanCollector collector = new PayloadSpanCollector();
    Spans pspans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS);
    while (pspans.nextDoc() != Spans.NO_MORE_DOCS) {
      while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
        if (VERBOSE) {
          System.out.println(
              "doc "
                  + pspans.docID()
                  + ": span "
                  + pspans.startPosition()
                  + " to "
                  + pspans.endPosition());
        }
        collector.reset();
        pspans.collect(collector);
        sawZero |= pspans.startPosition() == 0;
        for (BytesRef payload : collector.payloads) {
          count++;
          if (VERBOSE) {
            System.out.println("  payload: " + Term.toString(payload));
          }
        }
      }
    }
    assertTrue(sawZero);
    assertEquals(8, count);

    // System.out.println("\ngetSpans test");
    Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq);
    count = 0;
    sawZero = false;
    while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
      while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
        count++;
        sawZero |= spans.startPosition() == 0;
        // System.out.println(spans.doc() + " - " + spans.start() + " - " +
        // spans.end());
      }
    }
    assertEquals(4, count);
    assertTrue(sawZero);

    writer.close();
    is.getIndexReader().close();
    dir.close();
  }
Exemplo n.º 15
0
  @Test
  public void testRandom() throws Exception {
    Directory directory = newDirectory();
    final Random r = random();
    final IndexWriterConfig iwc =
        LuceneTestCase.newIndexWriterConfig(r, new MockAnalyzer(r))
            .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
            .setRAMBufferSizeMB(
                scaledRandomIntBetween(16, 64)); // we might index a lot - don't go crazy here
    RandomIndexWriter indexWriter = new RandomIndexWriter(r, directory, iwc);
    int numUniqueChildValues = scaledRandomIntBetween(100, 2000);
    String[] childValues = new String[numUniqueChildValues];
    for (int i = 0; i < numUniqueChildValues; i++) {
      childValues[i] = Integer.toString(i);
    }

    IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet();

    int childDocId = 0;
    int numParentDocs = scaledRandomIntBetween(1, numUniqueChildValues);
    ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>> childValueToParentIds =
        new ObjectObjectOpenHashMap<>();
    for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) {
      boolean markParentAsDeleted = rarely();
      boolean filterMe = rarely();
      String parent = Integer.toString(parentDocId);
      Document document = new Document();
      document.add(
          new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
      document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
      if (markParentAsDeleted) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("delete", "me", Field.Store.NO));
      }
      if (filterMe) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("filter", "me", Field.Store.NO));
      }
      indexWriter.addDocument(document);

      int numChildDocs = scaledRandomIntBetween(0, 100);
      for (int i = 0; i < numChildDocs; i++) {
        boolean markChildAsDeleted = rarely();
        String childValue = childValues[random().nextInt(childValues.length)];

        document = new Document();
        document.add(
            new StringField(
                UidFieldMapper.NAME,
                Uid.createUid("child", Integer.toString(childDocId++)),
                Field.Store.NO));
        document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
        document.add(
            new StringField(
                ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
        document.add(new StringField("field1", childValue, Field.Store.NO));
        if (markChildAsDeleted) {
          document.add(new StringField("delete", "me", Field.Store.NO));
        }
        indexWriter.addDocument(document);

        if (!markChildAsDeleted) {
          NavigableMap<String, FloatArrayList> parentIdToChildScores;
          if (childValueToParentIds.containsKey(childValue)) {
            parentIdToChildScores = childValueToParentIds.lget();
          } else {
            childValueToParentIds.put(childValue, parentIdToChildScores = new TreeMap<>());
          }
          if (!markParentAsDeleted && !filterMe) {
            FloatArrayList childScores = parentIdToChildScores.get(parent);
            if (childScores == null) {
              parentIdToChildScores.put(parent, childScores = new FloatArrayList());
            }
            childScores.add(1f);
          }
        }
      }
    }

    // Delete docs that are marked to be deleted.
    indexWriter.deleteDocuments(new Term("delete", "me"));
    indexWriter.commit();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    Engine.Searcher engineSearcher =
        new Engine.Searcher(ChildrenQueryTests.class.getSimpleName(), searcher);
    ((TestSearchContext) SearchContext.current())
        .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));

    int max = numUniqueChildValues / 4;
    for (int i = 0; i < max; i++) {
      // Simulate a parent update
      if (random().nextBoolean()) {
        final int numberOfUpdatableParents = numParentDocs - filteredOrDeletedDocs.size();
        int numberOfUpdates =
            RandomInts.randomIntBetween(
                random(), 0, Math.min(numberOfUpdatableParents, TEST_NIGHTLY ? 25 : 5));
        for (int j = 0; j < numberOfUpdates; j++) {
          int parentId;
          do {
            parentId = random().nextInt(numParentDocs);
          } while (filteredOrDeletedDocs.contains(parentId));

          String parentUid = Uid.createUid("parent", Integer.toString(parentId));
          indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid));

          Document document = new Document();
          document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES));
          document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
          indexWriter.addDocument(document);
        }

        indexReader.close();
        indexReader = DirectoryReader.open(indexWriter.w, true);
        searcher = new IndexSearcher(indexReader);
        engineSearcher =
            new Engine.Searcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher);
        ((TestSearchContext) SearchContext.current())
            .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));
      }

      String childValue = childValues[random().nextInt(numUniqueChildValues)];
      int shortCircuitParentDocSet = random().nextInt(numParentDocs);
      ScoreType scoreType = ScoreType.values()[random().nextInt(ScoreType.values().length)];
      // leave min/max set to 0 half the time
      int minChildren = random().nextInt(2) * scaledRandomIntBetween(0, 110);
      int maxChildren = random().nextInt(2) * scaledRandomIntBetween(minChildren, 110);

      QueryBuilder queryBuilder =
          hasChildQuery("child", constantScoreQuery(termQuery("field1", childValue)))
              .scoreType(scoreType.name().toLowerCase(Locale.ENGLISH))
              .minChildren(minChildren)
              .maxChildren(maxChildren)
              .setShortCircuitCutoff(shortCircuitParentDocSet);
      // Using a FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer not
      // get live docs as acceptedDocs
      queryBuilder = filteredQuery(queryBuilder, notFilter(termFilter("filter", "me")));
      Query query = parseQuery(queryBuilder);
      BitSetCollector collector = new BitSetCollector(indexReader.maxDoc());
      int numHits = 1 + random().nextInt(25);
      TopScoreDocCollector actualTopDocsCollector = TopScoreDocCollector.create(numHits);
      searcher.search(query, MultiCollector.wrap(collector, actualTopDocsCollector));
      FixedBitSet actualResult = collector.getResult();

      FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc());
      TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits);
      if (childValueToParentIds.containsKey(childValue)) {
        LeafReader slowLeafReader = SlowCompositeReaderWrapper.wrap(indexReader);
        final FloatArrayList[] scores = new FloatArrayList[slowLeafReader.maxDoc()];
        Terms terms = slowLeafReader.terms(UidFieldMapper.NAME);
        if (terms != null) {
          NavigableMap<String, FloatArrayList> parentIdToChildScores = childValueToParentIds.lget();
          TermsEnum termsEnum = terms.iterator(null);
          DocsEnum docsEnum = null;
          for (Map.Entry<String, FloatArrayList> entry : parentIdToChildScores.entrySet()) {
            int count = entry.getValue().elementsCount;
            if (count >= minChildren && (maxChildren == 0 || count <= maxChildren)) {
              TermsEnum.SeekStatus seekStatus =
                  termsEnum.seekCeil(Uid.createUidAsBytes("parent", entry.getKey()));
              if (seekStatus == TermsEnum.SeekStatus.FOUND) {
                docsEnum =
                    termsEnum.docs(slowLeafReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
                expectedResult.set(docsEnum.nextDoc());
                scores[docsEnum.docID()] = new FloatArrayList(entry.getValue());
              } else if (seekStatus == TermsEnum.SeekStatus.END) {
                break;
              }
            }
          }
        }
        MockScorer mockScorer = new MockScorer(scoreType);
        final LeafCollector leafCollector =
            expectedTopDocsCollector.getLeafCollector(slowLeafReader.getContext());
        leafCollector.setScorer(mockScorer);
        for (int doc = expectedResult.nextSetBit(0);
            doc < slowLeafReader.maxDoc();
            doc =
                doc + 1 >= expectedResult.length()
                    ? DocIdSetIterator.NO_MORE_DOCS
                    : expectedResult.nextSetBit(doc + 1)) {
          mockScorer.scores = scores[doc];
          leafCollector.collect(doc);
        }
      }

      assertBitSet(actualResult, expectedResult, searcher);
      assertTopDocs(actualTopDocsCollector.topDocs(), expectedTopDocsCollector.topDocs());
    }

    indexWriter.close();
    indexReader.close();
    directory.close();
  }
  private void duellReaders(CompositeReader other, LeafReader memIndexReader) throws IOException {
    LeafReader competitor = SlowCompositeReaderWrapper.wrap(other);
    Fields memFields = memIndexReader.fields();
    for (String field : competitor.fields()) {
      Terms memTerms = memFields.terms(field);
      Terms iwTerms = memIndexReader.terms(field);
      if (iwTerms == null) {
        assertNull(memTerms);
      } else {
        NumericDocValues normValues = competitor.getNormValues(field);
        NumericDocValues memNormValues = memIndexReader.getNormValues(field);
        if (normValues != null) {
          // mem idx always computes norms on the fly
          assertNotNull(memNormValues);
          assertEquals(normValues.get(0), memNormValues.get(0));
        }

        assertNotNull(memTerms);
        assertEquals(iwTerms.getDocCount(), memTerms.getDocCount());
        assertEquals(iwTerms.getSumDocFreq(), memTerms.getSumDocFreq());
        assertEquals(iwTerms.getSumTotalTermFreq(), memTerms.getSumTotalTermFreq());
        TermsEnum iwTermsIter = iwTerms.iterator();
        TermsEnum memTermsIter = memTerms.iterator();
        if (iwTerms.hasPositions()) {
          final boolean offsets = iwTerms.hasOffsets() && memTerms.hasOffsets();

          while (iwTermsIter.next() != null) {
            assertNotNull(memTermsIter.next());
            assertEquals(iwTermsIter.term(), memTermsIter.term());
            PostingsEnum iwDocsAndPos = iwTermsIter.postings(null, PostingsEnum.ALL);
            PostingsEnum memDocsAndPos = memTermsIter.postings(null, PostingsEnum.ALL);
            while (iwDocsAndPos.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
              assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc());
              assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq());
              for (int i = 0; i < iwDocsAndPos.freq(); i++) {
                assertEquals(
                    "term: " + iwTermsIter.term().utf8ToString(),
                    iwDocsAndPos.nextPosition(),
                    memDocsAndPos.nextPosition());
                if (offsets) {
                  assertEquals(iwDocsAndPos.startOffset(), memDocsAndPos.startOffset());
                  assertEquals(iwDocsAndPos.endOffset(), memDocsAndPos.endOffset());
                }

                if (iwTerms.hasPayloads()) {
                  assertEquals(iwDocsAndPos.getPayload(), memDocsAndPos.getPayload());
                }
              }
            }
          }
        } else {
          while (iwTermsIter.next() != null) {
            assertEquals(iwTermsIter.term(), memTermsIter.term());
            PostingsEnum iwDocsAndPos = iwTermsIter.postings(null);
            PostingsEnum memDocsAndPos = memTermsIter.postings(null);
            while (iwDocsAndPos.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
              assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc());
              assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq());
            }
          }
        }
      }
    }
  }
Exemplo n.º 17
0
  public SparseInstances readIndex(String indexPath, String destFile, int threshold)
      throws Exception {

    if (indexPath == null || destFile == null) {
      System.out.println("error: indexPath or destFile is null\n");
      return null;
    }

    DirectoryReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
    Terms terms = SlowCompositeReaderWrapper.wrap(reader).terms(reviewKey);

    int capacity = (int) terms.size();
    HashMap<String, Integer> wordDict = new HashMap<>(capacity);
    capacity = capacity > 65535 ? 65535 : capacity;
    SparseInstances instData = new SparseInstances(capacity, reader.numDocs());
    TermsEnum termsEnum = terms.iterator();
    int index = 0;
    BytesRef term = null;
    String strTerm = null;
    while ((term = termsEnum.next()) != null) {
      strTerm = term.toString();
      if (termsEnum.totalTermFreq() < threshold) {
        continue;
      }
      if (strTerm.isEmpty()) {
        continue;
      }
      if (wordDict.get(strTerm) != null) {
        continue;
      }
      instData.addAttribute(strTerm);
      index++;
    }
    int numAtt = instData.numAttributes();
    int numInst = instData.numInstances();
    Integer attIndex = null;
    String id = null;
    int termIndex = 0;
    for (int docIndex = 0; docIndex < numInst; docIndex++) {
      id = reader.document(docIndex).getField(idKey).stringValue();
      Terms docTerms = reader.getTermVector(docIndex, reviewKey);
      if (docTerms == null) {
        continue;
      }
      int[] indices = new int[(int) docTerms.size()];
      double[] attValues = new double[(int) docTerms.size()];
      termsEnum = docTerms.iterator();
      termIndex = 0;
      while ((term = termsEnum.next()) != null) {
        strTerm = term.toString();
        attIndex = wordDict.get(strTerm);
        if (attIndex == null) {
          continue;
        }
        indices[termIndex] = attIndex.intValue();
        attValues[termIndex] = termsEnum.totalTermFreq();
      }
      ESparseInstance instance = new ESparseInstance(id, 1.0, attValues, indices, numAtt);
      instData.addInstance(instance);
    }

    return null;
  }
  @Test
  public void testRandom() throws Exception {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    int numUniqueChildValues = 1 + random().nextInt(TEST_NIGHTLY ? 10000 : 1000);
    String[] childValues = new String[numUniqueChildValues];
    for (int i = 0; i < numUniqueChildValues; i++) {
      childValues[i] = Integer.toString(i);
    }

    IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet();
    int childDocId = 0;
    int numParentDocs = 1 + random().nextInt(TEST_NIGHTLY ? 20000 : 1000);
    ObjectObjectOpenHashMap<String, NavigableSet<String>> childValueToParentIds =
        new ObjectObjectOpenHashMap<String, NavigableSet<String>>();
    for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) {
      boolean markParentAsDeleted = rarely();
      boolean filterMe = rarely();
      String parent = Integer.toString(parentDocId);
      Document document = new Document();
      document.add(
          new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
      document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
      if (markParentAsDeleted) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("delete", "me", Field.Store.NO));
      }
      if (filterMe) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("filter", "me", Field.Store.NO));
      }
      indexWriter.addDocument(document);

      int numChildDocs;
      if (rarely()) {
        numChildDocs = random().nextInt(TEST_NIGHTLY ? 100 : 25);
      } else {
        numChildDocs = random().nextInt(TEST_NIGHTLY ? 40 : 10);
      }
      for (int i = 0; i < numChildDocs; i++) {
        boolean markChildAsDeleted = rarely();
        String childValue = childValues[random().nextInt(childValues.length)];

        document = new Document();
        document.add(
            new StringField(
                UidFieldMapper.NAME,
                Uid.createUid("child", Integer.toString(childDocId)),
                Field.Store.NO));
        document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
        document.add(
            new StringField(
                ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
        document.add(new StringField("field1", childValue, Field.Store.NO));
        if (markChildAsDeleted) {
          document.add(new StringField("delete", "me", Field.Store.NO));
        }
        indexWriter.addDocument(document);

        if (!markChildAsDeleted) {
          NavigableSet<String> parentIds;
          if (childValueToParentIds.containsKey(childValue)) {
            parentIds = childValueToParentIds.lget();
          } else {
            childValueToParentIds.put(childValue, parentIds = new TreeSet<String>());
          }
          if (!markParentAsDeleted && !filterMe) {
            parentIds.add(parent);
          }
        }
      }
    }

    // Delete docs that are marked to be deleted.
    indexWriter.deleteDocuments(new Term("delete", "me"));

    indexWriter.commit();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    Engine.Searcher engineSearcher =
        new Engine.SimpleSearcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher);
    ((TestSearchContext) SearchContext.current())
        .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));

    Filter rawParentFilter = new TermFilter(new Term(TypeFieldMapper.NAME, "parent"));
    Filter rawFilterMe = new NotFilter(new TermFilter(new Term("filter", "me")));
    int max = numUniqueChildValues / 4;
    for (int i = 0; i < max; i++) {
      // Randomly pick a cached version: there is specific logic inside ChildrenQuery that deals
      // with the fact
      // that deletes are applied at the top level when filters are cached.
      Filter parentFilter;
      if (random().nextBoolean()) {
        parentFilter = SearchContext.current().filterCache().cache(rawParentFilter);
      } else {
        parentFilter = rawParentFilter;
      }

      // Using this in FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer
      // not get live docs as acceptedDocs
      Filter filterMe;
      if (random().nextBoolean()) {
        filterMe = SearchContext.current().filterCache().cache(rawFilterMe);
      } else {
        filterMe = rawFilterMe;
      }

      // Simulate a parent update
      if (random().nextBoolean()) {
        int numberOfUpdates = 1 + random().nextInt(TEST_NIGHTLY ? 25 : 5);
        for (int j = 0; j < numberOfUpdates; j++) {
          int parentId;
          do {
            parentId = random().nextInt(numParentDocs);
          } while (filteredOrDeletedDocs.contains(parentId));

          String parentUid = Uid.createUid("parent", Integer.toString(parentId));
          indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid));

          Document document = new Document();
          document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES));
          document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
          indexWriter.addDocument(document);
        }

        indexReader.close();
        indexReader = DirectoryReader.open(indexWriter.w, true);
        searcher = new IndexSearcher(indexReader);
        engineSearcher =
            new Engine.SimpleSearcher(
                ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher);
        ((TestSearchContext) SearchContext.current())
            .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));
      }

      String childValue = childValues[random().nextInt(numUniqueChildValues)];
      TermQuery childQuery = new TermQuery(new Term("field1", childValue));
      int shortCircuitParentDocSet = random().nextInt(numParentDocs);
      Filter nonNestedDocsFilter = random().nextBoolean() ? NonNestedDocsFilter.INSTANCE : null;
      Query query;
      if (random().nextBoolean()) {
        // Usage in HasChildQueryParser
        query =
            new ChildrenConstantScoreQuery(
                childQuery,
                "parent",
                "child",
                parentFilter,
                shortCircuitParentDocSet,
                nonNestedDocsFilter);
      } else {
        // Usage in HasChildFilterParser
        query =
            new XConstantScoreQuery(
                new CustomQueryWrappingFilter(
                    new ChildrenConstantScoreQuery(
                        childQuery,
                        "parent",
                        "child",
                        parentFilter,
                        shortCircuitParentDocSet,
                        nonNestedDocsFilter)));
      }
      query = new XFilteredQuery(query, filterMe);
      BitSetCollector collector = new BitSetCollector(indexReader.maxDoc());
      searcher.search(query, collector);
      FixedBitSet actualResult = collector.getResult();

      FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc());
      if (childValueToParentIds.containsKey(childValue)) {
        AtomicReader slowAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader);
        Terms terms = slowAtomicReader.terms(UidFieldMapper.NAME);
        if (terms != null) {
          NavigableSet<String> parentIds = childValueToParentIds.lget();
          TermsEnum termsEnum = terms.iterator(null);
          DocsEnum docsEnum = null;
          for (String id : parentIds) {
            TermsEnum.SeekStatus seekStatus =
                termsEnum.seekCeil(Uid.createUidAsBytes("parent", id));
            if (seekStatus == TermsEnum.SeekStatus.FOUND) {
              docsEnum =
                  termsEnum.docs(slowAtomicReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
              expectedResult.set(docsEnum.nextDoc());
            } else if (seekStatus == TermsEnum.SeekStatus.END) {
              break;
            }
          }
        }
      }

      assertBitSet(actualResult, expectedResult, searcher);
    }

    indexWriter.close();
    indexReader.close();
    directory.close();
  }
Exemplo n.º 19
0
  private IndexIterationContext createContext(
      int nDocs,
      RandomIndexWriter fromWriter,
      RandomIndexWriter toWriter,
      boolean multipleValuesPerDocument,
      boolean scoreDocsInOrder)
      throws IOException {
    IndexIterationContext context = new IndexIterationContext();
    int numRandomValues = nDocs / 2;
    context.randomUniqueValues = new String[numRandomValues];
    Set<String> trackSet = new HashSet<String>();
    context.randomFrom = new boolean[numRandomValues];
    for (int i = 0; i < numRandomValues; i++) {
      String uniqueRandomValue;
      do {
        uniqueRandomValue = _TestUtil.randomRealisticUnicodeString(random());
        //        uniqueRandomValue = _TestUtil.randomSimpleString(random);
      } while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));
      // Generate unique values and empty strings aren't allowed.
      trackSet.add(uniqueRandomValue);
      context.randomFrom[i] = random().nextBoolean();
      context.randomUniqueValues[i] = uniqueRandomValue;
    }

    RandomDoc[] docs = new RandomDoc[nDocs];
    for (int i = 0; i < nDocs; i++) {
      String id = Integer.toString(i);
      int randomI = random().nextInt(context.randomUniqueValues.length);
      String value = context.randomUniqueValues[randomI];
      Document document = new Document();
      document.add(newTextField(random(), "id", id, Field.Store.NO));
      document.add(newTextField(random(), "value", value, Field.Store.NO));

      boolean from = context.randomFrom[randomI];
      int numberOfLinkValues = multipleValuesPerDocument ? 2 + random().nextInt(10) : 1;
      docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
      for (int j = 0; j < numberOfLinkValues; j++) {
        String linkValue =
            context.randomUniqueValues[random().nextInt(context.randomUniqueValues.length)];
        docs[i].linkValues.add(linkValue);
        if (from) {
          if (!context.fromDocuments.containsKey(linkValue)) {
            context.fromDocuments.put(linkValue, new ArrayList<RandomDoc>());
          }
          if (!context.randomValueFromDocs.containsKey(value)) {
            context.randomValueFromDocs.put(value, new ArrayList<RandomDoc>());
          }

          context.fromDocuments.get(linkValue).add(docs[i]);
          context.randomValueFromDocs.get(value).add(docs[i]);
          document.add(newTextField(random(), "from", linkValue, Field.Store.NO));
        } else {
          if (!context.toDocuments.containsKey(linkValue)) {
            context.toDocuments.put(linkValue, new ArrayList<RandomDoc>());
          }
          if (!context.randomValueToDocs.containsKey(value)) {
            context.randomValueToDocs.put(value, new ArrayList<RandomDoc>());
          }

          context.toDocuments.get(linkValue).add(docs[i]);
          context.randomValueToDocs.get(value).add(docs[i]);
          document.add(newTextField(random(), "to", linkValue, Field.Store.NO));
        }
      }

      final RandomIndexWriter w;
      if (from) {
        w = fromWriter;
      } else {
        w = toWriter;
      }

      w.addDocument(document);
      if (random().nextInt(10) == 4) {
        w.commit();
      }
      if (VERBOSE) {
        System.out.println("Added document[" + docs[i].id + "]: " + document);
      }
    }

    // Pre-compute all possible hits for all unique random values. On top of this also compute all
    // possible score for
    // any ScoreMode.
    IndexSearcher fromSearcher = newSearcher(fromWriter.getReader());
    IndexSearcher toSearcher = newSearcher(toWriter.getReader());
    for (int i = 0; i < context.randomUniqueValues.length; i++) {
      String uniqueRandomValue = context.randomUniqueValues[i];
      final String fromField;
      final String toField;
      final Map<String, Map<Integer, JoinScore>> queryVals;
      if (context.randomFrom[i]) {
        fromField = "from";
        toField = "to";
        queryVals = context.fromHitsToJoinScore;
      } else {
        fromField = "to";
        toField = "from";
        queryVals = context.toHitsToJoinScore;
      }
      final Map<BytesRef, JoinScore> joinValueToJoinScores = new HashMap<BytesRef, JoinScore>();
      if (multipleValuesPerDocument) {
        fromSearcher.search(
            new TermQuery(new Term("value", uniqueRandomValue)),
            new Collector() {

              private Scorer scorer;
              private SortedSetDocValues docTermOrds;
              final BytesRef joinValue = new BytesRef();

              @Override
              public void collect(int doc) throws IOException {
                docTermOrds.setDocument(doc);
                long ord;
                while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                  docTermOrds.lookupOrd(ord, joinValue);
                  JoinScore joinScore = joinValueToJoinScores.get(joinValue);
                  if (joinScore == null) {
                    joinValueToJoinScores.put(
                        BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
                  }
                  joinScore.addScore(scorer.score());
                }
              }

              @Override
              public void setNextReader(AtomicReaderContext context) throws IOException {
                docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), fromField);
              }

              @Override
              public void setScorer(Scorer scorer) {
                this.scorer = scorer;
              }

              @Override
              public boolean acceptsDocsOutOfOrder() {
                return false;
              }
            });
      } else {
        fromSearcher.search(
            new TermQuery(new Term("value", uniqueRandomValue)),
            new Collector() {

              private Scorer scorer;
              private BinaryDocValues terms;
              private Bits docsWithField;
              private final BytesRef spare = new BytesRef();

              @Override
              public void collect(int doc) throws IOException {
                terms.get(doc, spare);
                BytesRef joinValue = spare;
                if (joinValue.length == 0 && !docsWithField.get(doc)) {
                  return;
                }

                JoinScore joinScore = joinValueToJoinScores.get(joinValue);
                if (joinScore == null) {
                  joinValueToJoinScores.put(
                      BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
                }
                joinScore.addScore(scorer.score());
              }

              @Override
              public void setNextReader(AtomicReaderContext context) throws IOException {
                terms = FieldCache.DEFAULT.getTerms(context.reader(), fromField, true);
                docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), fromField);
              }

              @Override
              public void setScorer(Scorer scorer) {
                this.scorer = scorer;
              }

              @Override
              public boolean acceptsDocsOutOfOrder() {
                return false;
              }
            });
      }

      final Map<Integer, JoinScore> docToJoinScore = new HashMap<Integer, JoinScore>();
      if (multipleValuesPerDocument) {
        if (scoreDocsInOrder) {
          AtomicReader slowCompositeReader =
              SlowCompositeReaderWrapper.wrap(toSearcher.getIndexReader());
          Terms terms = slowCompositeReader.terms(toField);
          if (terms != null) {
            DocsEnum docsEnum = null;
            TermsEnum termsEnum = null;
            SortedSet<BytesRef> joinValues =
                new TreeSet<BytesRef>(BytesRef.getUTF8SortedAsUnicodeComparator());
            joinValues.addAll(joinValueToJoinScores.keySet());
            for (BytesRef joinValue : joinValues) {
              termsEnum = terms.iterator(termsEnum);
              if (termsEnum.seekExact(joinValue)) {
                docsEnum =
                    termsEnum.docs(slowCompositeReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
                JoinScore joinScore = joinValueToJoinScores.get(joinValue);

                for (int doc = docsEnum.nextDoc();
                    doc != DocIdSetIterator.NO_MORE_DOCS;
                    doc = docsEnum.nextDoc()) {
                  // First encountered join value determines the score.
                  // Something to keep in mind for many-to-many relations.
                  if (!docToJoinScore.containsKey(doc)) {
                    docToJoinScore.put(doc, joinScore);
                  }
                }
              }
            }
          }
        } else {
          toSearcher.search(
              new MatchAllDocsQuery(),
              new Collector() {

                private SortedSetDocValues docTermOrds;
                private final BytesRef scratch = new BytesRef();
                private int docBase;

                @Override
                public void collect(int doc) throws IOException {
                  docTermOrds.setDocument(doc);
                  long ord;
                  while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                    docTermOrds.lookupOrd(ord, scratch);
                    JoinScore joinScore = joinValueToJoinScores.get(scratch);
                    if (joinScore == null) {
                      continue;
                    }
                    Integer basedDoc = docBase + doc;
                    // First encountered join value determines the score.
                    // Something to keep in mind for many-to-many relations.
                    if (!docToJoinScore.containsKey(basedDoc)) {
                      docToJoinScore.put(basedDoc, joinScore);
                    }
                  }
                }

                @Override
                public void setNextReader(AtomicReaderContext context) throws IOException {
                  docBase = context.docBase;
                  docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField);
                }

                @Override
                public boolean acceptsDocsOutOfOrder() {
                  return false;
                }

                @Override
                public void setScorer(Scorer scorer) {}
              });
        }
      } else {
        toSearcher.search(
            new MatchAllDocsQuery(),
            new Collector() {

              private BinaryDocValues terms;
              private int docBase;
              private final BytesRef spare = new BytesRef();

              @Override
              public void collect(int doc) {
                terms.get(doc, spare);
                JoinScore joinScore = joinValueToJoinScores.get(spare);
                if (joinScore == null) {
                  return;
                }
                docToJoinScore.put(docBase + doc, joinScore);
              }

              @Override
              public void setNextReader(AtomicReaderContext context) throws IOException {
                terms = FieldCache.DEFAULT.getTerms(context.reader(), toField, false);
                docBase = context.docBase;
              }

              @Override
              public boolean acceptsDocsOutOfOrder() {
                return false;
              }

              @Override
              public void setScorer(Scorer scorer) {}
            });
      }
      queryVals.put(uniqueRandomValue, docToJoinScore);
    }

    fromSearcher.getIndexReader().close();
    toSearcher.getIndexReader().close();

    return context;
  }