Esempio n. 1
0
  @Deprecated
  private IntOpenHashSet getNonProjectiveMapAux(IntObjectOpenHashMap<IntOpenHashSet> map) {
    IntIntPair max = new IntIntPair(-1, -1);
    IntOpenHashSet set, remove;
    boolean removed;
    int[] keys;

    do {
      max.set(-1, -1);
      keys = map.keys().toArray();
      Arrays.sort(keys);

      for (int key : keys) {
        set = map.get(key);

        if (set.size() > max.i2) max.set(key, set.size());
      }

      removed = false;

      if (max.i2 > 0) {
        remove = new IntOpenHashSet();

        for (IntCursor cur : map.get(max.i1)) {
          if (map.containsKey(cur.value)) {
            set = map.get(cur.value);

            if (set.contains(max.i1)) {
              removed = true;
              set.remove(max.i1);
              if (set.isEmpty()) remove.add(cur.value);
            }
          }
        }

        for (IntCursor cur : remove) map.remove(cur.value);
      }
    } while (removed);

    return new IntOpenHashSet(map.keys());
  }
  @Test
  public void testRandom() throws Exception {
    Directory directory = newDirectory();
    final Random r = random();
    final IndexWriterConfig iwc =
        LuceneTestCase.newIndexWriterConfig(r, new MockAnalyzer(r))
            .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
            .setRAMBufferSizeMB(
                scaledRandomIntBetween(16, 64)); // we might index a lot - don't go crazy here
    RandomIndexWriter indexWriter = new RandomIndexWriter(r, directory, iwc);
    int numUniqueChildValues = scaledRandomIntBetween(100, 2000);
    String[] childValues = new String[numUniqueChildValues];
    for (int i = 0; i < numUniqueChildValues; i++) {
      childValues[i] = Integer.toString(i);
    }

    IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet();

    int childDocId = 0;
    int numParentDocs = scaledRandomIntBetween(1, numUniqueChildValues);
    ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>> childValueToParentIds =
        new ObjectObjectOpenHashMap<>();
    for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) {
      boolean markParentAsDeleted = rarely();
      boolean filterMe = rarely();
      String parent = Integer.toString(parentDocId);
      Document document = new Document();
      document.add(
          new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
      document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
      if (markParentAsDeleted) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("delete", "me", Field.Store.NO));
      }
      if (filterMe) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("filter", "me", Field.Store.NO));
      }
      indexWriter.addDocument(document);

      int numChildDocs = scaledRandomIntBetween(0, 100);
      for (int i = 0; i < numChildDocs; i++) {
        boolean markChildAsDeleted = rarely();
        String childValue = childValues[random().nextInt(childValues.length)];

        document = new Document();
        document.add(
            new StringField(
                UidFieldMapper.NAME,
                Uid.createUid("child", Integer.toString(childDocId++)),
                Field.Store.NO));
        document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
        document.add(
            new StringField(
                ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
        document.add(new StringField("field1", childValue, Field.Store.NO));
        if (markChildAsDeleted) {
          document.add(new StringField("delete", "me", Field.Store.NO));
        }
        indexWriter.addDocument(document);

        if (!markChildAsDeleted) {
          NavigableMap<String, FloatArrayList> parentIdToChildScores;
          if (childValueToParentIds.containsKey(childValue)) {
            parentIdToChildScores = childValueToParentIds.lget();
          } else {
            childValueToParentIds.put(childValue, parentIdToChildScores = new TreeMap<>());
          }
          if (!markParentAsDeleted && !filterMe) {
            FloatArrayList childScores = parentIdToChildScores.get(parent);
            if (childScores == null) {
              parentIdToChildScores.put(parent, childScores = new FloatArrayList());
            }
            childScores.add(1f);
          }
        }
      }
    }

    // Delete docs that are marked to be deleted.
    indexWriter.deleteDocuments(new Term("delete", "me"));
    indexWriter.commit();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    Engine.Searcher engineSearcher =
        new Engine.Searcher(ChildrenQueryTests.class.getSimpleName(), searcher);
    ((TestSearchContext) SearchContext.current())
        .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));

    int max = numUniqueChildValues / 4;
    for (int i = 0; i < max; i++) {
      // Simulate a parent update
      if (random().nextBoolean()) {
        final int numberOfUpdatableParents = numParentDocs - filteredOrDeletedDocs.size();
        int numberOfUpdates =
            RandomInts.randomIntBetween(
                random(), 0, Math.min(numberOfUpdatableParents, TEST_NIGHTLY ? 25 : 5));
        for (int j = 0; j < numberOfUpdates; j++) {
          int parentId;
          do {
            parentId = random().nextInt(numParentDocs);
          } while (filteredOrDeletedDocs.contains(parentId));

          String parentUid = Uid.createUid("parent", Integer.toString(parentId));
          indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid));

          Document document = new Document();
          document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES));
          document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
          indexWriter.addDocument(document);
        }

        indexReader.close();
        indexReader = DirectoryReader.open(indexWriter.w, true);
        searcher = new IndexSearcher(indexReader);
        engineSearcher =
            new Engine.Searcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher);
        ((TestSearchContext) SearchContext.current())
            .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));
      }

      String childValue = childValues[random().nextInt(numUniqueChildValues)];
      int shortCircuitParentDocSet = random().nextInt(numParentDocs);
      ScoreType scoreType = ScoreType.values()[random().nextInt(ScoreType.values().length)];
      // leave min/max set to 0 half the time
      int minChildren = random().nextInt(2) * scaledRandomIntBetween(0, 110);
      int maxChildren = random().nextInt(2) * scaledRandomIntBetween(minChildren, 110);

      QueryBuilder queryBuilder =
          hasChildQuery("child", constantScoreQuery(termQuery("field1", childValue)))
              .scoreType(scoreType.name().toLowerCase(Locale.ENGLISH))
              .minChildren(minChildren)
              .maxChildren(maxChildren)
              .setShortCircuitCutoff(shortCircuitParentDocSet);
      // Using a FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer not
      // get live docs as acceptedDocs
      queryBuilder = filteredQuery(queryBuilder, notFilter(termFilter("filter", "me")));
      Query query = parseQuery(queryBuilder);
      BitSetCollector collector = new BitSetCollector(indexReader.maxDoc());
      int numHits = 1 + random().nextInt(25);
      TopScoreDocCollector actualTopDocsCollector = TopScoreDocCollector.create(numHits);
      searcher.search(query, MultiCollector.wrap(collector, actualTopDocsCollector));
      FixedBitSet actualResult = collector.getResult();

      FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc());
      TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits);
      if (childValueToParentIds.containsKey(childValue)) {
        LeafReader slowLeafReader = SlowCompositeReaderWrapper.wrap(indexReader);
        final FloatArrayList[] scores = new FloatArrayList[slowLeafReader.maxDoc()];
        Terms terms = slowLeafReader.terms(UidFieldMapper.NAME);
        if (terms != null) {
          NavigableMap<String, FloatArrayList> parentIdToChildScores = childValueToParentIds.lget();
          TermsEnum termsEnum = terms.iterator(null);
          DocsEnum docsEnum = null;
          for (Map.Entry<String, FloatArrayList> entry : parentIdToChildScores.entrySet()) {
            int count = entry.getValue().elementsCount;
            if (count >= minChildren && (maxChildren == 0 || count <= maxChildren)) {
              TermsEnum.SeekStatus seekStatus =
                  termsEnum.seekCeil(Uid.createUidAsBytes("parent", entry.getKey()));
              if (seekStatus == TermsEnum.SeekStatus.FOUND) {
                docsEnum =
                    termsEnum.docs(slowLeafReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
                expectedResult.set(docsEnum.nextDoc());
                scores[docsEnum.docID()] = new FloatArrayList(entry.getValue());
              } else if (seekStatus == TermsEnum.SeekStatus.END) {
                break;
              }
            }
          }
        }
        MockScorer mockScorer = new MockScorer(scoreType);
        final LeafCollector leafCollector =
            expectedTopDocsCollector.getLeafCollector(slowLeafReader.getContext());
        leafCollector.setScorer(mockScorer);
        for (int doc = expectedResult.nextSetBit(0);
            doc < slowLeafReader.maxDoc();
            doc =
                doc + 1 >= expectedResult.length()
                    ? DocIdSetIterator.NO_MORE_DOCS
                    : expectedResult.nextSetBit(doc + 1)) {
          mockScorer.scores = scores[doc];
          leafCollector.collect(doc);
        }
      }

      assertBitSet(actualResult, expectedResult, searcher);
      assertTopDocs(actualTopDocsCollector.topDocs(), expectedTopDocsCollector.topDocs());
    }

    indexWriter.close();
    indexReader.close();
    directory.close();
  }
Esempio n. 3
0
  // test long/double/string terms aggs with high number of buckets that require array growth
  public void testDuelTerms() throws Exception {
    final int numDocs = scaledRandomIntBetween(1000, 2000);
    final int maxNumTerms = randomIntBetween(10, 5000);

    final IntOpenHashSet valuesSet = new IntOpenHashSet();
    cluster().wipeIndices("idx");
    prepareCreate("idx")
        .addMapping(
            "type",
            jsonBuilder()
                .startObject()
                .startObject("type")
                .startObject("properties")
                .startObject("string_values")
                .field("type", "string")
                .field("index", "not_analyzed")
                .startObject("fields")
                .startObject("doc_values")
                .field("type", "string")
                .field("index", "no")
                .startObject("fielddata")
                .field("format", "doc_values")
                .endObject()
                .endObject()
                .endObject()
                .endObject()
                .startObject("long_values")
                .field("type", "long")
                .endObject()
                .startObject("double_values")
                .field("type", "double")
                .endObject()
                .endObject()
                .endObject())
        .execute()
        .actionGet();

    List<IndexRequestBuilder> indexingRequests = Lists.newArrayList();
    for (int i = 0; i < numDocs; ++i) {
      final int[] values = new int[randomInt(4)];
      for (int j = 0; j < values.length; ++j) {
        values[j] = randomInt(maxNumTerms - 1) - 1000;
        valuesSet.add(values[j]);
      }
      XContentBuilder source =
          jsonBuilder().startObject().field("num", randomDouble()).startArray("long_values");
      for (int j = 0; j < values.length; ++j) {
        source = source.value(values[j]);
      }
      source = source.endArray().startArray("double_values");
      for (int j = 0; j < values.length; ++j) {
        source = source.value((double) values[j]);
      }
      source = source.endArray().startArray("string_values");
      for (int j = 0; j < values.length; ++j) {
        source = source.value(Integer.toString(values[j]));
      }
      source = source.endArray().endObject();
      indexingRequests.add(client().prepareIndex("idx", "type").setSource(source));
    }
    indexRandom(true, indexingRequests);

    assertNoFailures(
        client()
            .admin()
            .indices()
            .prepareRefresh("idx")
            .setIndicesOptions(IndicesOptions.lenientExpandOpen())
            .execute()
            .get());

    TermsAggregatorFactory.ExecutionMode[] globalOrdinalModes =
        new TermsAggregatorFactory.ExecutionMode[] {
          TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS_HASH,
          TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS
        };

    SearchResponse resp =
        client()
            .prepareSearch("idx")
            .addAggregation(
                terms("long")
                    .field("long_values")
                    .size(maxNumTerms)
                    .collectMode(randomFrom(SubAggCollectionMode.values()))
                    .subAggregation(min("min").field("num")))
            .addAggregation(
                terms("double")
                    .field("double_values")
                    .size(maxNumTerms)
                    .collectMode(randomFrom(SubAggCollectionMode.values()))
                    .subAggregation(max("max").field("num")))
            .addAggregation(
                terms("string_map")
                    .field("string_values")
                    .collectMode(randomFrom(SubAggCollectionMode.values()))
                    .executionHint(TermsAggregatorFactory.ExecutionMode.MAP.toString())
                    .size(maxNumTerms)
                    .subAggregation(stats("stats").field("num")))
            .addAggregation(
                terms("string_global_ordinals")
                    .field("string_values")
                    .collectMode(randomFrom(SubAggCollectionMode.values()))
                    .executionHint(
                        globalOrdinalModes[randomInt(globalOrdinalModes.length - 1)].toString())
                    .size(maxNumTerms)
                    .subAggregation(extendedStats("stats").field("num")))
            .addAggregation(
                terms("string_global_ordinals_doc_values")
                    .field("string_values.doc_values")
                    .collectMode(randomFrom(SubAggCollectionMode.values()))
                    .executionHint(
                        globalOrdinalModes[randomInt(globalOrdinalModes.length - 1)].toString())
                    .size(maxNumTerms)
                    .subAggregation(extendedStats("stats").field("num")))
            .execute()
            .actionGet();
    assertAllSuccessful(resp);
    assertEquals(numDocs, resp.getHits().getTotalHits());

    final Terms longTerms = resp.getAggregations().get("long");
    final Terms doubleTerms = resp.getAggregations().get("double");
    final Terms stringMapTerms = resp.getAggregations().get("string_map");
    final Terms stringGlobalOrdinalsTerms = resp.getAggregations().get("string_global_ordinals");
    final Terms stringGlobalOrdinalsDVTerms =
        resp.getAggregations().get("string_global_ordinals_doc_values");

    assertEquals(valuesSet.size(), longTerms.getBuckets().size());
    assertEquals(valuesSet.size(), doubleTerms.getBuckets().size());
    assertEquals(valuesSet.size(), stringMapTerms.getBuckets().size());
    assertEquals(valuesSet.size(), stringGlobalOrdinalsTerms.getBuckets().size());
    assertEquals(valuesSet.size(), stringGlobalOrdinalsDVTerms.getBuckets().size());
    for (Terms.Bucket bucket : longTerms.getBuckets()) {
      final Terms.Bucket doubleBucket =
          doubleTerms.getBucketByKey(
              Double.toString(Long.parseLong(bucket.getKeyAsText().string())));
      final Terms.Bucket stringMapBucket =
          stringMapTerms.getBucketByKey(bucket.getKeyAsText().string());
      final Terms.Bucket stringGlobalOrdinalsBucket =
          stringGlobalOrdinalsTerms.getBucketByKey(bucket.getKeyAsText().string());
      final Terms.Bucket stringGlobalOrdinalsDVBucket =
          stringGlobalOrdinalsDVTerms.getBucketByKey(bucket.getKeyAsText().string());
      assertNotNull(doubleBucket);
      assertNotNull(stringMapBucket);
      assertNotNull(stringGlobalOrdinalsBucket);
      assertNotNull(stringGlobalOrdinalsDVBucket);
      assertEquals(bucket.getDocCount(), doubleBucket.getDocCount());
      assertEquals(bucket.getDocCount(), stringMapBucket.getDocCount());
      assertEquals(bucket.getDocCount(), stringGlobalOrdinalsBucket.getDocCount());
      assertEquals(bucket.getDocCount(), stringGlobalOrdinalsDVBucket.getDocCount());
    }
  }
  @Test
  public void testRandom() throws Exception {
    Directory directory = newDirectory();
    final Random r = random();
    final IndexWriterConfig iwc =
        LuceneTestCase.newIndexWriterConfig(r, new MockAnalyzer(r))
            .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
            .setRAMBufferSizeMB(
                scaledRandomIntBetween(16, 64)); // we might index a lot - don't go crazy here
    RandomIndexWriter indexWriter = new RandomIndexWriter(r, directory, iwc);
    int numUniqueChildValues = scaledRandomIntBetween(100, 2000);
    String[] childValues = new String[numUniqueChildValues];
    for (int i = 0; i < numUniqueChildValues; i++) {
      childValues[i] = Integer.toString(i);
    }

    IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet();
    int childDocId = 0;
    int numParentDocs = scaledRandomIntBetween(1, numUniqueChildValues);
    ObjectObjectOpenHashMap<String, NavigableSet<String>> childValueToParentIds =
        new ObjectObjectOpenHashMap<>();
    for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) {
      boolean markParentAsDeleted = rarely();
      boolean filterMe = rarely();
      String parent = Integer.toString(parentDocId);
      Document document = new Document();
      document.add(
          new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
      document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
      if (markParentAsDeleted) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("delete", "me", Field.Store.NO));
      }
      if (filterMe) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("filter", "me", Field.Store.NO));
      }
      indexWriter.addDocument(document);

      final int numChildDocs = scaledRandomIntBetween(0, 100);
      for (int i = 0; i < numChildDocs; i++) {
        boolean markChildAsDeleted = rarely();
        String childValue = childValues[random().nextInt(childValues.length)];

        document = new Document();
        document.add(
            new StringField(
                UidFieldMapper.NAME,
                Uid.createUid("child", Integer.toString(childDocId++)),
                Field.Store.NO));
        document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
        document.add(
            new StringField(
                ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
        document.add(new StringField("field1", childValue, Field.Store.NO));
        if (markChildAsDeleted) {
          document.add(new StringField("delete", "me", Field.Store.NO));
        }
        indexWriter.addDocument(document);

        if (!markChildAsDeleted) {
          NavigableSet<String> parentIds;
          if (childValueToParentIds.containsKey(childValue)) {
            parentIds = childValueToParentIds.lget();
          } else {
            childValueToParentIds.put(childValue, parentIds = new TreeSet<>());
          }
          if (!markParentAsDeleted && !filterMe) {
            parentIds.add(parent);
          }
        }
      }
    }

    // Delete docs that are marked to be deleted.
    indexWriter.deleteDocuments(new Term("delete", "me"));

    indexWriter.commit();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    Engine.Searcher engineSearcher =
        new Engine.Searcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher);
    ((TestSearchContext) SearchContext.current())
        .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));

    int max = numUniqueChildValues / 4;
    for (int i = 0; i < max; i++) {
      // Simulate a parent update
      if (random().nextBoolean()) {
        final int numberOfUpdatableParents = numParentDocs - filteredOrDeletedDocs.size();
        int numberOfUpdates = scaledRandomIntBetween(0, numberOfUpdatableParents);
        for (int j = 0; j < numberOfUpdates; j++) {
          int parentId;
          do {
            parentId = random().nextInt(numParentDocs);
          } while (filteredOrDeletedDocs.contains(parentId));

          String parentUid = Uid.createUid("parent", Integer.toString(parentId));
          indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid));

          Document document = new Document();
          document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES));
          document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
          indexWriter.addDocument(document);
        }

        indexReader.close();
        indexReader = DirectoryReader.open(indexWriter.w, true);
        searcher = new IndexSearcher(indexReader);
        engineSearcher =
            new Engine.Searcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher);
        ((TestSearchContext) SearchContext.current())
            .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));
      }

      String childValue = childValues[random().nextInt(numUniqueChildValues)];
      int shortCircuitParentDocSet = random().nextInt(numParentDocs);
      QueryBuilder queryBuilder;
      if (random().nextBoolean()) {
        queryBuilder =
            hasChildQuery("child", termQuery("field1", childValue))
                .setShortCircuitCutoff(shortCircuitParentDocSet);
      } else {
        queryBuilder =
            constantScoreQuery(
                hasChildFilter("child", termQuery("field1", childValue))
                    .setShortCircuitCutoff(shortCircuitParentDocSet));
      }
      // Using a FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer not
      // get live docs as acceptedDocs
      queryBuilder = filteredQuery(queryBuilder, notFilter(termFilter("filter", "me")));
      Query query = parseQuery(queryBuilder);

      BitSetCollector collector = new BitSetCollector(indexReader.maxDoc());
      searcher.search(query, collector);
      FixedBitSet actualResult = collector.getResult();

      FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc());
      if (childValueToParentIds.containsKey(childValue)) {
        LeafReader slowLeafReader = SlowCompositeReaderWrapper.wrap(indexReader);
        Terms terms = slowLeafReader.terms(UidFieldMapper.NAME);
        if (terms != null) {
          NavigableSet<String> parentIds = childValueToParentIds.lget();
          TermsEnum termsEnum = terms.iterator(null);
          PostingsEnum docsEnum = null;
          for (String id : parentIds) {
            TermsEnum.SeekStatus seekStatus =
                termsEnum.seekCeil(Uid.createUidAsBytes("parent", id));
            if (seekStatus == TermsEnum.SeekStatus.FOUND) {
              docsEnum =
                  termsEnum.postings(slowLeafReader.getLiveDocs(), docsEnum, PostingsEnum.NONE);
              expectedResult.set(docsEnum.nextDoc());
            } else if (seekStatus == TermsEnum.SeekStatus.END) {
              break;
            }
          }
        }
      }

      assertBitSet(actualResult, expectedResult, searcher);
    }

    indexWriter.close();
    indexReader.close();
    directory.close();
  }