private int[] getRandomPorts(int numberOfPorts) {
    IntHashSet ports = new IntHashSet();

    int nextPort = randomIntBetween(49152, 65535);
    for (int i = 0; i < numberOfPorts; i++) {
      boolean foundPortInRange = false;
      while (!foundPortInRange) {
        if (!ports.contains(nextPort)) {
          logger.debug("looking to see if port [{}]is available", nextPort);
          try (ServerSocket serverSocket = new ServerSocket()) {
            // Set SO_REUSEADDR as we may bind here and not be able
            // to reuse the address immediately without it.
            serverSocket.setReuseAddress(NetworkUtils.defaultReuseAddress());
            serverSocket.bind(new InetSocketAddress(nextPort));

            // bind was a success
            logger.debug("port [{}] available.", nextPort);
            foundPortInRange = true;
            ports.add(nextPort);
          } catch (IOException e) {
            // Do nothing
            logger.debug("port [{}] not available.", e, nextPort);
          }
        }
        nextPort = randomIntBetween(49152, 65535);
      }
    }
    return ports.toArray();
  }
 private void applyDeletedShards(final ClusterChangedEvent event) {
   RoutingNodes.RoutingNodeIterator routingNode =
       event.state().readOnlyRoutingNodes().routingNodeIter(event.state().nodes().localNodeId());
   if (routingNode == null) {
     return;
   }
   IntHashSet newShardIds = new IntHashSet();
   for (IndexService indexService : indicesService) {
     String index = indexService.index().name();
     IndexMetaData indexMetaData = event.state().metaData().index(index);
     if (indexMetaData == null) {
       continue;
     }
     // now, go over and delete shards that needs to get deleted
     newShardIds.clear();
     for (ShardRouting shard : routingNode) {
       if (shard.index().equals(index)) {
         newShardIds.add(shard.id());
       }
     }
     for (Integer existingShardId : indexService.shardIds()) {
       if (!newShardIds.contains(existingShardId)) {
         if (indexMetaData.state() == IndexMetaData.State.CLOSE) {
           if (logger.isDebugEnabled()) {
             logger.debug("[{}][{}] removing shard (index is closed)", index, existingShardId);
           }
           indexService.removeShard(existingShardId, "removing shard (index is closed)");
         } else {
           // we can just remove the shard, without cleaning it locally, since we will clean it
           // when all shards are allocated in the IndicesStore
           if (logger.isDebugEnabled()) {
             logger.debug("[{}][{}] removing shard (not allocated)", index, existingShardId);
           }
           indexService.removeShard(existingShardId, "removing shard (not allocated)");
         }
       }
     }
   }
 }
Ejemplo n.º 3
0
  public void collectDocumentStatistics() {

    topicCodocumentMatrices = new int[numTopics][numTopWords][numTopWords];
    wordTypeCounts = new int[alphabet.size()];
    numTokens = 0;

    // This is an array of hash sets containing the words-of-interest for each topic,
    //  used for checking if the word at some position is one of those words.
    IntHashSet[] topicTopWordIndices = new IntHashSet[numTopics];

    // The same as the topic top words, but with int indices instead of strings,
    //  used for iterating over positions.
    int[][] topicWordIndicesInOrder = new int[numTopics][numTopWords];

    // This is an array of hash sets that will hold the words-of-interest present in a document,
    //  which will be cleared after every document.
    IntHashSet[] docTopicWordIndices = new IntHashSet[numTopics];

    int numDocs = model.getData().size();

    // The count of each topic, again cleared after every document.
    int[] topicCounts = new int[numTopics];

    for (int topic = 0; topic < numTopics; topic++) {
      IntHashSet wordIndices = new IntHashSet();

      for (int i = 0; i < numTopWords; i++) {
        if (topicTopWords[topic][i] != null) {
          int type = alphabet.lookupIndex(topicTopWords[topic][i]);
          topicWordIndicesInOrder[topic][i] = type;
          wordIndices.add(type);
        }
      }

      topicTopWordIndices[topic] = wordIndices;
      docTopicWordIndices[topic] = new IntHashSet();
    }

    int doc = 0;

    for (TopicAssignment document : model.getData()) {

      FeatureSequence tokens = (FeatureSequence) document.instance.getData();
      FeatureSequence topics = (FeatureSequence) document.topicSequence;

      for (int position = 0; position < tokens.size(); position++) {
        int type = tokens.getIndexAtPosition(position);
        int topic = topics.getIndexAtPosition(position);

        numTokens++;
        wordTypeCounts[type]++;

        topicCounts[topic]++;

        if (topicTopWordIndices[topic].contains(type)) {
          docTopicWordIndices[topic].add(type);
        }
      }

      int docLength = tokens.size();

      if (docLength > 0) {
        int maxTopic = -1;
        int maxCount = -1;

        for (int topic = 0; topic < numTopics; topic++) {

          if (topicCounts[topic] > 0) {
            numNonZeroDocuments[topic]++;

            if (topicCounts[topic] > maxCount) {
              maxTopic = topic;
              maxCount = topicCounts[topic];
            }

            sumCountTimesLogCount[topic] += topicCounts[topic] * Math.log(topicCounts[topic]);

            double proportion =
                (model.alpha[topic] + topicCounts[topic]) / (model.alphaSum + docLength);
            for (int i = 0; i < DEFAULT_DOC_PROPORTIONS.length; i++) {
              if (proportion < DEFAULT_DOC_PROPORTIONS[i]) {
                break;
              }
              numDocumentsAtProportions[topic][i]++;
            }

            IntHashSet supportedWords = docTopicWordIndices[topic];
            int[] indices = topicWordIndicesInOrder[topic];

            for (int i = 0; i < numTopWords; i++) {
              if (supportedWords.contains(indices[i])) {
                for (int j = i; j < numTopWords; j++) {
                  if (i == j) {
                    // Diagonals are total number of documents with word W in topic T
                    topicCodocumentMatrices[topic][i][i]++;
                  } else if (supportedWords.contains(indices[j])) {
                    topicCodocumentMatrices[topic][i][j]++;
                    topicCodocumentMatrices[topic][j][i]++;
                  }
                }
              }
            }

            docTopicWordIndices[topic].clear();
            topicCounts[topic] = 0;
          }
        }

        if (maxTopic > -1) {
          numRank1Documents[maxTopic]++;
        }
      }

      doc++;
    }
  }
Ejemplo n.º 4
0
  @Test
  public void testRelocationWhileIndexingRandom() throws Exception {
    int numberOfRelocations = scaledRandomIntBetween(1, rarely() ? 10 : 4);
    int numberOfReplicas = randomBoolean() ? 0 : 1;
    int numberOfNodes = numberOfReplicas == 0 ? 2 : 3;

    logger.info(
        "testRelocationWhileIndexingRandom(numRelocations={}, numberOfReplicas={}, numberOfNodes={})",
        numberOfRelocations,
        numberOfReplicas,
        numberOfNodes);

    String[] nodes = new String[numberOfNodes];
    logger.info("--> starting [node1] ...");
    nodes[0] = internalCluster().startNode();

    logger.info("--> creating test index ...");
    client()
        .admin()
        .indices()
        .prepareCreate("test")
        .setSettings(
            settingsBuilder()
                .put("index.number_of_shards", 1)
                .put("index.number_of_replicas", numberOfReplicas))
        .execute()
        .actionGet();

    for (int i = 1; i < numberOfNodes; i++) {
      logger.info("--> starting [node{}] ...", i + 1);
      nodes[i] = internalCluster().startNode();
      if (i != numberOfNodes - 1) {
        ClusterHealthResponse healthResponse =
            client()
                .admin()
                .cluster()
                .prepareHealth()
                .setWaitForEvents(Priority.LANGUID)
                .setWaitForNodes(Integer.toString(i + 1))
                .setWaitForGreenStatus()
                .execute()
                .actionGet();
        assertThat(healthResponse.isTimedOut(), equalTo(false));
      }
    }

    int numDocs = scaledRandomIntBetween(200, 2500);
    try (BackgroundIndexer indexer = new BackgroundIndexer("test", "type1", client(), numDocs)) {
      logger.info("--> waiting for {} docs to be indexed ...", numDocs);
      waitForDocs(numDocs, indexer);
      logger.info("--> {} docs indexed", numDocs);

      logger.info("--> starting relocations...");
      int nodeShiftBased = numberOfReplicas; // if we have replicas shift those
      for (int i = 0; i < numberOfRelocations; i++) {
        int fromNode = (i % 2);
        int toNode = fromNode == 0 ? 1 : 0;
        fromNode += nodeShiftBased;
        toNode += nodeShiftBased;
        numDocs = scaledRandomIntBetween(200, 1000);
        logger.debug("--> Allow indexer to index [{}] documents", numDocs);
        indexer.continueIndexing(numDocs);
        logger.info("--> START relocate the shard from {} to {}", nodes[fromNode], nodes[toNode]);
        client()
            .admin()
            .cluster()
            .prepareReroute()
            .add(new MoveAllocationCommand(new ShardId("test", 0), nodes[fromNode], nodes[toNode]))
            .get();
        if (rarely()) {
          logger.debug("--> flushing");
          client().admin().indices().prepareFlush().get();
        }
        ClusterHealthResponse clusterHealthResponse =
            client()
                .admin()
                .cluster()
                .prepareHealth()
                .setWaitForEvents(Priority.LANGUID)
                .setWaitForRelocatingShards(0)
                .setTimeout(ACCEPTABLE_RELOCATION_TIME)
                .execute()
                .actionGet();
        assertThat(clusterHealthResponse.isTimedOut(), equalTo(false));
        clusterHealthResponse =
            client()
                .admin()
                .cluster()
                .prepareHealth()
                .setWaitForEvents(Priority.LANGUID)
                .setWaitForRelocatingShards(0)
                .setTimeout(ACCEPTABLE_RELOCATION_TIME)
                .execute()
                .actionGet();
        assertThat(clusterHealthResponse.isTimedOut(), equalTo(false));
        indexer.pauseIndexing();
        logger.info("--> DONE relocate the shard from {} to {}", fromNode, toNode);
      }
      logger.info("--> done relocations");
      logger.info("--> waiting for indexing threads to stop ...");
      indexer.stop();
      logger.info("--> indexing threads stopped");

      logger.info("--> refreshing the index");
      client().admin().indices().prepareRefresh("test").execute().actionGet();
      logger.info("--> searching the index");
      boolean ranOnce = false;
      for (int i = 0; i < 10; i++) {
        try {
          logger.info("--> START search test round {}", i + 1);
          SearchHits hits =
              client()
                  .prepareSearch("test")
                  .setQuery(matchAllQuery())
                  .setSize((int) indexer.totalIndexedDocs())
                  .setNoFields()
                  .execute()
                  .actionGet()
                  .getHits();
          ranOnce = true;
          if (hits.totalHits() != indexer.totalIndexedDocs()) {
            int[] hitIds = new int[(int) indexer.totalIndexedDocs()];
            for (int hit = 0; hit < indexer.totalIndexedDocs(); hit++) {
              hitIds[hit] = hit + 1;
            }
            IntHashSet set = IntHashSet.from(hitIds);
            for (SearchHit hit : hits.hits()) {
              int id = Integer.parseInt(hit.id());
              if (!set.remove(id)) {
                logger.error("Extra id [{}]", id);
              }
            }
            set.forEach(
                new IntProcedure() {

                  @Override
                  public void apply(int value) {
                    logger.error("Missing id [{}]", value);
                  }
                });
          }
          assertThat(hits.totalHits(), equalTo(indexer.totalIndexedDocs()));
          logger.info("--> DONE search test round {}", i + 1);
        } catch (SearchPhaseExecutionException ex) {
          // TODO: the first run fails with this failure, waiting for relocating nodes set to 0 is
          // not enough?
          logger.warn("Got exception while searching.", ex);
        }
      }
      if (!ranOnce) {
        fail();
      }
    }
  }
  @Test
  public void testRandom() throws Exception {
    Directory directory = newDirectory();
    final Random r = random();
    final IndexWriterConfig iwc =
        LuceneTestCase.newIndexWriterConfig(r, new MockAnalyzer(r))
            .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
            .setRAMBufferSizeMB(
                scaledRandomIntBetween(16, 64)); // we might index a lot - don't go crazy here
    RandomIndexWriter indexWriter = new RandomIndexWriter(r, directory, iwc);
    int numUniqueChildValues = scaledRandomIntBetween(100, 2000);
    String[] childValues = new String[numUniqueChildValues];
    for (int i = 0; i < numUniqueChildValues; i++) {
      childValues[i] = Integer.toString(i);
    }

    IntHashSet filteredOrDeletedDocs = new IntHashSet();
    int childDocId = 0;
    int numParentDocs = scaledRandomIntBetween(1, numUniqueChildValues);
    ObjectObjectHashMap<String, NavigableSet<String>> childValueToParentIds =
        new ObjectObjectHashMap<>();
    for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) {
      boolean markParentAsDeleted = rarely();
      boolean filterMe = rarely();
      String parent = Integer.toString(parentDocId);
      Document document = new Document();
      document.add(
          new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
      document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
      if (markParentAsDeleted) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("delete", "me", Field.Store.NO));
      }
      if (filterMe) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("filter", "me", Field.Store.NO));
      }
      indexWriter.addDocument(document);

      final int numChildDocs = scaledRandomIntBetween(0, 100);
      for (int i = 0; i < numChildDocs; i++) {
        boolean markChildAsDeleted = rarely();
        String childValue = childValues[random().nextInt(childValues.length)];

        document = new Document();
        document.add(
            new StringField(
                UidFieldMapper.NAME,
                Uid.createUid("child", Integer.toString(childDocId++)),
                Field.Store.NO));
        document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
        document.add(
            new StringField(
                ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
        document.add(new StringField("field1", childValue, Field.Store.NO));
        if (markChildAsDeleted) {
          document.add(new StringField("delete", "me", Field.Store.NO));
        }
        indexWriter.addDocument(document);

        if (!markChildAsDeleted) {
          NavigableSet<String> parentIds;
          if (childValueToParentIds.containsKey(childValue)) {
            parentIds = childValueToParentIds.get(childValue);
          } else {
            childValueToParentIds.put(childValue, parentIds = new TreeSet<>());
          }
          if (!markParentAsDeleted && !filterMe) {
            parentIds.add(parent);
          }
        }
      }
    }

    // Delete docs that are marked to be deleted.
    indexWriter.deleteDocuments(new Term("delete", "me"));

    indexWriter.commit();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    Engine.Searcher engineSearcher =
        new Engine.Searcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher);
    ((TestSearchContext) SearchContext.current())
        .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));

    int max = numUniqueChildValues / 4;
    for (int i = 0; i < max; i++) {
      // Simulate a parent update
      if (random().nextBoolean()) {
        final int numberOfUpdatableParents = numParentDocs - filteredOrDeletedDocs.size();
        int numberOfUpdates = scaledRandomIntBetween(0, numberOfUpdatableParents);
        for (int j = 0; j < numberOfUpdates; j++) {
          int parentId;
          do {
            parentId = random().nextInt(numParentDocs);
          } while (filteredOrDeletedDocs.contains(parentId));

          String parentUid = Uid.createUid("parent", Integer.toString(parentId));
          indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid));

          Document document = new Document();
          document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES));
          document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
          indexWriter.addDocument(document);
        }

        indexReader.close();
        indexReader = DirectoryReader.open(indexWriter.w, true);
        searcher = new IndexSearcher(indexReader);
        engineSearcher =
            new Engine.Searcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher);
        ((TestSearchContext) SearchContext.current())
            .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));
      }

      String childValue = childValues[random().nextInt(numUniqueChildValues)];
      int shortCircuitParentDocSet = random().nextInt(numParentDocs);
      QueryBuilder queryBuilder;
      if (random().nextBoolean()) {
        queryBuilder =
            hasChildQuery("child", termQuery("field1", childValue))
                .setShortCircuitCutoff(shortCircuitParentDocSet);
      } else {
        queryBuilder =
            constantScoreQuery(
                hasChildQuery("child", termQuery("field1", childValue))
                    .setShortCircuitCutoff(shortCircuitParentDocSet));
      }
      // Using a FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer not
      // get live docs as acceptedDocs
      queryBuilder = filteredQuery(queryBuilder, notQuery(termQuery("filter", "me")));
      Query query = parseQuery(queryBuilder);

      BitSetCollector collector = new BitSetCollector(indexReader.maxDoc());
      searcher.search(query, collector);
      FixedBitSet actualResult = collector.getResult();

      FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc());
      if (childValueToParentIds.containsKey(childValue)) {
        LeafReader slowLeafReader = SlowCompositeReaderWrapper.wrap(indexReader);
        Terms terms = slowLeafReader.terms(UidFieldMapper.NAME);
        if (terms != null) {
          NavigableSet<String> parentIds = childValueToParentIds.get(childValue);
          TermsEnum termsEnum = terms.iterator();
          PostingsEnum docsEnum = null;
          for (String id : parentIds) {
            TermsEnum.SeekStatus seekStatus =
                termsEnum.seekCeil(Uid.createUidAsBytes("parent", id));
            if (seekStatus == TermsEnum.SeekStatus.FOUND) {
              docsEnum =
                  termsEnum.postings(slowLeafReader.getLiveDocs(), docsEnum, PostingsEnum.NONE);
              expectedResult.set(docsEnum.nextDoc());
            } else if (seekStatus == TermsEnum.SeekStatus.END) {
              break;
            }
          }
        }
      }

      assertBitSet(actualResult, expectedResult, searcher);
    }

    indexWriter.close();
    indexReader.close();
    directory.close();
  }