@Override
 public byte[] encode() {
   byte[] bytes = new byte[encodedSize()];
   bytes[0] = (byte) 0x80;
   BitWriter bitWriter = new BitWriter(bytes, 0, bytes.length, 5, true);
   int lastSetBit = -1;
   for (int setPos = bitSet.nextSetBit(0); setPos >= 0; setPos = bitSet.nextSetBit(setPos + 1)) {
     // skip the distance between setPos and lastSetBit
     bitWriter.skip(setPos - lastSetBit - 1);
     /*
      * Because this field is present, we need to use 2 bits to indicate the
      * type information necessary to parse. The format for the type bit is
      *
      * Untyped: 00
      * Double: 01
      * Float: 10
      * Scalar: 11
      */
     if (scalarFields != null && scalarFields.get(setPos)) {
       bitWriter.set(3);
     } else if (floatFields != null && floatFields.get(setPos)) {
       bitWriter.set(2);
       bitWriter.skipNext();
     } else if (doubleFields != null && doubleFields.get(setPos)) {
       bitWriter.setNext();
       bitWriter.skipNext();
       bitWriter.setNext();
     } else {
       bitWriter.setNext();
       bitWriter.skip(2);
     }
     lastSetBit = setPos;
   }
   return bytes;
 }
 @Override
 public int cardinality(int position) {
   int count = 0;
   for (int i = bitSet.nextSetBit(0); i >= 0 && i < position; i = bitSet.nextSetBit(i + 1)) {
     count++;
   }
   return count;
 }
 @Override
 public int nextSetBit(int currentPosition) {
   return bitSet.nextSetBit(currentPosition);
 }
  /**
   * Performs the actual clustering with an assumption that all documents are written in one <code>
   * language</code>.
   */
  private void cluster(LanguageCode language) {
    // Preprocessing of documents
    final PreprocessingContext context =
        preprocessingPipeline.preprocess(documents, query, language);

    // Further processing only if there are words to process
    clusters = Lists.newArrayList();
    if (context.hasLabels()) {
      // Term-document matrix building and reduction
      final VectorSpaceModelContext vsmContext = new VectorSpaceModelContext(context);
      final ReducedVectorSpaceModelContext reducedVsmContext =
          new ReducedVectorSpaceModelContext(vsmContext);
      LingoProcessingContext lingoContext = new LingoProcessingContext(reducedVsmContext);

      matrixBuilder.buildTermDocumentMatrix(vsmContext);
      matrixBuilder.buildTermPhraseMatrix(vsmContext);

      matrixReducer.reduce(
          reducedVsmContext, computeClusterCount(desiredClusterCountBase, documents.size()));

      // Cluster label building
      clusterBuilder.buildLabels(lingoContext, matrixBuilder.termWeighting);

      // Document assignment
      clusterBuilder.assignDocuments(lingoContext);

      // Cluster merging
      clusterBuilder.merge(lingoContext);

      // Format final clusters
      final int[] clusterLabelIndex = lingoContext.clusterLabelFeatureIndex;
      final BitSet[] clusterDocuments = lingoContext.clusterDocuments;
      final double[] clusterLabelScore = lingoContext.clusterLabelScore;
      for (int i = 0; i < clusterLabelIndex.length; i++) {
        final Cluster cluster = new Cluster();

        final int labelFeature = clusterLabelIndex[i];
        if (labelFeature < 0) {
          // Cluster removed during merging
          continue;
        }

        // Add label and score
        cluster.addPhrases(labelFormatter.format(context, labelFeature));
        cluster.setAttribute(Cluster.SCORE, clusterLabelScore[i]);

        // Add documents
        final BitSet bs = clusterDocuments[i];
        for (int bit = bs.nextSetBit(0); bit >= 0; bit = bs.nextSetBit(bit + 1)) {
          cluster.addDocuments(documents.get(bit));
        }

        // Add cluster
        clusters.add(cluster);
      }

      Collections.sort(clusters, Cluster.byReversedWeightedScoreAndSizeComparator(scoreWeight));
    }

    Cluster.appendOtherTopics(documents, clusters);
  }