Esempio n. 1
0
 /**
  * This code is factored out from mismatched token and mismatched set recovery. It handles "single
  * token insertion" error recovery for both. No tokens are consumed to recover from insertions.
  * Return true if recovery was possible else return false.
  */
 protected boolean recoverFromMismatchedElement(
     IntStream input, RecognitionException e, BitSet follow) {
   if (follow == null) {
     // we have no information about the follow; we can only consume
     // a single token and hope for the best
     return false;
   }
   // System.out.println("recoverFromMismatchedElement");
   // compute what can follow this grammar element reference
   if (follow.member(Token.EOR_TOKEN_TYPE)) {
     BitSet viableTokensFollowingThisRule = computeContextSensitiveRuleFOLLOW();
     follow = follow.or(viableTokensFollowingThisRule);
     follow.remove(Token.EOR_TOKEN_TYPE);
   }
   // if current token is consistent with what could come after set
   // then it is ok to "insert" the missing token, else throw exception
   // System.out.println("viable tokens="+follow.toString(getTokenNames())+")");
   if (follow.member(input.LA(1))) {
     // System.out.println("LT(1)=="+input.LT(1)+" is consistent with what follows; inserting...");
     reportError(e);
     return true;
   }
   // System.err.println("nothing to do; throw exception");
   return false;
 }
Esempio n. 2
0
  /**
   * Add the content of the provided {@link DocIdSetIterator} to this builder. NOTE: if you need to
   * build a {@link DocIdSet} out of a single {@link DocIdSetIterator}, you should rather use {@link
   * RoaringDocIdSet.Builder}.
   */
  public void add(DocIdSetIterator iter) throws IOException {
    grow((int) Math.min(Integer.MAX_VALUE, iter.cost()));

    if (bitSet != null) {
      bitSet.or(iter);
    } else {
      while (true) {
        assert buffer.length <= threshold;
        final int end = buffer.length;
        for (int i = bufferSize; i < end; ++i) {
          final int doc = iter.nextDoc();
          if (doc == DocIdSetIterator.NO_MORE_DOCS) {
            bufferSize = i;
            return;
          }
          buffer[bufferSize++] = doc;
        }
        bufferSize = end;

        if (bufferSize + 1 >= threshold) {
          break;
        }

        growBuffer(bufferSize + 1);
      }

      upgradeToBitSet();
      for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
        bitSet.set(doc);
      }
    }
  }
Esempio n. 3
0
  public boolean mismatchIsMissingToken(IntStream input, BitSet follow) {
    if (follow == null) {
      // we have no information about the follow; we can only consume
      // a single token and hope for the best
      return false;
    }
    // compute what can follow this grammar element reference
    if (follow.member(Token.EOR_TOKEN_TYPE)) {
      BitSet viableTokensFollowingThisRule = computeContextSensitiveRuleFOLLOW();
      follow = follow.or(viableTokensFollowingThisRule);
      if (state._fsp >= 0) { // remove EOR if we're not the start symbol
        follow.remove(Token.EOR_TOKEN_TYPE);
      }
    }
    // if current token is consistent with what could come after set
    // then we know we're missing a token; error recovery is free to
    // "insert" the missing token

    // System.out.println("viable tokens="+follow.toString(getTokenNames()));
    // System.out.println("LT(1)="+((TokenStream)input).LT(1));

    // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
    // in follow set to indicate that the fall of the start symbol is
    // in the set (EOF can follow).
    if (follow.member(input.LA(1)) || follow.member(Token.EOR_TOKEN_TYPE)) {
      // System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows;
      // inserting...");
      return true;
    }
    return false;
  }
Esempio n. 4
0
  /**
   * Create the junk (unassigned documents) cluster and create the final set of clusters in Carrot2
   * format.
   */
  private void postProcessing(ArrayList<ClusterCandidate> clusters) {
    // Adapt to Carrot2 classes, counting used documents on the way.
    final BitSet all = new BitSet(documents.size());
    final ArrayList<Document> docs = Lists.newArrayListWithCapacity(documents.size());
    final ArrayList<String> phrases = Lists.newArrayListWithCapacity(3);
    for (ClusterCandidate c : clusters) {
      final Cluster c2 = new Cluster();
      c2.addPhrases(collectPhrases(phrases, c));
      c2.addDocuments(collectDocuments(docs, c.documents));
      c2.setScore((double) c.score);
      this.clusters.add(c2);

      all.or(c.documents);
      docs.clear();
      phrases.clear();
    }

    Cluster.appendOtherTopics(this.documents, this.clusters);
  }
  @Override
  public void or(DocIdSetIterator it) throws IOException {
    {
      // specialize union with another SparseFixedBitSet
      final SparseFixedBitSet other = BitSetIterator.getSparseFixedBitSetOrNull(it);
      if (other != null) {
        assertUnpositioned(it);
        or(other);
        return;
      }
    }

    // We do not specialize the union with a FixedBitSet since FixedBitSets are
    // supposed to be used for dense data and sparse fixed bit sets for sparse
    // data, so a sparse set would likely get upgraded by DocIdSetBuilder before
    // being or'ed with a FixedBitSet

    if (it.cost() < indices.length) {
      // the default impl is good for sparse iterators
      super.or(it);
    } else {
      orDense(it);
    }
  }