/** * This code is factored out from mismatched token and mismatched set recovery. It handles "single * token insertion" error recovery for both. No tokens are consumed to recover from insertions. * Return true if recovery was possible else return false. */ protected boolean recoverFromMismatchedElement( IntStream input, RecognitionException e, BitSet follow) { if (follow == null) { // we have no information about the follow; we can only consume // a single token and hope for the best return false; } // System.out.println("recoverFromMismatchedElement"); // compute what can follow this grammar element reference if (follow.member(Token.EOR_TOKEN_TYPE)) { BitSet viableTokensFollowingThisRule = computeContextSensitiveRuleFOLLOW(); follow = follow.or(viableTokensFollowingThisRule); follow.remove(Token.EOR_TOKEN_TYPE); } // if current token is consistent with what could come after set // then it is ok to "insert" the missing token, else throw exception // System.out.println("viable tokens="+follow.toString(getTokenNames())+")"); if (follow.member(input.LA(1))) { // System.out.println("LT(1)=="+input.LT(1)+" is consistent with what follows; inserting..."); reportError(e); return true; } // System.err.println("nothing to do; throw exception"); return false; }
/** * Add the content of the provided {@link DocIdSetIterator} to this builder. NOTE: if you need to * build a {@link DocIdSet} out of a single {@link DocIdSetIterator}, you should rather use {@link * RoaringDocIdSet.Builder}. */ public void add(DocIdSetIterator iter) throws IOException { grow((int) Math.min(Integer.MAX_VALUE, iter.cost())); if (bitSet != null) { bitSet.or(iter); } else { while (true) { assert buffer.length <= threshold; final int end = buffer.length; for (int i = bufferSize; i < end; ++i) { final int doc = iter.nextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { bufferSize = i; return; } buffer[bufferSize++] = doc; } bufferSize = end; if (bufferSize + 1 >= threshold) { break; } growBuffer(bufferSize + 1); } upgradeToBitSet(); for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) { bitSet.set(doc); } } }
public boolean mismatchIsMissingToken(IntStream input, BitSet follow) { if (follow == null) { // we have no information about the follow; we can only consume // a single token and hope for the best return false; } // compute what can follow this grammar element reference if (follow.member(Token.EOR_TOKEN_TYPE)) { BitSet viableTokensFollowingThisRule = computeContextSensitiveRuleFOLLOW(); follow = follow.or(viableTokensFollowingThisRule); if (state._fsp >= 0) { // remove EOR if we're not the start symbol follow.remove(Token.EOR_TOKEN_TYPE); } } // if current token is consistent with what could come after set // then we know we're missing a token; error recovery is free to // "insert" the missing token // System.out.println("viable tokens="+follow.toString(getTokenNames())); // System.out.println("LT(1)="+((TokenStream)input).LT(1)); // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR // in follow set to indicate that the fall of the start symbol is // in the set (EOF can follow). if (follow.member(input.LA(1)) || follow.member(Token.EOR_TOKEN_TYPE)) { // System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; // inserting..."); return true; } return false; }
/** * Create the junk (unassigned documents) cluster and create the final set of clusters in Carrot2 * format. */ private void postProcessing(ArrayList<ClusterCandidate> clusters) { // Adapt to Carrot2 classes, counting used documents on the way. final BitSet all = new BitSet(documents.size()); final ArrayList<Document> docs = Lists.newArrayListWithCapacity(documents.size()); final ArrayList<String> phrases = Lists.newArrayListWithCapacity(3); for (ClusterCandidate c : clusters) { final Cluster c2 = new Cluster(); c2.addPhrases(collectPhrases(phrases, c)); c2.addDocuments(collectDocuments(docs, c.documents)); c2.setScore((double) c.score); this.clusters.add(c2); all.or(c.documents); docs.clear(); phrases.clear(); } Cluster.appendOtherTopics(this.documents, this.clusters); }
@Override public void or(DocIdSetIterator it) throws IOException { { // specialize union with another SparseFixedBitSet final SparseFixedBitSet other = BitSetIterator.getSparseFixedBitSetOrNull(it); if (other != null) { assertUnpositioned(it); or(other); return; } } // We do not specialize the union with a FixedBitSet since FixedBitSets are // supposed to be used for dense data and sparse fixed bit sets for sparse // data, so a sparse set would likely get upgraded by DocIdSetBuilder before // being or'ed with a FixedBitSet if (it.cost() < indices.length) { // the default impl is good for sparse iterators super.or(it); } else { orDense(it); } }