public boolean mismatchIsMissingToken(IntStream input, BitSet follow) { if (follow == null) { // we have no information about the follow; we can only consume // a single token and hope for the best return false; } // compute what can follow this grammar element reference if (follow.member(Token.EOR_TOKEN_TYPE)) { BitSet viableTokensFollowingThisRule = computeContextSensitiveRuleFOLLOW(); follow = follow.or(viableTokensFollowingThisRule); if (state._fsp >= 0) { // remove EOR if we're not the start symbol follow.remove(Token.EOR_TOKEN_TYPE); } } // if current token is consistent with what could come after set // then we know we're missing a token; error recovery is free to // "insert" the missing token // System.out.println("viable tokens="+follow.toString(getTokenNames())); // System.out.println("LT(1)="+((TokenStream)input).LT(1)); // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR // in follow set to indicate that the fall of the start symbol is // in the set (EOF can follow). if (follow.member(input.LA(1)) || follow.member(Token.EOR_TOKEN_TYPE)) { // System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; // inserting..."); return true; } return false; }
@Test public void testClone() { BitSet bitSet = new BitSet(NUM_BITS); bitSet.set(NUM_BITS - 1); bitSet = bitSet.clone(); assertTrue(bitSet.get(NUM_BITS - 1)); }
/** * Add the content of the provided {@link DocIdSetIterator} to this builder. NOTE: if you need to * build a {@link DocIdSet} out of a single {@link DocIdSetIterator}, you should rather use {@link * RoaringDocIdSet.Builder}. */ public void add(DocIdSetIterator iter) throws IOException { grow((int) Math.min(Integer.MAX_VALUE, iter.cost())); if (bitSet != null) { bitSet.or(iter); } else { while (true) { assert buffer.length <= threshold; final int end = buffer.length; for (int i = bufferSize; i < end; ++i) { final int doc = iter.nextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { bufferSize = i; return; } buffer[bufferSize++] = doc; } bufferSize = end; if (bufferSize + 1 >= threshold) { break; } growBuffer(bufferSize + 1); } upgradeToBitSet(); for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) { bitSet.set(doc); } } }
// what is exact? it seems to only add sets from above on stack // if EOR is in set i. When it sees a set w/o EOR, it stops adding. // Why would we ever want them all? Maybe no viable alt instead of // mismatched token? protected BitSet combineFollows(boolean exact) { int top = state._fsp; BitSet followSet = new BitSet(); for (int i = top; i >= 0; i--) { BitSet localFollowSet = state.following[i]; /* System.out.println("local follow depth "+i+"="+ localFollowSet.toString(getTokenNames())+")"); */ followSet.orInPlace(localFollowSet); if (exact) { // can we see end of rule? if (localFollowSet.member(Token.EOR_TOKEN_TYPE)) { // Only leave EOR in set if at top (start rule); this lets // us know if have to include follow(start rule); i.e., EOF if (i > 0) { followSet.remove(Token.EOR_TOKEN_TYPE); } } else { // can't see end of rule, quit break; } } } return followSet; }
/** * This code is factored out from mismatched token and mismatched set recovery. It handles "single * token insertion" error recovery for both. No tokens are consumed to recover from insertions. * Return true if recovery was possible else return false. */ protected boolean recoverFromMismatchedElement( IntStream input, RecognitionException e, BitSet follow) { if (follow == null) { // we have no information about the follow; we can only consume // a single token and hope for the best return false; } // System.out.println("recoverFromMismatchedElement"); // compute what can follow this grammar element reference if (follow.member(Token.EOR_TOKEN_TYPE)) { BitSet viableTokensFollowingThisRule = computeContextSensitiveRuleFOLLOW(); follow = follow.or(viableTokensFollowingThisRule); follow.remove(Token.EOR_TOKEN_TYPE); } // if current token is consistent with what could come after set // then it is ok to "insert" the missing token, else throw exception // System.out.println("viable tokens="+follow.toString(getTokenNames())+")"); if (follow.member(input.LA(1))) { // System.out.println("LT(1)=="+input.LT(1)+" is consistent with what follows; inserting..."); reportError(e); return true; } // System.err.println("nothing to do; throw exception"); return false; }
public static BitSet reset_set(BitSet target, BitSet r, BitSet s) { if (target != null) target.reset(r); if (s != null) { if (target == null) target = new BitSet(size(s)); target.set(s); } return target; }
/** Remove all elements set in other. this = this AND_NOT other */ public void remove(BitSet other) { int idx = Math.min(myNumWords, other.getNumWords()); long[] thisArr = myBits; long[] otherArr = other.getBits(); while (--idx >= 0) { thisArr[idx] &= ~otherArr[idx]; } }
/** * Returns the popcount or cardinality of "a and not b" or "intersection(a, not(b))". Neither set * is modified. */ public static long andNotCount(BitSet a, BitSet b) { long tot = BitUtil.pop_andnot(a.getBits(), b.getBits(), 0, Math.min(a.getNumWords(), b.getNumWords())); if (a.getNumWords() > b.getNumWords()) { tot += BitUtil.pop_array(a.getBits(), b.getNumWords(), a.getNumWords() - b.getNumWords()); } return tot; }
/** returns true if both sets have the same bits set */ @Override public boolean equals(Object o) { if (this == o) { return true; } if (!(o instanceof BitSet)) { return false; } BitSet a; BitSet b = (BitSet) o; // make a the larger set. if (b.getNumWords() > myNumWords) { a = b; b = this; } else { a = this; } // check for any set bits out of the range of b for (int i = a.getNumWords() - 1; i >= b.getNumWords(); i--) { if (a.getBits()[i] != 0) { return false; } } for (int i = b.getNumWords() - 1; i >= 0; i--) { if (a.getBits()[i] != b.getBits()[i]) { return false; } } return true; }
/** Collect documents from a bitset. */ private List<Document> collectDocuments(List<Document> l, BitSet bitset) { if (l == null) { l = Lists.newArrayListWithCapacity((int) bitset.cardinality()); } final BitSetIterator i = bitset.iterator(); for (int d = i.nextSetBit(); d >= 0; d = i.nextSetBit()) { l.add(documents.get(d)); } return l; }
/** returns true if the sets have any elements in common */ public boolean intersects(BitSet other) { int pos = Math.min(myNumWords, other.getNumWords()); long[] thisArr = myBits; long[] otherArr = other.getBits(); while (--pos >= 0) { if ((thisArr[pos] & otherArr[pos]) != 0) { return true; } } return false; }
@Test public void testClear() { BitSet bitSet = new BitSet(NUM_BITS); for (int i = 0; i < NUM_BITS; i++) { bitSet.set(i); } for (int i = 0; i < NUM_BITS; i++) { assertTrue(bitSet.get(i)); } bitSet.clear(); for (int i = 0; i < NUM_BITS; i++) { assertFalse(bitSet.get(i)); } }
/** this = this AND other */ public void intersect(BitSet other) { int newLen = Math.min(myNumWords, other.getNumWords()); long[] thisArr = myBits; long[] otherArr = other.getBits(); // testing against zero can be more efficient int pos = newLen; while (--pos >= 0) { thisArr[pos] &= otherArr[pos]; } if (myNumWords > newLen) { // fill zeros from the new shorter length to the old length Arrays.fill(myBits, newLen, myNumWords, 0); } myNumWords = newLen; }
/** this = this XOR other */ public void xor(BitSet other) { int newLen = Math.max(myNumWords, other.getNumWords()); ensureCapacityWords(newLen); long[] thisArr = myBits; long[] otherArr = other.getBits(); int pos = Math.min(myNumWords, other.getNumWords()); while (--pos >= 0) { thisArr[pos] ^= otherArr[pos]; } if (myNumWords < newLen) { System.arraycopy(otherArr, myNumWords, thisArr, myNumWords, newLen - myNumWords); } myNumWords = newLen; }
/** * Add a document to this builder. NOTE: doc IDs do not need to be provided in order. NOTE: if you * plan on adding several docs at once, look into using {@link #grow(int)} to reserve space. */ public void add(int doc) { if (bitSet != null) { bitSet.set(doc); } else { if (bufferSize + 1 > buffer.length) { if (bufferSize + 1 >= threshold) { upgradeToBitSet(); bitSet.set(doc); return; } growBuffer(bufferSize + 1); } buffer[bufferSize++] = doc; } }
public static BitSet set(BitSet target, BitSet s) { if (size(s) > 0) { if (target == null) target = new BitSet(size(s)); target.set(s); } return target; }
protected BitSet combineFollows(boolean exact) { int top = _fsp; BitSet followSet = new BitSet(); for (int i = top; i >= 0; i--) { BitSet localFollowSet = following[i]; /* * System.out.println("local follow depth "+i+"="+ * localFollowSet.toString(getTokenNames())+")"); */ followSet.orInPlace(localFollowSet); if (exact && !localFollowSet.member(Token.EOR_TOKEN_TYPE)) { break; } } followSet.remove(Token.EOR_TOKEN_TYPE); return followSet; }
/** * Make sure current lookahead symbol matches the given set Throw an exception upon mismatch, * which is catch by either the error handler or by the syntactic predicate. */ public void match(BitSet b) throws MismatchedTokenException, TokenStreamException { if (!b.member(LA(1))) { throw new MismatchedTokenException(tokenNames, LT(1), b, false, getFilename()); } else { // mark token as consumed -- fetch next token deferred until LA/LT consume(); } }
/** Consume tokens until one matches the given token set */ public void consumeUntil(IntStream input, BitSet set) { // System.out.println("consumeUntil("+set.toString(getTokenNames())+")"); int ttype = input.LA(1); while (ttype != Token.EOF && !set.member(ttype)) { // System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]); input.consume(); ttype = input.LA(1); } }
private void upgradeToBitSet() { assert bitSet == null; bitSet = new FixedBitSet(maxDoc); for (int i = 0; i < bufferSize; ++i) { bitSet.set(buffer[i]); } this.buffer = null; this.bufferSize = 0; }
/** * Create the junk (unassigned documents) cluster and create the final set of clusters in Carrot2 * format. */ private void postProcessing(ArrayList<ClusterCandidate> clusters) { // Adapt to Carrot2 classes, counting used documents on the way. final BitSet all = new BitSet(documents.size()); final ArrayList<Document> docs = Lists.newArrayListWithCapacity(documents.size()); final ArrayList<String> phrases = Lists.newArrayListWithCapacity(3); for (ClusterCandidate c : clusters) { final Cluster c2 = new Cluster(); c2.addPhrases(collectPhrases(phrases, c)); c2.addDocuments(collectDocuments(docs, c.documents)); c2.setScore((double) c.score); this.clusters.add(c2); all.or(c.documents); docs.clear(); phrases.clear(); } Cluster.appendOtherTopics(this.documents, this.clusters); }
@Override @SuppressWarnings("unchecked") public synchronized boolean removeIf(Predicate<? super E> filter) { Objects.requireNonNull(filter); // figure out which elements are to be removed // any exception thrown from the filter predicate at this stage // will leave the collection unmodified int removeCount = 0; final int size = elementCount; final BitSet removeSet = new BitSet(size); final int expectedModCount = modCount; for (int i = 0; modCount == expectedModCount && i < size; i++) { @SuppressWarnings("unchecked") final E element = (E) elementData[i]; if (filter.test(element)) { removeSet.set(i); removeCount++; } } if (modCount != expectedModCount) { throw new ConcurrentModificationException(); } // shift surviving elements left over the spaces left by removed elements final boolean anyToRemove = removeCount > 0; if (anyToRemove) { final int newSize = size - removeCount; for (int i = 0, j = 0; (i < size) && (j < newSize); i++, j++) { i = removeSet.nextClearBit(i); elementData[j] = elementData[i]; } for (int k = newSize; k < size; k++) { elementData[k] = null; // Let gc do its work } elementCount = newSize; if (modCount != expectedModCount) { throw new ConcurrentModificationException(); } modCount++; } return anyToRemove; }
private State( final BitSet itemsToBuy, final Store store, final boolean hasPerishable, final int lastBought) { this.itemsToBuy = itemsToBuy; this.store = store; this.hasPerishable = hasPerishable; this.lastBought = lastBought; hash = (itemsToBuy.hashCode() * stores.size() + store.index) * 2 + (hasPerishable ? 0 : 1); }
public static BitSet set(BitSet target, int index, boolean value) { assert index >= 0; int i = index >> 6; if (index >= size(target)) { if (target == null) target = new BitSet(index + 1); long[] b = new long[i + 1]; if (target.bits != null) System.arraycopy(target.bits, 0, b, 0, target.bits.length); target.bits = b; } if (value) target.bits[i] |= 1L << (index & 63); else target.bits[i] &= ~(1L << (index & 63)); return target; }
@Test public void testGetSet() { BitSet bitSet = new BitSet(NUM_BITS); for (int i = 0; i < NUM_BITS; i++) { assertFalse(bitSet.get(i)); } bitSet.set(0); bitSet.set(NUM_BITS - 1); assertTrue(bitSet.get(0)); assertTrue(bitSet.get(NUM_BITS - 1)); }
/** * Given a start and stop index, return a List of all tokens in the token type BitSet. Return null * if no tokens were found. This method looks at both on and off channel tokens. */ public List getTokens(int start, int stop, BitSet types) { if (p == -1) setup(); if (stop >= tokens.size()) stop = tokens.size() - 1; if (start < 0) start = 0; if (start > stop) return null; // list = tokens[start:stop]:{Token t, t.getType() in types} List<Token> filteredTokens = new ArrayList<Token>(); for (int i = start; i <= stop; i++) { Token t = tokens.get(i); if (types == null || types.member(t.getType())) { filteredTokens.add(t); } } if (filteredTokens.size() == 0) { filteredTokens = null; } return filteredTokens; }
@Override public void and(DocIdSetIterator it) throws IOException { final SparseFixedBitSet other = BitSetIterator.getSparseFixedBitSetOrNull(it); if (other != null) { // if we are merging with another SparseFixedBitSet, a quick win is // to clear up some blocks by only looking at their index. Then the set // is sparser and the leap-frog approach of the parent class is more // efficient. Since SparseFixedBitSet is supposed to be used for sparse // sets, the intersection of two SparseFixedBitSet is likely very sparse final int numCommonBlocks = Math.min(indices.length, other.indices.length); for (int i = 0; i < numCommonBlocks; ++i) { if ((indices[i] & other.indices[i]) == 0) { this.nonZeroLongCount -= Long.bitCount(this.indices[i]); this.indices[i] = 0; this.bits[i] = null; } } } super.and(it); }
@Override public void or(DocIdSetIterator it) throws IOException { { // specialize union with another SparseFixedBitSet final SparseFixedBitSet other = BitSetIterator.getSparseFixedBitSetOrNull(it); if (other != null) { assertUnpositioned(it); or(other); return; } } // We do not specialize the union with a FixedBitSet since FixedBitSets are // supposed to be used for dense data and sparse fixed bit sets for sparse // data, so a sparse set would likely get upgraded by DocIdSetBuilder before // being or'ed with a FixedBitSet if (it.cost() < indices.length) { // the default impl is good for sparse iterators super.or(it); } else { orDense(it); } }
@Test public void testNextBitSetRandom() { RandomGenerator random = RandomManager.getRandom(); for (int i = 0; i < 100; i++) { BitSet bitSet = new BitSet(NUM_BITS); for (int j = 0; j < 20 + random.nextInt(50); j++) { bitSet.set(random.nextInt(NUM_BITS)); } int from = random.nextInt(NUM_BITS); int nextSet = bitSet.nextSetBit(from); if (nextSet == -1) { for (int j = from; j < NUM_BITS; j++) { assertFalse(bitSet.get(j)); } } else { for (int j = from; j < nextSet; j++) { assertFalse(bitSet.get(j)); } assertTrue(bitSet.get(nextSet)); } } }
public static BitSet copy(BitSet s) { return s != null && !s.isEmpty() ? new BitSet(s) : null; }