Esempio n. 1
0
  public boolean mismatchIsMissingToken(IntStream input, BitSet follow) {
    if (follow == null) {
      // we have no information about the follow; we can only consume
      // a single token and hope for the best
      return false;
    }
    // compute what can follow this grammar element reference
    if (follow.member(Token.EOR_TOKEN_TYPE)) {
      BitSet viableTokensFollowingThisRule = computeContextSensitiveRuleFOLLOW();
      follow = follow.or(viableTokensFollowingThisRule);
      if (state._fsp >= 0) { // remove EOR if we're not the start symbol
        follow.remove(Token.EOR_TOKEN_TYPE);
      }
    }
    // if current token is consistent with what could come after set
    // then we know we're missing a token; error recovery is free to
    // "insert" the missing token

    // System.out.println("viable tokens="+follow.toString(getTokenNames()));
    // System.out.println("LT(1)="+((TokenStream)input).LT(1));

    // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
    // in follow set to indicate that the fall of the start symbol is
    // in the set (EOF can follow).
    if (follow.member(input.LA(1)) || follow.member(Token.EOR_TOKEN_TYPE)) {
      // System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows;
      // inserting...");
      return true;
    }
    return false;
  }
 @Test
 public void testClone() {
   BitSet bitSet = new BitSet(NUM_BITS);
   bitSet.set(NUM_BITS - 1);
   bitSet = bitSet.clone();
   assertTrue(bitSet.get(NUM_BITS - 1));
 }
Esempio n. 3
0
  /**
   * Add the content of the provided {@link DocIdSetIterator} to this builder. NOTE: if you need to
   * build a {@link DocIdSet} out of a single {@link DocIdSetIterator}, you should rather use {@link
   * RoaringDocIdSet.Builder}.
   */
  public void add(DocIdSetIterator iter) throws IOException {
    grow((int) Math.min(Integer.MAX_VALUE, iter.cost()));

    if (bitSet != null) {
      bitSet.or(iter);
    } else {
      while (true) {
        assert buffer.length <= threshold;
        final int end = buffer.length;
        for (int i = bufferSize; i < end; ++i) {
          final int doc = iter.nextDoc();
          if (doc == DocIdSetIterator.NO_MORE_DOCS) {
            bufferSize = i;
            return;
          }
          buffer[bufferSize++] = doc;
        }
        bufferSize = end;

        if (bufferSize + 1 >= threshold) {
          break;
        }

        growBuffer(bufferSize + 1);
      }

      upgradeToBitSet();
      for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
        bitSet.set(doc);
      }
    }
  }
Esempio n. 4
0
 // what is exact? it seems to only add sets from above on stack
 // if EOR is in set i.  When it sees a set w/o EOR, it stops adding.
 // Why would we ever want them all?  Maybe no viable alt instead of
 // mismatched token?
 protected BitSet combineFollows(boolean exact) {
   int top = state._fsp;
   BitSet followSet = new BitSet();
   for (int i = top; i >= 0; i--) {
     BitSet localFollowSet = state.following[i];
     /*
     System.out.println("local follow depth "+i+"="+
     				   localFollowSet.toString(getTokenNames())+")");
      */
     followSet.orInPlace(localFollowSet);
     if (exact) {
       // can we see end of rule?
       if (localFollowSet.member(Token.EOR_TOKEN_TYPE)) {
         // Only leave EOR in set if at top (start rule); this lets
         // us know if have to include follow(start rule); i.e., EOF
         if (i > 0) {
           followSet.remove(Token.EOR_TOKEN_TYPE);
         }
       } else { // can't see end of rule, quit
         break;
       }
     }
   }
   return followSet;
 }
Esempio n. 5
0
 /**
  * This code is factored out from mismatched token and mismatched set recovery. It handles "single
  * token insertion" error recovery for both. No tokens are consumed to recover from insertions.
  * Return true if recovery was possible else return false.
  */
 protected boolean recoverFromMismatchedElement(
     IntStream input, RecognitionException e, BitSet follow) {
   if (follow == null) {
     // we have no information about the follow; we can only consume
     // a single token and hope for the best
     return false;
   }
   // System.out.println("recoverFromMismatchedElement");
   // compute what can follow this grammar element reference
   if (follow.member(Token.EOR_TOKEN_TYPE)) {
     BitSet viableTokensFollowingThisRule = computeContextSensitiveRuleFOLLOW();
     follow = follow.or(viableTokensFollowingThisRule);
     follow.remove(Token.EOR_TOKEN_TYPE);
   }
   // if current token is consistent with what could come after set
   // then it is ok to "insert" the missing token, else throw exception
   // System.out.println("viable tokens="+follow.toString(getTokenNames())+")");
   if (follow.member(input.LA(1))) {
     // System.out.println("LT(1)=="+input.LT(1)+" is consistent with what follows; inserting...");
     reportError(e);
     return true;
   }
   // System.err.println("nothing to do; throw exception");
   return false;
 }
Esempio n. 6
0
 public static BitSet reset_set(BitSet target, BitSet r, BitSet s) {
   if (target != null) target.reset(r);
   if (s != null) {
     if (target == null) target = new BitSet(size(s));
     target.set(s);
   }
   return target;
 }
 /** Remove all elements set in other. this = this AND_NOT other */
 public void remove(BitSet other) {
   int idx = Math.min(myNumWords, other.getNumWords());
   long[] thisArr = myBits;
   long[] otherArr = other.getBits();
   while (--idx >= 0) {
     thisArr[idx] &= ~otherArr[idx];
   }
 }
 /**
  * Returns the popcount or cardinality of "a and not b" or "intersection(a, not(b))". Neither set
  * is modified.
  */
 public static long andNotCount(BitSet a, BitSet b) {
   long tot =
       BitUtil.pop_andnot(a.getBits(), b.getBits(), 0, Math.min(a.getNumWords(), b.getNumWords()));
   if (a.getNumWords() > b.getNumWords()) {
     tot += BitUtil.pop_array(a.getBits(), b.getNumWords(), a.getNumWords() - b.getNumWords());
   }
   return tot;
 }
  /** returns true if both sets have the same bits set */
  @Override
  public boolean equals(Object o) {
    if (this == o) {
      return true;
    }
    if (!(o instanceof BitSet)) {
      return false;
    }
    BitSet a;
    BitSet b = (BitSet) o;
    // make a the larger set.
    if (b.getNumWords() > myNumWords) {
      a = b;
      b = this;
    } else {
      a = this;
    }

    // check for any set bits out of the range of b
    for (int i = a.getNumWords() - 1; i >= b.getNumWords(); i--) {
      if (a.getBits()[i] != 0) {
        return false;
      }
    }

    for (int i = b.getNumWords() - 1; i >= 0; i--) {
      if (a.getBits()[i] != b.getBits()[i]) {
        return false;
      }
    }

    return true;
  }
Esempio n. 10
0
  /** Collect documents from a bitset. */
  private List<Document> collectDocuments(List<Document> l, BitSet bitset) {
    if (l == null) {
      l = Lists.newArrayListWithCapacity((int) bitset.cardinality());
    }

    final BitSetIterator i = bitset.iterator();
    for (int d = i.nextSetBit(); d >= 0; d = i.nextSetBit()) {
      l.add(documents.get(d));
    }
    return l;
  }
 /** returns true if the sets have any elements in common */
 public boolean intersects(BitSet other) {
   int pos = Math.min(myNumWords, other.getNumWords());
   long[] thisArr = myBits;
   long[] otherArr = other.getBits();
   while (--pos >= 0) {
     if ((thisArr[pos] & otherArr[pos]) != 0) {
       return true;
     }
   }
   return false;
 }
Esempio n. 12
0
 @Test
 public void testClear() {
   BitSet bitSet = new BitSet(NUM_BITS);
   for (int i = 0; i < NUM_BITS; i++) {
     bitSet.set(i);
   }
   for (int i = 0; i < NUM_BITS; i++) {
     assertTrue(bitSet.get(i));
   }
   bitSet.clear();
   for (int i = 0; i < NUM_BITS; i++) {
     assertFalse(bitSet.get(i));
   }
 }
 /** this = this AND other */
 public void intersect(BitSet other) {
   int newLen = Math.min(myNumWords, other.getNumWords());
   long[] thisArr = myBits;
   long[] otherArr = other.getBits();
   // testing against zero can be more efficient
   int pos = newLen;
   while (--pos >= 0) {
     thisArr[pos] &= otherArr[pos];
   }
   if (myNumWords > newLen) {
     // fill zeros from the new shorter length to the old length
     Arrays.fill(myBits, newLen, myNumWords, 0);
   }
   myNumWords = newLen;
 }
  /** this = this XOR other */
  public void xor(BitSet other) {
    int newLen = Math.max(myNumWords, other.getNumWords());
    ensureCapacityWords(newLen);

    long[] thisArr = myBits;
    long[] otherArr = other.getBits();
    int pos = Math.min(myNumWords, other.getNumWords());
    while (--pos >= 0) {
      thisArr[pos] ^= otherArr[pos];
    }
    if (myNumWords < newLen) {
      System.arraycopy(otherArr, myNumWords, thisArr, myNumWords, newLen - myNumWords);
    }
    myNumWords = newLen;
  }
Esempio n. 15
0
 /**
  * Add a document to this builder. NOTE: doc IDs do not need to be provided in order. NOTE: if you
  * plan on adding several docs at once, look into using {@link #grow(int)} to reserve space.
  */
 public void add(int doc) {
   if (bitSet != null) {
     bitSet.set(doc);
   } else {
     if (bufferSize + 1 > buffer.length) {
       if (bufferSize + 1 >= threshold) {
         upgradeToBitSet();
         bitSet.set(doc);
         return;
       }
       growBuffer(bufferSize + 1);
     }
     buffer[bufferSize++] = doc;
   }
 }
Esempio n. 16
0
 public static BitSet set(BitSet target, BitSet s) {
   if (size(s) > 0) {
     if (target == null) target = new BitSet(size(s));
     target.set(s);
   }
   return target;
 }
Esempio n. 17
0
 protected BitSet combineFollows(boolean exact) {
   int top = _fsp;
   BitSet followSet = new BitSet();
   for (int i = top; i >= 0; i--) {
     BitSet localFollowSet = following[i];
     /*
      * System.out.println("local follow depth "+i+"="+
      * localFollowSet.toString(getTokenNames())+")");
      */
     followSet.orInPlace(localFollowSet);
     if (exact && !localFollowSet.member(Token.EOR_TOKEN_TYPE)) {
       break;
     }
   }
   followSet.remove(Token.EOR_TOKEN_TYPE);
   return followSet;
 }
Esempio n. 18
0
 /**
  * Make sure current lookahead symbol matches the given set Throw an exception upon mismatch,
  * which is catch by either the error handler or by the syntactic predicate.
  */
 public void match(BitSet b) throws MismatchedTokenException, TokenStreamException {
   if (!b.member(LA(1))) {
     throw new MismatchedTokenException(tokenNames, LT(1), b, false, getFilename());
   } else {
     // mark token as consumed -- fetch next token deferred until LA/LT
     consume();
   }
 }
Esempio n. 19
0
 /** Consume tokens until one matches the given token set */
 public void consumeUntil(IntStream input, BitSet set) {
   // System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
   int ttype = input.LA(1);
   while (ttype != Token.EOF && !set.member(ttype)) {
     // System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
     input.consume();
     ttype = input.LA(1);
   }
 }
Esempio n. 20
0
 private void upgradeToBitSet() {
   assert bitSet == null;
   bitSet = new FixedBitSet(maxDoc);
   for (int i = 0; i < bufferSize; ++i) {
     bitSet.set(buffer[i]);
   }
   this.buffer = null;
   this.bufferSize = 0;
 }
Esempio n. 21
0
  /**
   * Create the junk (unassigned documents) cluster and create the final set of clusters in Carrot2
   * format.
   */
  private void postProcessing(ArrayList<ClusterCandidate> clusters) {
    // Adapt to Carrot2 classes, counting used documents on the way.
    final BitSet all = new BitSet(documents.size());
    final ArrayList<Document> docs = Lists.newArrayListWithCapacity(documents.size());
    final ArrayList<String> phrases = Lists.newArrayListWithCapacity(3);
    for (ClusterCandidate c : clusters) {
      final Cluster c2 = new Cluster();
      c2.addPhrases(collectPhrases(phrases, c));
      c2.addDocuments(collectDocuments(docs, c.documents));
      c2.setScore((double) c.score);
      this.clusters.add(c2);

      all.or(c.documents);
      docs.clear();
      phrases.clear();
    }

    Cluster.appendOtherTopics(this.documents, this.clusters);
  }
Esempio n. 22
0
  @Override
  @SuppressWarnings("unchecked")
  public synchronized boolean removeIf(Predicate<? super E> filter) {
    Objects.requireNonNull(filter);
    // figure out which elements are to be removed
    // any exception thrown from the filter predicate at this stage
    // will leave the collection unmodified
    int removeCount = 0;
    final int size = elementCount;
    final BitSet removeSet = new BitSet(size);
    final int expectedModCount = modCount;
    for (int i = 0; modCount == expectedModCount && i < size; i++) {
      @SuppressWarnings("unchecked")
      final E element = (E) elementData[i];
      if (filter.test(element)) {
        removeSet.set(i);
        removeCount++;
      }
    }
    if (modCount != expectedModCount) {
      throw new ConcurrentModificationException();
    }

    // shift surviving elements left over the spaces left by removed elements
    final boolean anyToRemove = removeCount > 0;
    if (anyToRemove) {
      final int newSize = size - removeCount;
      for (int i = 0, j = 0; (i < size) && (j < newSize); i++, j++) {
        i = removeSet.nextClearBit(i);
        elementData[j] = elementData[i];
      }
      for (int k = newSize; k < size; k++) {
        elementData[k] = null; // Let gc do its work
      }
      elementCount = newSize;
      if (modCount != expectedModCount) {
        throw new ConcurrentModificationException();
      }
      modCount++;
    }

    return anyToRemove;
  }
 private State(
     final BitSet itemsToBuy,
     final Store store,
     final boolean hasPerishable,
     final int lastBought) {
   this.itemsToBuy = itemsToBuy;
   this.store = store;
   this.hasPerishable = hasPerishable;
   this.lastBought = lastBought;
   hash = (itemsToBuy.hashCode() * stores.size() + store.index) * 2 + (hasPerishable ? 0 : 1);
 }
Esempio n. 24
0
 public static BitSet set(BitSet target, int index, boolean value) {
   assert index >= 0;
   int i = index >> 6;
   if (index >= size(target)) {
     if (target == null) target = new BitSet(index + 1);
     long[] b = new long[i + 1];
     if (target.bits != null) System.arraycopy(target.bits, 0, b, 0, target.bits.length);
     target.bits = b;
   }
   if (value) target.bits[i] |= 1L << (index & 63);
   else target.bits[i] &= ~(1L << (index & 63));
   return target;
 }
Esempio n. 25
0
 @Test
 public void testGetSet() {
   BitSet bitSet = new BitSet(NUM_BITS);
   for (int i = 0; i < NUM_BITS; i++) {
     assertFalse(bitSet.get(i));
   }
   bitSet.set(0);
   bitSet.set(NUM_BITS - 1);
   assertTrue(bitSet.get(0));
   assertTrue(bitSet.get(NUM_BITS - 1));
 }
Esempio n. 26
0
  /**
   * Given a start and stop index, return a List of all tokens in the token type BitSet. Return null
   * if no tokens were found. This method looks at both on and off channel tokens.
   */
  public List getTokens(int start, int stop, BitSet types) {
    if (p == -1) setup();
    if (stop >= tokens.size()) stop = tokens.size() - 1;
    if (start < 0) start = 0;
    if (start > stop) return null;

    // list = tokens[start:stop]:{Token t, t.getType() in types}
    List<Token> filteredTokens = new ArrayList<Token>();
    for (int i = start; i <= stop; i++) {
      Token t = tokens.get(i);
      if (types == null || types.member(t.getType())) {
        filteredTokens.add(t);
      }
    }
    if (filteredTokens.size() == 0) {
      filteredTokens = null;
    }
    return filteredTokens;
  }
Esempio n. 27
0
 @Override
 public void and(DocIdSetIterator it) throws IOException {
   final SparseFixedBitSet other = BitSetIterator.getSparseFixedBitSetOrNull(it);
   if (other != null) {
     // if we are merging with another SparseFixedBitSet, a quick win is
     // to clear up some blocks by only looking at their index. Then the set
     // is sparser and the leap-frog approach of the parent class is more
     // efficient. Since SparseFixedBitSet is supposed to be used for sparse
     // sets, the intersection of two SparseFixedBitSet is likely very sparse
     final int numCommonBlocks = Math.min(indices.length, other.indices.length);
     for (int i = 0; i < numCommonBlocks; ++i) {
       if ((indices[i] & other.indices[i]) == 0) {
         this.nonZeroLongCount -= Long.bitCount(this.indices[i]);
         this.indices[i] = 0;
         this.bits[i] = null;
       }
     }
   }
   super.and(it);
 }
Esempio n. 28
0
  @Override
  public void or(DocIdSetIterator it) throws IOException {
    {
      // specialize union with another SparseFixedBitSet
      final SparseFixedBitSet other = BitSetIterator.getSparseFixedBitSetOrNull(it);
      if (other != null) {
        assertUnpositioned(it);
        or(other);
        return;
      }
    }

    // We do not specialize the union with a FixedBitSet since FixedBitSets are
    // supposed to be used for dense data and sparse fixed bit sets for sparse
    // data, so a sparse set would likely get upgraded by DocIdSetBuilder before
    // being or'ed with a FixedBitSet

    if (it.cost() < indices.length) {
      // the default impl is good for sparse iterators
      super.or(it);
    } else {
      orDense(it);
    }
  }
Esempio n. 29
0
 @Test
 public void testNextBitSetRandom() {
   RandomGenerator random = RandomManager.getRandom();
   for (int i = 0; i < 100; i++) {
     BitSet bitSet = new BitSet(NUM_BITS);
     for (int j = 0; j < 20 + random.nextInt(50); j++) {
       bitSet.set(random.nextInt(NUM_BITS));
     }
     int from = random.nextInt(NUM_BITS);
     int nextSet = bitSet.nextSetBit(from);
     if (nextSet == -1) {
       for (int j = from; j < NUM_BITS; j++) {
         assertFalse(bitSet.get(j));
       }
     } else {
       for (int j = from; j < nextSet; j++) {
         assertFalse(bitSet.get(j));
       }
       assertTrue(bitSet.get(nextSet));
     }
   }
 }
Esempio n. 30
0
 public static BitSet copy(BitSet s) {
   return s != null && !s.isEmpty() ? new BitSet(s) : null;
 }