private void appendLiteral() { if (vec.size() == 0) { vec.add(active.val); } else { int back = vec.getQuick(vec.size() - 1); if (active.val == 0) { // incoming word is zero if (back == 0) { setBack(HEADER0 + 2); } else if (isZeroFill(back)) { setBack(++back); } else { vec.add(active.val); } } else if (active.val == ALLONES) { // incoming word is allones if (back == ALLONES) { setBack((HEADER1 | 2)); } else if (isOneFill(back)) { setBack(++back); } else { vec.add(active.val); } } else { // incoming word contains a mixture of bits vec.add(active.val); } } nbits += MAXBITS; active.reset(); nset = 0; }
/** * Checks if the bit is set in the compressed bitset. * * <p>This operation is considerably slower than the uncompressed version. Use it with care. * * @param i the index where bit is checked. * @return true if the index is set, false otherwise. */ public boolean get(int i) { if (i > numBits()) { // no-way return false; } else { // we have to do hard way for (int j = 0; j < vec.size() && i >= 0; j++) { int v = vec.getQuick(j); // for a fill, we know easily. if (isAFill(v)) { int size = (v & MAXCNT) * MAXBITS; if (i < size) return isOneFill(v); i -= size; } else { // plain value. if (i < MAXBITS) { // do we need to check? return (1 << (MAXBITS - i - 1) & v) != 0; } else { // yet to get there. i -= MAXBITS; } } } // we need to check the active word. if (i >= 0) { return (moreThanInUnsigned(active.val << (MAXBITS + 1 - active.nbits + i), ALLONES)); } } return false; }
private WAHBitSet genericOp(OpType op, WAHBitSet other) { WAHBitSet ret = new WAHBitSet(); // ensure that they have the same bit length. if (this.numBits() < other.numBits()) { this.setBit(other.numBits() - 1, 0); } else if (this.numBits() > other.numBits()) { other.setBit(numBits() - 1, 0); } if (vec.size() > 0) { run xrun = new run(vec), yrun = new run(other.vec); xrun.decode(); yrun.decode(); do { if (xrun.nWords == 0) { xrun.inc(); xrun.decode(); } if (yrun.nWords == 0) { yrun.inc(); yrun.decode(); } if (xrun.isFill()) { if (yrun.isFill()) { int nWords = Math.min(xrun.nWords, yrun.nWords); ret.appendFill(nWords, getOpResult(op, xrun.fillWord, yrun.fillWord)); xrun.nWords -= nWords; yrun.nWords -= nWords; } else { ret.active.val = getOpResult(op, xrun.fillWord, yrun.get()); ret.appendLiteral(); --xrun.nWords; yrun.nWords = 0; } } else if (yrun.isFill()) { ret.active.val = getOpResult(op, yrun.fillWord, xrun.get()); ret.appendLiteral(); yrun.nWords--; xrun.nWords = 0; } else { ret.active.val = getOpResult(op, xrun.get(), yrun.get()); ret.appendLiteral(); yrun.nWords = 0; xrun.nWords = 0; } } while (!(xrun.end() && yrun.end())); } ret.active.val = getOpResult(op, this.active.val, other.active.val); ret.active.nbits = this.active.nbits; ret.doCount(); return ret; }
/** * This is an optimization over the and function. This does not create new bitset. This just * counts the number of 1 bits common between the two bitsets. * * @param other the bitset to and with. * @return the number of 1s common between two bitsets. */ public int andSize(WAHBitSet other) { int size = 0; // ensure that they have the same bit length. if (this.numBits() < other.numBits()) { this.setBit(other.numBits() - 1, 0); } else if (this.numBits() > other.numBits()) { other.setBit(numBits() - 1, 0); } if (vec.size() > 0) { run xrun = new run(vec), yrun = new run(other.vec); xrun.decode(); yrun.decode(); do { if (xrun.nWords == 0) { xrun.inc(); xrun.decode(); } if (yrun.nWords == 0) { yrun.inc(); yrun.decode(); } if (xrun.isFill()) { if (yrun.isFill()) { int nWords = Math.min(xrun.nWords, yrun.nWords); if ((xrun.fillWord & yrun.fillWord) == 1) { size += nWords * MAXBITS; } xrun.nWords -= nWords; yrun.nWords -= nWords; } else { size += countInRun(xrun, yrun); } } else if (yrun.isFill()) { size += countInRun(yrun, xrun); } else { int val = xrun.get() & yrun.get(); if (val > 0) size += Integer.bitCount(val); yrun.nWords = 0; xrun.nWords = 0; } } while (!(xrun.end() && yrun.end())); } size += Integer.bitCount(this.active.val & other.active.val); return size; }
private int doCount() { nset = 0; nbits = 0; for (int i = 0; i < vec.size(); i++) { int v = vec.getQuick(i); if (!isAFill(v)) { nbits += MAXBITS; nset += Integer.bitCount(v); } else { int tmp = (v & MAXCNT) * MAXBITS; nbits += tmp; nset += tmp * (isOneFill(v) ? 1 : 0); } } return nbits; }
public boolean end() { return idx >= vec.size() - 1; }
private void setBack(int val) { vec.setQuick(vec.size() - 1, val); }
private int getBack() { return vec.getQuick(vec.size() - 1); }
private void setBit(int ind, int val) { assert val == 0 || val == 1; if (ind >= numBits()) { int diff = ind - numBits() + 1; if (active.nbits > 0) { if (ind + 1 >= nbits + MAXBITS) { diff -= MAXBITS - active.nbits; active.val <<= (MAXBITS - active.nbits); if (diff == 0) active.val += (val != 0 ? 1 : 0); appendLiteral(); } else { active.nbits += diff; active.val <<= diff; active.val += (val != 0 ? 1 : 0); diff = 0; } } if (diff != 0) { int w = diff / MAXBITS; diff -= w * MAXBITS; if (diff != 0) { if (w > 1) { appendCounter(0, w); } else if (w != 0) { appendLiteral(); } active.nbits = diff; active.val += (val != 0 ? 1 : 0); } else if (val != 0) { if (w > 2) { appendCounter(0, w - 1); } else if (w == 2) { appendLiteral(); } active.val = 1; appendLiteral(); } else { if (w > 1) { appendCounter(0, w); } else if (w != 0) { appendLiteral(); } } } if (numBits() != ind + 1) logger.warning("Warning"); if (nset != 0) nset += (val != 0 ? 1 : 0); return; } else if (ind >= nbits) { // modify an active bit int u = active.val; if (val != 0) { active.val |= (1 << (active.nbits - (ind - nbits) - 1)); } else { active.val &= ~(1 << (active.nbits - (ind - nbits) - 1)); } if (nset != 0 && (u != active.val)) nset += (val != 0 ? 1 : -1); return; } else if (vec.size() * MAXBITS == nbits) { // uncompressed int i = ind / MAXBITS; int u = vec.get(i); int w = (1 << (SECONDBIT - (ind % MAXBITS))); if (val != 0) vec.setQuick(i, u |= w); else vec.setQuick(i, u &= ~w); if (nset != 0 && (vec.getQuick(i) != u)) nset += (val != 0 ? 1 : -1); return; } // the code after this has not been verified at all... // should proceed with caution. // compressed bit vector -- // the bit to be modified is in vec if (RUN_UNTESTED_CODE) { int idx = 0; int compressed = 0, cnt = 0, ind1 = 0, ind0 = ind; int current = 0; // current bit value while ((ind0 > 0) && (idx < vec.size())) { int v = vec.getQuick(idx); if (isAFill(v)) { // a fill cnt = ((v) & MAXCNT) * MAXBITS; if (cnt > ind0) { // found the location current = (isOneFill(v) ? 1 : 0); compressed = 1; ind1 = ind0; ind0 = 0; } else { ind0 -= cnt; ind1 = ind0; ++idx; } } else { // a literal word cnt = MAXBITS; if (MAXBITS > ind0) { // found the location current = (1 & ((v) >>> (SECONDBIT - ind0))); compressed = 0; ind1 = ind0; ind0 = 0; } else { ind0 -= MAXBITS; ind1 = ind0; ++idx; } } } // while (ind... if (ind1 == 0) { // set current and compressed int v = vec.getQuick(idx); if (isAFill(v)) { cnt = (v & MAXCNT) * MAXBITS; current = (isOneFill(v) ? 1 : 0); compressed = 1; } else { cnt = MAXBITS; current = (v >>> SECONDBIT); compressed = 0; } } if (ind0 > 0) // has not found the right location yet. { if (ind0 < active.nbits) { // in the active word ind1 = (1 << (active.nbits - ind0 - 1)); if (val != 0) { active.val |= ind1; } else { active.val &= ~ind1; } } else { // extends the current bit vector ind1 = ind0 - active.nbits - 1; appendWord(HEADER0 | (ind1 / MAXBITS)); for (ind1 %= MAXBITS; ind1 > 0; --ind1) addOneBit(0); addOneBit(val != 0 ? 1 : 0); } if (nset != 0) nset += val != 0 ? 1 : -1; return; } // locate the bit to be changed, lots of work hidden here if (current == val) return; // nothing to do int v = vec.getQuick(idx); // need to actually modify the bit if (compressed == 0) { // toggle a single bit of a literal word v ^= (1 << (SECONDBIT - ind1)); vec.setQuick(idx, v); } else if (ind1 < MAXBITS) { // bit to be modified is in the first word, two pieces --v; vec.set(idx, v); if ((v & MAXCNT) == 1) { v = (current != 0) ? ALLONES : 0; vec.setQuick(idx, v); } int w = 1 << (SECONDBIT - ind1); if (val == 0) w ^= ALLONES; vec.beforeInsert(idx, w); idx++; } else if (cnt - ind1 <= MAXBITS) { // bit to be modified is in the last word, two pieces --(v); vec.setQuick(idx, v); if ((v & MAXCNT) == 1) { v = (current != 0) ? ALLONES : 0; vec.setQuick(idx, v); } int w = 1 << (cnt - ind1 - 1); if (val == 0) w ^= ALLONES; ++idx; vec.beforeInsert(idx, w); } else { // the counter breaks into three pieces int u[] = new int[2], w; u[0] = ind1 / MAXBITS; w = (v & MAXCNT) - u[0] - 1; u[1] = 1 << (SECONDBIT - ind1 + u[0] * MAXBITS); if (val == 0) { u[0] = (u[0] > 1) ? (HEADER1 | u[0]) : (ALLONES); u[1] ^= ALLONES; w = (w > 1) ? (HEADER1 | w) : (ALLONES); } else { u[0] = (u[0] > 1) ? (HEADER0 | u[0]) : 0; w = (w > 1) ? (HEADER0 | w) : 0; } vec.setQuick(idx, w); vec.beforeInsertAllOf(idx, Arrays.asList(u)); } if (nset != 0) nset += val != 0 ? 1 : -1; } else { throw new AssertionError("Untested code detected, would rather die than run this"); } }
/** * Returns a new WAH compressed bitset after anding the current bitset with the <i>other</i> * bitset. * * @param other the bitset to and with * @return The resulting bitset */ public WAHBitSet and(WAHBitSet other) { WAHBitSet ret = new WAHBitSet(); // ensure that they have the same bit length. if (this.numBits() < other.numBits()) { this.setBit(other.numBits() - 1, 0); } else if (this.numBits() > other.numBits()) { other.setBit(numBits() - 1, 0); } // if there is something in the vector. if (vec.size() > 0) { // create new run objects and decode them. run xrun = new run(vec), yrun = new run(other.vec); xrun.decode(); yrun.decode(); do { // if you finished a run, then get the next one. if (xrun.nWords == 0) { xrun.inc(); xrun.decode(); } if (yrun.nWords == 0) { yrun.inc(); yrun.decode(); } if (xrun.isFill()) { if (yrun.isFill()) { // both are fills... this is the best. int nWords = Math.min(xrun.nWords, yrun.nWords); ret.appendFill(nWords, xrun.fillWord & yrun.fillWord); xrun.nWords -= nWords; yrun.nWords -= nWords; } else { // just cut through the other run chewUpRun(xrun, ret, yrun); } } else if (yrun.isFill()) { // again do the same, with different order. chewUpRun(yrun, ret, xrun); } else { // both are literals, so get the new literal and // append it to the return value. ret.active.val = xrun.get() & yrun.get(); ret.appendLiteral(); yrun.nWords = 0; xrun.nWords = 0; } // till they are not at the end. } while (!(xrun.end() && yrun.end())); } // set the active word. ret.active.val = this.active.val & other.active.val; ret.active.nbits = this.active.nbits; // ensure that the counts are set properly. ret.doCount(); return ret; }
/** * Returns the amount of memory used by the compressed bit set * * @return the amount of memory used by the compressed bit set */ public long memSize() { return vec.size() + 2; }