/** * Allocate a new context big enough for globalCount + localCount + remoteCount elements and * return the initial corresponding ContextState. */ public static ContextState allocate(int globalCount, int localCount, int remoteCount) { int headerLength = HEADER_SIZE_LENGTH + (globalCount + localCount) * HEADER_ELT_LENGTH; int bodyLength = (globalCount + localCount + remoteCount) * STEP_LENGTH; ByteBuffer buffer = ByteBuffer.allocate(headerLength + bodyLength); buffer.putShort(buffer.position(), (short) (globalCount + localCount)); return ContextState.wrap(buffer); }
/** * Human-readable String from context. * * @param context counter context. * @return a human-readable String of the context. */ public String toString(ByteBuffer context) { ContextState state = ContextState.wrap(context); StringBuilder sb = new StringBuilder(); sb.append("["); while (state.hasRemaining()) { if (state.getElementIndex() > 0) sb.append(","); sb.append("{"); sb.append(state.getCounterId()).append(", "); sb.append(state.getClock()).append(", "); sb.append(state.getCount()); sb.append("}"); if (state.isGlobal()) sb.append("$"); else if (state.isLocal()) sb.append("*"); state.moveToNext(); } sb.append("]"); return sb.toString(); }
@Test public void testDiff() throws UnknownHostException { ContextState left; ContextState right; CounterColumn leftCol; CounterColumn rightCol; // timestamp leftCol = new CounterColumn(ByteBufferUtil.bytes("x"), 0, 1L); rightCol = new CounterColumn(ByteBufferUtil.bytes("x"), 0, 2L); assert rightCol == leftCol.diff(rightCol); assert null == rightCol.diff(leftCol); // timestampOfLastDelete leftCol = new CounterColumn(ByteBufferUtil.bytes("x"), 0, 1L, 1L); rightCol = new CounterColumn(ByteBufferUtil.bytes("x"), 0, 1L, 2L); assert rightCol == leftCol.diff(rightCol); assert null == rightCol.diff(leftCol); // equality: equal nodes, all counts same left = ContextState.allocate(3, 0); left.writeElement(NodeId.fromInt(3), 3L, 0L); left.writeElement(NodeId.fromInt(6), 2L, 0L); left.writeElement(NodeId.fromInt(9), 1L, 0L); right = new ContextState(ByteBufferUtil.clone(left.context), 2); leftCol = new CounterColumn(ByteBufferUtil.bytes("x"), left.context, 1L); rightCol = new CounterColumn(ByteBufferUtil.bytes("x"), right.context, 1L); assert null == leftCol.diff(rightCol); // greater than: left has superset of nodes (counts equal) left = ContextState.allocate(4, 0); left.writeElement(NodeId.fromInt(3), 3L, 0L); left.writeElement(NodeId.fromInt(6), 2L, 0L); left.writeElement(NodeId.fromInt(9), 1L, 0L); left.writeElement(NodeId.fromInt(12), 0L, 0L); right = ContextState.allocate(3, 0); right.writeElement(NodeId.fromInt(3), 3L, 0L); right.writeElement(NodeId.fromInt(6), 2L, 0L); right.writeElement(NodeId.fromInt(9), 1L, 0L); leftCol = new CounterColumn(ByteBufferUtil.bytes("x"), left.context, 1L); rightCol = new CounterColumn(ByteBufferUtil.bytes("x"), right.context, 1L); assert null == leftCol.diff(rightCol); // less than: right has subset of nodes (counts equal) assert leftCol == rightCol.diff(leftCol); // disjoint: right and left have disjoint node sets left = ContextState.allocate(3, 0); left.writeElement(NodeId.fromInt(3), 1L, 0L); left.writeElement(NodeId.fromInt(4), 1L, 0L); left.writeElement(NodeId.fromInt(9), 1L, 0L); right = ContextState.allocate(3, 0); right.writeElement(NodeId.fromInt(3), 1L, 0L); right.writeElement(NodeId.fromInt(6), 1L, 0L); right.writeElement(NodeId.fromInt(9), 1L, 0L); leftCol = new CounterColumn(ByteBufferUtil.bytes("x"), left.context, 1L); rightCol = new CounterColumn(ByteBufferUtil.bytes("x"), right.context, 1L); assert rightCol == leftCol.diff(rightCol); assert leftCol == rightCol.diff(leftCol); }
/** Creates a counter context with a single global, 2.1+ shard (a result of increment). */ public ByteBuffer createGlobal(CounterId id, long clock, long count) { ContextState state = ContextState.allocate(1, 0, 0); state.writeGlobal(id, clock, count); return state.context; }
public void copyTo(ContextState other) { other.writeElement(getCounterId(), getClock(), getCount(), currentIsGlobal, currentIsLocal); }
/* * Compares two shards, returns: * - GREATER_THAN if leftState overrides rightState * - LESS_THAN if rightState overrides leftState * - EQUAL for two equal, non-local, shards * - DISJOINT for any two local shards */ private Relationship compare(ContextState leftState, ContextState rightState) { long leftClock = leftState.getClock(); long leftCount = leftState.getCount(); long rightClock = rightState.getClock(); long rightCount = rightState.getCount(); if (leftState.isGlobal() || rightState.isGlobal()) { if (leftState.isGlobal() && rightState.isGlobal()) { if (leftClock == rightClock) { // Can happen if an sstable gets lost and disk failure policy is set to 'best effort' if (leftCount != rightCount && CompactionManager.isCompactionManager.get()) { logger.warn( "invalid global counter shard detected; ({}, {}, {}) and ({}, {}, {}) differ only in " + "count; will pick highest to self-heal on compaction", leftState.getCounterId(), leftClock, leftCount, rightState.getCounterId(), rightClock, rightCount); } if (leftCount > rightCount) return Relationship.GREATER_THAN; else if (leftCount == rightCount) return Relationship.EQUAL; else return Relationship.LESS_THAN; } else { return leftClock > rightClock ? Relationship.GREATER_THAN : Relationship.LESS_THAN; } } else // only one is global - keep that one { return leftState.isGlobal() ? Relationship.GREATER_THAN : Relationship.LESS_THAN; } } if (leftState.isLocal() || rightState.isLocal()) { // Local id and at least one is a local shard. if (leftState.isLocal() && rightState.isLocal()) return Relationship.DISJOINT; else // only one is local - keep that one return leftState.isLocal() ? Relationship.GREATER_THAN : Relationship.LESS_THAN; } // both are remote shards if (leftClock == rightClock) { // We should never see non-local shards w/ same id+clock but different counts. However, if we // do // we should "heal" the problem by being deterministic in our selection of shard - and // log the occurrence so that the operator will know something is wrong. if (leftCount != rightCount && CompactionManager.isCompactionManager.get()) { logger.warn( "invalid remote counter shard detected; ({}, {}, {}) and ({}, {}, {}) differ only in " + "count; will pick highest to self-heal on compaction", leftState.getCounterId(), leftClock, leftCount, rightState.getCounterId(), rightClock, rightCount); } if (leftCount > rightCount) return Relationship.GREATER_THAN; else if (leftCount == rightCount) return Relationship.EQUAL; else return Relationship.LESS_THAN; } else { if ((leftClock >= 0 && rightClock > 0 && leftClock >= rightClock) || (leftClock < 0 && (rightClock > 0 || leftClock < rightClock))) return Relationship.GREATER_THAN; else return Relationship.LESS_THAN; } }
private ByteBuffer merge( ContextState mergedState, ContextState leftState, ContextState rightState) { while (leftState.hasRemaining() && rightState.hasRemaining()) { int cmp = leftState.compareIdTo(rightState); if (cmp == 0) { Relationship rel = compare(leftState, rightState); if (rel == Relationship.DISJOINT) // two local shards mergedState.writeLocal( leftState.getCounterId(), leftState.getClock() + rightState.getClock(), leftState.getCount() + rightState.getCount()); else if (rel == Relationship.GREATER_THAN) leftState.copyTo(mergedState); else // EQUAL or LESS_THAN rightState.copyTo(mergedState); rightState.moveToNext(); leftState.moveToNext(); } else if (cmp > 0) { rightState.copyTo(mergedState); rightState.moveToNext(); } else // cmp < 0 { leftState.copyTo(mergedState); leftState.moveToNext(); } } while (leftState.hasRemaining()) { leftState.copyTo(mergedState); leftState.moveToNext(); } while (rightState.hasRemaining()) { rightState.copyTo(mergedState); rightState.moveToNext(); } return mergedState.context; }
/** * Return a context w/ an aggregated count for each counter id. * * @param left counter context. * @param right counter context. */ public ByteBuffer merge(ByteBuffer left, ByteBuffer right) { boolean leftIsSuperSet = true; boolean rightIsSuperSet = true; int globalCount = 0; int localCount = 0; int remoteCount = 0; ContextState leftState = ContextState.wrap(left); ContextState rightState = ContextState.wrap(right); while (leftState.hasRemaining() && rightState.hasRemaining()) { int cmp = leftState.compareIdTo(rightState); if (cmp == 0) { Relationship rel = compare(leftState, rightState); if (rel == Relationship.GREATER_THAN) rightIsSuperSet = false; else if (rel == Relationship.LESS_THAN) leftIsSuperSet = false; else if (rel == Relationship.DISJOINT) leftIsSuperSet = rightIsSuperSet = false; if (leftState.isGlobal() || rightState.isGlobal()) globalCount += 1; else if (leftState.isLocal() || rightState.isLocal()) localCount += 1; else remoteCount += 1; leftState.moveToNext(); rightState.moveToNext(); } else if (cmp > 0) { leftIsSuperSet = false; if (rightState.isGlobal()) globalCount += 1; else if (rightState.isLocal()) localCount += 1; else remoteCount += 1; rightState.moveToNext(); } else // cmp < 0 { rightIsSuperSet = false; if (leftState.isGlobal()) globalCount += 1; else if (leftState.isLocal()) localCount += 1; else remoteCount += 1; leftState.moveToNext(); } } if (leftState.hasRemaining()) rightIsSuperSet = false; else if (rightState.hasRemaining()) leftIsSuperSet = false; // if one of the contexts is a superset, return it early. if (leftIsSuperSet) return left; else if (rightIsSuperSet) return right; while (leftState.hasRemaining()) { if (leftState.isGlobal()) globalCount += 1; else if (leftState.isLocal()) localCount += 1; else remoteCount += 1; leftState.moveToNext(); } while (rightState.hasRemaining()) { if (rightState.isGlobal()) globalCount += 1; else if (rightState.isLocal()) localCount += 1; else remoteCount += 1; rightState.moveToNext(); } leftState.reset(); rightState.reset(); return merge( ContextState.allocate(globalCount, localCount, remoteCount), leftState, rightState); }
/** * Determine the count relationship between two contexts. * * <p>EQUAL: Equal set of nodes and every count is equal. GREATER_THAN: Superset of nodes and * every count is equal or greater than its corollary. LESS_THAN: Subset of nodes and every count * is equal or less than its corollary. DISJOINT: Node sets are not equal and/or counts are not * all greater or less than. * * <p>Strategy: compare node logical clocks (like a version vector). * * @param left counter context. * @param right counter context. * @return the Relationship between the contexts. */ public Relationship diff(ByteBuffer left, ByteBuffer right) { Relationship relationship = Relationship.EQUAL; ContextState leftState = ContextState.wrap(left); ContextState rightState = ContextState.wrap(right); while (leftState.hasRemaining() && rightState.hasRemaining()) { // compare id bytes int compareId = leftState.compareIdTo(rightState); if (compareId == 0) { long leftClock = leftState.getClock(); long rightClock = rightState.getClock(); long leftCount = leftState.getCount(); long rightCount = rightState.getCount(); // advance leftState.moveToNext(); rightState.moveToNext(); // process clock comparisons if (leftClock == rightClock) { if (leftCount != rightCount) { // Inconsistent shard (see the corresponding code in merge()). We return DISJOINT in // this // case so that it will be treated as a difference, allowing read-repair to work. return Relationship.DISJOINT; } } else if ((leftClock >= 0 && rightClock > 0 && leftClock > rightClock) || (leftClock < 0 && (rightClock > 0 || leftClock < rightClock))) { if (relationship == Relationship.EQUAL) relationship = Relationship.GREATER_THAN; else if (relationship == Relationship.LESS_THAN) return Relationship.DISJOINT; // relationship == Relationship.GREATER_THAN } else { if (relationship == Relationship.EQUAL) relationship = Relationship.LESS_THAN; else if (relationship == Relationship.GREATER_THAN) return Relationship.DISJOINT; // relationship == Relationship.LESS_THAN } } else if (compareId > 0) { // only advance the right context rightState.moveToNext(); if (relationship == Relationship.EQUAL) relationship = Relationship.LESS_THAN; else if (relationship == Relationship.GREATER_THAN) return Relationship.DISJOINT; // relationship == Relationship.LESS_THAN } else // compareId < 0 { // only advance the left context leftState.moveToNext(); if (relationship == Relationship.EQUAL) relationship = Relationship.GREATER_THAN; else if (relationship == Relationship.LESS_THAN) return Relationship.DISJOINT; // relationship == Relationship.GREATER_THAN } } // check final lengths if (leftState.hasRemaining()) { if (relationship == Relationship.EQUAL) return Relationship.GREATER_THAN; else if (relationship == Relationship.LESS_THAN) return Relationship.DISJOINT; } if (rightState.hasRemaining()) { if (relationship == Relationship.EQUAL) return Relationship.LESS_THAN; else if (relationship == Relationship.GREATER_THAN) return Relationship.DISJOINT; } return relationship; }
/** * Creates a counter context with a single remote shard. For use by tests of compatibility with * pre-2.1 counters only. */ public ByteBuffer createRemote(CounterId id, long clock, long count) { ContextState state = ContextState.allocate(0, 0, 1); state.writeRemote(id, clock, count); return state.context; }
/** * Creates a counter context with a single local shard. For use by tests of compatibility with * pre-2.1 counters only. */ public ByteBuffer createLocal(long count) { ContextState state = ContextState.allocate(0, 1, 0); state.writeLocal(CounterId.getLocalId(), 1L, count); return state.context; }