/** Compares our trees, and triggers repairs for any ranges that mismatch. */
      public void run() {
        InetAddress local = FBUtilities.getLocalAddress();

        // restore partitioners (in case we were serialized)
        if (ltree.partitioner() == null) ltree.partitioner(StorageService.getPartitioner());
        if (rtree.partitioner() == null) rtree.partitioner(StorageService.getPartitioner());

        // compare trees, and collect differences
        differences.addAll(MerkleTree.difference(ltree, rtree));

        // choose a repair method based on the significance of the difference
        String format =
            "Endpoints " + local + " and " + remote + " %s for " + cfname + " on " + range;
        if (differences.isEmpty()) {
          logger.info(String.format(format, "are consistent"));
          completed(remote, cfname);
          return;
        }

        // non-0 difference: perform streaming repair
        logger.info(String.format(format, "have " + differences.size() + " range(s) out of sync"));
        try {
          performStreamingRepair();
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      }
    public void prepare(ColumnFamilyStore cfs) {
      if (tree.partitioner() instanceof RandomPartitioner) {
        // You can't beat an even tree distribution for md5
        tree.init();
      } else {
        List<DecoratedKey> keys = new ArrayList<DecoratedKey>();
        for (DecoratedKey sample : cfs.keySamples(request.range)) {
          assert request.range.contains(sample.token)
              : "Token " + sample.token + " is not within range " + request.range;
          keys.add(sample);
        }

        if (keys.isEmpty()) {
          // use an even tree distribution
          tree.init();
        } else {
          int numkeys = keys.size();
          Random random = new Random();
          // sample the column family using random keys from the index
          while (true) {
            DecoratedKey dk = keys.get(random.nextInt(numkeys));
            if (!tree.split(dk.token)) break;
          }
        }
      }
      logger.debug("Prepared AEService tree of size " + tree.size() + " for " + request);
      ranges = tree.invalids();
    }
  @Test
  public void testSplit() {
    // split the range  (zero, zero] into:
    //  (zero,four], (four,six], (six,seven] and (seven, zero]
    mt.split(tok(4));
    mt.split(tok(6));
    mt.split(tok(7));

    assertEquals(4, mt.size());
    assertEquals(new Range(tok(7), tok(-1)), mt.get(tok(-1)));
    assertEquals(new Range(tok(-1), tok(4)), mt.get(tok(3)));
    assertEquals(new Range(tok(-1), tok(4)), mt.get(tok(4)));
    assertEquals(new Range(tok(4), tok(6)), mt.get(tok(6)));
    assertEquals(new Range(tok(6), tok(7)), mt.get(tok(7)));

    // check depths
    assertEquals((byte) 1, mt.get(tok(4)).depth);
    assertEquals((byte) 2, mt.get(tok(6)).depth);
    assertEquals((byte) 3, mt.get(tok(7)).depth);
    assertEquals((byte) 3, mt.get(tok(-1)).depth);

    try {
      mt.split(tok(-1));
      fail("Shouldn't be able to split outside the initial range.");
    } catch (AssertionError e) {
      // pass
    }
  }
Beispiel #4
0
  /**
   * @param ltree First tree.
   * @param rtree Second tree.
   * @return A list of the largest contiguous ranges where the given trees disagree.
   */
  public static List<TreeRange> difference(MerkleTree ltree, MerkleTree rtree) {
    List<TreeRange> diff = new ArrayList<TreeRange>();
    Token mintoken = ltree.partitioner.getMinimumToken();
    TreeRange active = new TreeRange(null, mintoken, mintoken, (byte) 0, null);

    byte[] lhash = ltree.hash(active);
    byte[] rhash = rtree.hash(active);

    if (lhash != null && rhash != null && !Arrays.equals(lhash, rhash)) {
      if (FULLY_INCONSISTENT == differenceHelper(ltree, rtree, diff, active)) diff.add(active);
    } else if (lhash == null || rhash == null) diff.add(active);
    return diff;
  }
  @Test
  public void testHashFull() {
    byte[] val = DUMMY;
    Range range = new Range(tok(-1), tok(-1));

    // (zero, zero]
    assertNull(mt.hash(range));

    // validate the range
    mt.get(tok(-1)).hash(val);

    assertHashEquals(val, mt.hash(range));
  }
  @Test
  public void testCompactHash() {
    byte[] val = DUMMY;
    byte[] valXval = hashed(val, 1, 1);

    // (zero, four], (four,zero]
    mt.split(tok(4));

    // validate both ranges
    mt.get(tok(4)).hash(val);
    mt.get(tok(-1)).hash(val);

    // compact (zero, eight]
    mt.compact(tok(4));
    assertHashEquals(valXval, mt.get(tok(-1)).hash());
  }
  @Test
  public void testHashRandom() {
    int max = 1000000;
    TOKEN_SCALE = new BigInteger("" + max);

    mt = new MerkleTree(partitioner, RECOMMENDED_DEPTH, 32);
    Random random = new Random();
    while (true) {
      if (!mt.split(tok(random.nextInt(max)))) break;
    }

    // validate the tree
    TreeRangeIterator ranges = mt.invalids(new Range(tok(-1), tok(-1)));
    for (TreeRange range : ranges) range.addHash(new RowHash(range.right, new byte[0]));

    assert null != mt.hash(new Range(tok(-1), tok(-1))) : "Could not hash tree " + mt;
  }
Beispiel #8
0
    TreeRangeIterator(MerkleTree tree, Range range) {
      Token mintoken = tree.partitioner().getMinimumToken();
      tovisit = new ArrayDeque<TreeRange>();
      tovisit.add(new TreeRange(tree, mintoken, mintoken, (byte) 0, tree.root));

      this.tree = tree;
      this.range = range;
    }
Beispiel #9
0
  /**
   * TODO: This function could be optimized into a depth first traversal of the two trees in
   * parallel.
   *
   * <p>Takes two trees and a range for which they have hashes, but are inconsistent.
   *
   * @return FULLY_INCONSISTENT if active is inconsistent, PARTIALLY_INCONSISTENT if only a subrange
   *     is inconsistent.
   */
  static int differenceHelper(
      MerkleTree ltree, MerkleTree rtree, List<TreeRange> diff, TreeRange active) {
    Token midpoint = ltree.partitioner().midpoint(active.left, active.right);
    TreeRange left = new TreeRange(null, active.left, midpoint, inc(active.depth), null);
    TreeRange right = new TreeRange(null, midpoint, active.right, inc(active.depth), null);
    byte[] lhash;
    byte[] rhash;

    // see if we should recurse left
    lhash = ltree.hash(left);
    rhash = rtree.hash(left);
    int ldiff = CONSISTENT;
    boolean lreso = lhash != null && rhash != null;
    if (lreso && !Arrays.equals(lhash, rhash)) ldiff = differenceHelper(ltree, rtree, diff, left);
    else if (!lreso) ldiff = FULLY_INCONSISTENT;

    // see if we should recurse right
    lhash = ltree.hash(right);
    rhash = rtree.hash(right);
    int rdiff = CONSISTENT;
    boolean rreso = lhash != null && rhash != null;
    if (rreso && !Arrays.equals(lhash, rhash)) rdiff = differenceHelper(ltree, rtree, diff, right);
    else if (!rreso) rdiff = FULLY_INCONSISTENT;

    if (ldiff == FULLY_INCONSISTENT && rdiff == FULLY_INCONSISTENT) {
      // both children are fully inconsistent
      return FULLY_INCONSISTENT;
    } else if (ldiff == FULLY_INCONSISTENT) {
      diff.add(left);
      return PARTIALLY_INCONSISTENT;
    } else if (rdiff == FULLY_INCONSISTENT) {
      diff.add(right);
      return PARTIALLY_INCONSISTENT;
    }
    return PARTIALLY_INCONSISTENT;
  }
  @Test
  public void testSplitLimitDepth() {
    mt = new MerkleTree(partitioner, (byte) 2, Integer.MAX_VALUE);

    assertTrue(mt.split(tok(4)));
    assertTrue(mt.split(tok(2)));
    assertEquals(3, mt.size());

    // should fail to split below hashdepth
    assertFalse(mt.split(tok(1)));
    assertEquals(3, mt.size());
    assertEquals(new Range(tok(4), tok(-1)), mt.get(tok(-1)));
    assertEquals(new Range(tok(-1), tok(2)), mt.get(tok(2)));
    assertEquals(new Range(tok(2), tok(4)), mt.get(tok(4)));
  }
  @Test
  public void testSplitLimitSize() {
    mt = new MerkleTree(partitioner, RECOMMENDED_DEPTH, 2);

    assertTrue(mt.split(tok(4)));
    assertEquals(2, mt.size());

    // should fail to split above maxsize
    assertFalse(mt.split(tok(2)));
    assertEquals(2, mt.size());
    assertEquals(new Range(tok(4), tok(-1)), mt.get(tok(-1)));
    assertEquals(new Range(tok(-1), tok(4)), mt.get(tok(4)));
  }
      /** Compares our trees, and triggers repairs for any ranges that mismatch. */
      public void run() {
        // restore partitioners (in case we were serialized)
        if (r1.tree.partitioner() == null) r1.tree.partitioner(StorageService.getPartitioner());
        if (r2.tree.partitioner() == null) r2.tree.partitioner(StorageService.getPartitioner());

        // compare trees, and collect differences
        differences.addAll(MerkleTree.difference(r1.tree, r2.tree));

        // choose a repair method based on the significance of the difference
        String format =
            String.format(
                "[repair #%s] Endpoints %s and %s %%s for %s",
                getName(), r1.endpoint, r2.endpoint, cfname);
        if (differences.isEmpty()) {
          logger.info(String.format(format, "are consistent"));
          completed(this);
          return;
        }

        // non-0 difference: perform streaming repair
        logger.info(String.format(format, "have " + differences.size() + " range(s) out of sync"));
        performStreamingRepair();
      }
  @Test
  public void testSerialization() throws Exception {
    Range full = new Range(tok(-1), tok(-1));
    ByteArrayOutputStream bout = new ByteArrayOutputStream();
    ObjectOutputStream oout = new ObjectOutputStream(bout);

    // populate and validate the tree
    mt.maxsize(256);
    mt.init();
    for (TreeRange range : mt.invalids(full)) range.addAll(new HIterator(range.right));

    byte[] initialhash = mt.hash(full);
    oout.writeObject(mt);
    oout.close();

    ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
    ObjectInputStream oin = new ObjectInputStream(bin);
    MerkleTree restored = (MerkleTree) oin.readObject();

    // restore partitioner after serialization
    restored.partitioner(partitioner);

    assertHashEquals(initialhash, restored.hash(full));
  }
  @Test
  public void testInvalids() {
    Iterator<TreeRange> ranges;

    // (zero, zero]
    ranges = mt.invalids(new Range(tok(-1), tok(-1)));
    assertEquals(new Range(tok(-1), tok(-1)), ranges.next());
    assertFalse(ranges.hasNext());

    // all invalid
    mt.split(tok(4));
    mt.split(tok(2));
    mt.split(tok(6));
    mt.split(tok(3));
    mt.split(tok(5));
    ranges = mt.invalids(new Range(tok(-1), tok(-1)));
    assertEquals(new Range(tok(-1), tok(2)), ranges.next());
    assertEquals(new Range(tok(2), tok(3)), ranges.next());
    assertEquals(new Range(tok(3), tok(4)), ranges.next());
    assertEquals(new Range(tok(4), tok(5)), ranges.next());
    assertEquals(new Range(tok(5), tok(6)), ranges.next());
    assertEquals(new Range(tok(6), tok(-1)), ranges.next());
    assertFalse(ranges.hasNext());

    // some invalid
    mt.get(tok(2)).hash("non-null!".getBytes());
    mt.get(tok(4)).hash("non-null!".getBytes());
    mt.get(tok(5)).hash("non-null!".getBytes());
    mt.get(tok(-1)).hash("non-null!".getBytes());
    ranges = mt.invalids(new Range(tok(-1), tok(-1)));
    assertEquals(new Range(tok(2), tok(3)), ranges.next());
    assertEquals(new Range(tok(5), tok(6)), ranges.next());
    assertFalse(ranges.hasNext());

    // some invalid in left subrange
    ranges = mt.invalids(new Range(tok(-1), tok(6)));
    assertEquals(new Range(tok(2), tok(3)), ranges.next());
    assertEquals(new Range(tok(5), tok(6)), ranges.next());
    assertFalse(ranges.hasNext());

    // some invalid in right subrange
    ranges = mt.invalids(new Range(tok(2), tok(-1)));
    assertEquals(new Range(tok(2), tok(3)), ranges.next());
    assertEquals(new Range(tok(5), tok(6)), ranges.next());
    assertFalse(ranges.hasNext());
  }
  @Test
  public void testDifference() {
    Range full = new Range(tok(-1), tok(-1));
    int maxsize = 16;
    mt.maxsize(maxsize);
    MerkleTree mt2 = new MerkleTree(partitioner, RECOMMENDED_DEPTH, maxsize);
    mt.init();
    mt2.init();

    TreeRange leftmost = null;
    TreeRange middle = null;
    TreeRange rightmost = null;

    // compact the leftmost, and split the rightmost
    Iterator<TreeRange> ranges = mt.invalids(full);
    leftmost = ranges.next();
    rightmost = null;
    while (ranges.hasNext()) rightmost = ranges.next();
    mt.compact(leftmost.right);
    leftmost = mt.get(leftmost.right); // leftmost is now a larger range
    mt.split(rightmost.right);

    // set the hash for the left neighbor of rightmost
    middle = mt.get(rightmost.left);
    middle.hash("arbitrary!".getBytes());
    byte depth = middle.depth;

    // add dummy hashes to the rest of both trees
    for (TreeRange range : mt.invalids(full)) range.addAll(new HIterator(range.right));
    for (TreeRange range : mt2.invalids(full)) range.addAll(new HIterator(range.right));

    // trees should disagree for leftmost, (middle.left, rightmost.right]
    List<TreeRange> diffs = MerkleTree.difference(mt, mt2);
    assertEquals(diffs + " contains wrong number of differences:", 2, diffs.size());
    assertTrue(diffs.contains(leftmost));
    assertTrue(diffs.contains(new Range(middle.left, rightmost.right)));
  }
  /**
   * Generate two trees with different splits, but containing the same keys, and check that they
   * compare equally.
   *
   * <p>The set of keys used in this test is: #{2,4,6,8,12,14,0}
   */
  @Test
  public void testValidateTree() {
    TOKEN_SCALE = new BigInteger("16"); // this test needs slightly more resolution

    Range full = new Range(tok(-1), tok(-1));
    Iterator<TreeRange> ranges;
    MerkleTree mt2 = new MerkleTree(partitioner, RECOMMENDED_DEPTH, Integer.MAX_VALUE);

    mt.split(tok(8));
    mt.split(tok(4));
    mt.split(tok(12));
    mt.split(tok(6));
    mt.split(tok(10));

    ranges = mt.invalids(full);
    ranges.next().addAll(new HIterator(2, 4)); // (-1,4]: depth 2
    ranges.next().addAll(new HIterator(6)); // (4,6]
    ranges.next().addAll(new HIterator(8)); // (6,8]
    ranges.next().addAll(new HIterator(/*empty*/ new int[0])); // (8,10]
    ranges.next().addAll(new HIterator(12)); // (10,12]
    ranges.next().addAll(new HIterator(14, -1)); // (12,-1]: depth 2

    mt2.split(tok(8));
    mt2.split(tok(4));
    mt2.split(tok(12));
    mt2.split(tok(2));
    mt2.split(tok(10));
    mt2.split(tok(9));
    mt2.split(tok(11));

    ranges = mt2.invalids(full);
    ranges.next().addAll(new HIterator(2)); // (-1,2]
    ranges.next().addAll(new HIterator(4)); // (2,4]
    ranges.next().addAll(new HIterator(6, 8)); // (4,8]: depth 2
    ranges.next().addAll(new HIterator(/*empty*/ new int[0])); // (8,9]
    ranges.next().addAll(new HIterator(/*empty*/ new int[0])); // (9,10]
    ranges.next().addAll(new HIterator(/*empty*/ new int[0])); // (10,11]: depth 4
    ranges.next().addAll(new HIterator(12)); // (11,12]: depth 4
    ranges.next().addAll(new HIterator(14, -1)); // (12,-1]: depth 2

    byte[] mthash = mt.hash(full);
    byte[] mt2hash = mt2.hash(full);
    assertHashEquals("Tree hashes did not match: " + mt + " && " + mt2, mthash, mt2hash);
  }
  @Test
  public void testHashDegenerate() {
    TOKEN_SCALE = new BigInteger("32");

    byte[] val = DUMMY;
    byte[] childfullval = hashed(val, 5, 5, 4);
    byte[] fullval = hashed(val, 5, 5, 4, 3, 2, 1);
    Range childfull = new Range(tok(-1), tok(4));
    Range full = new Range(tok(-1), tok(-1));
    Range invalid = new Range(tok(4), tok(-1));

    mt = new MerkleTree(partitioner, RECOMMENDED_DEPTH, Integer.MAX_VALUE);
    mt.split(tok(16));
    mt.split(tok(8));
    mt.split(tok(4));
    mt.split(tok(2));
    mt.split(tok(1));
    assertNull(mt.hash(full));
    assertNull(mt.hash(childfull));
    assertNull(mt.hash(invalid));

    // validate the range
    mt.get(tok(1)).hash(val);
    mt.get(tok(2)).hash(val);
    mt.get(tok(4)).hash(val);
    mt.get(tok(8)).hash(val);
    mt.get(tok(16)).hash(val);
    mt.get(tok(-1)).hash(val);

    assertHashEquals(fullval, mt.hash(full));
    assertHashEquals(childfullval, mt.hash(childfull));
    assertNull(mt.hash(invalid));
  }
  @Test
  public void testHashInner() {
    byte[] val = DUMMY;
    byte[] lchildval = hashed(val, 3, 3, 2);
    byte[] rchildval = hashed(val, 2, 2);
    byte[] fullval = hashed(val, 3, 3, 2, 2, 2);
    Range full = new Range(tok(-1), tok(-1));
    Range lchild = new Range(tok(-1), tok(4));
    Range rchild = new Range(tok(4), tok(-1));
    Range invalid = new Range(tok(1), tok(-1));

    // (zero,one] (one, two] (two,four] (four, six] (six, zero]
    mt.split(tok(4));
    mt.split(tok(2));
    mt.split(tok(6));
    mt.split(tok(1));
    assertNull(mt.hash(full));
    assertNull(mt.hash(lchild));
    assertNull(mt.hash(rchild));
    assertNull(mt.hash(invalid));

    // validate the range
    mt.get(tok(1)).hash(val);
    mt.get(tok(2)).hash(val);
    mt.get(tok(4)).hash(val);
    mt.get(tok(6)).hash(val);
    mt.get(tok(-1)).hash(val);

    assertHashEquals(fullval, mt.hash(full));
    assertHashEquals(lchildval, mt.hash(lchild));
    assertHashEquals(rchildval, mt.hash(rchild));
    assertNull(mt.hash(invalid));
  }
  @Test
  public void testHashPartial() {
    byte[] val = DUMMY;
    byte[] leftval = hashed(val, 1, 1);
    byte[] partialval = hashed(val, 1);
    Range left = new Range(tok(-1), tok(4));
    Range partial = new Range(tok(2), tok(4));
    Range right = new Range(tok(4), tok(-1));
    Range linvalid = new Range(tok(1), tok(4));
    Range rinvalid = new Range(tok(4), tok(6));

    // (zero,two] (two,four] (four, zero]
    mt.split(tok(4));
    mt.split(tok(2));
    assertNull(mt.hash(left));
    assertNull(mt.hash(partial));
    assertNull(mt.hash(right));
    assertNull(mt.hash(linvalid));
    assertNull(mt.hash(rinvalid));

    // validate the range
    mt.get(tok(2)).hash(val);
    mt.get(tok(4)).hash(val);
    mt.get(tok(-1)).hash(val);

    assertHashEquals(leftval, mt.hash(left));
    assertHashEquals(partialval, mt.hash(partial));
    assertHashEquals(val, mt.hash(right));
    assertNull(mt.hash(linvalid));
    assertNull(mt.hash(rinvalid));
  }
  @Test
  public void testCompact() {
    // (zero, one], (one,two], ... (seven, zero]
    mt.split(tok(4));
    mt.split(tok(2));
    mt.split(tok(6));
    mt.split(tok(1));
    mt.split(tok(3));
    mt.split(tok(5));
    mt.split(tok(7));

    // compact (zero,two] and then (four,six]
    mt.compact(tok(1));
    mt.compact(tok(5));
    assertEquals(6, mt.size());
    assertEquals(new Range(tok(-1), tok(2)), mt.get(tok(2)));
    assertEquals(new Range(tok(2), tok(3)), mt.get(tok(3)));
    assertEquals(new Range(tok(3), tok(4)), mt.get(tok(4)));
    assertEquals(new Range(tok(4), tok(6)), mt.get(tok(5)));
    assertEquals(new Range(tok(6), tok(7)), mt.get(tok(7)));
    assertEquals(new Range(tok(7), tok(-1)), mt.get(tok(-1)));
    // compacted ranges should be at depth 2, and the rest at 3
    for (int i : new int[] {2, 6}) {
      assertEquals((byte) 2, mt.get(tok(i)).depth);
    }
    for (int i : new int[] {3, 4, 7, -1}) {
      assertEquals((byte) 3, mt.get(tok(i)).depth);
    }

    // compact (two,four] and then (six,zero]
    mt.compact(tok(3));
    mt.compact(tok(7));
    assertEquals(4, mt.size());
    assertEquals(new Range(tok(-1), tok(2)), mt.get(tok(2)));
    assertEquals(new Range(tok(2), tok(4)), mt.get(tok(4)));
    assertEquals(new Range(tok(4), tok(6)), mt.get(tok(5)));
    assertEquals(new Range(tok(6), tok(-1)), mt.get(tok(-1)));
    for (int i : new int[] {2, 4, 5, -1}) {
      assertEquals((byte) 2, mt.get(tok(i)).depth);
    }

    // compact (zero,four]
    mt.compact(tok(2));
    assertEquals(3, mt.size());
    assertEquals(new Range(tok(-1), tok(4)), mt.get(tok(2)));
    assertEquals(new Range(tok(4), tok(6)), mt.get(tok(6)));
    assertEquals(new Range(tok(6), tok(-1)), mt.get(tok(-1)));

    // compact (four, zero]
    mt.compact(tok(6));
    assertEquals(2, mt.size());
    assertEquals(new Range(tok(-1), tok(4)), mt.get(tok(2)));
    assertEquals(new Range(tok(4), tok(-1)), mt.get(tok(6)));
    assertEquals((byte) 1, mt.get(tok(2)).depth);
    assertEquals((byte) 1, mt.get(tok(6)).depth);

    // compact (zero, zero] (the root)
    mt.compact(tok(4));
    assertEquals(1, mt.size());
    assertEquals(new Range(tok(-1), tok(-1)), mt.get(tok(-1)));
    assertEquals((byte) 0, mt.get(tok(-1)).depth);
  }