@Override public int compare(KeyValue o1, KeyValue o2) { int d; if ((d = Bytes.memcmp(o1.key(), o2.key())) != 0) { return d; } else if ((d = Bytes.memcmp(o1.family(), o2.family())) != 0) { return d; } else if ((d = Bytes.memcmp(o1.qualifier(), o2.qualifier())) != 0) { return d; } else if ((d = Long.signum(o2.timestamp() - o1.timestamp())) != 0) { return d; } else { d = Bytes.memcmp(o1.value(), o2.value()); } return d; }
/** * Extracts the qualifier of a cell containing a data point. * * @param kv The cell. * @return The qualifier, on a short, since it's expected to be on 2 bytes. */ private short extractQualifier(final KeyValue kv) { if (!Bytes.equals(TSDB.FAMILY, kv.family())) { throw new AssertionError("unexpected KeyValue family: " + Bytes.pretty(kv.family())); } final byte[] qual = kv.qualifier(); if (qual.length != 2) { throw new AssertionError("Invalid qualifier length: " + Bytes.pretty(qual)); } return Bytes.getShort(qual); }
@Override @SuppressWarnings({"rawtypes", "unchecked"}) public void execute(TridentTuple tuple, TridentCollector collector) { List<KeyValue> keyValueList = (List<KeyValue>) tuple.getValue(0); for (KeyValue keyValue : keyValueList) { Values values = new Values(); if (this.rowKey) { if (this.rowKeyDeserializer != null) { values.add(this.rowKeyDeserializer.deserialize(keyValue.key())); } else { values.add(new String(keyValue.key())); } } if (this.family) { if (this.columnFamilyDeserializer != null) { values.add(this.columnFamilyDeserializer.deserialize(keyValue.family())); } else { values.add(new String(keyValue.family())); } } if (this.qualifier) { if (this.columnQualifierDeserializer != null) { values.add(this.columnFamilyDeserializer.deserialize(keyValue.qualifier())); } else { values.add(new String(keyValue.qualifier())); } } if (this.value) { if (this.valueDeserializer != null) { values.add(this.valueDeserializer.deserialize(keyValue.value())); } else { values.add(new String(keyValue.value())); } } if (this.timestamp) { values.add(keyValue.timestamp()); } collector.emit(values); } }
@Override public Deferred<Object> call(final ArrayList<KeyValue> row) throws Exception { if (row == null || row.isEmpty()) { return Deferred.fromResult(null); } final ArrayList<byte[]> qualifiers = new ArrayList<byte[]>(row.size()); for (KeyValue column : row) { if (column.qualifier().length > RULE_PREFIX.length && Bytes.memcmp(RULE_PREFIX, column.qualifier(), 0, RULE_PREFIX.length) == 0) { qualifiers.add(column.qualifier()); } } final DeleteRequest delete = new DeleteRequest( tsdb.treeTable(), Tree.idToBytes(tree_id), Tree.TREE_FAMILY(), qualifiers.toArray(new byte[qualifiers.size()][])); return tsdb.getClient().delete(delete); }
int indexOf(final byte[] family, final byte[] qualifier) { KeyValue searchTerm = new KeyValue(getRowKey(), family, qualifier, HBaseClient.EMPTY_ARRAY); int pos = Collections.binarySearch(kvList, searchTerm, KV_COMPARATOR); // never will exact match if (pos < 0) { pos = (pos + 1) * -1; // pos is now insertion point } if (pos == kvList.size()) { return -1; // doesn't exist } KeyValue kv = kvList.get(pos); return (Bytes.equals(family, kv.family()) && Bytes.equals(qualifier, kv.qualifier())) ? pos : -1; }
public Deferred<Object> call(final Object arg) { return client.delete(new DeleteRequest(table, kv.key(), kv.family(), kv.qualifier())); }
private static int fsck( final TSDB tsdb, final HBaseClient client, final byte[] table, final boolean fix, final String[] args) throws Exception { /** Callback to asynchronously delete a specific {@link KeyValue}. */ final class DeleteOutOfOrder implements Callback<Deferred<Object>, Object> { private final KeyValue kv; public DeleteOutOfOrder(final KeyValue kv) { this.kv = kv; } public Deferred<Object> call(final Object arg) { return client.delete(new DeleteRequest(table, kv.key(), kv.family(), kv.qualifier())); } public String toString() { return "delete out-of-order data"; } } int errors = 0; int correctable = 0; final short metric_width = width(tsdb, "metrics"); final short name_width = width(tsdb, "tag_names"); final short value_width = width(tsdb, "tag_values"); final ArrayList<Query> queries = new ArrayList<Query>(); CliQuery.parseCommandLineQuery(args, tsdb, queries, null, null); final StringBuilder buf = new StringBuilder(); for (final Query query : queries) { final long start_time = System.nanoTime(); long ping_start_time = start_time; LOG.info("Starting to fsck data covered by " + query); long kvcount = 0; long rowcount = 0; final Bytes.ByteMap<Seen> seen = new Bytes.ByteMap<Seen>(); final Scanner scanner = Core.getScanner(query); ArrayList<ArrayList<KeyValue>> rows; while ((rows = scanner.nextRows().joinUninterruptibly()) != null) { for (final ArrayList<KeyValue> row : rows) { rowcount++; // Take a copy of the row-key because we're going to zero-out the // timestamp and use that as a key in our `seen' map. final byte[] key = row.get(0).key().clone(); final long base_time = Bytes.getUnsignedInt(key, metric_width); for (int i = metric_width; i < metric_width + Const.TIMESTAMP_BYTES; i++) { key[i] = 0; } Seen prev = seen.get(key); if (prev == null) { prev = new Seen(base_time - 1, row.get(0)); seen.put(key, prev); } for (final KeyValue kv : row) { kvcount++; if (kvcount % 100000 == 0) { final long now = System.nanoTime(); ping_start_time = (now - ping_start_time) / 1000000; LOG.info( "... " + kvcount + " KV analyzed in " + ping_start_time + "ms (" + (100000 * 1000 / ping_start_time) + " KVs/s)"); ping_start_time = now; } if (kv.qualifier().length != 2) { LOG.warn( "Ignoring unsupported KV with a qualifier of " + kv.qualifier().length + " bytes:" + kv); continue; } final short qualifier = Bytes.getShort(kv.qualifier()); final short delta = (short) ((qualifier & 0xFFFF) >>> FLAG_BITS); final long timestamp = base_time + delta; byte[] value = kv.value(); if (value.length > 8) { errors++; LOG.error("Value more than 8 byte long with a 2-byte" + " qualifier.\n\t" + kv); } // TODO(tsuna): Don't hardcode 0x8 / 0x3 here. if ((qualifier & (0x8 | 0x3)) == (0x8 | 0x3)) { // float | 4 bytes // The qualifier says the value is on 4 bytes, and the value is // on 8 bytes, then the 4 MSBs must be 0s. Old versions of the // code were doing this. It's kinda sad. Some versions had a // bug whereby the value would be sign-extended, so we can // detect these values and fix them here. if (value.length == 8) { if (value[0] == -1 && value[1] == -1 && value[2] == -1 && value[3] == -1) { errors++; correctable++; if (fix) { value = value.clone(); // We're going to change it. value[0] = value[1] = value[2] = value[3] = 0; client.put(new PutRequest(table, kv.key(), kv.family(), kv.qualifier(), value)); } else { LOG.error( "Floating point value with 0xFF most significant" + " bytes, probably caused by sign extension bug" + " present in revisions [96908436..607256fc].\n" + "\t" + kv); } } else if (value[0] != 0 || value[1] != 0 || value[2] != 0 || value[3] != 0) { errors++; } } else if (value.length != 4) { errors++; LOG.error( "This floating point value must be encoded either on" + " 4 or 8 bytes, but it's on " + value.length + " bytes.\n\t" + kv); } } if (timestamp <= prev.timestamp()) { errors++; correctable++; if (fix) { final byte[] newkey = kv.key().clone(); // Fix the timestamp in the row key. final long new_base_time = (timestamp - (timestamp % Const.MAX_TIMESPAN)); Bytes.setInt(newkey, (int) new_base_time, metric_width); final short newqual = (short) ((timestamp - new_base_time) << FLAG_BITS | (qualifier & FLAGS_MASK)); final DeleteOutOfOrder delooo = new DeleteOutOfOrder(kv); if (timestamp < prev.timestamp()) { client .put( new PutRequest( table, newkey, kv.family(), Bytes.fromShort(newqual), value)) // Only delete the offending KV once we're sure that the new // KV has been persisted in HBase. .addCallbackDeferring(delooo); } else { // We have two data points at exactly the same timestamp. // This can happen when only the flags differ. This is // typically caused by one data point being an integer and // the other being a floating point value. In this case // we just delete the duplicate data point and keep the // first one we saw. delooo.call(null); } } else { buf.setLength(0); buf.append( timestamp < prev.timestamp() ? "Out of order data.\n\t" : "Duplicate data point with different flags.\n\t") .append(timestamp) .append(" (") .append(DumpSeries.date(timestamp)) .append(") @ ") .append(kv) .append("\n\t"); DumpSeries.formatKeyValue(buf, tsdb, kv, base_time); buf.append("\n\t was found after\n\t") .append(prev.timestamp) .append(" (") .append(DumpSeries.date(prev.timestamp)) .append(") @ ") .append(prev.kv) .append("\n\t"); DumpSeries.formatKeyValue( buf, tsdb, prev.kv, Bytes.getUnsignedInt(prev.kv.key(), metric_width)); LOG.error(buf.toString()); } } else { prev.setTimestamp(timestamp); prev.kv = kv; } } } } final long timing = (System.nanoTime() - start_time) / 1000000; System.out.println( kvcount + " KVs (in " + rowcount + " rows) analyzed in " + timing + "ms (~" + (kvcount * 1000 / timing) + " KV/s)"); } System.out.println(errors != 0 ? "Found " + errors + " errors." : "No error found."); if (!fix && correctable > 0) { System.out.println( correctable + " of these errors are automatically" + " correctable, re-run with --fix.\n" + "Make sure you understand the errors above and you" + " know what you're doing before using --fix."); } return errors; }