/** * Extracts the qualifier of a cell containing a data point. * * @param kv The cell. * @return The qualifier, on a short, since it's expected to be on 2 bytes. */ private short extractQualifier(final KeyValue kv) { if (!Bytes.equals(TSDB.FAMILY, kv.family())) { throw new AssertionError("unexpected KeyValue family: " + Bytes.pretty(kv.family())); } final byte[] qual = kv.qualifier(); if (qual.length != 2) { throw new AssertionError("Invalid qualifier length: " + Bytes.pretty(qual)); } return Bytes.getShort(qual); }
@Test public void addPointLong2BytesNegative() throws Exception { setupAddPointStorage(); HashMap<String, String> tags = new HashMap<String, String>(1); tags.put("host", "web01"); tsdb.addPoint("sys.cpu.user", 1356998400, -257, tags).joinUninterruptibly(); final byte[] row = new byte[] {0, 0, 1, 0x50, (byte) 0xE2, 0x27, 0, 0, 0, 1, 0, 0, 1}; final byte[] value = storage.getColumn(row, new byte[] {0, 1}); assertNotNull(value); assertEquals(-257, Bytes.getShort(value)); }
private static int fsck( final TSDB tsdb, final HBaseClient client, final byte[] table, final boolean fix, final String[] args) throws Exception { /** Callback to asynchronously delete a specific {@link KeyValue}. */ final class DeleteOutOfOrder implements Callback<Deferred<Object>, Object> { private final KeyValue kv; public DeleteOutOfOrder(final KeyValue kv) { this.kv = kv; } public Deferred<Object> call(final Object arg) { return client.delete(new DeleteRequest(table, kv.key(), kv.family(), kv.qualifier())); } public String toString() { return "delete out-of-order data"; } } int errors = 0; int correctable = 0; final short metric_width = width(tsdb, "metrics"); final short name_width = width(tsdb, "tag_names"); final short value_width = width(tsdb, "tag_values"); final ArrayList<Query> queries = new ArrayList<Query>(); CliQuery.parseCommandLineQuery(args, tsdb, queries, null, null); final StringBuilder buf = new StringBuilder(); for (final Query query : queries) { final long start_time = System.nanoTime(); long ping_start_time = start_time; LOG.info("Starting to fsck data covered by " + query); long kvcount = 0; long rowcount = 0; final Bytes.ByteMap<Seen> seen = new Bytes.ByteMap<Seen>(); final Scanner scanner = Core.getScanner(query); ArrayList<ArrayList<KeyValue>> rows; while ((rows = scanner.nextRows().joinUninterruptibly()) != null) { for (final ArrayList<KeyValue> row : rows) { rowcount++; // Take a copy of the row-key because we're going to zero-out the // timestamp and use that as a key in our `seen' map. final byte[] key = row.get(0).key().clone(); final long base_time = Bytes.getUnsignedInt(key, metric_width); for (int i = metric_width; i < metric_width + Const.TIMESTAMP_BYTES; i++) { key[i] = 0; } Seen prev = seen.get(key); if (prev == null) { prev = new Seen(base_time - 1, row.get(0)); seen.put(key, prev); } for (final KeyValue kv : row) { kvcount++; if (kvcount % 100000 == 0) { final long now = System.nanoTime(); ping_start_time = (now - ping_start_time) / 1000000; LOG.info( "... " + kvcount + " KV analyzed in " + ping_start_time + "ms (" + (100000 * 1000 / ping_start_time) + " KVs/s)"); ping_start_time = now; } if (kv.qualifier().length != 2) { LOG.warn( "Ignoring unsupported KV with a qualifier of " + kv.qualifier().length + " bytes:" + kv); continue; } final short qualifier = Bytes.getShort(kv.qualifier()); final short delta = (short) ((qualifier & 0xFFFF) >>> FLAG_BITS); final long timestamp = base_time + delta; byte[] value = kv.value(); if (value.length > 8) { errors++; LOG.error("Value more than 8 byte long with a 2-byte" + " qualifier.\n\t" + kv); } // TODO(tsuna): Don't hardcode 0x8 / 0x3 here. if ((qualifier & (0x8 | 0x3)) == (0x8 | 0x3)) { // float | 4 bytes // The qualifier says the value is on 4 bytes, and the value is // on 8 bytes, then the 4 MSBs must be 0s. Old versions of the // code were doing this. It's kinda sad. Some versions had a // bug whereby the value would be sign-extended, so we can // detect these values and fix them here. if (value.length == 8) { if (value[0] == -1 && value[1] == -1 && value[2] == -1 && value[3] == -1) { errors++; correctable++; if (fix) { value = value.clone(); // We're going to change it. value[0] = value[1] = value[2] = value[3] = 0; client.put(new PutRequest(table, kv.key(), kv.family(), kv.qualifier(), value)); } else { LOG.error( "Floating point value with 0xFF most significant" + " bytes, probably caused by sign extension bug" + " present in revisions [96908436..607256fc].\n" + "\t" + kv); } } else if (value[0] != 0 || value[1] != 0 || value[2] != 0 || value[3] != 0) { errors++; } } else if (value.length != 4) { errors++; LOG.error( "This floating point value must be encoded either on" + " 4 or 8 bytes, but it's on " + value.length + " bytes.\n\t" + kv); } } if (timestamp <= prev.timestamp()) { errors++; correctable++; if (fix) { final byte[] newkey = kv.key().clone(); // Fix the timestamp in the row key. final long new_base_time = (timestamp - (timestamp % Const.MAX_TIMESPAN)); Bytes.setInt(newkey, (int) new_base_time, metric_width); final short newqual = (short) ((timestamp - new_base_time) << FLAG_BITS | (qualifier & FLAGS_MASK)); final DeleteOutOfOrder delooo = new DeleteOutOfOrder(kv); if (timestamp < prev.timestamp()) { client .put( new PutRequest( table, newkey, kv.family(), Bytes.fromShort(newqual), value)) // Only delete the offending KV once we're sure that the new // KV has been persisted in HBase. .addCallbackDeferring(delooo); } else { // We have two data points at exactly the same timestamp. // This can happen when only the flags differ. This is // typically caused by one data point being an integer and // the other being a floating point value. In this case // we just delete the duplicate data point and keep the // first one we saw. delooo.call(null); } } else { buf.setLength(0); buf.append( timestamp < prev.timestamp() ? "Out of order data.\n\t" : "Duplicate data point with different flags.\n\t") .append(timestamp) .append(" (") .append(DumpSeries.date(timestamp)) .append(") @ ") .append(kv) .append("\n\t"); DumpSeries.formatKeyValue(buf, tsdb, kv, base_time); buf.append("\n\t was found after\n\t") .append(prev.timestamp) .append(" (") .append(DumpSeries.date(prev.timestamp)) .append(") @ ") .append(prev.kv) .append("\n\t"); DumpSeries.formatKeyValue( buf, tsdb, prev.kv, Bytes.getUnsignedInt(prev.kv.key(), metric_width)); LOG.error(buf.toString()); } } else { prev.setTimestamp(timestamp); prev.kv = kv; } } } } final long timing = (System.nanoTime() - start_time) / 1000000; System.out.println( kvcount + " KVs (in " + rowcount + " rows) analyzed in " + timing + "ms (~" + (kvcount * 1000 / timing) + " KV/s)"); } System.out.println(errors != 0 ? "Found " + errors + " errors." : "No error found."); if (!fix && correctable > 0) { System.out.println( correctable + " of these errors are automatically" + " correctable, re-run with --fix.\n" + "Make sure you understand the errors above and you" + " know what you're doing before using --fix."); } return errors; }