Пример #1
0
  /** Creates the {@link Scanner} to use for this query. */
  Scanner getScanner() throws HBaseException {
    final short metric_width = tsdb.metrics.width();
    final byte[] start_row = new byte[metric_width + Const.TIMESTAMP_BYTES];
    final byte[] end_row = new byte[metric_width + Const.TIMESTAMP_BYTES];
    // We search at least one row before and one row after the start & end
    // time we've been given as it's quite likely that the exact timestamp
    // we're looking for is in the middle of a row.  Plus, a number of things
    // rely on having a few extra data points before & after the exact start
    // & end dates in order to do proper rate calculation or downsampling near
    // the "edges" of the graph.
    Bytes.setInt(start_row, (int) getScanStartTime(), metric_width);
    Bytes.setInt(
        end_row,
        (end_time == UNSET
            ? -1 // Will scan until the end (0xFFF...).
            : (int) getScanEndTime()),
        metric_width);
    System.arraycopy(metric, 0, start_row, 0, metric_width);
    System.arraycopy(metric, 0, end_row, 0, metric_width);

    final Scanner scanner = tsdb.client.newScanner(tsdb.table);
    scanner.setStartKey(start_row);
    scanner.setStopKey(end_row);
    if (tags.size() > 0 || group_bys != null) {
      createAndSetFilter(scanner);
    }
    scanner.setFamily(TSDB.FAMILY);
    return scanner;
  }
Пример #2
0
 /**
  * Finds all the {@link Span}s that match this query. This is what actually scans the HBase table
  * and loads the data into {@link Span}s.
  *
  * @return A map from HBase row key to the {@link Span} for that row key. Since a {@link Span}
  *     actually contains multiple HBase rows, the row key stored in the map has its timestamp
  *     zero'ed out.
  * @throws HBaseException if there was a problem communicating with HBase to perform the search.
  * @throws IllegalArgumentException if bad data was retreived from HBase.
  */
 private TreeMap<byte[], Span> findSpans() throws HBaseException {
   final short metric_width = tsdb.metrics.width();
   final TreeMap<byte[], Span> spans = // The key is a row key from HBase.
       new TreeMap<byte[], Span>(new SpanCmp(metric_width));
   int nrows = 0;
   int hbase_time = 0; // milliseconds.
   long starttime = System.nanoTime();
   final Scanner scanner = getScanner();
   try {
     ArrayList<ArrayList<KeyValue>> rows;
     while ((rows = scanner.nextRows().joinUninterruptibly()) != null) {
       hbase_time += (System.nanoTime() - starttime) / 1000000;
       for (final ArrayList<KeyValue> row : rows) {
         final byte[] key = row.get(0).key();
         if (Bytes.memcmp(metric, key, 0, metric_width) != 0) {
           throw new IllegalDataException(
               "HBase returned a row that doesn't match"
                   + " our scanner ("
                   + scanner
                   + ")! "
                   + row
                   + " does not start"
                   + " with "
                   + Arrays.toString(metric));
         }
         Span datapoints = spans.get(key);
         if (datapoints == null) {
           datapoints = new Span(tsdb);
           spans.put(key, datapoints);
         }
         datapoints.addRow(tsdb.compact(row));
         nrows++;
         starttime = System.nanoTime();
       }
     }
   } catch (RuntimeException e) {
     throw e;
   } catch (Exception e) {
     throw new RuntimeException("Should never be here", e);
   } finally {
     hbase_time += (System.nanoTime() - starttime) / 1000000;
     scanlatency.add(hbase_time);
   }
   LOG.info(this + " matched " + nrows + " rows in " + spans.size() + " spans");
   if (nrows == 0) {
     return null;
   }
   return spans;
 }
Пример #3
0
  /**
   * Sets the server-side regexp filter on the scanner. In order to find the rows with the relevant
   * tags, we use a server-side filter that matches a regular expression on the row key.
   *
   * @param scanner The scanner on which to add the filter.
   */
  void createAndSetFilter(final Scanner scanner) {
    if (group_bys != null) {
      Collections.sort(group_bys, Bytes.MEMCMP);
    }
    final short name_width = tsdb.tag_names.width();
    final short value_width = tsdb.tag_values.width();
    final short tagsize = (short) (name_width + value_width);
    // Generate a regexp for our tags.  Say we have 2 tags: { 0 0 1 0 0 2 }
    // and { 4 5 6 9 8 7 }, the regexp will be:
    // "^.{7}(?:.{6})*\\Q\000\000\001\000\000\002\\E(?:.{6})*\\Q\004\005\006\011\010\007\\E(?:.{6})*$"
    final StringBuilder buf =
        new StringBuilder(
            15 // "^.{N}" + "(?:.{M})*" + "$"
                + ((13 + tagsize) // "(?:.{M})*\\Q" + tagsize bytes + "\\E"
                    * (tags.size() + (group_bys == null ? 0 : group_bys.size() * 3))));
    // In order to avoid re-allocations, reserve a bit more w/ groups ^^^

    // Alright, let's build this regexp.  From the beginning...
    buf.append(
            "(?s)" // Ensure we use the DOTALL flag.
                + "^.{")
        // ... start by skipping the metric ID and timestamp.
        .append(tsdb.metrics.width() + Const.TIMESTAMP_BYTES)
        .append("}");
    final Iterator<byte[]> tags = this.tags.iterator();
    final Iterator<byte[]> group_bys =
        (this.group_bys == null ? new ArrayList<byte[]>(0).iterator() : this.group_bys.iterator());
    byte[] tag = tags.hasNext() ? tags.next() : null;
    byte[] group_by = group_bys.hasNext() ? group_bys.next() : null;
    // Tags and group_bys are already sorted.  We need to put them in the
    // regexp in order by ID, which means we just merge two sorted lists.
    do {
      // Skip any number of tags.
      buf.append("(?:.{").append(tagsize).append("})*\\Q");
      if (isTagNext(name_width, tag, group_by)) {
        addId(buf, tag);
        tag = tags.hasNext() ? tags.next() : null;
      } else { // Add a group_by.
        addId(buf, group_by);
        final byte[][] value_ids = (group_by_values == null ? null : group_by_values.get(group_by));
        if (value_ids == null) { // We don't want any specific ID...
          buf.append(".{").append(value_width).append('}'); // Any value ID.
        } else { // We want specific IDs.  List them: /(AAA|BBB|CCC|..)/
          buf.append("(?:");
          for (final byte[] value_id : value_ids) {
            buf.append("\\Q");
            addId(buf, value_id);
            buf.append('|');
          }
          // Replace the pipe of the last iteration.
          buf.setCharAt(buf.length() - 1, ')');
        }
        group_by = group_bys.hasNext() ? group_bys.next() : null;
      }
    } while (tag != group_by); // Stop when they both become null.
    // Skip any number of tags before the end.
    buf.append("(?:.{").append(tagsize).append("})*$");
    scanner.setKeyRegexp(buf.toString(), CHARSET);
  }
Пример #4
0
  private static int fsck(
      final TSDB tsdb,
      final HBaseClient client,
      final byte[] table,
      final boolean fix,
      final String[] args)
      throws Exception {

    /** Callback to asynchronously delete a specific {@link KeyValue}. */
    final class DeleteOutOfOrder implements Callback<Deferred<Object>, Object> {

      private final KeyValue kv;

      public DeleteOutOfOrder(final KeyValue kv) {
        this.kv = kv;
      }

      public Deferred<Object> call(final Object arg) {
        return client.delete(new DeleteRequest(table, kv.key(), kv.family(), kv.qualifier()));
      }

      public String toString() {
        return "delete out-of-order data";
      }
    }

    int errors = 0;
    int correctable = 0;

    final short metric_width = width(tsdb, "metrics");
    final short name_width = width(tsdb, "tag_names");
    final short value_width = width(tsdb, "tag_values");

    final ArrayList<Query> queries = new ArrayList<Query>();
    CliQuery.parseCommandLineQuery(args, tsdb, queries, null, null);
    final StringBuilder buf = new StringBuilder();
    for (final Query query : queries) {
      final long start_time = System.nanoTime();
      long ping_start_time = start_time;
      LOG.info("Starting to fsck data covered by " + query);
      long kvcount = 0;
      long rowcount = 0;
      final Bytes.ByteMap<Seen> seen = new Bytes.ByteMap<Seen>();
      final Scanner scanner = Core.getScanner(query);
      ArrayList<ArrayList<KeyValue>> rows;
      while ((rows = scanner.nextRows().joinUninterruptibly()) != null) {
        for (final ArrayList<KeyValue> row : rows) {
          rowcount++;
          // Take a copy of the row-key because we're going to zero-out the
          // timestamp and use that as a key in our `seen' map.
          final byte[] key = row.get(0).key().clone();
          final long base_time = Bytes.getUnsignedInt(key, metric_width);
          for (int i = metric_width; i < metric_width + Const.TIMESTAMP_BYTES; i++) {
            key[i] = 0;
          }
          Seen prev = seen.get(key);
          if (prev == null) {
            prev = new Seen(base_time - 1, row.get(0));
            seen.put(key, prev);
          }
          for (final KeyValue kv : row) {
            kvcount++;
            if (kvcount % 100000 == 0) {
              final long now = System.nanoTime();
              ping_start_time = (now - ping_start_time) / 1000000;
              LOG.info(
                  "... "
                      + kvcount
                      + " KV analyzed in "
                      + ping_start_time
                      + "ms ("
                      + (100000 * 1000 / ping_start_time)
                      + " KVs/s)");
              ping_start_time = now;
            }
            if (kv.qualifier().length != 2) {
              LOG.warn(
                  "Ignoring unsupported KV with a qualifier of "
                      + kv.qualifier().length
                      + " bytes:"
                      + kv);
              continue;
            }
            final short qualifier = Bytes.getShort(kv.qualifier());
            final short delta = (short) ((qualifier & 0xFFFF) >>> FLAG_BITS);
            final long timestamp = base_time + delta;
            byte[] value = kv.value();
            if (value.length > 8) {
              errors++;
              LOG.error("Value more than 8 byte long with a 2-byte" + " qualifier.\n\t" + kv);
            }
            // TODO(tsuna): Don't hardcode 0x8 / 0x3 here.
            if ((qualifier & (0x8 | 0x3)) == (0x8 | 0x3)) { // float | 4 bytes
              // The qualifier says the value is on 4 bytes, and the value is
              // on 8 bytes, then the 4 MSBs must be 0s.  Old versions of the
              // code were doing this.  It's kinda sad.  Some versions had a
              // bug whereby the value would be sign-extended, so we can
              // detect these values and fix them here.
              if (value.length == 8) {
                if (value[0] == -1 && value[1] == -1 && value[2] == -1 && value[3] == -1) {
                  errors++;
                  correctable++;
                  if (fix) {
                    value = value.clone(); // We're going to change it.
                    value[0] = value[1] = value[2] = value[3] = 0;
                    client.put(new PutRequest(table, kv.key(), kv.family(), kv.qualifier(), value));
                  } else {
                    LOG.error(
                        "Floating point value with 0xFF most significant"
                            + " bytes, probably caused by sign extension bug"
                            + " present in revisions [96908436..607256fc].\n"
                            + "\t"
                            + kv);
                  }
                } else if (value[0] != 0 || value[1] != 0 || value[2] != 0 || value[3] != 0) {
                  errors++;
                }
              } else if (value.length != 4) {
                errors++;
                LOG.error(
                    "This floating point value must be encoded either on"
                        + " 4 or 8 bytes, but it's on "
                        + value.length
                        + " bytes.\n\t"
                        + kv);
              }
            }
            if (timestamp <= prev.timestamp()) {
              errors++;
              correctable++;
              if (fix) {
                final byte[] newkey = kv.key().clone();
                // Fix the timestamp in the row key.
                final long new_base_time = (timestamp - (timestamp % Const.MAX_TIMESPAN));
                Bytes.setInt(newkey, (int) new_base_time, metric_width);
                final short newqual =
                    (short) ((timestamp - new_base_time) << FLAG_BITS | (qualifier & FLAGS_MASK));
                final DeleteOutOfOrder delooo = new DeleteOutOfOrder(kv);
                if (timestamp < prev.timestamp()) {
                  client
                      .put(
                          new PutRequest(
                              table, newkey, kv.family(), Bytes.fromShort(newqual), value))
                      // Only delete the offending KV once we're sure that the new
                      // KV has been persisted in HBase.
                      .addCallbackDeferring(delooo);
                } else {
                  // We have two data points at exactly the same timestamp.
                  // This can happen when only the flags differ.  This is
                  // typically caused by one data point being an integer and
                  // the other being a floating point value.  In this case
                  // we just delete the duplicate data point and keep the
                  // first one we saw.
                  delooo.call(null);
                }
              } else {
                buf.setLength(0);
                buf.append(
                        timestamp < prev.timestamp()
                            ? "Out of order data.\n\t"
                            : "Duplicate data point with different flags.\n\t")
                    .append(timestamp)
                    .append(" (")
                    .append(DumpSeries.date(timestamp))
                    .append(") @ ")
                    .append(kv)
                    .append("\n\t");
                DumpSeries.formatKeyValue(buf, tsdb, kv, base_time);
                buf.append("\n\t  was found after\n\t")
                    .append(prev.timestamp)
                    .append(" (")
                    .append(DumpSeries.date(prev.timestamp))
                    .append(") @ ")
                    .append(prev.kv)
                    .append("\n\t");
                DumpSeries.formatKeyValue(
                    buf, tsdb, prev.kv, Bytes.getUnsignedInt(prev.kv.key(), metric_width));
                LOG.error(buf.toString());
              }
            } else {
              prev.setTimestamp(timestamp);
              prev.kv = kv;
            }
          }
        }
      }
      final long timing = (System.nanoTime() - start_time) / 1000000;
      System.out.println(
          kvcount
              + " KVs (in "
              + rowcount
              + " rows) analyzed in "
              + timing
              + "ms (~"
              + (kvcount * 1000 / timing)
              + " KV/s)");
    }

    System.out.println(errors != 0 ? "Found " + errors + " errors." : "No error found.");
    if (!fix && correctable > 0) {
      System.out.println(
          correctable
              + " of these errors are automatically"
              + " correctable, re-run with --fix.\n"
              + "Make sure you understand the errors above and you"
              + " know what you're doing before using --fix.");
    }
    return errors;
  }