Пример #1
0
  public Blog(String blogid) throws IOException {

    Configuration conf = HBaseConfiguration.create();
    table = new HTable(conf, "blogs");

    // 1. Get the row whose row key is blogid from above
    Get g = new Get(Bytes.toBytes(blogid));
    Result r = table.get(g);

    // 2. Extract the rowkey, blog text (column "body") and blog title
    // (column "meta:title")
    key = r.getRow();
    keyStr = Bytes.toString(key);
    blogText = Bytes.toString(r.getValue(Bytes.toBytes("body"), Bytes.toBytes("")));
    blogTitle = Bytes.toString(r.getValue(Bytes.toBytes("meta"), Bytes.toBytes("title")));
    Long reverseTimestamp = Long.parseLong(keyStr.substring(4));
    Long epoch = Math.abs(reverseTimestamp - Long.MAX_VALUE);
    dateOfPost = new Date(epoch);

    // Get an iterator for the comments
    Scan s = new Scan();
    s.addFamily(Bytes.toBytes("comment"));
    // Use a PrefixFilter
    PrefixFilter filter = new PrefixFilter(key);
    s.setFilter(filter);
    scanner = table.getScanner(s);
    resultIterator = scanner.iterator();
  }
 /**
  * Looks at every value of the mapreduce output and verifies that indeed the values have been
  * reversed.
  *
  * @param table Table to scan.
  * @throws IOException
  * @throws NullPointerException if we failed to find a cell value
  */
 private void verifyAttempt(final Table table) throws IOException, NullPointerException {
   Scan scan = new Scan();
   scan.addFamily(INPUT_FAMILY);
   scan.addFamily(OUTPUT_FAMILY);
   ResultScanner scanner = table.getScanner(scan);
   try {
     Iterator<Result> itr = scanner.iterator();
     assertTrue(itr.hasNext());
     while (itr.hasNext()) {
       Result r = itr.next();
       if (LOG.isDebugEnabled()) {
         if (r.size() > 2) {
           throw new IOException("Too many results, expected 2 got " + r.size());
         }
       }
       byte[] firstValue = null;
       byte[] secondValue = null;
       int count = 0;
       for (Cell kv : r.listCells()) {
         if (count == 0) {
           firstValue = CellUtil.cloneValue(kv);
         } else if (count == 1) {
           secondValue = CellUtil.cloneValue(kv);
         } else if (count == 2) {
           break;
         }
         count++;
       }
       String first = "";
       if (firstValue == null) {
         throw new NullPointerException(Bytes.toString(r.getRow()) + ": first value is null");
       }
       first = Bytes.toString(firstValue);
       String second = "";
       if (secondValue == null) {
         throw new NullPointerException(Bytes.toString(r.getRow()) + ": second value is null");
       }
       byte[] secondReversed = new byte[secondValue.length];
       for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
         secondReversed[i] = secondValue[j];
       }
       second = Bytes.toString(secondReversed);
       if (first.compareTo(second) != 0) {
         if (LOG.isDebugEnabled()) {
           LOG.debug(
               "second key is not the reverse of first. row="
                   + Bytes.toStringBinary(r.getRow())
                   + ", first value="
                   + first
                   + ", second value="
                   + second);
         }
         fail();
       }
     }
   } finally {
     scanner.close();
   }
 }
Пример #3
0
 public AttachedEdgeIterable(
     DuctileDBGraphImpl graph, DuctileDBTransactionImpl transaction, ResultScanner resultScanner) {
   super();
   this.graph = graph;
   this.transaction = transaction;
   resultIterator = resultScanner.iterator();
   addedIterator = transaction.addedEdges().iterator();
 }
Пример #4
0
 private <V> List<V> trasfer(ResultScanner resultScanner, HbaseMapper<V> mapper) throws Exception {
   Iterator<Result> it = resultScanner.iterator();
   Result temp = null;
   List<V> result = new ArrayList<V>();
   while (it.hasNext()) {
     temp = it.next();
     result.add(mapper.mapApi(temp));
   }
   return result;
 }
Пример #5
0
  public GCResult getBinFor(String chr, int gcContent) throws IOException {
    log.debug("Get GC bin: " + chr + " " + gcContent);
    FilterList filters = new FilterList(FilterList.Operator.MUST_PASS_ALL);
    filters.addFilter(
        new SingleColumnValueFilter(
            Bytes.toBytes("chr"),
            Bytes.toBytes("name"),
            CompareFilter.CompareOp.EQUAL,
            Bytes.toBytes(chr)));
    filters.addFilter(
        new SingleColumnValueFilter(
            Bytes.toBytes("gc"),
            Bytes.toBytes("min"),
            CompareFilter.CompareOp.LESS_OR_EQUAL,
            Bytes.toBytes(gcContent)));
    filters.addFilter(
        new SingleColumnValueFilter(
            Bytes.toBytes("gc"),
            Bytes.toBytes("max"),
            CompareFilter.CompareOp.GREATER_OR_EQUAL,
            Bytes.toBytes(gcContent)));

    Scan scan = new Scan();
    scan.setFilter(filters);

    ResultScanner scanner = this.getScanner(scan);
    Iterator<Result> rI = scanner.iterator();
    if (!rI.hasNext() && gcContent > 0) {
      // it's possible that I've hit the max, might be a smarter way to do this with filters but I
      // can't think of one right now
      filters = new FilterList(FilterList.Operator.MUST_PASS_ALL);
      filters.addFilter(
          new SingleColumnValueFilter(
              Bytes.toBytes("chr"),
              Bytes.toBytes("name"),
              CompareFilter.CompareOp.EQUAL,
              Bytes.toBytes(chr)));
      filters.addFilter(
          new SingleColumnValueFilter(
              Bytes.toBytes("gc"),
              Bytes.toBytes("max"),
              CompareFilter.CompareOp.GREATER_OR_EQUAL,
              Bytes.toBytes(gcContent)));

      scan = new Scan();
      scan.setFilter(filters);
      scanner = this.getScanner(scan);
      rI = scanner.iterator();

      if (!rI.hasNext()) log.warn("No GC bin for " + chr + " " + gcContent);
    }

    if (!rI.hasNext()) {
      throw new IOException("Failed to retrieve any GC bins for chr " + chr + " GC=" + gcContent);
    }

    // only expect one result
    GCResult gcResult = createResult(rI.next());
    if (rI.hasNext())
      log.warn(
          "Found multiple matches for " + chr + " " + gcContent + " returning only the first.");

    scanner.close();

    return gcResult;
  }
Пример #6
0
  public boolean runWithCheckedExceptions() throws IOException, InterruptedException {
    HTable backfilledHTable = null;
    HTable liveCubeHTable = null;
    ResultScanner liveCubeScanner = null;
    try {
      // If the live table is empty, we just efficiently copy the backfill in
      liveCubeHTable = new HTable(conf, liveCubeTableName);
      liveCubeScanner = liveCubeHTable.getScanner(cf);
      boolean liveCubeIsEmpty = !liveCubeScanner.iterator().hasNext();
      liveCubeScanner.close();
      if (liveCubeIsEmpty) {
        log.info("Live cube is empty, running a straight copy from the backfill table");

        HBaseSnapshotter hbaseSnapshotter =
            new HBaseSnapshotter(
                conf,
                backfilledTableName,
                cf,
                liveCubeTableName,
                new Path("/tmp/backfill_snapshot_hfiles"),
                true,
                null,
                null);
        return hbaseSnapshotter.runWithCheckedExceptions();
      } else {

        Job job = new Job(conf);
        backfilledHTable = new HTable(conf, backfilledTableName);

        Pair<byte[][], byte[][]> allRegionsStartAndEndKeys = backfilledHTable.getStartEndKeys();
        byte[][] internalSplitKeys = BackfillUtil.getSplitKeys(allRegionsStartAndEndKeys);
        Collection<Scan> scans = scansThisCubeOnly(cubeNameKeyPrefix, internalSplitKeys);

        // Get the scans that will cover this table, and store them in the job configuration to be
        // used
        // as input splits.

        if (log.isDebugEnabled()) {
          log.debug("Scans: " + scans);
        }
        CollectionInputFormat.setCollection(job, Scan.class, scans);

        // We cannot allow map tasks to retry, or we could increment the same key multiple times.
        job.getConfiguration().set("mapred.map.max.attempts", "1");

        job.setJobName("DataCube HBase backfiller");
        job.setJarByClass(HBaseBackfillMerger.class);
        job.getConfiguration().set(CONFKEY_DESERIALIZER, opDeserializer.getName());
        job.setMapperClass(HBaseBackfillMergeMapper.class);
        job.setInputFormatClass(CollectionInputFormat.class);
        job.setNumReduceTasks(0); // No reducers, mappers do all the work
        job.setOutputFormatClass(NullOutputFormat.class);
        job.getConfiguration().set(CONFKEY_LIVECUBE_TABLE_NAME, new String(liveCubeTableName));
        job.getConfiguration().set(CONFKEY_SNAPSHOT_TABLE_NAME, new String(snapshotTableName));
        job.getConfiguration().set(CONFKEY_BACKFILLED_TABLE_NAME, new String(backfilledTableName));
        job.getConfiguration().set(CONFKEY_COLUMN_FAMILY, new String(cf));
        job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false");
        job.getConfiguration().set("mapred.reduce.tasks.speculative.execution", "false");

        try {
          job.waitForCompletion(true);
          return job.isSuccessful();
        } catch (ClassNotFoundException e) {
          log.error("", e);
          throw new RuntimeException(e);
        }
      }
    } finally {
      if (liveCubeScanner != null) {
        liveCubeScanner.close();
      }
      if (liveCubeHTable != null) {
        liveCubeHTable.close();
      }
      if (backfilledHTable != null) {
        backfilledHTable.close();
      }
    }
  }