public Blog(String blogid) throws IOException { Configuration conf = HBaseConfiguration.create(); table = new HTable(conf, "blogs"); // 1. Get the row whose row key is blogid from above Get g = new Get(Bytes.toBytes(blogid)); Result r = table.get(g); // 2. Extract the rowkey, blog text (column "body") and blog title // (column "meta:title") key = r.getRow(); keyStr = Bytes.toString(key); blogText = Bytes.toString(r.getValue(Bytes.toBytes("body"), Bytes.toBytes(""))); blogTitle = Bytes.toString(r.getValue(Bytes.toBytes("meta"), Bytes.toBytes("title"))); Long reverseTimestamp = Long.parseLong(keyStr.substring(4)); Long epoch = Math.abs(reverseTimestamp - Long.MAX_VALUE); dateOfPost = new Date(epoch); // Get an iterator for the comments Scan s = new Scan(); s.addFamily(Bytes.toBytes("comment")); // Use a PrefixFilter PrefixFilter filter = new PrefixFilter(key); s.setFilter(filter); scanner = table.getScanner(s); resultIterator = scanner.iterator(); }
/** * Looks at every value of the mapreduce output and verifies that indeed the values have been * reversed. * * @param table Table to scan. * @throws IOException * @throws NullPointerException if we failed to find a cell value */ private void verifyAttempt(final Table table) throws IOException, NullPointerException { Scan scan = new Scan(); scan.addFamily(INPUT_FAMILY); scan.addFamily(OUTPUT_FAMILY); ResultScanner scanner = table.getScanner(scan); try { Iterator<Result> itr = scanner.iterator(); assertTrue(itr.hasNext()); while (itr.hasNext()) { Result r = itr.next(); if (LOG.isDebugEnabled()) { if (r.size() > 2) { throw new IOException("Too many results, expected 2 got " + r.size()); } } byte[] firstValue = null; byte[] secondValue = null; int count = 0; for (Cell kv : r.listCells()) { if (count == 0) { firstValue = CellUtil.cloneValue(kv); } else if (count == 1) { secondValue = CellUtil.cloneValue(kv); } else if (count == 2) { break; } count++; } String first = ""; if (firstValue == null) { throw new NullPointerException(Bytes.toString(r.getRow()) + ": first value is null"); } first = Bytes.toString(firstValue); String second = ""; if (secondValue == null) { throw new NullPointerException(Bytes.toString(r.getRow()) + ": second value is null"); } byte[] secondReversed = new byte[secondValue.length]; for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) { secondReversed[i] = secondValue[j]; } second = Bytes.toString(secondReversed); if (first.compareTo(second) != 0) { if (LOG.isDebugEnabled()) { LOG.debug( "second key is not the reverse of first. row=" + Bytes.toStringBinary(r.getRow()) + ", first value=" + first + ", second value=" + second); } fail(); } } } finally { scanner.close(); } }
public AttachedEdgeIterable( DuctileDBGraphImpl graph, DuctileDBTransactionImpl transaction, ResultScanner resultScanner) { super(); this.graph = graph; this.transaction = transaction; resultIterator = resultScanner.iterator(); addedIterator = transaction.addedEdges().iterator(); }
private <V> List<V> trasfer(ResultScanner resultScanner, HbaseMapper<V> mapper) throws Exception { Iterator<Result> it = resultScanner.iterator(); Result temp = null; List<V> result = new ArrayList<V>(); while (it.hasNext()) { temp = it.next(); result.add(mapper.mapApi(temp)); } return result; }
public GCResult getBinFor(String chr, int gcContent) throws IOException { log.debug("Get GC bin: " + chr + " " + gcContent); FilterList filters = new FilterList(FilterList.Operator.MUST_PASS_ALL); filters.addFilter( new SingleColumnValueFilter( Bytes.toBytes("chr"), Bytes.toBytes("name"), CompareFilter.CompareOp.EQUAL, Bytes.toBytes(chr))); filters.addFilter( new SingleColumnValueFilter( Bytes.toBytes("gc"), Bytes.toBytes("min"), CompareFilter.CompareOp.LESS_OR_EQUAL, Bytes.toBytes(gcContent))); filters.addFilter( new SingleColumnValueFilter( Bytes.toBytes("gc"), Bytes.toBytes("max"), CompareFilter.CompareOp.GREATER_OR_EQUAL, Bytes.toBytes(gcContent))); Scan scan = new Scan(); scan.setFilter(filters); ResultScanner scanner = this.getScanner(scan); Iterator<Result> rI = scanner.iterator(); if (!rI.hasNext() && gcContent > 0) { // it's possible that I've hit the max, might be a smarter way to do this with filters but I // can't think of one right now filters = new FilterList(FilterList.Operator.MUST_PASS_ALL); filters.addFilter( new SingleColumnValueFilter( Bytes.toBytes("chr"), Bytes.toBytes("name"), CompareFilter.CompareOp.EQUAL, Bytes.toBytes(chr))); filters.addFilter( new SingleColumnValueFilter( Bytes.toBytes("gc"), Bytes.toBytes("max"), CompareFilter.CompareOp.GREATER_OR_EQUAL, Bytes.toBytes(gcContent))); scan = new Scan(); scan.setFilter(filters); scanner = this.getScanner(scan); rI = scanner.iterator(); if (!rI.hasNext()) log.warn("No GC bin for " + chr + " " + gcContent); } if (!rI.hasNext()) { throw new IOException("Failed to retrieve any GC bins for chr " + chr + " GC=" + gcContent); } // only expect one result GCResult gcResult = createResult(rI.next()); if (rI.hasNext()) log.warn( "Found multiple matches for " + chr + " " + gcContent + " returning only the first."); scanner.close(); return gcResult; }
public boolean runWithCheckedExceptions() throws IOException, InterruptedException { HTable backfilledHTable = null; HTable liveCubeHTable = null; ResultScanner liveCubeScanner = null; try { // If the live table is empty, we just efficiently copy the backfill in liveCubeHTable = new HTable(conf, liveCubeTableName); liveCubeScanner = liveCubeHTable.getScanner(cf); boolean liveCubeIsEmpty = !liveCubeScanner.iterator().hasNext(); liveCubeScanner.close(); if (liveCubeIsEmpty) { log.info("Live cube is empty, running a straight copy from the backfill table"); HBaseSnapshotter hbaseSnapshotter = new HBaseSnapshotter( conf, backfilledTableName, cf, liveCubeTableName, new Path("/tmp/backfill_snapshot_hfiles"), true, null, null); return hbaseSnapshotter.runWithCheckedExceptions(); } else { Job job = new Job(conf); backfilledHTable = new HTable(conf, backfilledTableName); Pair<byte[][], byte[][]> allRegionsStartAndEndKeys = backfilledHTable.getStartEndKeys(); byte[][] internalSplitKeys = BackfillUtil.getSplitKeys(allRegionsStartAndEndKeys); Collection<Scan> scans = scansThisCubeOnly(cubeNameKeyPrefix, internalSplitKeys); // Get the scans that will cover this table, and store them in the job configuration to be // used // as input splits. if (log.isDebugEnabled()) { log.debug("Scans: " + scans); } CollectionInputFormat.setCollection(job, Scan.class, scans); // We cannot allow map tasks to retry, or we could increment the same key multiple times. job.getConfiguration().set("mapred.map.max.attempts", "1"); job.setJobName("DataCube HBase backfiller"); job.setJarByClass(HBaseBackfillMerger.class); job.getConfiguration().set(CONFKEY_DESERIALIZER, opDeserializer.getName()); job.setMapperClass(HBaseBackfillMergeMapper.class); job.setInputFormatClass(CollectionInputFormat.class); job.setNumReduceTasks(0); // No reducers, mappers do all the work job.setOutputFormatClass(NullOutputFormat.class); job.getConfiguration().set(CONFKEY_LIVECUBE_TABLE_NAME, new String(liveCubeTableName)); job.getConfiguration().set(CONFKEY_SNAPSHOT_TABLE_NAME, new String(snapshotTableName)); job.getConfiguration().set(CONFKEY_BACKFILLED_TABLE_NAME, new String(backfilledTableName)); job.getConfiguration().set(CONFKEY_COLUMN_FAMILY, new String(cf)); job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false"); job.getConfiguration().set("mapred.reduce.tasks.speculative.execution", "false"); try { job.waitForCompletion(true); return job.isSuccessful(); } catch (ClassNotFoundException e) { log.error("", e); throw new RuntimeException(e); } } } finally { if (liveCubeScanner != null) { liveCubeScanner.close(); } if (liveCubeHTable != null) { liveCubeHTable.close(); } if (backfilledHTable != null) { backfilledHTable.close(); } } }