// @Override // protected void map(ImmutableBytesWritable key, Text value, Context context) throws // IOException, InterruptedException { // Text combinedKeyValue = new Text(); // //the structure is key###value // combinedKeyValue.set(Bytes.toString(key.get()) + "###" + value.toString()); // context.write(one, combinedKeyValue); // } @Override protected void map(ImmutableBytesWritable key, Result columns, Context context) throws IOException, InterruptedException { Text combinedKeyValue = new Text(); // the structure is key###value String value = null; try { for (KeyValue kv : columns.list()) { byte[] gmmData = kv.getValue(); String gmmString = Bytes.toStringBinary(gmmData); // /* just for checking that gmm is correctly constructed MixtureModel m = null; m = (MixtureModel) ObjectAndByte.byteArrayToObject(Bytes.toBytesBinary(gmmString)); System.out.println("m.size:" + m.size); // */ combinedKeyValue.set(Bytes.toString(key.get()) + "###" + gmmString); context.write(one, combinedKeyValue); // context.write(key, new Text(gmmString)); } } catch (Exception e) { e.printStackTrace(); } }
public void map(ImmutableBytesWritable row, Result value, Context context) throws InterruptedException, IOException { /* BERLIN SPARQL BENHCMARK QUERY 11 ---------------------------------------- SELECT ?property ?hasValue ?isValueOf WHERE { [TP-01] { %OfferXYZ% ?property ?hasValue } UNION [TP-02] { ?isValueOf ?property %OfferXYZ% } } --------------------------------------- */ // TP-01 if (isPartOfFirstUnion(value)) { List<KeyValue> entireRowAsList = value.list(); KeyValue[] kvsAsArray = new KeyValue[entireRowAsList.size()]; for (int i = 0; i < entireRowAsList.size(); i++) { kvsAsArray[i] = entireRowAsList.get(i); } context.write( new CompositeKeyWritable(new String(value.getRow()), 1), new KeyValueArrayWritable(kvsAsArray)); return; } // TP-02 else { List<KeyValue> entireRowAsList = value.list(); List<KeyValue> kvsToTransmit = new LinkedList<KeyValue>(); // Check all cells and see if the OFFER is part of the value for (KeyValue kv : entireRowAsList) { if (new String(kv.getValue()).equals(OfferXYZ)) { kvsToTransmit.add(kv); } } KeyValue[] kvsAsArray = new KeyValue[kvsToTransmit.size()]; for (int i = 0; i < kvsToTransmit.size(); i++) { kvsAsArray[i] = kvsToTransmit.get(i); } if (kvsAsArray.length > 0) { context.write( new CompositeKeyWritable(new String(value.getRow()), 2), new KeyValueArrayWritable(kvsAsArray)); } else { return; } } }
/** * Maps the data. * * @param row The current table row key. * @param values The columns. * @param context The current context. * @throws IOException When something is broken with the data. * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, * org.apache.hadoop.mapreduce.Mapper.Context) */ @Override public void map(ImmutableBytesWritable row, Result values, Context context) throws IOException { for (KeyValue value : values.list()) { if (value.getValue().length > 0) { context.getCounter(Counters.ROWS).increment(1); break; } } }
@Override public boolean next() { readBuffer.clear(); searchRow = null; row = null; index++; if (result != null && index < result.length) { setSearchRow(); return true; } Transaction transaction = session.getTransaction(); List<KeyValue> kvs; KeyValue kv; Result r; long queryTimestamp; try { result = session.getRegionServer().next(scannerId, fetchSize); ArrayList<Result> list = new ArrayList<Result>(result.length); for (int i = 0; i < result.length; i++) { r = result[i]; kvs = r.list(); // 当Result.isEmpty=true时,r.list()也返回null,所以这里不用再判断kvs.isEmpty if (kvs != null) { kv = kvs.get(0); queryTimestamp = kv.getTimestamp(); if (queryTimestamp < transaction.getStartTimestamp() & queryTimestamp % 2 == 0) { if (kv.getValueLength() != 0) // kv已删除,不需要再处理 list.add(r); continue; } } // TODO Filter.filter很慢 r = new Result(Filter.filter(session.getRegionServer(), regionName, transaction, kvs, 1)); if (!r.isEmpty()) list.add(r); } result = list.toArray(new Result[0]); } catch (Exception e) { close(); throw DbException.convert(e); } index = 0; if (result != null && result.length > 0) { setSearchRow(); return true; } close(); return false; }
/** * Maps the data. * * @param row The current table row key. * @param values The columns. * @param context The current context. * @throws IOException When something is broken with the data. * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, * org.apache.hadoop.mapreduce.Mapper.Context) */ @Override public void map(ImmutableBytesWritable row, Result values, Context context) throws IOException { String currentFamilyName = null; String currentQualifierName = null; String currentRowKey = null; Configuration config = context.getConfiguration(); String separator = config.get("ReportSeparator", ":"); try { if (values != null) { context.getCounter(Counters.ROWS).increment(1); context.write(new Text("Total ROWS"), new IntWritable(1)); } for (KeyValue value : values.list()) { currentRowKey = Bytes.toStringBinary(value.getRow()); String thisRowFamilyName = Bytes.toStringBinary(value.getFamily()); if (thisRowFamilyName != null && !thisRowFamilyName.equals(currentFamilyName)) { currentFamilyName = thisRowFamilyName; context.getCounter("CF", thisRowFamilyName).increment(1); context.write(new Text("Total Families Across all Rows"), new IntWritable(1)); context.write(new Text(thisRowFamilyName), new IntWritable(1)); } String thisRowQualifierName = thisRowFamilyName + separator + Bytes.toStringBinary(value.getQualifier()); if (thisRowQualifierName != null && !thisRowQualifierName.equals(currentQualifierName)) { currentQualifierName = thisRowQualifierName; context.getCounter("CFQL", thisRowQualifierName).increment(1); context.write(new Text("Total Qualifiers across all Rows"), new IntWritable(1)); context.write(new Text(thisRowQualifierName), new IntWritable(1)); // Intialize versions context .getCounter("QL_VERSIONS", currentRowKey + separator + thisRowQualifierName) .increment(1); context.write( new Text(currentRowKey + separator + thisRowQualifierName + "_Versions"), new IntWritable(1)); } else { // Increment versions currentQualifierName = thisRowQualifierName; context .getCounter("QL_VERSIONS", currentRowKey + separator + thisRowQualifierName) .increment(1); context.write( new Text(currentRowKey + separator + thisRowQualifierName + "_Versions"), new IntWritable(1)); } } } catch (InterruptedException e) { e.printStackTrace(); } }
/* * 根据rwokey查询 * * @rowKey rowKey * * @tableName 表名 */ public static Result getResult(String tableName, String rowKey) throws IOException { Get get = new Get(Bytes.toBytes(rowKey)); HTable table = new HTable(conf, Bytes.toBytes(tableName)); // 获取表 Result result = table.get(get); for (KeyValue kv : result.list()) { System.out.println("family:" + Bytes.toString(kv.getFamily())); System.out.println("qualifier:" + Bytes.toString(kv.getQualifier())); System.out.println("value:" + Bytes.toString(kv.getValue())); System.out.println("Timestamp:" + kv.getTimestamp()); System.out.println("-------------------------------------------"); } return result; }
/* * 查询表中的某一列 * * @tableName 表名 * * @rowKey rowKey */ public static void getResultByColumn( String tableName, String rowKey, String familyName, String columnName) throws IOException { HTable table = new HTable(conf, Bytes.toBytes(tableName)); Get get = new Get(Bytes.toBytes(rowKey)); get.addColumn(Bytes.toBytes(familyName), Bytes.toBytes(columnName)); // 获取指定列族和列修饰符对应的列 Result result = table.get(get); for (KeyValue kv : result.list()) { System.out.println("family:" + Bytes.toString(kv.getFamily())); System.out.println("qualifier:" + Bytes.toString(kv.getQualifier())); System.out.println("value:" + Bytes.toString(kv.getValue())); System.out.println("Timestamp:" + kv.getTimestamp()); System.out.println("-------------------------------------------"); } }
@Override protected void map(ImmutableBytesWritable rowKey, Result result, Context context) throws IOException, InterruptedException { List<KeyValue> kvList = result.list(); // boolean isTextIndex = tableName.equals(Constants.TEXTS_INDEX_TABLE_NAME); long totalFreq = 0; Iterator<KeyValue> iter = kvList.iterator(); while (iter.hasNext()) { byte[] value = iter.next().getValue(); totalFreq += Bytes.toInt(value); } Text term = new Text(Bytes.toString(rowKey.get())); context.write(term, new LongWritable(totalFreq)); }
/* * 遍历查询hbase表 * * @tableName 表名 */ public static void getResultScann(String tableName) throws IOException { Scan scan = new Scan(); ResultScanner rs = null; HTableInterface table = conn.getTable(tableName); try { // String split = StringUtils.S001; // QualifierFilter ff = new QualifierFilter(CompareOp.EQUAL, new BinaryComparator( // Bytes.toBytes("A"))); // scan.setFilter(ff); rs = table.getScanner(scan); int count = 0; for (Result r : rs) { count++; for (KeyValue kv : r.list()) { System.out.println("row:" + Bytes.toString(kv.getRow())); // System.out.println("family:" + Bytes.toString(kv.getFamily())); // System.out.println("qualifier:" + Bytes.toString(kv.getQualifier())); System.out.println("value:" + Bytes.toString(kv.getValue())); // // System.out.println("timestamp:" + kv.getTimestamp()); // StringBuilder sb = new StringBuilder(); // sb.append(Bytes.toString(r.getRow())); // sb.append(split); // sb.append(Bytes.toString(kv.getValue())); // EntBaseinfo baseInfo = new EntBaseinfo(); // baseInfo.parseFromString(sb.toString()); // System.out.println(baseInfo.getENTNAME()); // if(baseInfo.getNAME()!=null&&baseInfo.getNAME().isEmpty()){ // System.out.println(baseInfo.getENTNAME()); // } // // // if(baseInfo.getDOM()!=null&&baseInfo.getNAME().isEmpty()){ // System.out.println(baseInfo.getENTNAME()); // } } if (count > 1000) { return; } } } finally { rs.close(); } }
@SuppressWarnings("resource") public static void searchByRowKey(String tableName, String rowKey) { try { HTable table = new HTable(conf, Bytes.toBytes(tableName)); Get g = new Get(Bytes.toBytes(rowKey)); Result rs = table.get(g); for (KeyValue kv : rs.list()) { System.out.println("family:" + Bytes.toString(kv.getFamily())); System.out.println("qualifier:" + Bytes.toString(kv.getQualifier())); System.out.println("value:" + Bytes.toString(kv.getValue())); System.out.println("Timestamp:" + kv.getTimestamp()); System.out.println("-------------------------------------------"); } } catch (IOException e) { e.printStackTrace(); } }
private void printResult(Result result) { if (result != null) { Map<String, String> map = new HashMap<String, String>(); for (KeyValue kv : result.list()) { // 需要判断 map.put(Bytes.toString(kv.getQualifier()), Bytes.toString(kv.getValue())); } logger.info(map.toString()); NavigableMap<byte[], NavigableMap<byte[], byte[]>> map2 = result.getNoVersionMap(); for (byte[] key : map2.keySet()) { Map<String, String> mm = new HashMap<String, String>(); mm.put("family", Bytes.toString(key)); NavigableMap<byte[], byte[]> navigableMap = map2.get(key); for (byte[] key2 : navigableMap.keySet()) { mm.put(Bytes.toString(key2), Bytes.toString(navigableMap.get(key2))); } logger.info(mm.toString()); } } }
/* * 根据rwokey查询 * * @rowKey rowKey * * @tableName 表名 */ public static Result getResult(String tableName, String rowKey, String qualifierName) throws IOException { Get get = new Get(Bytes.toBytes(rowKey)); QualifierFilter filter = new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(qualifierName))); get.setFilter(filter); @SuppressWarnings("resource") HTable table = new HTable(conf, Bytes.toBytes(tableName)); // 获取表 Result result = table.get(get); for (KeyValue kv : result.list()) { System.out.println("family:" + Bytes.toString(kv.getFamily())); System.out.println("qualifier:" + Bytes.toString(kv.getQualifier())); System.out.println("value:" + Bytes.toString(kv.getValue())); System.out.println("Timestamp:" + kv.getTimestamp()); System.out.println("-------------------------------------------"); } return result; }
/* * 查询某列数据的多个版本 * * @tableName 表名 * * @rowKey rowKey * * @familyName 列族名 * * @columnName 列名 */ public static void getResultByVersion( String tableName, String rowKey, String familyName, String columnName) throws IOException { HTable table = new HTable(conf, Bytes.toBytes(tableName)); Get get = new Get(Bytes.toBytes(rowKey)); get.addColumn(Bytes.toBytes(familyName), Bytes.toBytes(columnName)); get.setMaxVersions(5); Result result = table.get(get); for (KeyValue kv : result.list()) { System.out.println("family:" + Bytes.toString(kv.getFamily())); System.out.println("qualifier:" + Bytes.toString(kv.getQualifier())); System.out.println("value:" + Bytes.toString(kv.getValue())); System.out.println("Timestamp:" + kv.getTimestamp()); System.out.println("-------------------------------------------"); } /* * List<?> results = table.get(get).list(); Iterator<?> it = * results.iterator(); while (it.hasNext()) { * System.out.println(it.next().toString()); } */ }
/* * 遍历查询hbase表 * * @tableName 表名 */ public static void getResultScann(String tableName) throws IOException { Scan scan = new Scan(); ResultScanner rs = null; HTable table = new HTable(conf, Bytes.toBytes(tableName)); try { rs = table.getScanner(scan); for (Result r : rs) { for (KeyValue kv : r.list()) { System.out.println("row:" + Bytes.toString(kv.getRow())); System.out.println("family:" + Bytes.toString(kv.getFamily())); System.out.println("qualifier:" + Bytes.toString(kv.getQualifier())); System.out.println("value:" + Bytes.toString(kv.getValue())); System.out.println("timestamp:" + kv.getTimestamp()); System.out.println("-------------------------------------------"); } } } finally { rs.close(); } }
/** * 查询表中的某一列 * * @tableName 表名 * @rowKey rowKey */ public static byte[] getResultByColumn( String tableName, String rowKey, String familyName, String columnName) throws IOException { System.out.println("getResultByColumn------------------"); @SuppressWarnings("resource") HTable table = new HTable(conf, Bytes.toBytes(tableName)); Get get = new Get(Bytes.toBytes(rowKey)); get.addColumn(Bytes.toBytes(familyName), Bytes.toBytes(columnName)); // 获取指定列族和列修饰符对应的列 Result result = table.get(get); byte[] value = null; for (KeyValue kv : result.list()) { System.out.println("family:" + Bytes.toString(kv.getFamily())); System.out.println("qualifier:" + Bytes.toString(kv.getQualifier())); System.out.println("value:" + Bytes.toString(kv.getValue())); System.out.println("Timestamp:" + kv.getTimestamp()); System.out.println("-------------------------------------------"); if (Bytes.toString(kv.getQualifier()).equals(columnName)) { value = kv.getValue(); } } return value; }
private Result filter( TransactionState state, Result result, long startTimestamp, int localVersions) throws IOException { if (result == null) { return null; } List<KeyValue> kvs = result.list(); if (kvs == null) { return result; } Map<ByteArray, Map<ByteArray, Integer>> occurrences = new HashMap<TransactionalTable.ByteArray, Map<ByteArray, Integer>>(); Map<ByteArray, Map<ByteArray, Long>> minTimestamp = new HashMap<TransactionalTable.ByteArray, Map<ByteArray, Long>>(); List<KeyValue> nonDeletes = new ArrayList<KeyValue>(); List<KeyValue> filtered = new ArrayList<KeyValue>(); Map<ByteArray, Set<ByteArray>> read = new HashMap<ByteArray, Set<ByteArray>>(); DeleteTracker tracker = new DeleteTracker(); for (KeyValue kv : kvs) { ByteArray family = new ByteArray(kv.getFamily()); ByteArray qualifier = new ByteArray(kv.getQualifier()); Set<ByteArray> readQualifiers = read.get(family); if (readQualifiers == null) { readQualifiers = new HashSet<TransactionalTable.ByteArray>(); read.put(family, readQualifiers); } else if (readQualifiers.contains(qualifier)) continue; // RowKey rk = new RowKey(kv.getRow(), getTableName()); if (state.tsoclient.validRead(kv.getTimestamp(), startTimestamp)) { if (!tracker.addDeleted(kv)) nonDeletes.add(kv); { // Read valid value readQualifiers.add(qualifier); // statistics // elementsGotten++; Map<ByteArray, Integer> occurrencesCols = occurrences.get(family); Integer times = null; if (occurrencesCols != null) { times = occurrencesCols.get(qualifier); } if (times != null) { // elementsRead += times; versionsAvg = times > versionsAvg ? times : alpha * versionsAvg + (1 - alpha) * times; // extraVersionsAvg = times > extraVersionsAvg ? times : alpha * // extraVersionsAvg + (1 - alpha) * times; } else { // elementsRead++; versionsAvg = alpha * versionsAvg + (1 - alpha); // extraVersionsAvg = alpha * extraVersionsAvg + (1 - alpha); } } } else { Map<ByteArray, Integer> occurrencesCols = occurrences.get(family); Map<ByteArray, Long> minTimestampCols = minTimestamp.get(family); if (occurrencesCols == null) { occurrencesCols = new HashMap<TransactionalTable.ByteArray, Integer>(); minTimestampCols = new HashMap<TransactionalTable.ByteArray, Long>(); occurrences.put(family, occurrencesCols); minTimestamp.put(family, minTimestampCols); } Integer times = occurrencesCols.get(qualifier); Long timestamp = minTimestampCols.get(qualifier); if (times == null) { times = 0; timestamp = kv.getTimestamp(); } times++; timestamp = Math.min(timestamp, kv.getTimestamp()); if (times == localVersions) { // We need to fetch more versions Get get = new Get(kv.getRow()); get.addColumn(kv.getFamily(), kv.getQualifier()); get.setMaxVersions(localVersions); Result r; GOTRESULT: do { extraGetsPerformed++; get.setTimeRange(0, timestamp); r = this.get(get); List<KeyValue> list = r.list(); if (list == null) break; for (KeyValue t : list) { times++; timestamp = Math.min(timestamp, t.getTimestamp()); // rk = new RowKey(kv.getRow(), getTableName()); if (state.tsoclient.validRead(t.getTimestamp(), startTimestamp)) { if (!tracker.addDeleted(t)) nonDeletes.add(t); readQualifiers.add(qualifier); elementsGotten++; elementsRead += times; versionsAvg = times > versionsAvg ? times : alpha * versionsAvg + (1 - alpha) * times; extraVersionsAvg = times > extraVersionsAvg ? times : alpha * extraVersionsAvg + (1 - alpha) * times; break GOTRESULT; } } } while (r.size() == localVersions); } else { occurrencesCols.put(qualifier, times); minTimestampCols.put(qualifier, timestamp); } } } for (KeyValue kv : nonDeletes) { if (!tracker.isDeleted(kv)) { filtered.add(kv); } } // cacheVersions = (int) versionsAvg; if (filtered.isEmpty()) { return null; } return new Result(filtered); }
/** * Looks at every value of the mapreduce output and verifies that indeed the values have been * reversed. * * @param table Table to scan. * @throws IOException * @throws NullPointerException if we failed to find a cell value */ private void verifyAttempt(final HTable table) throws IOException, NullPointerException { Scan scan = new Scan(); scan.addFamily(INPUT_FAMILY); scan.addFamily(OUTPUT_FAMILY); ResultScanner scanner = table.getScanner(scan); try { Iterator<Result> itr = scanner.iterator(); assertTrue(itr.hasNext()); while (itr.hasNext()) { Result r = itr.next(); if (LOG.isDebugEnabled()) { if (r.size() > 2) { throw new IOException("Too many results, expected 2 got " + r.size()); } } byte[] firstValue = null; byte[] secondValue = null; int count = 0; for (KeyValue kv : r.list()) { if (count == 0) { firstValue = kv.getValue(); } if (count == 1) { secondValue = kv.getValue(); } count++; if (count == 2) { break; } } String first = ""; if (firstValue == null) { throw new NullPointerException(Bytes.toString(r.getRow()) + ": first value is null"); } first = Bytes.toString(firstValue); String second = ""; if (secondValue == null) { throw new NullPointerException(Bytes.toString(r.getRow()) + ": second value is null"); } byte[] secondReversed = new byte[secondValue.length]; for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) { secondReversed[i] = secondValue[j]; } second = Bytes.toString(secondReversed); if (first.compareTo(second) != 0) { if (LOG.isDebugEnabled()) { LOG.debug( "second key is not the reverse of first. row=" + Bytes.toStringBinary(r.getRow()) + ", first value=" + first + ", second value=" + second); } fail(); } } } finally { scanner.close(); } }
public void reduce( CompositeKeyWritable key, Iterable<KeyValueArrayWritable> values, Context context) throws IOException, InterruptedException { /* BERLIN SPARQL BENHCMARK QUERY 8 ---------------------------------------- SELECT ?title ?text ?reviewDate ?reviewer ?reviewerName ?rating1 ?rating2 ?rating3 ?rating4 WHERE { [TP-01] ?review bsbm:reviewFor %ProductXYZ% . [TP-02] ?review dc:title ?title . [TP-03] ?review rev:text ?text . [TP-04] FILTER langMatches( lang(?text), "EN" ) [TP-05] ?review bsbm:reviewDate ?reviewDate . [TP-06] ?review rev:reviewer ?reviewer . [TP-07] ?reviewer foaf:name ?reviewerName . [TP-08] OPTIONAL { ?review bsbm:rating1 ?rating1 . } [TP-09] OPTIONAL { ?review bsbm:rating2 ?rating2 . } [TP-10] OPTIONAL { ?review bsbm:rating3 ?rating3 . } [TP-11] OPTIONAL { ?review bsbm:rating4 ?rating4 . } } ORDER BY DESC(?reviewDate) LIMIT 20 --------------------------------------- */ List<KeyValue> finalKeyValues = new ArrayList<KeyValue>(); // Find the keys for the vendor/publisher KeyValue kv_reviewer = null; for (KeyValueArrayWritable array : values) { for (KeyValue kv : (KeyValue[]) array.toArray()) { if (Arrays.equals(kv.getValue(), "rev_reviewer".getBytes())) { kv_reviewer = kv; finalKeyValues.add(kv); } else { finalKeyValues.add(kv); } } } // TP-07 Result reviewerResult = table.get(new Get(kv_reviewer.getQualifier())); boolean foundReviewerName = false; for (KeyValue kv : reviewerResult.list()) { if (Arrays.equals(kv.getQualifier(), "foaf_name".getBytes())) { finalKeyValues.add(kv); foundReviewerName = true; break; } } if (foundReviewerName == false) { return; } // Format and output the values StringBuilder builder = new StringBuilder(); builder.append("\n"); for (KeyValue kv : finalKeyValues) { String[] triple = null; try { triple = SharedServices.keyValueToTripleString(kv); } catch (ClassNotFoundException e) { e.printStackTrace(); } builder.append(triple[0] + "\t" + triple[1] + "\t" + triple[2] + "\n"); } context.write(new Text(key.getValue()), new Text(builder.toString())); }
public void map(ImmutableBytesWritable row, Result value, Context context) throws InterruptedException, IOException { /* BERLIN SPARQL BENHCMARK QUERY 8 ---------------------------------------- SELECT ?title ?text ?reviewDate ?reviewer ?reviewerName ?rating1 ?rating2 ?rating3 ?rating4 WHERE { [TriplePattern-01] ?review bsbm:reviewFor %ProductXYZ% . [TP-02] ?review dc:title ?title . [TP-03] ?review rev:text ?text . [TP-04] FILTER langMatches( lang(?text), "EN" ) [TP-05] ?review bsbm:reviewDate ?reviewDate . [TP-06] ?review rev:reviewer ?reviewer . [TP-07] ?reviewer foaf:name ?reviewerName . [TP-08] OPTIONAL { ?review bsbm:rating1 ?rating1 . } [TP-09] OPTIONAL { ?review bsbm:rating2 ?rating2 . } [TP-10] OPTIONAL { ?review bsbm:rating3 ?rating3 . } [TP-11] OPTIONAL { ?review bsbm:rating4 ?rating4 . } } ORDER BY DESC(?reviewDate) LIMIT 20 ---------------------------------------*/ String rowKey = new String(value.getRow()); ArrayList<KeyValue> keyValuesToTransmit = new ArrayList<KeyValue>(); List<KeyValue> reviewRow = value.list(); byte[] predicate = value.getValue(SharedServices.CF_AS_BYTES, ProductXYZ.getBytes()); if (!Arrays.equals(predicate, "bsbm-voc_reviewFor".getBytes())) { return; } int requiredColumns = 0; for (KeyValue kv : reviewRow) { // TP-01 if (Arrays.equals(kv.getValue(), "bsbm-voc_reviewFor".getBytes())) { keyValuesToTransmit.add(kv); requiredColumns++; } // TP-02 else if (Arrays.equals(kv.getQualifier(), "dc_title".getBytes())) { keyValuesToTransmit.add(kv); requiredColumns++; } // TP-03 else if (Arrays.equals(kv.getQualifier(), "rev_text".getBytes())) { keyValuesToTransmit.add(kv); requiredColumns++; } // TP-04 else if (Arrays.equals(kv.getValue(), "rdfs_lang".getBytes())) { if (!Arrays.equals(kv.getQualifier(), "@en".getBytes())) { return; } keyValuesToTransmit.add(kv); requiredColumns++; } // TP-05 else if (Arrays.equals(kv.getQualifier(), "bsbm-voc_reviewDate".getBytes())) { keyValuesToTransmit.add(kv); requiredColumns++; } // TP-06 else if (Arrays.equals(kv.getValue(), "rev_reviewer".getBytes())) { keyValuesToTransmit.add(kv); requiredColumns++; } // OPTIONAL TP-08, TP-09, TP-10, TP-11 else if (Arrays.equals(kv.getQualifier(), "bsbm-voc_rating1".getBytes())) { keyValuesToTransmit.add(kv); } else if (Arrays.equals(kv.getQualifier(), "bsbm-voc_rating2".getBytes())) { keyValuesToTransmit.add(kv); } else if (Arrays.equals(kv.getQualifier(), "bsbm-voc_rating3".getBytes())) { keyValuesToTransmit.add(kv); } else if (Arrays.equals(kv.getQualifier(), "bsbm-voc_rating4".getBytes())) { keyValuesToTransmit.add(kv); } } if (requiredColumns < 6) { return; } context.write( new CompositeKeyWritable(rowKey, 1), new KeyValueArrayWritable(SharedServices.listToArray(keyValuesToTransmit))); }
/** * Get an estimate of the number of rows and bytes per row in regions between startRowKey and * endRowKey. The more store files there are the more this will be off. Also, this does not take * into account any rows that are in the memstore. * * <p>The values computed here should be cached so that in high qps workloads the nn is not * overwhelmed. Could be done in load(); Synchronized to make sure that only one thread at a time * is using the htable. * * @param startRowKey First row key in the range * @param endRowKey Last row key in the range * @return The estimated number of rows in the regions between the row keys (first) and the * estimated row size in bytes (second). */ public synchronized Pair<Long, Long> getEstimatedRowStats(byte[] startRowKey, byte[] endRowKey) { Preconditions.checkNotNull(startRowKey); Preconditions.checkNotNull(endRowKey); long rowSize = 0; long rowCount = 0; long hdfsSize = 0; boolean isCompressed = false; try { // Check to see if things are compressed. // If they are we'll estimate a compression factor. if (columnFamilies_ == null) { columnFamilies_ = hTable_.getTableDescriptor().getColumnFamilies(); } Preconditions.checkNotNull(columnFamilies_); for (HColumnDescriptor desc : columnFamilies_) { isCompressed |= desc.getCompression() != Compression.Algorithm.NONE; } // For every region in the range. List<HRegionLocation> locations = getRegionsInRange(hTable_, startRowKey, endRowKey); for (HRegionLocation location : locations) { long currentHdfsSize = 0; long currentRowSize = 0; long currentRowCount = 0; HRegionInfo info = location.getRegionInfo(); // Get the size on hdfs currentHdfsSize += getHdfsSize(info); Scan s = new Scan(info.getStartKey()); // Get a small sample of rows s.setBatch(ROW_COUNT_ESTIMATE_BATCH_SIZE); // Try and get every version so the row's size can be used to estimate. s.setMaxVersions(Short.MAX_VALUE); // Don't cache the blocks as we don't think these are // necessarily important blocks. s.setCacheBlocks(false); // Try and get deletes too so their size can be counted. s.setRaw(true); ResultScanner rs = hTable_.getScanner(s); try { // And get the the ROW_COUNT_ESTIMATE_BATCH_SIZE fetched rows // for a representative sample for (int i = 0; i < ROW_COUNT_ESTIMATE_BATCH_SIZE; i++) { Result r = rs.next(); if (r == null) break; currentRowCount += 1; for (KeyValue kv : r.list()) { // some extra row size added to make up for shared overhead currentRowSize += kv.getRowLength() // row key + 4 // row key length field + kv.getFamilyLength() // Column family bytes + 4 // family length field + kv.getQualifierLength() // qualifier bytes + 4 // qualifier length field + kv.getValueLength() // length of the value + 4 // value length field + 10; // extra overhead for hfile index, checksums, metadata, etc } } // add these values to the cumulative totals in one shot just // in case there was an error in between getting the hdfs // size and the row/column sizes. hdfsSize += currentHdfsSize; rowCount += currentRowCount; rowSize += currentRowSize; } finally { rs.close(); } } } catch (IOException ioe) { // Print the stack trace, but we'll ignore it // as this is just an estimate. // TODO: Put this into the per query log. LOG.error("Error computing HBase row count estimate", ioe); } // If there are no rows then no need to estimate. if (rowCount == 0) return new Pair<Long, Long>(0L, 0L); // if something went wrong then set a signal value. if (rowSize <= 0 || hdfsSize <= 0) return new Pair<Long, Long>(-1L, -1L); // estimate the number of rows. double bytesPerRow = rowSize / (double) rowCount; long estimatedRowCount = (long) ((isCompressed ? 2 : 1) * (hdfsSize / bytesPerRow)); return new Pair<Long, Long>(estimatedRowCount, (long) bytesPerRow); }