//        @Override
    //        protected void map(ImmutableBytesWritable key, Text value, Context context) throws
    // IOException, InterruptedException {
    //            Text combinedKeyValue = new Text();
    //            //the structure is key###value
    //            combinedKeyValue.set(Bytes.toString(key.get()) + "###" + value.toString());
    //            context.write(one, combinedKeyValue);
    //        }
    @Override
    protected void map(ImmutableBytesWritable key, Result columns, Context context)
        throws IOException, InterruptedException {

      Text combinedKeyValue = new Text();
      // the structure is key###value
      String value = null;
      try {
        for (KeyValue kv : columns.list()) {
          byte[] gmmData = kv.getValue();
          String gmmString = Bytes.toStringBinary(gmmData);

          // /* just for checking that gmm is correctly constructed
          MixtureModel m = null;
          m = (MixtureModel) ObjectAndByte.byteArrayToObject(Bytes.toBytesBinary(gmmString));
          System.out.println("m.size:" + m.size);
          // */
          combinedKeyValue.set(Bytes.toString(key.get()) + "###" + gmmString);
          context.write(one, combinedKeyValue);
          //                    context.write(key, new Text(gmmString));

        }
      } catch (Exception e) {
        e.printStackTrace();
      }
    }
 public void map(ImmutableBytesWritable row, Result value, Context context)
     throws InterruptedException, IOException {
   /* BERLIN SPARQL BENHCMARK QUERY 11
     ----------------------------------------
   SELECT ?property ?hasValue ?isValueOf
   WHERE {
   [TP-01]	{ %OfferXYZ% ?property ?hasValue }
   		UNION
   [TP-02]	{ ?isValueOf ?property %OfferXYZ% }
   }
     ---------------------------------------
   */
   // TP-01
   if (isPartOfFirstUnion(value)) {
     List<KeyValue> entireRowAsList = value.list();
     KeyValue[] kvsAsArray = new KeyValue[entireRowAsList.size()];
     for (int i = 0; i < entireRowAsList.size(); i++) {
       kvsAsArray[i] = entireRowAsList.get(i);
     }
     context.write(
         new CompositeKeyWritable(new String(value.getRow()), 1),
         new KeyValueArrayWritable(kvsAsArray));
     return;
   }
   // TP-02
   else {
     List<KeyValue> entireRowAsList = value.list();
     List<KeyValue> kvsToTransmit = new LinkedList<KeyValue>();
     // Check all cells and see if the OFFER is part of the value
     for (KeyValue kv : entireRowAsList) {
       if (new String(kv.getValue()).equals(OfferXYZ)) {
         kvsToTransmit.add(kv);
       }
     }
     KeyValue[] kvsAsArray = new KeyValue[kvsToTransmit.size()];
     for (int i = 0; i < kvsToTransmit.size(); i++) {
       kvsAsArray[i] = kvsToTransmit.get(i);
     }
     if (kvsAsArray.length > 0) {
       context.write(
           new CompositeKeyWritable(new String(value.getRow()), 2),
           new KeyValueArrayWritable(kvsAsArray));
     } else {
       return;
     }
   }
 }
 /**
  * Maps the data.
  *
  * @param row The current table row key.
  * @param values The columns.
  * @param context The current context.
  * @throws IOException When something is broken with the data.
  * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN,
  *     org.apache.hadoop.mapreduce.Mapper.Context)
  */
 @Override
 public void map(ImmutableBytesWritable row, Result values, Context context) throws IOException {
   for (KeyValue value : values.list()) {
     if (value.getValue().length > 0) {
       context.getCounter(Counters.ROWS).increment(1);
       break;
     }
   }
 }
  @Override
  public boolean next() {
    readBuffer.clear();
    searchRow = null;
    row = null;
    index++;
    if (result != null && index < result.length) {
      setSearchRow();
      return true;
    }

    Transaction transaction = session.getTransaction();
    List<KeyValue> kvs;
    KeyValue kv;
    Result r;
    long queryTimestamp;
    try {
      result = session.getRegionServer().next(scannerId, fetchSize);
      ArrayList<Result> list = new ArrayList<Result>(result.length);
      for (int i = 0; i < result.length; i++) {
        r = result[i];
        kvs = r.list();
        // 当Result.isEmpty=true时,r.list()也返回null,所以这里不用再判断kvs.isEmpty
        if (kvs != null) {
          kv = kvs.get(0);
          queryTimestamp = kv.getTimestamp();
          if (queryTimestamp < transaction.getStartTimestamp() & queryTimestamp % 2 == 0) {
            if (kv.getValueLength() != 0) // kv已删除,不需要再处理
            list.add(r);
            continue;
          }
        }

        // TODO Filter.filter很慢
        r = new Result(Filter.filter(session.getRegionServer(), regionName, transaction, kvs, 1));
        if (!r.isEmpty()) list.add(r);
      }

      result = list.toArray(new Result[0]);
    } catch (Exception e) {
      close();
      throw DbException.convert(e);
    }

    index = 0;

    if (result != null && result.length > 0) {
      setSearchRow();
      return true;
    }

    close();
    return false;
  }
Esempio n. 5
0
    /**
     * Maps the data.
     *
     * @param row The current table row key.
     * @param values The columns.
     * @param context The current context.
     * @throws IOException When something is broken with the data.
     * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN,
     *     org.apache.hadoop.mapreduce.Mapper.Context)
     */
    @Override
    public void map(ImmutableBytesWritable row, Result values, Context context) throws IOException {
      String currentFamilyName = null;
      String currentQualifierName = null;
      String currentRowKey = null;
      Configuration config = context.getConfiguration();
      String separator = config.get("ReportSeparator", ":");

      try {
        if (values != null) {
          context.getCounter(Counters.ROWS).increment(1);
          context.write(new Text("Total ROWS"), new IntWritable(1));
        }

        for (KeyValue value : values.list()) {
          currentRowKey = Bytes.toStringBinary(value.getRow());
          String thisRowFamilyName = Bytes.toStringBinary(value.getFamily());
          if (thisRowFamilyName != null && !thisRowFamilyName.equals(currentFamilyName)) {
            currentFamilyName = thisRowFamilyName;
            context.getCounter("CF", thisRowFamilyName).increment(1);
            context.write(new Text("Total Families Across all Rows"), new IntWritable(1));
            context.write(new Text(thisRowFamilyName), new IntWritable(1));
          }
          String thisRowQualifierName =
              thisRowFamilyName + separator + Bytes.toStringBinary(value.getQualifier());
          if (thisRowQualifierName != null && !thisRowQualifierName.equals(currentQualifierName)) {
            currentQualifierName = thisRowQualifierName;
            context.getCounter("CFQL", thisRowQualifierName).increment(1);
            context.write(new Text("Total Qualifiers across all Rows"), new IntWritable(1));
            context.write(new Text(thisRowQualifierName), new IntWritable(1));
            // Intialize versions
            context
                .getCounter("QL_VERSIONS", currentRowKey + separator + thisRowQualifierName)
                .increment(1);
            context.write(
                new Text(currentRowKey + separator + thisRowQualifierName + "_Versions"),
                new IntWritable(1));

          } else {
            // Increment versions
            currentQualifierName = thisRowQualifierName;
            context
                .getCounter("QL_VERSIONS", currentRowKey + separator + thisRowQualifierName)
                .increment(1);
            context.write(
                new Text(currentRowKey + separator + thisRowQualifierName + "_Versions"),
                new IntWritable(1));
          }
        }
      } catch (InterruptedException e) {
        e.printStackTrace();
      }
    }
Esempio n. 6
0
 /*
  * 根据rwokey查询
  *
  * @rowKey rowKey
  *
  * @tableName 表名
  */
 public static Result getResult(String tableName, String rowKey) throws IOException {
   Get get = new Get(Bytes.toBytes(rowKey));
   HTable table = new HTable(conf, Bytes.toBytes(tableName)); // 获取表
   Result result = table.get(get);
   for (KeyValue kv : result.list()) {
     System.out.println("family:" + Bytes.toString(kv.getFamily()));
     System.out.println("qualifier:" + Bytes.toString(kv.getQualifier()));
     System.out.println("value:" + Bytes.toString(kv.getValue()));
     System.out.println("Timestamp:" + kv.getTimestamp());
     System.out.println("-------------------------------------------");
   }
   return result;
 }
Esempio n. 7
0
 /*
  * 查询表中的某一列
  *
  * @tableName 表名
  *
  * @rowKey rowKey
  */
 public static void getResultByColumn(
     String tableName, String rowKey, String familyName, String columnName) throws IOException {
   HTable table = new HTable(conf, Bytes.toBytes(tableName));
   Get get = new Get(Bytes.toBytes(rowKey));
   get.addColumn(Bytes.toBytes(familyName), Bytes.toBytes(columnName)); // 获取指定列族和列修饰符对应的列
   Result result = table.get(get);
   for (KeyValue kv : result.list()) {
     System.out.println("family:" + Bytes.toString(kv.getFamily()));
     System.out.println("qualifier:" + Bytes.toString(kv.getQualifier()));
     System.out.println("value:" + Bytes.toString(kv.getValue()));
     System.out.println("Timestamp:" + kv.getTimestamp());
     System.out.println("-------------------------------------------");
   }
 }
    @Override
    protected void map(ImmutableBytesWritable rowKey, Result result, Context context)
        throws IOException, InterruptedException {
      List<KeyValue> kvList = result.list();
      // boolean isTextIndex = tableName.equals(Constants.TEXTS_INDEX_TABLE_NAME);
      long totalFreq = 0;
      Iterator<KeyValue> iter = kvList.iterator();
      while (iter.hasNext()) {
        byte[] value = iter.next().getValue();
        totalFreq += Bytes.toInt(value);
      }

      Text term = new Text(Bytes.toString(rowKey.get()));
      context.write(term, new LongWritable(totalFreq));
    }
Esempio n. 9
0
  /*
   * 遍历查询hbase表
   *
   * @tableName 表名
   */
  public static void getResultScann(String tableName) throws IOException {
    Scan scan = new Scan();
    ResultScanner rs = null;
    HTableInterface table = conn.getTable(tableName);
    try {
      //			String split = StringUtils.S001;
      //			QualifierFilter ff = new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(
      //					Bytes.toBytes("A")));
      //			scan.setFilter(ff);
      rs = table.getScanner(scan);
      int count = 0;
      for (Result r : rs) {
        count++;
        for (KeyValue kv : r.list()) {
          System.out.println("row:" + Bytes.toString(kv.getRow()));
          //					System.out.println("family:" + Bytes.toString(kv.getFamily()));
          //					System.out.println("qualifier:" + Bytes.toString(kv.getQualifier()));
          System.out.println("value:" + Bytes.toString(kv.getValue()));
          //
          //					System.out.println("timestamp:" + kv.getTimestamp());

          //					StringBuilder sb = new StringBuilder();
          //					sb.append(Bytes.toString(r.getRow()));
          //					sb.append(split);
          //					sb.append(Bytes.toString(kv.getValue()));
          //					EntBaseinfo baseInfo = new EntBaseinfo();
          //					baseInfo.parseFromString(sb.toString());
          //					System.out.println(baseInfo.getENTNAME());
          //					if(baseInfo.getNAME()!=null&&baseInfo.getNAME().isEmpty()){
          //						System.out.println(baseInfo.getENTNAME());
          //					}
          //
          //
          //					if(baseInfo.getDOM()!=null&&baseInfo.getNAME().isEmpty()){
          //						System.out.println(baseInfo.getENTNAME());
          //					}

        }
        if (count > 1000) {
          return;
        }
      }
    } finally {
      rs.close();
    }
  }
Esempio n. 10
0
 @SuppressWarnings("resource")
 public static void searchByRowKey(String tableName, String rowKey) {
   try {
     HTable table = new HTable(conf, Bytes.toBytes(tableName));
     Get g = new Get(Bytes.toBytes(rowKey));
     Result rs = table.get(g);
     for (KeyValue kv : rs.list()) {
       System.out.println("family:" + Bytes.toString(kv.getFamily()));
       System.out.println("qualifier:" + Bytes.toString(kv.getQualifier()));
       System.out.println("value:" + Bytes.toString(kv.getValue()));
       System.out.println("Timestamp:" + kv.getTimestamp());
       System.out.println("-------------------------------------------");
     }
   } catch (IOException e) {
     e.printStackTrace();
   }
 }
Esempio n. 11
0
 private void printResult(Result result) {
   if (result != null) {
     Map<String, String> map = new HashMap<String, String>();
     for (KeyValue kv : result.list()) { // 需要判断
       map.put(Bytes.toString(kv.getQualifier()), Bytes.toString(kv.getValue()));
     }
     logger.info(map.toString());
     NavigableMap<byte[], NavigableMap<byte[], byte[]>> map2 = result.getNoVersionMap();
     for (byte[] key : map2.keySet()) {
       Map<String, String> mm = new HashMap<String, String>();
       mm.put("family", Bytes.toString(key));
       NavigableMap<byte[], byte[]> navigableMap = map2.get(key);
       for (byte[] key2 : navigableMap.keySet()) {
         mm.put(Bytes.toString(key2), Bytes.toString(navigableMap.get(key2)));
       }
       logger.info(mm.toString());
     }
   }
 }
Esempio n. 12
0
  /*
   * 根据rwokey查询
   *
   * @rowKey rowKey
   *
   * @tableName 表名
   */
  public static Result getResult(String tableName, String rowKey, String qualifierName)
      throws IOException {

    Get get = new Get(Bytes.toBytes(rowKey));
    QualifierFilter filter =
        new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(qualifierName)));
    get.setFilter(filter);
    @SuppressWarnings("resource")
    HTable table = new HTable(conf, Bytes.toBytes(tableName)); // 获取表
    Result result = table.get(get);
    for (KeyValue kv : result.list()) {
      System.out.println("family:" + Bytes.toString(kv.getFamily()));
      System.out.println("qualifier:" + Bytes.toString(kv.getQualifier()));
      System.out.println("value:" + Bytes.toString(kv.getValue()));
      System.out.println("Timestamp:" + kv.getTimestamp());
      System.out.println("-------------------------------------------");
    }
    return result;
  }
Esempio n. 13
0
 /*
  * 查询某列数据的多个版本
  *
  * @tableName 表名
  *
  * @rowKey rowKey
  *
  * @familyName 列族名
  *
  * @columnName 列名
  */
 public static void getResultByVersion(
     String tableName, String rowKey, String familyName, String columnName) throws IOException {
   HTable table = new HTable(conf, Bytes.toBytes(tableName));
   Get get = new Get(Bytes.toBytes(rowKey));
   get.addColumn(Bytes.toBytes(familyName), Bytes.toBytes(columnName));
   get.setMaxVersions(5);
   Result result = table.get(get);
   for (KeyValue kv : result.list()) {
     System.out.println("family:" + Bytes.toString(kv.getFamily()));
     System.out.println("qualifier:" + Bytes.toString(kv.getQualifier()));
     System.out.println("value:" + Bytes.toString(kv.getValue()));
     System.out.println("Timestamp:" + kv.getTimestamp());
     System.out.println("-------------------------------------------");
   }
   /*
    * List<?> results = table.get(get).list(); Iterator<?> it =
    * results.iterator(); while (it.hasNext()) {
    * System.out.println(it.next().toString()); }
    */
 }
Esempio n. 14
0
 /*
  * 遍历查询hbase表
  *
  * @tableName 表名
  */
 public static void getResultScann(String tableName) throws IOException {
   Scan scan = new Scan();
   ResultScanner rs = null;
   HTable table = new HTable(conf, Bytes.toBytes(tableName));
   try {
     rs = table.getScanner(scan);
     for (Result r : rs) {
       for (KeyValue kv : r.list()) {
         System.out.println("row:" + Bytes.toString(kv.getRow()));
         System.out.println("family:" + Bytes.toString(kv.getFamily()));
         System.out.println("qualifier:" + Bytes.toString(kv.getQualifier()));
         System.out.println("value:" + Bytes.toString(kv.getValue()));
         System.out.println("timestamp:" + kv.getTimestamp());
         System.out.println("-------------------------------------------");
       }
     }
   } finally {
     rs.close();
   }
 }
Esempio n. 15
0
  /**
   * 查询表中的某一列
   *
   * @tableName 表名
   * @rowKey rowKey
   */
  public static byte[] getResultByColumn(
      String tableName, String rowKey, String familyName, String columnName) throws IOException {
    System.out.println("getResultByColumn------------------");

    @SuppressWarnings("resource")
    HTable table = new HTable(conf, Bytes.toBytes(tableName));
    Get get = new Get(Bytes.toBytes(rowKey));
    get.addColumn(Bytes.toBytes(familyName), Bytes.toBytes(columnName)); // 获取指定列族和列修饰符对应的列
    Result result = table.get(get);
    byte[] value = null;
    for (KeyValue kv : result.list()) {
      System.out.println("family:" + Bytes.toString(kv.getFamily()));
      System.out.println("qualifier:" + Bytes.toString(kv.getQualifier()));
      System.out.println("value:" + Bytes.toString(kv.getValue()));
      System.out.println("Timestamp:" + kv.getTimestamp());
      System.out.println("-------------------------------------------");
      if (Bytes.toString(kv.getQualifier()).equals(columnName)) {
        value = kv.getValue();
      }
    }

    return value;
  }
Esempio n. 16
0
  private Result filter(
      TransactionState state, Result result, long startTimestamp, int localVersions)
      throws IOException {
    if (result == null) {
      return null;
    }
    List<KeyValue> kvs = result.list();
    if (kvs == null) {
      return result;
    }
    Map<ByteArray, Map<ByteArray, Integer>> occurrences =
        new HashMap<TransactionalTable.ByteArray, Map<ByteArray, Integer>>();
    Map<ByteArray, Map<ByteArray, Long>> minTimestamp =
        new HashMap<TransactionalTable.ByteArray, Map<ByteArray, Long>>();
    List<KeyValue> nonDeletes = new ArrayList<KeyValue>();
    List<KeyValue> filtered = new ArrayList<KeyValue>();
    Map<ByteArray, Set<ByteArray>> read = new HashMap<ByteArray, Set<ByteArray>>();
    DeleteTracker tracker = new DeleteTracker();
    for (KeyValue kv : kvs) {
      ByteArray family = new ByteArray(kv.getFamily());
      ByteArray qualifier = new ByteArray(kv.getQualifier());
      Set<ByteArray> readQualifiers = read.get(family);
      if (readQualifiers == null) {
        readQualifiers = new HashSet<TransactionalTable.ByteArray>();
        read.put(family, readQualifiers);
      } else if (readQualifiers.contains(qualifier)) continue;
      //         RowKey rk = new RowKey(kv.getRow(), getTableName());
      if (state.tsoclient.validRead(kv.getTimestamp(), startTimestamp)) {
        if (!tracker.addDeleted(kv)) nonDeletes.add(kv);
        {
          // Read valid value
          readQualifiers.add(qualifier);

          //                statistics
          //               elementsGotten++;
          Map<ByteArray, Integer> occurrencesCols = occurrences.get(family);
          Integer times = null;
          if (occurrencesCols != null) {
            times = occurrencesCols.get(qualifier);
          }
          if (times != null) {
            //                  elementsRead += times;
            versionsAvg = times > versionsAvg ? times : alpha * versionsAvg + (1 - alpha) * times;
            //                  extraVersionsAvg = times > extraVersionsAvg ? times : alpha *
            // extraVersionsAvg + (1 - alpha) * times;
          } else {
            //                  elementsRead++;
            versionsAvg = alpha * versionsAvg + (1 - alpha);
            //                  extraVersionsAvg = alpha * extraVersionsAvg + (1 - alpha);
          }
        }
      } else {
        Map<ByteArray, Integer> occurrencesCols = occurrences.get(family);
        Map<ByteArray, Long> minTimestampCols = minTimestamp.get(family);
        if (occurrencesCols == null) {
          occurrencesCols = new HashMap<TransactionalTable.ByteArray, Integer>();
          minTimestampCols = new HashMap<TransactionalTable.ByteArray, Long>();
          occurrences.put(family, occurrencesCols);
          minTimestamp.put(family, minTimestampCols);
        }
        Integer times = occurrencesCols.get(qualifier);
        Long timestamp = minTimestampCols.get(qualifier);
        if (times == null) {
          times = 0;
          timestamp = kv.getTimestamp();
        }
        times++;
        timestamp = Math.min(timestamp, kv.getTimestamp());
        if (times == localVersions) {
          // We need to fetch more versions
          Get get = new Get(kv.getRow());
          get.addColumn(kv.getFamily(), kv.getQualifier());
          get.setMaxVersions(localVersions);
          Result r;
          GOTRESULT:
          do {
            extraGetsPerformed++;
            get.setTimeRange(0, timestamp);
            r = this.get(get);
            List<KeyValue> list = r.list();
            if (list == null) break;
            for (KeyValue t : list) {
              times++;
              timestamp = Math.min(timestamp, t.getTimestamp());
              //                     rk = new RowKey(kv.getRow(), getTableName());
              if (state.tsoclient.validRead(t.getTimestamp(), startTimestamp)) {
                if (!tracker.addDeleted(t)) nonDeletes.add(t);
                readQualifiers.add(qualifier);
                elementsGotten++;
                elementsRead += times;
                versionsAvg =
                    times > versionsAvg ? times : alpha * versionsAvg + (1 - alpha) * times;
                extraVersionsAvg =
                    times > extraVersionsAvg
                        ? times
                        : alpha * extraVersionsAvg + (1 - alpha) * times;
                break GOTRESULT;
              }
            }
          } while (r.size() == localVersions);
        } else {
          occurrencesCols.put(qualifier, times);
          minTimestampCols.put(qualifier, timestamp);
        }
      }
    }
    for (KeyValue kv : nonDeletes) {
      if (!tracker.isDeleted(kv)) {
        filtered.add(kv);
      }
    }
    //      cacheVersions = (int) versionsAvg;
    if (filtered.isEmpty()) {
      return null;
    }
    return new Result(filtered);
  }
Esempio n. 17
0
  /**
   * Looks at every value of the mapreduce output and verifies that indeed the values have been
   * reversed.
   *
   * @param table Table to scan.
   * @throws IOException
   * @throws NullPointerException if we failed to find a cell value
   */
  private void verifyAttempt(final HTable table) throws IOException, NullPointerException {
    Scan scan = new Scan();
    scan.addFamily(INPUT_FAMILY);
    scan.addFamily(OUTPUT_FAMILY);
    ResultScanner scanner = table.getScanner(scan);
    try {
      Iterator<Result> itr = scanner.iterator();
      assertTrue(itr.hasNext());
      while (itr.hasNext()) {
        Result r = itr.next();
        if (LOG.isDebugEnabled()) {
          if (r.size() > 2) {
            throw new IOException("Too many results, expected 2 got " + r.size());
          }
        }
        byte[] firstValue = null;
        byte[] secondValue = null;
        int count = 0;
        for (KeyValue kv : r.list()) {
          if (count == 0) {
            firstValue = kv.getValue();
          }
          if (count == 1) {
            secondValue = kv.getValue();
          }
          count++;
          if (count == 2) {
            break;
          }
        }

        String first = "";
        if (firstValue == null) {
          throw new NullPointerException(Bytes.toString(r.getRow()) + ": first value is null");
        }
        first = Bytes.toString(firstValue);

        String second = "";
        if (secondValue == null) {
          throw new NullPointerException(Bytes.toString(r.getRow()) + ": second value is null");
        }
        byte[] secondReversed = new byte[secondValue.length];
        for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
          secondReversed[i] = secondValue[j];
        }
        second = Bytes.toString(secondReversed);

        if (first.compareTo(second) != 0) {
          if (LOG.isDebugEnabled()) {
            LOG.debug(
                "second key is not the reverse of first. row="
                    + Bytes.toStringBinary(r.getRow())
                    + ", first value="
                    + first
                    + ", second value="
                    + second);
          }
          fail();
        }
      }
    } finally {
      scanner.close();
    }
  }
    public void reduce(
        CompositeKeyWritable key, Iterable<KeyValueArrayWritable> values, Context context)
        throws IOException, InterruptedException {
      /* BERLIN SPARQL BENHCMARK QUERY 8
         ----------------------------------------
      SELECT ?title ?text ?reviewDate ?reviewer ?reviewerName ?rating1 ?rating2 ?rating3 ?rating4
      WHERE {
      	[TP-01] ?review bsbm:reviewFor %ProductXYZ% .
      	[TP-02] ?review dc:title ?title .
      	[TP-03] ?review rev:text ?text .
      	[TP-04] FILTER langMatches( lang(?text), "EN" )
      	[TP-05] ?review bsbm:reviewDate ?reviewDate .
      	[TP-06] ?review rev:reviewer ?reviewer .
      	[TP-07] ?reviewer foaf:name ?reviewerName .
      	[TP-08] OPTIONAL { ?review bsbm:rating1 ?rating1 . }
      	[TP-09] OPTIONAL { ?review bsbm:rating2 ?rating2 . }
      	[TP-10] OPTIONAL { ?review bsbm:rating3 ?rating3 . }
      	[TP-11] OPTIONAL { ?review bsbm:rating4 ?rating4 . }
      }
      ORDER BY DESC(?reviewDate)
      LIMIT 20
         --------------------------------------- */

      List<KeyValue> finalKeyValues = new ArrayList<KeyValue>();

      // Find the keys for the vendor/publisher
      KeyValue kv_reviewer = null;
      for (KeyValueArrayWritable array : values) {
        for (KeyValue kv : (KeyValue[]) array.toArray()) {
          if (Arrays.equals(kv.getValue(), "rev_reviewer".getBytes())) {
            kv_reviewer = kv;
            finalKeyValues.add(kv);
          } else {
            finalKeyValues.add(kv);
          }
        }
      }
      // TP-07
      Result reviewerResult = table.get(new Get(kv_reviewer.getQualifier()));
      boolean foundReviewerName = false;
      for (KeyValue kv : reviewerResult.list()) {
        if (Arrays.equals(kv.getQualifier(), "foaf_name".getBytes())) {
          finalKeyValues.add(kv);
          foundReviewerName = true;
          break;
        }
      }
      if (foundReviewerName == false) {
        return;
      }

      // Format and output the values
      StringBuilder builder = new StringBuilder();
      builder.append("\n");
      for (KeyValue kv : finalKeyValues) {
        String[] triple = null;
        try {
          triple = SharedServices.keyValueToTripleString(kv);
        } catch (ClassNotFoundException e) {
          e.printStackTrace();
        }
        builder.append(triple[0] + "\t" + triple[1] + "\t" + triple[2] + "\n");
      }

      context.write(new Text(key.getValue()), new Text(builder.toString()));
    }
    public void map(ImmutableBytesWritable row, Result value, Context context)
        throws InterruptedException, IOException {
      /* BERLIN SPARQL BENHCMARK QUERY 8
        ----------------------------------------
      SELECT ?title ?text ?reviewDate ?reviewer ?reviewerName ?rating1 ?rating2 ?rating3 ?rating4
      WHERE {
      	[TriplePattern-01] ?review bsbm:reviewFor %ProductXYZ% .
      	[TP-02] ?review dc:title ?title .
      	[TP-03] ?review rev:text ?text .
      	[TP-04] FILTER langMatches( lang(?text), "EN" )
      	[TP-05] ?review bsbm:reviewDate ?reviewDate .
      	[TP-06] ?review rev:reviewer ?reviewer .
      	[TP-07] ?reviewer foaf:name ?reviewerName .
      	[TP-08] OPTIONAL { ?review bsbm:rating1 ?rating1 . }
      	[TP-09] OPTIONAL { ?review bsbm:rating2 ?rating2 . }
      	[TP-10] OPTIONAL { ?review bsbm:rating3 ?rating3 . }
      	[TP-11] OPTIONAL { ?review bsbm:rating4 ?rating4 . }
      }
      ORDER BY DESC(?reviewDate)
      LIMIT 20
        ---------------------------------------*/
      String rowKey = new String(value.getRow());

      ArrayList<KeyValue> keyValuesToTransmit = new ArrayList<KeyValue>();
      List<KeyValue> reviewRow = value.list();
      byte[] predicate = value.getValue(SharedServices.CF_AS_BYTES, ProductXYZ.getBytes());
      if (!Arrays.equals(predicate, "bsbm-voc_reviewFor".getBytes())) {
        return;
      }

      int requiredColumns = 0;
      for (KeyValue kv : reviewRow) {
        // TP-01
        if (Arrays.equals(kv.getValue(), "bsbm-voc_reviewFor".getBytes())) {
          keyValuesToTransmit.add(kv);
          requiredColumns++;
        }
        // TP-02
        else if (Arrays.equals(kv.getQualifier(), "dc_title".getBytes())) {
          keyValuesToTransmit.add(kv);
          requiredColumns++;
        }
        // TP-03
        else if (Arrays.equals(kv.getQualifier(), "rev_text".getBytes())) {
          keyValuesToTransmit.add(kv);
          requiredColumns++;
        }
        // TP-04
        else if (Arrays.equals(kv.getValue(), "rdfs_lang".getBytes())) {
          if (!Arrays.equals(kv.getQualifier(), "@en".getBytes())) {
            return;
          }
          keyValuesToTransmit.add(kv);
          requiredColumns++;
        }
        // TP-05
        else if (Arrays.equals(kv.getQualifier(), "bsbm-voc_reviewDate".getBytes())) {
          keyValuesToTransmit.add(kv);
          requiredColumns++;
        }
        // TP-06
        else if (Arrays.equals(kv.getValue(), "rev_reviewer".getBytes())) {
          keyValuesToTransmit.add(kv);
          requiredColumns++;
        }
        // OPTIONAL TP-08, TP-09, TP-10, TP-11
        else if (Arrays.equals(kv.getQualifier(), "bsbm-voc_rating1".getBytes())) {
          keyValuesToTransmit.add(kv);
        } else if (Arrays.equals(kv.getQualifier(), "bsbm-voc_rating2".getBytes())) {
          keyValuesToTransmit.add(kv);
        } else if (Arrays.equals(kv.getQualifier(), "bsbm-voc_rating3".getBytes())) {
          keyValuesToTransmit.add(kv);
        } else if (Arrays.equals(kv.getQualifier(), "bsbm-voc_rating4".getBytes())) {
          keyValuesToTransmit.add(kv);
        }
      }
      if (requiredColumns < 6) {
        return;
      }
      context.write(
          new CompositeKeyWritable(rowKey, 1),
          new KeyValueArrayWritable(SharedServices.listToArray(keyValuesToTransmit)));
    }
Esempio n. 20
0
  /**
   * Get an estimate of the number of rows and bytes per row in regions between startRowKey and
   * endRowKey. The more store files there are the more this will be off. Also, this does not take
   * into account any rows that are in the memstore.
   *
   * <p>The values computed here should be cached so that in high qps workloads the nn is not
   * overwhelmed. Could be done in load(); Synchronized to make sure that only one thread at a time
   * is using the htable.
   *
   * @param startRowKey First row key in the range
   * @param endRowKey Last row key in the range
   * @return The estimated number of rows in the regions between the row keys (first) and the
   *     estimated row size in bytes (second).
   */
  public synchronized Pair<Long, Long> getEstimatedRowStats(byte[] startRowKey, byte[] endRowKey) {
    Preconditions.checkNotNull(startRowKey);
    Preconditions.checkNotNull(endRowKey);

    long rowSize = 0;
    long rowCount = 0;
    long hdfsSize = 0;
    boolean isCompressed = false;

    try {
      // Check to see if things are compressed.
      // If they are we'll estimate a compression factor.
      if (columnFamilies_ == null) {
        columnFamilies_ = hTable_.getTableDescriptor().getColumnFamilies();
      }
      Preconditions.checkNotNull(columnFamilies_);
      for (HColumnDescriptor desc : columnFamilies_) {
        isCompressed |= desc.getCompression() != Compression.Algorithm.NONE;
      }

      // For every region in the range.
      List<HRegionLocation> locations = getRegionsInRange(hTable_, startRowKey, endRowKey);
      for (HRegionLocation location : locations) {
        long currentHdfsSize = 0;
        long currentRowSize = 0;
        long currentRowCount = 0;

        HRegionInfo info = location.getRegionInfo();
        // Get the size on hdfs
        currentHdfsSize += getHdfsSize(info);

        Scan s = new Scan(info.getStartKey());
        // Get a small sample of rows
        s.setBatch(ROW_COUNT_ESTIMATE_BATCH_SIZE);
        // Try and get every version so the row's size can be used to estimate.
        s.setMaxVersions(Short.MAX_VALUE);
        // Don't cache the blocks as we don't think these are
        // necessarily important blocks.
        s.setCacheBlocks(false);
        // Try and get deletes too so their size can be counted.
        s.setRaw(true);
        ResultScanner rs = hTable_.getScanner(s);
        try {
          // And get the the ROW_COUNT_ESTIMATE_BATCH_SIZE fetched rows
          // for a representative sample
          for (int i = 0; i < ROW_COUNT_ESTIMATE_BATCH_SIZE; i++) {
            Result r = rs.next();
            if (r == null) break;
            currentRowCount += 1;
            for (KeyValue kv : r.list()) {
              // some extra row size added to make up for shared overhead
              currentRowSize +=
                  kv.getRowLength() // row key
                      + 4 // row key length field
                      + kv.getFamilyLength() // Column family bytes
                      + 4 // family length field
                      + kv.getQualifierLength() // qualifier bytes
                      + 4 // qualifier length field
                      + kv.getValueLength() // length of the value
                      + 4 // value length field
                      + 10; // extra overhead for hfile index, checksums, metadata, etc
            }
          }
          // add these values to the cumulative totals in one shot just
          // in case there was an error in between getting the hdfs
          // size and the row/column sizes.
          hdfsSize += currentHdfsSize;
          rowCount += currentRowCount;
          rowSize += currentRowSize;
        } finally {
          rs.close();
        }
      }
    } catch (IOException ioe) {
      // Print the stack trace, but we'll ignore it
      // as this is just an estimate.
      // TODO: Put this into the per query log.
      LOG.error("Error computing HBase row count estimate", ioe);
    }

    // If there are no rows then no need to estimate.
    if (rowCount == 0) return new Pair<Long, Long>(0L, 0L);

    // if something went wrong then set a signal value.
    if (rowSize <= 0 || hdfsSize <= 0) return new Pair<Long, Long>(-1L, -1L);

    // estimate the number of rows.
    double bytesPerRow = rowSize / (double) rowCount;
    long estimatedRowCount = (long) ((isCompressed ? 2 : 1) * (hdfsSize / bytesPerRow));

    return new Pair<Long, Long>(estimatedRowCount, (long) bytesPerRow);
  }