Beispiel #1
0
  /**
   * Gets an hbase scanner.
   *
   * @return An hbase scanner.
   * @throws IOException Error accessing hbase.
   */
  @Override
  protected Scan getScanner() throws IOException {
    Scan scan = super.getScanner();

    if (this.query != null) {
      if (this.query.getStartKey() != null) {
        scan.setStartRow(this.query.getStartKey());
      }

      if (this.query.getEndKey() != null) {
        scan.setStopRow(padWithMaxUnicode(this.query.getEndKey()));
      }

      if (this.query.getStartDate() != null && this.query.getEndDate() != null) {
        scan.setTimeRange(this.query.getStartDate().getTime(), this.query.getEndDate().getTime());
      }

      if (this.query.getWord() != null || this.query.getOperator().isUnary()) {
        WritableByteArrayComparable comparator;

        switch (this.query.getOperator()) {
          case Contains:
            comparator = new SubstringComparator(this.query.getWord());
            break;
          case StartsWith:
            comparator = new RegexStringComparator(String.format("^%s.*", this.query.getWord()));
            break;
          case EndsWith:
            comparator = new RegexStringComparator(String.format(".*%s$", this.query.getWord()));
            break;
          case Less:
          case LessOrEqual:
          case Equal:
          case NotEqual:
          case GreaterOrEqual:
          case Greater:
            comparator = new BinaryComparator(this.query.getWordAsByteArray());
            break;
          case IsNull:
          case IsNotNull:
            comparator = new BinaryComparator(EMPTY_BYTES_ARRAY);
            break;
          default:
            throw new IllegalArgumentException(
                String.format(
                    "The specified operator type '%s' is not supported.",
                    this.query.getOperator()));
        }

        scan.setFilter(
            new SingleColumnValueFilter(
                Bytes.toBytesBinary(this.query.getFamily()),
                Bytes.toBytesBinary(this.query.getColumn()),
                this.query.getOperator().toFilter(),
                comparator));
      }
    }

    return scan;
  }
Beispiel #2
0
  @Test
  public void testStartStopRow() throws Exception {
    final TableName TABLENAME1 = TableName.valueOf("testStartStopRow1");
    final TableName TABLENAME2 = TableName.valueOf("testStartStopRow2");
    final byte[] FAMILY = Bytes.toBytes("family");
    final byte[] COLUMN1 = Bytes.toBytes("c1");
    final byte[] ROW0 = Bytes.toBytesBinary("\\x01row0");
    final byte[] ROW1 = Bytes.toBytesBinary("\\x01row1");
    final byte[] ROW2 = Bytes.toBytesBinary("\\x01row2");

    Table t1 = TEST_UTIL.createTable(TABLENAME1, FAMILY);
    Table t2 = TEST_UTIL.createTable(TABLENAME2, FAMILY);

    // put rows into the first table
    Put p = new Put(ROW0);
    p.addColumn(FAMILY, COLUMN1, COLUMN1);
    t1.put(p);
    p = new Put(ROW1);
    p.addColumn(FAMILY, COLUMN1, COLUMN1);
    t1.put(p);
    p = new Put(ROW2);
    p.addColumn(FAMILY, COLUMN1, COLUMN1);
    t1.put(p);

    CopyTable copy = new CopyTable();
    assertEquals(
        0,
        ToolRunner.run(
            new Configuration(TEST_UTIL.getConfiguration()),
            copy,
            new String[] {
              "--new.name=" + TABLENAME2,
              "--startrow=\\x01row1",
              "--stoprow=\\x01row2",
              TABLENAME1.getNameAsString()
            }));

    // verify the data was copied into table 2
    // row1 exist, row0, row2 do not exist
    Get g = new Get(ROW1);
    Result r = t2.get(g);
    assertEquals(1, r.size());
    assertTrue(CellUtil.matchingQualifier(r.rawCells()[0], COLUMN1));

    g = new Get(ROW0);
    r = t2.get(g);
    assertEquals(0, r.size());

    g = new Get(ROW2);
    r = t2.get(g);
    assertEquals(0, r.size());

    t1.close();
    t2.close();
    TEST_UTIL.deleteTable(TABLENAME1);
    TEST_UTIL.deleteTable(TABLENAME2);
  }
Beispiel #3
0
 /**
  * Checks if the row is valid according to the query. If a query is done on a specific column and
  * the row does not contain this column the row is considered invalid.
  *
  * @param row The row to check.
  * @return True if the query doesn't have a specific column or a row contains the column used with
  *     the query or False otherwise.
  */
 @Override
 protected boolean isValidRow(Result row) {
   Query localQuery = this.query;
   if (localQuery != null && localQuery.getWord() != null) {
     return row.containsColumn(
         Bytes.toBytesBinary(localQuery.getFamily()), Bytes.toBytesBinary(localQuery.getColumn()));
   }
   return true;
 }
    //        @Override
    //        protected void map(ImmutableBytesWritable key, Text value, Context context) throws
    // IOException, InterruptedException {
    //            Text combinedKeyValue = new Text();
    //            //the structure is key###value
    //            combinedKeyValue.set(Bytes.toString(key.get()) + "###" + value.toString());
    //            context.write(one, combinedKeyValue);
    //        }
    @Override
    protected void map(ImmutableBytesWritable key, Result columns, Context context)
        throws IOException, InterruptedException {

      Text combinedKeyValue = new Text();
      // the structure is key###value
      String value = null;
      try {
        for (KeyValue kv : columns.list()) {
          byte[] gmmData = kv.getValue();
          String gmmString = Bytes.toStringBinary(gmmData);

          // /* just for checking that gmm is correctly constructed
          MixtureModel m = null;
          m = (MixtureModel) ObjectAndByte.byteArrayToObject(Bytes.toBytesBinary(gmmString));
          System.out.println("m.size:" + m.size);
          // */
          combinedKeyValue.set(Bytes.toString(key.get()) + "###" + gmmString);
          context.write(one, combinedKeyValue);
          //                    context.write(key, new Text(gmmString));

        }
      } catch (Exception e) {
        e.printStackTrace();
      }
    }
    @Override
    protected void reduce(IntWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
      for (Text value : values) {
        String kv = value.toString();
        // kv is of form key###value
        int indexSeparator = kv.indexOf("###");
        String keyOnly = kv.substring(0, indexSeparator);
        String gmmString = kv.substring(indexSeparator + 3);

        /*
         * just for testing that casting is error free
         */
        byte[] gmmBinary = Bytes.toBytesBinary(gmmString);
        MixtureModel m = null;
        try {
          m = (MixtureModel) ObjectAndByte.byteArrayToObject(gmmBinary);
          // System.out.println("m.size:" + m.size);
          System.out.println("Gmm in good state:" + keyOnly);
          keyList.add(keyOnly);
          valueList.add(gmmString);
        } catch (ClassNotFoundException ex) {
          ex.printStackTrace();
        } catch (ArrayStoreException ase) {
          System.out.println("array store exception:");
          System.out.println("gmm corrupted :" + keyOnly);
        }
        /*
         * testing ends here
         */
      }
      int numOfGmm = keyList.size();
      for (int i = 0; i < numOfGmm; i++) {
        for (int j = i + 1; j < numOfGmm; j++) {
          outKey.set(keyList.get(i) + ":" + keyList.get(j));
          outValue.set(valueList.get(i) + "###" + valueList.get(j));
          context.write(outKey, outValue);
        }
      }
    }
Beispiel #6
0
  @Test
  public void testReducerNumEstimation() throws Exception {
    // Skip the test for Tez. Tez use a different mechanism.
    // Equivalent test is in TestTezAutoParallelism
    Assume.assumeTrue("Skip this test for TEZ", Util.isMapredExecType(cluster.getExecType()));
    // use the estimation
    Configuration conf = HBaseConfiguration.create(new Configuration());
    HBaseTestingUtility util = new HBaseTestingUtility(conf);
    int clientPort = util.startMiniZKCluster().getClientPort();
    util.startMiniHBaseCluster(1, 1);

    String query = "a = load '/passwd';" + "b = group a by $0;" + "store b into 'output';";
    PigServer ps = new PigServer(cluster.getExecType(), cluster.getProperties());
    PhysicalPlan pp = Util.buildPp(ps, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");
    pc.getConf().setProperty(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(clientPort));
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);
    JobControl jc = jcc.compile(mrPlan, "Test");
    Job job = jc.getWaitingJobs().get(0);
    long reducer =
        Math.min(
            (long) Math.ceil(new File("test/org/apache/pig/test/data/passwd").length() / 100.0),
            10);

    Util.assertParallelValues(-1, -1, reducer, reducer, job.getJobConf());

    // use the PARALLEL key word, it will override the estimated reducer number
    query = "a = load '/passwd';" + "b = group a by $0 PARALLEL 2;" + "store b into 'output';";
    pp = Util.buildPp(ps, query);
    mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    jcc = new JobControlCompiler(pc, conf);
    jc = jcc.compile(mrPlan, "Test");
    job = jc.getWaitingJobs().get(0);

    Util.assertParallelValues(-1, 2, -1, 2, job.getJobConf());

    final byte[] COLUMNFAMILY = Bytes.toBytes("pig");
    util.createTable(Bytes.toBytesBinary("test_table"), COLUMNFAMILY);

    // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as
    // hbase
    query =
        "a = load 'hbase://test_table' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');"
            + "b = group a by $0 ;"
            + "store b into 'output';";
    pp = Util.buildPp(ps, query);
    mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");

    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    jcc = new JobControlCompiler(pc, conf);
    jc = jcc.compile(mrPlan, "Test");
    job = jc.getWaitingJobs().get(0);

    Util.assertParallelValues(-1, -1, 1, 1, job.getJobConf());

    util.deleteTable(Bytes.toBytesBinary("test_table"));
    // In HBase 0.90.1 and above we can use util.shutdownMiniHBaseCluster()
    // here instead.
    MiniHBaseCluster hbc = util.getHBaseCluster();
    if (hbc != null) {
      hbc.shutdown();
      hbc.join();
    }
    util.shutdownMiniZKCluster();
  }