/** * Gets an hbase scanner. * * @return An hbase scanner. * @throws IOException Error accessing hbase. */ @Override protected Scan getScanner() throws IOException { Scan scan = super.getScanner(); if (this.query != null) { if (this.query.getStartKey() != null) { scan.setStartRow(this.query.getStartKey()); } if (this.query.getEndKey() != null) { scan.setStopRow(padWithMaxUnicode(this.query.getEndKey())); } if (this.query.getStartDate() != null && this.query.getEndDate() != null) { scan.setTimeRange(this.query.getStartDate().getTime(), this.query.getEndDate().getTime()); } if (this.query.getWord() != null || this.query.getOperator().isUnary()) { WritableByteArrayComparable comparator; switch (this.query.getOperator()) { case Contains: comparator = new SubstringComparator(this.query.getWord()); break; case StartsWith: comparator = new RegexStringComparator(String.format("^%s.*", this.query.getWord())); break; case EndsWith: comparator = new RegexStringComparator(String.format(".*%s$", this.query.getWord())); break; case Less: case LessOrEqual: case Equal: case NotEqual: case GreaterOrEqual: case Greater: comparator = new BinaryComparator(this.query.getWordAsByteArray()); break; case IsNull: case IsNotNull: comparator = new BinaryComparator(EMPTY_BYTES_ARRAY); break; default: throw new IllegalArgumentException( String.format( "The specified operator type '%s' is not supported.", this.query.getOperator())); } scan.setFilter( new SingleColumnValueFilter( Bytes.toBytesBinary(this.query.getFamily()), Bytes.toBytesBinary(this.query.getColumn()), this.query.getOperator().toFilter(), comparator)); } } return scan; }
@Test public void testStartStopRow() throws Exception { final TableName TABLENAME1 = TableName.valueOf("testStartStopRow1"); final TableName TABLENAME2 = TableName.valueOf("testStartStopRow2"); final byte[] FAMILY = Bytes.toBytes("family"); final byte[] COLUMN1 = Bytes.toBytes("c1"); final byte[] ROW0 = Bytes.toBytesBinary("\\x01row0"); final byte[] ROW1 = Bytes.toBytesBinary("\\x01row1"); final byte[] ROW2 = Bytes.toBytesBinary("\\x01row2"); Table t1 = TEST_UTIL.createTable(TABLENAME1, FAMILY); Table t2 = TEST_UTIL.createTable(TABLENAME2, FAMILY); // put rows into the first table Put p = new Put(ROW0); p.addColumn(FAMILY, COLUMN1, COLUMN1); t1.put(p); p = new Put(ROW1); p.addColumn(FAMILY, COLUMN1, COLUMN1); t1.put(p); p = new Put(ROW2); p.addColumn(FAMILY, COLUMN1, COLUMN1); t1.put(p); CopyTable copy = new CopyTable(); assertEquals( 0, ToolRunner.run( new Configuration(TEST_UTIL.getConfiguration()), copy, new String[] { "--new.name=" + TABLENAME2, "--startrow=\\x01row1", "--stoprow=\\x01row2", TABLENAME1.getNameAsString() })); // verify the data was copied into table 2 // row1 exist, row0, row2 do not exist Get g = new Get(ROW1); Result r = t2.get(g); assertEquals(1, r.size()); assertTrue(CellUtil.matchingQualifier(r.rawCells()[0], COLUMN1)); g = new Get(ROW0); r = t2.get(g); assertEquals(0, r.size()); g = new Get(ROW2); r = t2.get(g); assertEquals(0, r.size()); t1.close(); t2.close(); TEST_UTIL.deleteTable(TABLENAME1); TEST_UTIL.deleteTable(TABLENAME2); }
/** * Checks if the row is valid according to the query. If a query is done on a specific column and * the row does not contain this column the row is considered invalid. * * @param row The row to check. * @return True if the query doesn't have a specific column or a row contains the column used with * the query or False otherwise. */ @Override protected boolean isValidRow(Result row) { Query localQuery = this.query; if (localQuery != null && localQuery.getWord() != null) { return row.containsColumn( Bytes.toBytesBinary(localQuery.getFamily()), Bytes.toBytesBinary(localQuery.getColumn())); } return true; }
// @Override // protected void map(ImmutableBytesWritable key, Text value, Context context) throws // IOException, InterruptedException { // Text combinedKeyValue = new Text(); // //the structure is key###value // combinedKeyValue.set(Bytes.toString(key.get()) + "###" + value.toString()); // context.write(one, combinedKeyValue); // } @Override protected void map(ImmutableBytesWritable key, Result columns, Context context) throws IOException, InterruptedException { Text combinedKeyValue = new Text(); // the structure is key###value String value = null; try { for (KeyValue kv : columns.list()) { byte[] gmmData = kv.getValue(); String gmmString = Bytes.toStringBinary(gmmData); // /* just for checking that gmm is correctly constructed MixtureModel m = null; m = (MixtureModel) ObjectAndByte.byteArrayToObject(Bytes.toBytesBinary(gmmString)); System.out.println("m.size:" + m.size); // */ combinedKeyValue.set(Bytes.toString(key.get()) + "###" + gmmString); context.write(one, combinedKeyValue); // context.write(key, new Text(gmmString)); } } catch (Exception e) { e.printStackTrace(); } }
@Override protected void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text value : values) { String kv = value.toString(); // kv is of form key###value int indexSeparator = kv.indexOf("###"); String keyOnly = kv.substring(0, indexSeparator); String gmmString = kv.substring(indexSeparator + 3); /* * just for testing that casting is error free */ byte[] gmmBinary = Bytes.toBytesBinary(gmmString); MixtureModel m = null; try { m = (MixtureModel) ObjectAndByte.byteArrayToObject(gmmBinary); // System.out.println("m.size:" + m.size); System.out.println("Gmm in good state:" + keyOnly); keyList.add(keyOnly); valueList.add(gmmString); } catch (ClassNotFoundException ex) { ex.printStackTrace(); } catch (ArrayStoreException ase) { System.out.println("array store exception:"); System.out.println("gmm corrupted :" + keyOnly); } /* * testing ends here */ } int numOfGmm = keyList.size(); for (int i = 0; i < numOfGmm; i++) { for (int j = i + 1; j < numOfGmm; j++) { outKey.set(keyList.get(i) + ":" + keyList.get(j)); outValue.set(valueList.get(i) + "###" + valueList.get(j)); context.write(outKey, outValue); } } }
@Test public void testReducerNumEstimation() throws Exception { // Skip the test for Tez. Tez use a different mechanism. // Equivalent test is in TestTezAutoParallelism Assume.assumeTrue("Skip this test for TEZ", Util.isMapredExecType(cluster.getExecType())); // use the estimation Configuration conf = HBaseConfiguration.create(new Configuration()); HBaseTestingUtility util = new HBaseTestingUtility(conf); int clientPort = util.startMiniZKCluster().getClientPort(); util.startMiniHBaseCluster(1, 1); String query = "a = load '/passwd';" + "b = group a by $0;" + "store b into 'output';"; PigServer ps = new PigServer(cluster.getExecType(), cluster.getProperties()); PhysicalPlan pp = Util.buildPp(ps, query); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100"); pc.getConf().setProperty("pig.exec.reducers.max", "10"); pc.getConf().setProperty(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(clientPort)); ConfigurationValidator.validatePigProperties(pc.getProperties()); conf = ConfigurationUtil.toConfiguration(pc.getProperties()); JobControlCompiler jcc = new JobControlCompiler(pc, conf); JobControl jc = jcc.compile(mrPlan, "Test"); Job job = jc.getWaitingJobs().get(0); long reducer = Math.min( (long) Math.ceil(new File("test/org/apache/pig/test/data/passwd").length() / 100.0), 10); Util.assertParallelValues(-1, -1, reducer, reducer, job.getJobConf()); // use the PARALLEL key word, it will override the estimated reducer number query = "a = load '/passwd';" + "b = group a by $0 PARALLEL 2;" + "store b into 'output';"; pp = Util.buildPp(ps, query); mrPlan = Util.buildMRPlan(pp, pc); pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100"); pc.getConf().setProperty("pig.exec.reducers.max", "10"); ConfigurationValidator.validatePigProperties(pc.getProperties()); conf = ConfigurationUtil.toConfiguration(pc.getProperties()); jcc = new JobControlCompiler(pc, conf); jc = jcc.compile(mrPlan, "Test"); job = jc.getWaitingJobs().get(0); Util.assertParallelValues(-1, 2, -1, 2, job.getJobConf()); final byte[] COLUMNFAMILY = Bytes.toBytes("pig"); util.createTable(Bytes.toBytesBinary("test_table"), COLUMNFAMILY); // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as // hbase query = "a = load 'hbase://test_table' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');" + "b = group a by $0 ;" + "store b into 'output';"; pp = Util.buildPp(ps, query); mrPlan = Util.buildMRPlan(pp, pc); pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100"); pc.getConf().setProperty("pig.exec.reducers.max", "10"); ConfigurationValidator.validatePigProperties(pc.getProperties()); conf = ConfigurationUtil.toConfiguration(pc.getProperties()); jcc = new JobControlCompiler(pc, conf); jc = jcc.compile(mrPlan, "Test"); job = jc.getWaitingJobs().get(0); Util.assertParallelValues(-1, -1, 1, 1, job.getJobConf()); util.deleteTable(Bytes.toBytesBinary("test_table")); // In HBase 0.90.1 and above we can use util.shutdownMiniHBaseCluster() // here instead. MiniHBaseCluster hbc = util.getHBaseCluster(); if (hbc != null) { hbc.shutdown(); hbc.join(); } util.shutdownMiniZKCluster(); }