private void testSimpleScanInternal( long origKeyPrefix, Scan scan, int numValues, int startWithValue, int seekIntervalMinValue, int seekIntervalMaxValue) throws IOException { int valuesCountInSeekInterval = writeTestData( origKeyPrefix, numValues, startWithValue, seekIntervalMinValue, seekIntervalMaxValue); // TODO: add some filters to the scan for better testing ResultScanner distributedScanner = DistributedScanner.create(hTable, scan, keyDistributor); Result previous = null; int countMatched = 0; for (Result current : distributedScanner) { countMatched++; if (previous != null) { byte[] currentRowOrigKey = keyDistributor.getOriginalKey(current.getRow()); byte[] previousRowOrigKey = keyDistributor.getOriginalKey(previous.getRow()); Assert.assertTrue(Bytes.compareTo(currentRowOrigKey, previousRowOrigKey) >= 0); int currentValue = Bytes.toInt(current.getValue(CF, QUAL)); Assert.assertTrue(currentValue >= seekIntervalMinValue); Assert.assertTrue(currentValue <= seekIntervalMaxValue); } previous = current; } Assert.assertEquals(valuesCountInSeekInterval, countMatched); }
/** Testing simple get. */ @Test public void testGet() throws IOException, InterruptedException { // Testing simple get byte[] key = new byte[] {123, 124, 122}; byte[] distributedKey = keyDistributor.getDistributedKey(key); byte[] value = Bytes.toBytes("some"); // No need to adjust key here as hasher doesn't change it hTable.put(new Put(distributedKey).add(CF, QUAL, value)); Result result = hTable.get(new Get(distributedKey)); Assert.assertArrayEquals(key, keyDistributor.getOriginalKey(result.getRow())); Assert.assertArrayEquals(value, result.getValue(CF, QUAL)); }
private void testMapReduceInternal( long origKeyPrefix, Scan scan, int numValues, int startWithValue, int seekIntervalMinValue, int seekIntervalMaxValue) throws IOException, InterruptedException, ClassNotFoundException { int valuesCountInSeekInterval = writeTestData( origKeyPrefix, numValues, startWithValue, seekIntervalMinValue, seekIntervalMaxValue); // Reading data Configuration conf = testingUtility.getConfiguration(); Job job = new Job(conf, "testMapReduceInternal()-Job"); job.setJarByClass(this.getClass()); TableMapReduceUtil.initTableMapperJob( TABLE_NAME, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); // Substituting standard TableInputFormat which was set in // TableMapReduceUtil.initTableMapperJob(...) job.setInputFormatClass(WdTableInputFormat.class); keyDistributor.addInfo(job.getConfiguration()); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); boolean succeeded = job.waitForCompletion(true); Assert.assertTrue(succeeded); long mapInputRecords = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS).getValue(); Assert.assertEquals(valuesCountInSeekInterval, mapInputRecords); }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf.get(ROW_KEY_DISTRIBUTOR_CLASS) != null) { String clazz = conf.get(ROW_KEY_DISTRIBUTOR_CLASS); try { rowKeyDistributor = (AbstractRowKeyDistributor) Class.forName(clazz).newInstance(); if (conf.get(ROW_KEY_DISTRIBUTOR_PARAMS) != null) { rowKeyDistributor.init(conf.get(ROW_KEY_DISTRIBUTOR_PARAMS)); } } catch (Exception e) { throw new RuntimeException( "Cannot create row key distributor, " + ROW_KEY_DISTRIBUTOR_CLASS + ": " + clazz, e); } } }
private int writeTestData( long origKeyPrefix, int numRows, int rowKeySeed, int seekIntervalMinValue, int seekIntervalMaxValue) throws IOException { int valuesCountInSeekInterval = 0; for (int i = 0; i < numRows; i++) { int val = rowKeySeed + i - i * (i % 2) * 2; // i.e. 500, 499, 502, 497, 504, ... valuesCountInSeekInterval += (val >= seekIntervalMinValue && val <= seekIntervalMaxValue) ? 1 : 0; byte[] key = Bytes.toBytes(origKeyPrefix + val); byte[] distributedKey = keyDistributor.getDistributedKey(key); byte[] value = Bytes.toBytes(val); hTable.put(new Put(distributedKey).add(CF, QUAL, value)); } return valuesCountInSeekInterval; }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { List<InputSplit> allSplits = new ArrayList<InputSplit>(); Scan originalScan = getScan(); Scan[] scans = rowKeyDistributor.getDistributedScans(originalScan); for (Scan scan : scans) { // Internally super.getSplits(...) uses scan object stored in private variable, // to re-use the code of super class we switch scan object with scans we setScan(scan); List<InputSplit> splits = super.getSplits(context); allSplits.addAll(splits); } // Setting original scan back setScan(originalScan); return allSplits; }