@Test public void testBulkSplitOptimization() throws Exception { final Connector c = getConnector(); final String tableName = getUniqueNames(1)[0]; c.tableOperations().create(tableName); c.tableOperations().setProperty(tableName, Property.TABLE_MAJC_RATIO.getKey(), "1000"); c.tableOperations().setProperty(tableName, Property.TABLE_FILE_MAX.getKey(), "1000"); c.tableOperations().setProperty(tableName, Property.TABLE_SPLIT_THRESHOLD.getKey(), "1G"); FileSystem fs = cluster.getFileSystem(); Path testDir = new Path(getUsableDir(), "testmf"); FunctionalTestUtils.createRFiles(c, fs, testDir.toString(), ROWS, SPLITS, 8); FileStatus[] stats = fs.listStatus(testDir); System.out.println("Number of generated files: " + stats.length); FunctionalTestUtils.bulkImport(c, fs, tableName, testDir.toString()); FunctionalTestUtils.checkSplits(c, tableName, 0, 0); FunctionalTestUtils.checkRFiles(c, tableName, 1, 1, 100, 100); // initiate splits getConnector() .tableOperations() .setProperty(tableName, Property.TABLE_SPLIT_THRESHOLD.getKey(), "100K"); sleepUninterruptibly(2, TimeUnit.SECONDS); // wait until over split threshold -- should be 78 splits while (getConnector().tableOperations().listSplits(tableName).size() < 75) { sleepUninterruptibly(500, TimeUnit.MILLISECONDS); } FunctionalTestUtils.checkSplits(c, tableName, 50, 100); VerifyIngest.Opts opts = new VerifyIngest.Opts(); opts.timestamp = 1; opts.dataSize = 50; opts.random = 56; opts.rows = 100000; opts.startRow = 0; opts.cols = 1; opts.setTableName(tableName); AuthenticationToken adminToken = getAdminToken(); if (adminToken instanceof PasswordToken) { PasswordToken token = (PasswordToken) getAdminToken(); opts.setPassword(new Password(new String(token.getPassword(), UTF_8))); opts.setPrincipal(getAdminPrincipal()); } else if (adminToken instanceof KerberosToken) { ClientConfiguration clientConf = cluster.getClientConfig(); opts.updateKerberosCredentials(clientConf); } else { Assert.fail("Unknown token type"); } VerifyIngest.verifyIngest(c, opts, new ScannerOpts()); // ensure each tablet does not have all map files, should be ~2.5 files per tablet FunctionalTestUtils.checkRFiles(c, tableName, 50, 100, 1, 4); }
@Test(timeout = 60 * 1000) public void run() throws Exception { Connector c = getConnector(); c.tableOperations().create("rdel1"); Map<String, Set<Text>> groups = new HashMap<String, Set<Text>>(); groups.put("lg1", Collections.singleton(new Text("foo"))); groups.put("dg", Collections.<Text>emptySet()); c.tableOperations().setLocalityGroups("rdel1", groups); IteratorSetting setting = new IteratorSetting(30, RowDeletingIterator.class); c.tableOperations().attachIterator("rdel1", setting, EnumSet.of(IteratorScope.majc)); c.tableOperations().setProperty("rdel1", Property.TABLE_MAJC_RATIO.getKey(), "100"); BatchWriter bw = c.createBatchWriter("rdel1", new BatchWriterConfig()); bw.addMutation(nm("r1", "foo", "cf1", "v1")); bw.addMutation(nm("r1", "bar", "cf1", "v2")); bw.flush(); c.tableOperations().flush("rdel1", null, null, true); checkRFiles(c, "rdel1", 1, 1, 1, 1); int count = 0; Scanner scanner = c.createScanner("rdel1", Authorizations.EMPTY); for (@SuppressWarnings("unused") Entry<Key, Value> entry : scanner) { count++; } if (count != 2) throw new Exception("1 count=" + count); bw.addMutation(nm("r1", "", "", RowDeletingIterator.DELETE_ROW_VALUE)); bw.flush(); c.tableOperations().flush("rdel1", null, null, true); checkRFiles(c, "rdel1", 1, 1, 2, 2); count = 0; scanner = c.createScanner("rdel1", Authorizations.EMPTY); for (@SuppressWarnings("unused") Entry<Key, Value> entry : scanner) { count++; } if (count != 3) throw new Exception("2 count=" + count); c.tableOperations().compact("rdel1", null, null, false, true); checkRFiles(c, "rdel1", 1, 1, 0, 0); count = 0; scanner = c.createScanner("rdel1", Authorizations.EMPTY); for (@SuppressWarnings("unused") Entry<Key, Value> entry : scanner) { count++; } if (count != 0) throw new Exception("3 count=" + count); bw.close(); }
@Test(timeout = 3 * 60 * 1000) public void run() throws Exception { Connector c = getConnector(); c.tableOperations().create("test_ingest"); c.tableOperations().setProperty("test_ingest", Property.TABLE_MAJC_RATIO.getKey(), "10"); c.tableOperations() .addSplits("test_ingest", TestIngest.getSplitPoints(0, NUM_TO_INGEST, NUM_TABLETS)); // the following loop should create three tablets in each map file for (int i = 0; i < 3; i++) { TestIngest.Opts opts = new TestIngest.Opts(); opts.timestamp = i; opts.dataSize = 50; opts.rows = NUM_TO_INGEST; opts.cols = 1; opts.random = i; TestIngest.ingest(c, opts, new BatchWriterOpts()); c.tableOperations().flush("test_ingest", null, null, true); FunctionalTestUtils.checkRFiles(c, "test_ingest", NUM_TABLETS, NUM_TABLETS, i + 1, i + 1); } List<Range> ranges = new ArrayList<Range>(NUM_TO_INGEST); for (int i = 0; i < NUM_TO_INGEST; i++) { ranges.add(new Range(TestIngest.generateRow(i, 0))); } long time1 = batchScan(c, ranges, 1); // run it again, now that stuff is cached on the client and sever time1 = batchScan(c, ranges, 1); long time2 = batchScan(c, ranges, NUM_TABLETS); System.out.printf("Single thread scan time %6.2f %n", time1 / 1000.0); System.out.printf("Multiple thread scan time %6.2f %n", time2 / 1000.0); }