@Test
  public void testBulkSplitOptimization() throws Exception {
    final Connector c = getConnector();
    final String tableName = getUniqueNames(1)[0];
    c.tableOperations().create(tableName);
    c.tableOperations().setProperty(tableName, Property.TABLE_MAJC_RATIO.getKey(), "1000");
    c.tableOperations().setProperty(tableName, Property.TABLE_FILE_MAX.getKey(), "1000");
    c.tableOperations().setProperty(tableName, Property.TABLE_SPLIT_THRESHOLD.getKey(), "1G");
    FileSystem fs = cluster.getFileSystem();
    Path testDir = new Path(getUsableDir(), "testmf");
    FunctionalTestUtils.createRFiles(c, fs, testDir.toString(), ROWS, SPLITS, 8);
    FileStatus[] stats = fs.listStatus(testDir);

    System.out.println("Number of generated files: " + stats.length);
    FunctionalTestUtils.bulkImport(c, fs, tableName, testDir.toString());
    FunctionalTestUtils.checkSplits(c, tableName, 0, 0);
    FunctionalTestUtils.checkRFiles(c, tableName, 1, 1, 100, 100);

    // initiate splits
    getConnector()
        .tableOperations()
        .setProperty(tableName, Property.TABLE_SPLIT_THRESHOLD.getKey(), "100K");

    sleepUninterruptibly(2, TimeUnit.SECONDS);

    // wait until over split threshold -- should be 78 splits
    while (getConnector().tableOperations().listSplits(tableName).size() < 75) {
      sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
    }

    FunctionalTestUtils.checkSplits(c, tableName, 50, 100);
    VerifyIngest.Opts opts = new VerifyIngest.Opts();
    opts.timestamp = 1;
    opts.dataSize = 50;
    opts.random = 56;
    opts.rows = 100000;
    opts.startRow = 0;
    opts.cols = 1;
    opts.setTableName(tableName);

    AuthenticationToken adminToken = getAdminToken();
    if (adminToken instanceof PasswordToken) {
      PasswordToken token = (PasswordToken) getAdminToken();
      opts.setPassword(new Password(new String(token.getPassword(), UTF_8)));
      opts.setPrincipal(getAdminPrincipal());
    } else if (adminToken instanceof KerberosToken) {
      ClientConfiguration clientConf = cluster.getClientConfig();
      opts.updateKerberosCredentials(clientConf);
    } else {
      Assert.fail("Unknown token type");
    }

    VerifyIngest.verifyIngest(c, opts, new ScannerOpts());

    // ensure each tablet does not have all map files, should be ~2.5 files per tablet
    FunctionalTestUtils.checkRFiles(c, tableName, 50, 100, 1, 4);
  }
Esempio n. 2
0
  @Test(timeout = 60 * 1000)
  public void run() throws Exception {
    Connector c = getConnector();
    c.tableOperations().create("rdel1");
    Map<String, Set<Text>> groups = new HashMap<String, Set<Text>>();
    groups.put("lg1", Collections.singleton(new Text("foo")));
    groups.put("dg", Collections.<Text>emptySet());
    c.tableOperations().setLocalityGroups("rdel1", groups);
    IteratorSetting setting = new IteratorSetting(30, RowDeletingIterator.class);
    c.tableOperations().attachIterator("rdel1", setting, EnumSet.of(IteratorScope.majc));
    c.tableOperations().setProperty("rdel1", Property.TABLE_MAJC_RATIO.getKey(), "100");

    BatchWriter bw = c.createBatchWriter("rdel1", new BatchWriterConfig());

    bw.addMutation(nm("r1", "foo", "cf1", "v1"));
    bw.addMutation(nm("r1", "bar", "cf1", "v2"));

    bw.flush();
    c.tableOperations().flush("rdel1", null, null, true);

    checkRFiles(c, "rdel1", 1, 1, 1, 1);

    int count = 0;
    Scanner scanner = c.createScanner("rdel1", Authorizations.EMPTY);
    for (@SuppressWarnings("unused") Entry<Key, Value> entry : scanner) {
      count++;
    }
    if (count != 2) throw new Exception("1 count=" + count);

    bw.addMutation(nm("r1", "", "", RowDeletingIterator.DELETE_ROW_VALUE));

    bw.flush();
    c.tableOperations().flush("rdel1", null, null, true);

    checkRFiles(c, "rdel1", 1, 1, 2, 2);

    count = 0;
    scanner = c.createScanner("rdel1", Authorizations.EMPTY);
    for (@SuppressWarnings("unused") Entry<Key, Value> entry : scanner) {
      count++;
    }
    if (count != 3) throw new Exception("2 count=" + count);

    c.tableOperations().compact("rdel1", null, null, false, true);

    checkRFiles(c, "rdel1", 1, 1, 0, 0);

    count = 0;
    scanner = c.createScanner("rdel1", Authorizations.EMPTY);
    for (@SuppressWarnings("unused") Entry<Key, Value> entry : scanner) {
      count++;
    }
    if (count != 0) throw new Exception("3 count=" + count);

    bw.close();
  }
Esempio n. 3
0
  @Test(timeout = 3 * 60 * 1000)
  public void run() throws Exception {
    Connector c = getConnector();
    c.tableOperations().create("test_ingest");
    c.tableOperations().setProperty("test_ingest", Property.TABLE_MAJC_RATIO.getKey(), "10");
    c.tableOperations()
        .addSplits("test_ingest", TestIngest.getSplitPoints(0, NUM_TO_INGEST, NUM_TABLETS));

    // the following loop should create three tablets in each map file
    for (int i = 0; i < 3; i++) {
      TestIngest.Opts opts = new TestIngest.Opts();
      opts.timestamp = i;
      opts.dataSize = 50;
      opts.rows = NUM_TO_INGEST;
      opts.cols = 1;
      opts.random = i;
      TestIngest.ingest(c, opts, new BatchWriterOpts());

      c.tableOperations().flush("test_ingest", null, null, true);
      FunctionalTestUtils.checkRFiles(c, "test_ingest", NUM_TABLETS, NUM_TABLETS, i + 1, i + 1);
    }

    List<Range> ranges = new ArrayList<Range>(NUM_TO_INGEST);

    for (int i = 0; i < NUM_TO_INGEST; i++) {
      ranges.add(new Range(TestIngest.generateRow(i, 0)));
    }

    long time1 = batchScan(c, ranges, 1);
    // run it again, now that stuff is cached on the client and sever
    time1 = batchScan(c, ranges, 1);
    long time2 = batchScan(c, ranges, NUM_TABLETS);

    System.out.printf("Single thread scan time   %6.2f %n", time1 / 1000.0);
    System.out.printf("Multiple thread scan time %6.2f %n", time2 / 1000.0);
  }