@Test
  public void testTableWithCFNameStartWithUnderScore() throws Exception {
    Path dir = util.getDataTestDirOnTestFS("cfNameStartWithUnderScore");
    FileSystem fs = util.getTestFileSystem();
    dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
    String family = "_cf";
    Path familyDir = new Path(dir, family);

    byte[] from = Bytes.toBytes("begin");
    byte[] to = Bytes.toBytes("end");
    Configuration conf = util.getConfiguration();
    String tableName = "mytable_cfNameStartWithUnderScore";
    Table table = util.createTable(TableName.valueOf(tableName), family);
    HFileTestUtil.createHFile(
        conf, fs, new Path(familyDir, "hfile"), Bytes.toBytes(family), QUALIFIER, from, to, 1000);

    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
    String[] args = {dir.toString(), tableName};
    try {
      loader.run(args);
      assertEquals(1000, util.countRows(table));
    } finally {
      if (null != table) {
        table.close();
      }
    }
  }
Example #2
0
  private void runIncrementalPELoad(
      Configuration conf,
      HTableDescriptor tableDescriptor,
      RegionLocator regionLocator,
      Path outDir)
      throws IOException, UnsupportedEncodingException, InterruptedException,
          ClassNotFoundException {
    Job job = new Job(conf, "testLocalMRIncrementalLoad");
    job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
    job.getConfiguration()
        .setStrings(
            "io.serializations",
            conf.get("io.serializations"),
            MutationSerialization.class.getName(),
            ResultSerialization.class.getName(),
            KeyValueSerialization.class.getName());
    setupRandomGeneratorMapper(job);
    HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator);
    FileOutputFormat.setOutputPath(job, outDir);

    assertFalse(util.getTestFileSystem().exists(outDir));

    assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());

    assertTrue(job.waitForCompletion(true));
  }
  @Test
  public void testLoadTooMayHFiles() throws Exception {
    Path dir = util.getDataTestDirOnTestFS("testLoadTooMayHFiles");
    FileSystem fs = util.getTestFileSystem();
    dir = dir.makeQualified(fs);
    Path familyDir = new Path(dir, Bytes.toString(FAMILY));

    byte[] from = Bytes.toBytes("begin");
    byte[] to = Bytes.toBytes("end");
    for (int i = 0; i <= MAX_FILES_PER_REGION_PER_FAMILY; i++) {
      HFileTestUtil.createHFile(
          util.getConfiguration(),
          fs,
          new Path(familyDir, "hfile_" + i),
          FAMILY,
          QUALIFIER,
          from,
          to,
          1000);
    }

    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
    String[] args = {dir.toString(), "mytable_testLoadTooMayHFiles"};
    try {
      loader.run(args);
      fail("Bulk loading too many files should fail");
    } catch (IOException ie) {
      assertTrue(
          ie.getMessage()
              .contains("Trying to load more than " + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles"));
    }
  }
  @Test
  public void testSplitStoreFile() throws IOException {
    Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
    FileSystem fs = util.getTestFileSystem();
    Path testIn = new Path(dir, "testhfile");
    HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
    HFileTestUtil.createHFile(
        util.getConfiguration(),
        fs,
        testIn,
        FAMILY,
        QUALIFIER,
        Bytes.toBytes("aaa"),
        Bytes.toBytes("zzz"),
        1000);

    Path bottomOut = new Path(dir, "bottom.out");
    Path topOut = new Path(dir, "top.out");

    LoadIncrementalHFiles.splitStoreFile(
        util.getConfiguration(), testIn, familyDesc, Bytes.toBytes("ggg"), bottomOut, topOut);

    int rowCount = verifyHFile(bottomOut);
    rowCount += verifyHFile(topOut);
    assertEquals(1000, rowCount);
  }
  @Test
  public void bulkLoadHFileTest() throws Exception {
    String testName = TestRegionObserverInterface.class.getName() + ".bulkLoadHFileTest";
    TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + ".bulkLoadHFileTest");
    Configuration conf = util.getConfiguration();
    HTable table = util.createTable(tableName, new byte[][] {A, B, C});
    try {
      verifyMethodResult(
          SimpleRegionObserver.class,
          new String[] {"hadPreBulkLoadHFile", "hadPostBulkLoadHFile"},
          tableName,
          new Boolean[] {false, false});

      FileSystem fs = util.getTestFileSystem();
      final Path dir = util.getDataTestDirOnTestFS(testName).makeQualified(fs);
      Path familyDir = new Path(dir, Bytes.toString(A));

      createHFile(util.getConfiguration(), fs, new Path(familyDir, Bytes.toString(A)), A, A);

      // Bulk load
      new LoadIncrementalHFiles(conf).doBulkLoad(dir, new HTable(conf, tableName));

      verifyMethodResult(
          SimpleRegionObserver.class,
          new String[] {"hadPreBulkLoadHFile", "hadPostBulkLoadHFile"},
          tableName,
          new Boolean[] {true, true});
    } finally {
      util.deleteTable(tableName);
      table.close();
    }
  }
    public void doAnAction() throws Exception {
      long iteration = numBulkLoads.getAndIncrement();
      Path dir = UTIL.getDataTestDirOnTestFS(String.format("bulkLoad_%08d", iteration));

      // create HFiles for different column families
      FileSystem fs = UTIL.getTestFileSystem();
      byte[] val = Bytes.toBytes(String.format("%010d", iteration));
      final List<Pair<byte[], String>> famPaths = new ArrayList<Pair<byte[], String>>(NUM_CFS);
      for (int i = 0; i < NUM_CFS; i++) {
        Path hfile = new Path(dir, family(i));
        byte[] fam = Bytes.toBytes(family(i));
        createHFile(fs, hfile, fam, QUAL, val, 1000);
        famPaths.add(new Pair<>(fam, hfile.toString()));
      }

      // bulk load HFiles
      final ClusterConnection conn = (ClusterConnection) UTIL.getAdmin().getConnection();
      RegionServerCallable<Void> callable =
          new RegionServerCallable<Void>(conn, tableName, Bytes.toBytes("aaa")) {
            @Override
            public Void call(int callTimeout) throws Exception {
              LOG.debug(
                  "Going to connect to server "
                      + getLocation()
                      + " for row "
                      + Bytes.toStringBinary(getRow()));
              byte[] regionName = getLocation().getRegionInfo().getRegionName();
              BulkLoadHFileRequest request =
                  RequestConverter.buildBulkLoadHFileRequest(famPaths, regionName, true);
              getStub().bulkLoadHFile(null, request);
              return null;
            }
          };
      RpcRetryingCallerFactory factory = new RpcRetryingCallerFactory(conf);
      RpcRetryingCaller<Void> caller = factory.<Void>newCaller();
      caller.callWithRetries(callable, Integer.MAX_VALUE);

      // Periodically do compaction to reduce the number of open file handles.
      if (numBulkLoads.get() % 5 == 0) {
        // 5 * 50 = 250 open file handles!
        callable =
            new RegionServerCallable<Void>(conn, tableName, Bytes.toBytes("aaa")) {
              @Override
              public Void call(int callTimeout) throws Exception {
                LOG.debug(
                    "compacting " + getLocation() + " for row " + Bytes.toStringBinary(getRow()));
                AdminProtos.AdminService.BlockingInterface server =
                    conn.getAdmin(getLocation().getServerName());
                CompactRegionRequest request =
                    RequestConverter.buildCompactRegionRequest(
                        getLocation().getRegionInfo().getRegionName(), true, null);
                server.compactRegion(null, request);
                numCompactions.incrementAndGet();
                return null;
              }
            };
        caller.callWithRetries(callable, Integer.MAX_VALUE);
      }
    }
  private void runTest(
      String testName,
      HTableDescriptor htd,
      BloomType bloomType,
      boolean preCreateTable,
      byte[][] tableSplitKeys,
      byte[][][] hfileRanges)
      throws Exception {
    Path dir = util.getDataTestDirOnTestFS(testName);
    FileSystem fs = util.getTestFileSystem();
    dir = dir.makeQualified(fs);
    Path familyDir = new Path(dir, Bytes.toString(FAMILY));

    int hfileIdx = 0;
    for (byte[][] range : hfileRanges) {
      byte[] from = range[0];
      byte[] to = range[1];
      HFileTestUtil.createHFile(
          util.getConfiguration(),
          fs,
          new Path(familyDir, "hfile_" + hfileIdx++),
          FAMILY,
          QUALIFIER,
          from,
          to,
          1000);
    }
    int expectedRows = hfileIdx * 1000;

    if (preCreateTable) {
      util.getHBaseAdmin().createTable(htd, tableSplitKeys);
    }

    final TableName tableName = htd.getTableName();
    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
    String[] args = {dir.toString(), tableName.toString()};
    loader.run(args);

    Table table = new HTable(util.getConfiguration(), tableName);
    try {
      assertEquals(expectedRows, util.countRows(table));
    } finally {
      table.close();
    }

    // verify staging folder has been cleaned up
    Path stagingBasePath = SecureBulkLoadUtil.getBaseStagingDir(util.getConfiguration());
    if (fs.exists(stagingBasePath)) {
      FileStatus[] files = fs.listStatus(stagingBasePath);
      for (FileStatus file : files) {
        assertTrue(
            "Folder=" + file.getPath() + " is not cleaned up.",
            file.getPath().getName() != "DONOTERASE");
      }
    }

    util.deleteTable(tableName);
  }
Example #8
0
  @BeforeClass
  public static void setUpBeforeClass() throws Exception {
    CONF = TEST_UTIL.getConfiguration();
    TEST_UTIL.startMiniDFSCluster(1);

    CLUSTER = TEST_UTIL.getDFSCluster();
    FS = CLUSTER.getFileSystem();
    DIR = TEST_UTIL.getDataTestDirOnTestFS("TestDurability");
    FSUtils.setRootDir(CONF, DIR);
  }
Example #9
0
  /** Run small MR job. */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testWritingPEData() throws Exception {
    Configuration conf = util.getConfiguration();
    Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
    FileSystem fs = testDir.getFileSystem(conf);

    // Set down this value or we OOME in eclipse.
    conf.setInt("mapreduce.task.io.sort.mb", 20);
    // Write a few files.
    conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);

    Job job = new Job(conf, "testWritingPEData");
    setupRandomGeneratorMapper(job);
    // This partitioner doesn't work well for number keys but using it anyways
    // just to demonstrate how to configure it.
    byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
    byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];

    Arrays.fill(startKey, (byte) 0);
    Arrays.fill(endKey, (byte) 0xff);

    job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
    // Set start and end rows for partitioner.
    SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
    SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
    job.setReducerClass(KeyValueSortReducer.class);
    job.setOutputFormatClass(HFileOutputFormat2.class);
    job.setNumReduceTasks(4);
    job.getConfiguration()
        .setStrings(
            "io.serializations",
            conf.get("io.serializations"),
            MutationSerialization.class.getName(),
            ResultSerialization.class.getName(),
            KeyValueSerialization.class.getName());

    FileOutputFormat.setOutputPath(job, testDir);
    assertTrue(job.waitForCompletion(false));
    FileStatus[] files = fs.listStatus(testDir);
    assertTrue(files.length > 0);
  }
  /**
   * Write a random data file and a non-file in a dir with a valid family name but not part of the
   * table families. we should we able to bulkload without getting the unmatched family exception.
   * HBASE-13037/HBASE-13227
   */
  private void testNonHfileFolder(String tableName, boolean preCreateTable) throws Exception {
    Path dir = util.getDataTestDirOnTestFS(tableName);
    FileSystem fs = util.getTestFileSystem();
    dir = dir.makeQualified(fs);

    Path familyDir = new Path(dir, Bytes.toString(FAMILY));
    HFileTestUtil.createHFile(
        util.getConfiguration(),
        fs,
        new Path(familyDir, "hfile_0"),
        FAMILY,
        QUALIFIER,
        Bytes.toBytes("begin"),
        Bytes.toBytes("end"),
        500);
    createRandomDataFile(fs, new Path(familyDir, "012356789"), 16 * 1024);

    final String NON_FAMILY_FOLDER = "_logs";
    Path nonFamilyDir = new Path(dir, NON_FAMILY_FOLDER);
    fs.mkdirs(nonFamilyDir);
    fs.mkdirs(new Path(nonFamilyDir, "non-file"));
    createRandomDataFile(fs, new Path(nonFamilyDir, "012356789"), 16 * 1024);

    Table table = null;
    try {
      if (preCreateTable) {
        table = util.createTable(TableName.valueOf(tableName), FAMILY);
      } else {
        table = util.getConnection().getTable(TableName.valueOf(tableName));
      }

      final String[] args = {dir.toString(), tableName};
      new LoadIncrementalHFiles(util.getConfiguration()).run(args);
      assertEquals(500, util.countRows(table));
    } finally {
      if (table != null) {
        table.close();
      }
      fs.delete(dir, true);
    }
  }
  /**
   * Run an ImportTsv job and perform basic validation on the results. Returns the ImportTsv <code>
   * Tool</code> instance so that other tests can inspect it for further validation as necessary.
   * This method is static to insure non-reliance on instance's util/conf facilities.
   *
   * @param args Any arguments to pass BEFORE inputFile path is appended.
   * @param dataAvailable
   * @return The Tool instance used to run the test.
   */
  private Tool doMROnTableTest(
      HBaseTestingUtility util,
      String family,
      String data,
      String[] args,
      int valueMultiplier,
      boolean dataAvailable)
      throws Exception {
    String table = args[args.length - 1];
    Configuration conf = new Configuration(util.getConfiguration());

    // populate input file
    FileSystem fs = FileSystem.get(conf);
    Path inputPath = fs.makeQualified(new Path(util.getDataTestDirOnTestFS(table), "input.dat"));
    FSDataOutputStream op = fs.create(inputPath, true);
    op.write(Bytes.toBytes(data));
    op.close();
    LOG.debug(String.format("Wrote test data to file: %s", inputPath));

    if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
      LOG.debug("Forcing combiner.");
      conf.setInt("mapreduce.map.combine.minspills", 1);
    }

    // run the import
    List<String> argv = new ArrayList<String>(Arrays.asList(args));
    argv.add(inputPath.toString());
    Tool tool = new ImportTsv();
    LOG.debug("Running ImportTsv with arguments: " + argv);
    assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));

    validateTable(conf, TableName.valueOf(table), family, valueMultiplier, dataAvailable);

    if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
      LOG.debug("Deleting test subdirectory");
      util.cleanupDataTestDirOnTestFS(table);
    }
    return tool;
  }
  @Override
  public int run(String[] args) throws Exception {
    Path rootRegionDir = null;
    int numThreads = 1;
    long numIterations = 1000000;
    int numFamilies = 1;
    int syncInterval = 0;
    boolean noSync = false;
    boolean verify = false;
    boolean verbose = false;
    boolean cleanup = true;
    boolean noclosefs = false;
    long roll = Long.MAX_VALUE;
    boolean compress = false;
    String cipher = null;
    int numRegions = 1;
    String spanReceivers = getConf().get("hbase.trace.spanreceiver.classes");
    boolean trace = spanReceivers != null && !spanReceivers.isEmpty();
    double traceFreq = 1.0;
    // Process command line args
    for (int i = 0; i < args.length; i++) {
      String cmd = args[i];
      try {
        if (cmd.equals("-threads")) {
          numThreads = Integer.parseInt(args[++i]);
        } else if (cmd.equals("-iterations")) {
          numIterations = Long.parseLong(args[++i]);
        } else if (cmd.equals("-path")) {
          rootRegionDir = new Path(args[++i]);
        } else if (cmd.equals("-families")) {
          numFamilies = Integer.parseInt(args[++i]);
        } else if (cmd.equals("-qualifiers")) {
          numQualifiers = Integer.parseInt(args[++i]);
        } else if (cmd.equals("-keySize")) {
          keySize = Integer.parseInt(args[++i]);
        } else if (cmd.equals("-valueSize")) {
          valueSize = Integer.parseInt(args[++i]);
        } else if (cmd.equals("-syncInterval")) {
          syncInterval = Integer.parseInt(args[++i]);
        } else if (cmd.equals("-nosync")) {
          noSync = true;
        } else if (cmd.equals("-verify")) {
          verify = true;
        } else if (cmd.equals("-verbose")) {
          verbose = true;
        } else if (cmd.equals("-nocleanup")) {
          cleanup = false;
        } else if (cmd.equals("-noclosefs")) {
          noclosefs = true;
        } else if (cmd.equals("-roll")) {
          roll = Long.parseLong(args[++i]);
        } else if (cmd.equals("-compress")) {
          compress = true;
        } else if (cmd.equals("-encryption")) {
          cipher = args[++i];
        } else if (cmd.equals("-regions")) {
          numRegions = Integer.parseInt(args[++i]);
        } else if (cmd.equals("-traceFreq")) {
          traceFreq = Double.parseDouble(args[++i]);
        } else if (cmd.equals("-h")) {
          printUsageAndExit();
        } else if (cmd.equals("--help")) {
          printUsageAndExit();
        } else {
          System.err.println("UNEXPECTED: " + cmd);
          printUsageAndExit();
        }
      } catch (Exception e) {
        printUsageAndExit();
      }
    }

    if (compress) {
      Configuration conf = getConf();
      conf.setBoolean(HConstants.ENABLE_WAL_COMPRESSION, true);
    }

    if (cipher != null) {
      // Set up WAL for encryption
      Configuration conf = getConf();
      conf.set(HConstants.CRYPTO_KEYPROVIDER_CONF_KEY, KeyProviderForTesting.class.getName());
      conf.set(HConstants.CRYPTO_MASTERKEY_NAME_CONF_KEY, "hbase");
      conf.setClass(
          "hbase.regionserver.hlog.reader.impl", SecureProtobufLogReader.class, WAL.Reader.class);
      conf.setClass(
          "hbase.regionserver.hlog.writer.impl", SecureProtobufLogWriter.class, Writer.class);
      conf.setBoolean(HConstants.ENABLE_WAL_ENCRYPTION, true);
      conf.set(HConstants.CRYPTO_WAL_ALGORITHM_CONF_KEY, cipher);
    }

    if (numThreads < numRegions) {
      LOG.warn("Number of threads is less than the number of regions; some regions will sit idle.");
    }

    // Internal config. goes off number of threads; if more threads than handlers, stuff breaks.
    // In regionserver, number of handlers == number of threads.
    getConf().setInt(HConstants.REGION_SERVER_HANDLER_COUNT, numThreads);

    // Run WAL Performance Evaluation
    // First set the fs from configs.  In case we are on hadoop1
    FSUtils.setFsDefault(getConf(), FSUtils.getRootDir(getConf()));
    FileSystem fs = FileSystem.get(getConf());
    LOG.info("FileSystem: " + fs);

    SpanReceiverHost receiverHost = trace ? SpanReceiverHost.getInstance(getConf()) : null;
    TraceScope scope = Trace.startSpan("WALPerfEval", trace ? Sampler.ALWAYS : Sampler.NEVER);

    try {
      if (rootRegionDir == null) {
        rootRegionDir = TEST_UTIL.getDataTestDirOnTestFS("WALPerformanceEvaluation");
      }
      rootRegionDir = rootRegionDir.makeQualified(fs);
      cleanRegionRootDir(fs, rootRegionDir);
      FSUtils.setRootDir(getConf(), rootRegionDir);
      final WALFactory wals = new WALFactory(getConf(), null, "wals");
      final HRegion[] regions = new HRegion[numRegions];
      final Runnable[] benchmarks = new Runnable[numRegions];
      final MockRegionServerServices mockServices = new MockRegionServerServices(getConf());
      final LogRoller roller = new LogRoller(mockServices, mockServices);
      Threads.setDaemonThreadRunning(roller.getThread(), "WALPerfEval.logRoller");

      try {
        for (int i = 0; i < numRegions; i++) {
          // Initialize Table Descriptor
          // a table per desired region means we can avoid carving up the key space
          final HTableDescriptor htd = createHTableDescriptor(i, numFamilies);
          regions[i] = openRegion(fs, rootRegionDir, htd, wals, roll, roller);
          benchmarks[i] =
              Trace.wrap(
                  new WALPutBenchmark(
                      regions[i], htd, numIterations, noSync, syncInterval, traceFreq));
        }
        ConsoleReporter.enable(this.metrics, 30, TimeUnit.SECONDS);
        long putTime = runBenchmark(benchmarks, numThreads);
        logBenchmarkResult(
            "Summary: threads="
                + numThreads
                + ", iterations="
                + numIterations
                + ", syncInterval="
                + syncInterval,
            numIterations * numThreads,
            putTime);

        for (int i = 0; i < numRegions; i++) {
          if (regions[i] != null) {
            closeRegion(regions[i]);
            regions[i] = null;
          }
        }
        if (verify) {
          LOG.info("verifying written log entries.");
          Path dir =
              new Path(
                  FSUtils.getRootDir(getConf()), DefaultWALProvider.getWALDirectoryName("wals"));
          long editCount = 0;
          FileStatus[] fsss = fs.listStatus(dir);
          if (fsss.length == 0) throw new IllegalStateException("No WAL found");
          for (FileStatus fss : fsss) {
            Path p = fss.getPath();
            if (!fs.exists(p)) throw new IllegalStateException(p.toString());
            editCount += verify(wals, p, verbose);
          }
          long expected = numIterations * numThreads;
          if (editCount != expected) {
            throw new IllegalStateException("Counted=" + editCount + ", expected=" + expected);
          }
        }
      } finally {
        mockServices.stop("test clean up.");
        for (int i = 0; i < numRegions; i++) {
          if (regions[i] != null) {
            closeRegion(regions[i]);
          }
        }
        if (null != roller) {
          LOG.info("shutting down log roller.");
          Threads.shutdown(roller.getThread());
        }
        wals.shutdown();
        // Remove the root dir for this test region
        if (cleanup) cleanRegionRootDir(fs, rootRegionDir);
      }
    } finally {
      // We may be called inside a test that wants to keep on using the fs.
      if (!noclosefs) fs.close();
      scope.close();
      if (receiverHost != null) receiverHost.closeReceivers();
    }

    return (0);
  }
Example #13
0
  /**
   * Test that {@link HFileOutputFormat2} RecordWriter uses compression and bloom filter settings
   * from the column family descriptor
   */
  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
  @Test
  public void testColumnFamilySettings() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("testColumnFamilySettings");

    // Setup table descriptor
    Table table = Mockito.mock(Table.class);
    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
    HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
    Mockito.doReturn(htd).when(table).getTableDescriptor();
    for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) {
      htd.addFamily(hcd);
    }

    // set up the table to return some mock keys
    setupMockStartKeys(regionLocator);

    try {
      // partial map red setup to get an operational writer for testing
      // We turn off the sequence file compression, because DefaultCodec
      // pollutes the GZip codec pool with an incompatible compressor.
      conf.set("io.seqfile.compression.type", "NONE");
      conf.set("hbase.fs.tmp.dir", dir.toString());
      // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
      conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);

      Job job = new Job(conf, "testLocalMRIncrementalLoad");
      job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
      setupRandomGeneratorMapper(job);
      HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
      FileOutputFormat.setOutputPath(job, dir);
      context = createTestTaskAttemptContext(job);
      HFileOutputFormat2 hof = new HFileOutputFormat2();
      writer = hof.getRecordWriter(context);

      // write out random rows
      writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
      writer.close(context);

      // Make sure that a directory was created for every CF
      FileSystem fs = dir.getFileSystem(conf);

      // commit so that the filesystem has one directory per column family
      hof.getOutputCommitter(context).commitTask(context);
      hof.getOutputCommitter(context).commitJob(context);
      FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
      assertEquals(htd.getFamilies().size(), families.length);
      for (FileStatus f : families) {
        String familyStr = f.getPath().getName();
        HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
        // verify that the compression on this file matches the configured
        // compression
        Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
        Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
        Map<byte[], byte[]> fileInfo = reader.loadFileInfo();

        byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
        if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
        assertEquals(
            "Incorrect bloom filter used for column family "
                + familyStr
                + "(reader: "
                + reader
                + ")",
            hcd.getBloomFilterType(),
            BloomType.valueOf(Bytes.toString(bloomFilter)));
        assertEquals(
            "Incorrect compression used for column family "
                + familyStr
                + "(reader: "
                + reader
                + ")",
            hcd.getCompressionType(),
            reader.getFileContext().getCompression());
      }
    } finally {
      dir.getFileSystem(conf).delete(dir, true);
    }
  }
Example #14
0
  /**
   * This test is to test the scenario happened in HBASE-6901. All files are bulk loaded and
   * excluded from minor compaction. Without the fix of HBASE-6901, an
   * ArrayIndexOutOfBoundsException will be thrown.
   */
  @Ignore("Flakey: See HBASE-9051")
  @Test
  public void testExcludeAllFromMinorCompaction() throws Exception {
    Configuration conf = util.getConfiguration();
    conf.setInt("hbase.hstore.compaction.min", 2);
    generateRandomStartKeys(5);

    util.startMiniCluster();
    try (Connection conn = ConnectionFactory.createConnection();
        Admin admin = conn.getAdmin();
        Table table = util.createTable(TABLE_NAME, FAMILIES);
        RegionLocator locator = conn.getRegionLocator(TABLE_NAME)) {
      final FileSystem fs = util.getDFSCluster().getFileSystem();
      assertEquals("Should start with empty table", 0, util.countRows(table));

      // deep inspection: get the StoreFile dir
      final Path storePath =
          new Path(
              FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
              new Path(
                  admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
                  Bytes.toString(FAMILIES[0])));
      assertEquals(0, fs.listStatus(storePath).length);

      // Generate two bulk load files
      conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true);

      for (int i = 0; i < 2; i++) {
        Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
        runIncrementalPELoad(
            conf, table.getTableDescriptor(), conn.getRegionLocator(TABLE_NAME), testDir);
        // Perform the actual load
        new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, locator);
      }

      // Ensure data shows up
      int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
      assertEquals(
          "LoadIncrementalHFiles should put expected data in table",
          expectedRows,
          util.countRows(table));

      // should have a second StoreFile now
      assertEquals(2, fs.listStatus(storePath).length);

      // minor compactions shouldn't get rid of the file
      admin.compact(TABLE_NAME);
      try {
        quickPoll(
            new Callable<Boolean>() {
              @Override
              public Boolean call() throws Exception {
                return fs.listStatus(storePath).length == 1;
              }
            },
            5000);
        throw new IOException("SF# = " + fs.listStatus(storePath).length);
      } catch (AssertionError ae) {
        // this is expected behavior
      }

      // a major compaction should work though
      admin.majorCompact(TABLE_NAME);
      quickPoll(
          new Callable<Boolean>() {
            @Override
            public Boolean call() throws Exception {
              return fs.listStatus(storePath).length == 1;
            }
          },
          5000);

    } finally {
      util.shutdownMiniCluster();
    }
  }
  @Override
  public int run(String[] args) throws Exception {
    Path rootRegionDir = null;
    int numThreads = 1;
    long numIterations = 10000;
    int numFamilies = 1;
    boolean noSync = false;
    boolean verify = false;
    boolean verbose = false;
    boolean cleanup = true;
    boolean noclosefs = false;
    long roll = Long.MAX_VALUE;
    // Process command line args
    for (int i = 0; i < args.length; i++) {
      String cmd = args[i];
      try {
        if (cmd.equals("-threads")) {
          numThreads = Integer.parseInt(args[++i]);
        } else if (cmd.equals("-iterations")) {
          numIterations = Long.parseLong(args[++i]);
        } else if (cmd.equals("-path")) {
          rootRegionDir = new Path(args[++i]);
        } else if (cmd.equals("-families")) {
          numFamilies = Integer.parseInt(args[++i]);
        } else if (cmd.equals("-qualifiers")) {
          numQualifiers = Integer.parseInt(args[++i]);
        } else if (cmd.equals("-keySize")) {
          keySize = Integer.parseInt(args[++i]);
        } else if (cmd.equals("-valueSize")) {
          valueSize = Integer.parseInt(args[++i]);
        } else if (cmd.equals("-nosync")) {
          noSync = true;
        } else if (cmd.equals("-verify")) {
          verify = true;
        } else if (cmd.equals("-verbose")) {
          verbose = true;
        } else if (cmd.equals("-nocleanup")) {
          cleanup = false;
        } else if (cmd.equals("-noclosefs")) {
          noclosefs = true;
        } else if (cmd.equals("-roll")) {
          roll = Long.parseLong(args[++i]);
        } else if (cmd.equals("-h")) {
          printUsageAndExit();
        } else if (cmd.equals("--help")) {
          printUsageAndExit();
        } else {
          System.err.println("UNEXPECTED: " + cmd);
          printUsageAndExit();
        }
      } catch (Exception e) {
        printUsageAndExit();
      }
    }

    // Run HLog Performance Evaluation
    // First set the fs from configs.  In case we are on hadoop1
    FSUtils.setFsDefault(getConf(), FSUtils.getRootDir(getConf()));
    FileSystem fs = FileSystem.get(getConf());
    LOG.info("FileSystem: " + fs);
    try {
      if (rootRegionDir == null) {
        rootRegionDir = TEST_UTIL.getDataTestDirOnTestFS("HLogPerformanceEvaluation");
      }
      rootRegionDir = rootRegionDir.makeQualified(fs);
      cleanRegionRootDir(fs, rootRegionDir);
      // Initialize Table Descriptor
      HTableDescriptor htd = createHTableDescriptor(numFamilies);
      final long whenToRoll = roll;
      HLog hlog =
          new FSHLog(fs, rootRegionDir, "wals", getConf()) {
            int appends = 0;

            @Override
            protected void doWrite(
                HRegionInfo info, HLogKey logKey, WALEdit logEdit, HTableDescriptor htd)
                throws IOException {
              this.appends++;
              if (this.appends % whenToRoll == 0) {
                LOG.info("Rolling after " + appends + " edits");
                rollWriter();
              }
              super.doWrite(info, logKey, logEdit, htd);
            };
          };
      hlog.rollWriter();
      HRegion region = null;
      try {
        region = openRegion(fs, rootRegionDir, htd, hlog);
        long putTime =
            runBenchmark(new HLogPutBenchmark(region, htd, numIterations, noSync), numThreads);
        logBenchmarkResult(
            "Summary: threads=" + numThreads + ", iterations=" + numIterations,
            numIterations * numThreads,
            putTime);
        if (region != null) {
          closeRegion(region);
          region = null;
        }
        if (verify) {
          Path dir = ((FSHLog) hlog).getDir();
          long editCount = 0;
          FileStatus[] fsss = fs.listStatus(dir);
          if (fsss.length == 0) throw new IllegalStateException("No WAL found");
          for (FileStatus fss : fsss) {
            Path p = fss.getPath();
            if (!fs.exists(p)) throw new IllegalStateException(p.toString());
            editCount += verify(p, verbose);
          }
          long expected = numIterations * numThreads;
          if (editCount != expected) {
            throw new IllegalStateException("Counted=" + editCount + ", expected=" + expected);
          }
        }
      } finally {
        if (region != null) closeRegion(region);
        // Remove the root dir for this test region
        if (cleanup) cleanRegionRootDir(fs, rootRegionDir);
      }
    } finally {
      // We may be called inside a test that wants to keep on using the fs.
      if (!noclosefs) fs.close();
    }

    return (0);
  }
Example #16
0
  private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality)
      throws Exception {
    util = new HBaseTestingUtility();
    Configuration conf = util.getConfiguration();
    conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
    int hostCount = 1;
    int regionNum = 5;
    if (shouldKeepLocality) {
      // We should change host count higher than hdfs replica count when MiniHBaseCluster supports
      // explicit hostnames parameter just like MiniDFSCluster does.
      hostCount = 3;
      regionNum = 20;
    }

    byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
    String[] hostnames = new String[hostCount];
    for (int i = 0; i < hostCount; ++i) {
      hostnames[i] = "datanode_" + i;
    }
    util.startMiniCluster(1, hostCount, hostnames);

    Table table = util.createTable(TABLE_NAME, FAMILIES, splitKeys);
    Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
    try (RegionLocator r = util.getConnection().getRegionLocator(TABLE_NAME);
        Admin admin = util.getConnection().getAdmin(); ) {
      assertEquals("Should start with empty table", 0, util.countRows(table));
      int numRegions = r.getStartKeys().length;
      assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);

      // Generate the bulk load files
      runIncrementalPELoad(conf, table.getTableDescriptor(), r, testDir);
      // This doesn't write into the table, just makes files
      assertEquals("HFOF should not touch actual table", 0, util.countRows(table));

      // Make sure that a directory was created for every CF
      int dir = 0;
      for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
        for (byte[] family : FAMILIES) {
          if (Bytes.toString(family).equals(f.getPath().getName())) {
            ++dir;
          }
        }
      }
      assertEquals("Column family not found in FS.", FAMILIES.length, dir);

      // handle the split case
      if (shouldChangeRegions) {
        LOG.info("Changing regions in table");
        admin.disableTable(table.getName());
        while (util.getMiniHBaseCluster()
            .getMaster()
            .getAssignmentManager()
            .getRegionStates()
            .isRegionsInTransition()) {
          Threads.sleep(200);
          LOG.info("Waiting on table to finish disabling");
        }
        util.deleteTable(table.getName());
        byte[][] newSplitKeys = generateRandomSplitKeys(14);
        table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys);

        while (util.getConnection().getRegionLocator(TABLE_NAME).getAllRegionLocations().size()
                != 15
            || !admin.isTableAvailable(table.getName())) {
          Thread.sleep(200);
          LOG.info("Waiting for new region assignment to happen");
        }
      }

      // Perform the actual load
      new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, r);

      // Ensure data shows up
      int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
      assertEquals(
          "LoadIncrementalHFiles should put expected data in table",
          expectedRows,
          util.countRows(table));
      Scan scan = new Scan();
      ResultScanner results = table.getScanner(scan);
      for (Result res : results) {
        assertEquals(FAMILIES.length, res.rawCells().length);
        Cell first = res.rawCells()[0];
        for (Cell kv : res.rawCells()) {
          assertTrue(CellUtil.matchingRow(first, kv));
          assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
        }
      }
      results.close();
      String tableDigestBefore = util.checksumRows(table);

      // Check region locality
      HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
      for (HRegion region : util.getHBaseCluster().getRegions(TABLE_NAME)) {
        hbd.add(region.getHDFSBlocksDistribution());
      }
      for (String hostname : hostnames) {
        float locality = hbd.getBlockLocalityIndex(hostname);
        LOG.info("locality of [" + hostname + "]: " + locality);
        assertEquals(100, (int) (locality * 100));
      }

      // Cause regions to reopen
      admin.disableTable(TABLE_NAME);
      while (!admin.isTableDisabled(TABLE_NAME)) {
        Thread.sleep(200);
        LOG.info("Waiting for table to disable");
      }
      admin.enableTable(TABLE_NAME);
      util.waitTableAvailable(TABLE_NAME);
      assertEquals(
          "Data should remain after reopening of regions",
          tableDigestBefore,
          util.checksumRows(table));
    } finally {
      testDir.getFileSystem(conf).delete(testDir, true);
      util.deleteTable(TABLE_NAME);
      util.shutdownMiniCluster();
    }
  }
Example #17
0
  @Test
  public void testExcludeMinorCompaction() throws Exception {
    Configuration conf = util.getConfiguration();
    conf.setInt("hbase.hstore.compaction.min", 2);
    generateRandomStartKeys(5);

    try {
      util.startMiniCluster();
      Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
      final FileSystem fs = util.getDFSCluster().getFileSystem();
      Admin admin = util.getHBaseAdmin();
      HTable table = util.createTable(TABLE_NAME, FAMILIES);
      assertEquals("Should start with empty table", 0, util.countRows(table));

      // deep inspection: get the StoreFile dir
      final Path storePath =
          HStore.getStoreHomedir(
              FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
              admin.getTableRegions(TABLE_NAME).get(0),
              FAMILIES[0]);
      assertEquals(0, fs.listStatus(storePath).length);

      // put some data in it and flush to create a storefile
      Put p = new Put(Bytes.toBytes("test"));
      p.add(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
      table.put(p);
      admin.flush(TABLE_NAME);
      assertEquals(1, util.countRows(table));
      quickPoll(
          new Callable<Boolean>() {
            public Boolean call() throws Exception {
              return fs.listStatus(storePath).length == 1;
            }
          },
          5000);

      // Generate a bulk load file with more rows
      conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true);
      util.startMiniMapReduceCluster();
      runIncrementalPELoad(conf, table, testDir);

      // Perform the actual load
      new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);

      // Ensure data shows up
      int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
      assertEquals(
          "LoadIncrementalHFiles should put expected data in table",
          expectedRows + 1,
          util.countRows(table));

      // should have a second StoreFile now
      assertEquals(2, fs.listStatus(storePath).length);

      // minor compactions shouldn't get rid of the file
      admin.compact(TABLE_NAME);
      try {
        quickPoll(
            new Callable<Boolean>() {
              public Boolean call() throws Exception {
                return fs.listStatus(storePath).length == 1;
              }
            },
            5000);
        throw new IOException("SF# = " + fs.listStatus(storePath).length);
      } catch (AssertionError ae) {
        // this is expected behavior
      }

      // a major compaction should work though
      admin.majorCompact(TABLE_NAME);
      quickPoll(
          new Callable<Boolean>() {
            public Boolean call() throws Exception {
              return fs.listStatus(storePath).length == 1;
            }
          },
          5000);

    } finally {
      util.shutdownMiniMapReduceCluster();
      util.shutdownMiniCluster();
    }
  }
Example #18
0
  private void doIncrementalLoadTest(boolean shouldChangeRegions) throws Exception {
    util = new HBaseTestingUtility();
    Configuration conf = util.getConfiguration();
    byte[][] splitKeys = generateRandomSplitKeys(4);
    util.startMiniCluster();
    try {
      HTable table = util.createTable(TABLE_NAME, FAMILIES, splitKeys);
      Admin admin = table.getConnection().getAdmin();
      Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
      assertEquals("Should start with empty table", 0, util.countRows(table));
      int numRegions = -1;
      try (RegionLocator r = table.getRegionLocator()) {
        numRegions = r.getStartKeys().length;
      }
      assertEquals("Should make 5 regions", numRegions, 5);

      // Generate the bulk load files
      util.startMiniMapReduceCluster();
      runIncrementalPELoad(conf, table.getTableDescriptor(), table.getRegionLocator(), testDir);
      // This doesn't write into the table, just makes files
      assertEquals("HFOF should not touch actual table", 0, util.countRows(table));

      // Make sure that a directory was created for every CF
      int dir = 0;
      for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
        for (byte[] family : FAMILIES) {
          if (Bytes.toString(family).equals(f.getPath().getName())) {
            ++dir;
          }
        }
      }
      assertEquals("Column family not found in FS.", FAMILIES.length, dir);

      // handle the split case
      if (shouldChangeRegions) {
        LOG.info("Changing regions in table");
        admin.disableTable(table.getName());
        while (util.getMiniHBaseCluster()
            .getMaster()
            .getAssignmentManager()
            .getRegionStates()
            .isRegionsInTransition()) {
          Threads.sleep(200);
          LOG.info("Waiting on table to finish disabling");
        }
        util.deleteTable(table.getName());
        byte[][] newSplitKeys = generateRandomSplitKeys(14);
        table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys);

        while (table.getRegionLocator().getAllRegionLocations().size() != 15
            || !admin.isTableAvailable(table.getName())) {
          Thread.sleep(200);
          LOG.info("Waiting for new region assignment to happen");
        }
      }

      // Perform the actual load
      new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);

      // Ensure data shows up
      int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
      assertEquals(
          "LoadIncrementalHFiles should put expected data in table",
          expectedRows,
          util.countRows(table));
      Scan scan = new Scan();
      ResultScanner results = table.getScanner(scan);
      for (Result res : results) {
        assertEquals(FAMILIES.length, res.rawCells().length);
        Cell first = res.rawCells()[0];
        for (Cell kv : res.rawCells()) {
          assertTrue(CellUtil.matchingRow(first, kv));
          assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
        }
      }
      results.close();
      String tableDigestBefore = util.checksumRows(table);

      // Cause regions to reopen
      admin.disableTable(TABLE_NAME);
      while (!admin.isTableDisabled(TABLE_NAME)) {
        Thread.sleep(200);
        LOG.info("Waiting for table to disable");
      }
      admin.enableTable(TABLE_NAME);
      util.waitTableAvailable(TABLE_NAME);
      assertEquals(
          "Data should remain after reopening of regions",
          tableDigestBefore,
          util.checksumRows(table));
    } finally {
      util.shutdownMiniMapReduceCluster();
      util.shutdownMiniCluster();
    }
  }
  @Test(timeout = 30000)
  public void testBulkLoad() throws IOException {
    // Create table then get the single region for our new table.
    LOG.debug("Creating test table");
    HTableDescriptor hdt = HTU.createTableDescriptor("testBulkLoad");
    hdt.setRegionReplication(NB_SERVERS);
    hdt.addCoprocessor(SlowMeCopro.class.getName());
    Table table = HTU.createTable(hdt, new byte[][] {f}, HTU.getConfiguration());

    // create hfiles to load.
    LOG.debug("Creating test data");
    Path dir = HTU.getDataTestDirOnTestFS("testBulkLoad");
    final int numRows = 10;
    final byte[] qual = Bytes.toBytes("qual");
    final byte[] val = Bytes.toBytes("val");
    final List<Pair<byte[], String>> famPaths = new ArrayList<Pair<byte[], String>>();
    for (HColumnDescriptor col : hdt.getColumnFamilies()) {
      Path hfile = new Path(dir, col.getNameAsString());
      TestHRegionServerBulkLoad.createHFile(
          HTU.getTestFileSystem(), hfile, col.getName(), qual, val, numRows);
      famPaths.add(new Pair<byte[], String>(col.getName(), hfile.toString()));
    }

    // bulk load HFiles
    LOG.debug("Loading test data");
    @SuppressWarnings("deprecation")
    final HConnection conn = HTU.getHBaseAdmin().getConnection();
    RegionServerCallable<Void> callable =
        new RegionServerCallable<Void>(
            conn, hdt.getTableName(), TestHRegionServerBulkLoad.rowkey(0)) {
          @Override
          public Void call(int timeout) throws Exception {
            LOG.debug(
                "Going to connect to server "
                    + getLocation()
                    + " for row "
                    + Bytes.toStringBinary(getRow()));
            byte[] regionName = getLocation().getRegionInfo().getRegionName();
            BulkLoadHFileRequest request =
                RequestConverter.buildBulkLoadHFileRequest(famPaths, regionName, true);
            getStub().bulkLoadHFile(null, request);
            return null;
          }
        };
    RpcRetryingCallerFactory factory = new RpcRetryingCallerFactory(HTU.getConfiguration());
    RpcRetryingCaller<Void> caller = factory.<Void>newCaller();
    caller.callWithRetries(callable, 10000);

    // verify we can read them from the primary
    LOG.debug("Verifying data load");
    for (int i = 0; i < numRows; i++) {
      byte[] row = TestHRegionServerBulkLoad.rowkey(i);
      Get g = new Get(row);
      Result r = table.get(g);
      Assert.assertFalse(r.isStale());
    }

    // verify we can read them from the replica
    LOG.debug("Verifying replica queries");
    try {
      SlowMeCopro.cdl.set(new CountDownLatch(1));
      for (int i = 0; i < numRows; i++) {
        byte[] row = TestHRegionServerBulkLoad.rowkey(i);
        Get g = new Get(row);
        g.setConsistency(Consistency.TIMELINE);
        Result r = table.get(g);
        Assert.assertTrue(r.isStale());
      }
      SlowMeCopro.cdl.get().countDown();
    } finally {
      SlowMeCopro.cdl.get().countDown();
      SlowMeCopro.sleepTime.set(0);
    }

    HTU.getHBaseAdmin().disableTable(hdt.getTableName());
    HTU.deleteTable(hdt.getTableName());
  }