Exemplo n.º 1
0
  /**
   * Run a local map reduce job to read records from HCatalog table and verify if the count is as
   * expected
   *
   * @param readCount
   * @param filter
   * @return
   * @throws Exception
   */
  List<HCatRecord> runMRRead(int readCount, String filter) throws Exception {
    MapRead.readCount = 0;
    readRecords.clear();

    Configuration conf = new Configuration();
    conf.set(HiveConf.ConfVars.METASTORE_INTEGER_JDO_PUSHDOWN.varname, "true");
    Job job = new Job(conf, "hcat mapreduce read test");
    job.setJarByClass(this.getClass());
    job.setMapperClass(HCatMapReduceTest.MapRead.class);

    // input/output settings
    job.setInputFormatClass(HCatInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    HCatInputFormat.setInput(job, dbName, tableName, filter);

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setNumReduceTasks(0);

    Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceOutput");
    if (fs.exists(path)) {
      fs.delete(path, true);
    }

    TextOutputFormat.setOutputPath(job, path);

    job.waitForCompletion(true);
    Assert.assertEquals(readCount, MapRead.readCount);

    return readRecords;
  }
Exemplo n.º 2
0
 @Test
 public void testVanishingTaskZNode() throws Exception {
   LOG.info("testVanishingTaskZNode");
   conf.setInt("hbase.splitlog.manager.unassigned.timeout", 0);
   slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
   slm.finishInitialization();
   FileSystem fs = TEST_UTIL.getTestFileSystem();
   final Path logDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
   fs.mkdirs(logDir);
   Path logFile = new Path(logDir, UUID.randomUUID().toString());
   fs.createNewFile(logFile);
   new Thread() {
     public void run() {
       try {
         // this call will block because there are no SplitLogWorkers
         slm.splitLogDistributed(logDir);
       } catch (Exception e) {
         LOG.warn("splitLogDistributed failed", e);
         fail();
       }
     }
   }.start();
   waitForCounter(tot_mgr_node_create_result, 0, 1, 10000);
   String znode = ZKSplitLog.getEncodedNodeName(zkw, logFile.toString());
   // remove the task znode
   ZKUtil.deleteNode(zkw, znode);
   waitForCounter(tot_mgr_get_data_nonode, 0, 1, 30000);
   waitForCounter(tot_mgr_log_split_batch_success, 0, 1, 1000);
   assertTrue(fs.exists(logFile));
   fs.delete(logDir, true);
 }
  /*
   * (non-Javadoc)
   *
   * @see org.anon.smart.d2cache.store.StoreTransaction#commit()
   */
  @Override
  public void commit() throws CtxException {

    FileSystem hdfs = ((HadoopFileStoreConnection) _connection).getHadoopFS();

    assertion().assertNotNull(hdfs, "Hadoop FileSystem is null");

    String repo = hdfs.getWorkingDirectory().toUri().toString();

    for (Object fi : files.keySet()) {
      try {

        String[] params = (String[]) fi;

        Path destination = new Path(repo + "/" + params[1]);
        Path fldr = destination.getParent();
        if (!hdfs.exists(fldr)) hdfs.mkdirs(fldr);
        hdfs.copyFromLocalFile(true, new Path(params[0]), destination);

      } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }
  }
  @Test
  public void testTableWithCFNameStartWithUnderScore() throws Exception {
    Path dir = util.getDataTestDirOnTestFS("cfNameStartWithUnderScore");
    FileSystem fs = util.getTestFileSystem();
    dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
    String family = "_cf";
    Path familyDir = new Path(dir, family);

    byte[] from = Bytes.toBytes("begin");
    byte[] to = Bytes.toBytes("end");
    Configuration conf = util.getConfiguration();
    String tableName = "mytable_cfNameStartWithUnderScore";
    Table table = util.createTable(TableName.valueOf(tableName), family);
    HFileTestUtil.createHFile(
        conf, fs, new Path(familyDir, "hfile"), Bytes.toBytes(family), QUALIFIER, from, to, 1000);

    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
    String[] args = {dir.toString(), tableName};
    try {
      loader.run(args);
      assertEquals(1000, util.countRows(table));
    } finally {
      if (null != table) {
        table.close();
      }
    }
  }
 /**
  * print the current file system location
  *
  * @return !null string
  */
 @Override
 public String pwd() {
   if (isRunningAsUser()) {
     return super.pwd();
   }
   final FileSystem fs = getDFS();
   return absolutePath(fs.getWorkingDirectory());
 }
  /** Test compressible {@link GridmixRecord}. */
  @Test
  public void testCompressibleGridmixRecord() throws IOException {
    JobConf conf = new JobConf();
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);

    FileSystem lfs = FileSystem.getLocal(conf);
    int dataSize = 1024 * 1024 * 10; // 10 MB
    float ratio = 0.357F;

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestPossiblyCompressibleGridmixRecord");
    lfs.delete(tempDir, true);

    // define a compressible GridmixRecord
    GridmixRecord record = new GridmixRecord(dataSize, 0);
    record.setCompressibility(true, ratio); // enable compression

    conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class);
    org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true);

    // write the record to a file
    Path recordFile = new Path(tempDir, "record");
    OutputStream outStream =
        CompressionEmulationUtil.getPossiblyCompressedOutputStream(recordFile, conf);
    DataOutputStream out = new DataOutputStream(outStream);
    record.write(out);
    out.close();
    outStream.close();

    // open the compressed stream for reading
    Path actualRecordFile = recordFile.suffix(".gz");
    InputStream in =
        CompressionEmulationUtil.getPossiblyDecompressedInputStream(actualRecordFile, conf, 0);

    // get the compressed file size
    long compressedFileSize = lfs.listStatus(actualRecordFile)[0].getLen();

    GridmixRecord recordRead = new GridmixRecord();
    recordRead.readFields(new DataInputStream(in));

    assertEquals(
        "Record size mismatch in a compressible GridmixRecord", dataSize, recordRead.getSize());
    assertTrue(
        "Failed to generate a compressible GridmixRecord",
        recordRead.getSize() > compressedFileSize);

    // check if the record can generate data with the desired compression ratio
    float seenRatio = ((float) compressedFileSize) / dataSize;
    assertEquals(
        CompressionEmulationUtil.standardizeCompressionRatio(ratio),
        CompressionEmulationUtil.standardizeCompressionRatio(seenRatio),
        1.0D);
  }
Exemplo n.º 7
0
 public void execute(Environment env, CommandLine cmd, ConsoleReader reader) {
   FileSystem hdfs = (FileSystem) env.getValue(Constants.HDFS);
   String wd = hdfs.getWorkingDirectory().toString();
   if (cmd.hasOption("l")) {
     log(cmd, wd);
   } else {
     log(cmd, wd.substring(env.getProperty(Constants.HDFS_URL).length()));
   }
   FSUtil.prompt(env);
 }
Exemplo n.º 8
0
  //// jobSubmitDir is /tmp staging dir. copy lib 、files and so on into jobSubmitDir.
  private void copyAndConfigureFiles(Job job, Path jobSubmitDir) throws IOException {
    Configuration conf = job.getConfiguration();
    short replication = (short) conf.getInt(Job.SUBMIT_REPLICATION, 10);
    copyAndConfigureFiles(job, jobSubmitDir, replication);

    // Set the working directory
    if (job.getWorkingDirectory() == null) {
      job.setWorkingDirectory(jtFs.getWorkingDirectory());
    }
  }
  /** Test {@link RandomTextDataMapper} via {@link CompressionEmulationUtil}. */
  @Test
  public void testRandomCompressedTextDataGenerator() throws Exception {
    int wordSize = 10;
    int listSize = 20;
    long dataSize = 10 * 1024 * 1024;

    Configuration conf = new Configuration();
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);

    // configure the RandomTextDataGenerator to generate desired sized data
    conf.setInt(RandomTextDataGenerator.GRIDMIX_DATAGEN_RANDOMTEXT_LISTSIZE, listSize);
    conf.setInt(RandomTextDataGenerator.GRIDMIX_DATAGEN_RANDOMTEXT_WORDSIZE, wordSize);
    conf.setLong(GenerateData.GRIDMIX_GEN_BYTES, dataSize);

    FileSystem lfs = FileSystem.getLocal(conf);

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestRandomCompressedTextDataGenr");
    lfs.delete(tempDir, true);

    runDataGenJob(conf, tempDir);

    // validate the output data
    FileStatus[] files = lfs.listStatus(tempDir, new Utils.OutputFileUtils.OutputFilesFilter());
    long size = 0;
    long maxLineSize = 0;

    for (FileStatus status : files) {
      InputStream in =
          CompressionEmulationUtil.getPossiblyDecompressedInputStream(status.getPath(), conf, 0);
      BufferedReader reader = new BufferedReader(new InputStreamReader(in));
      String line = reader.readLine();
      if (line != null) {
        long lineSize = line.getBytes().length;
        if (lineSize > maxLineSize) {
          maxLineSize = lineSize;
        }
        while (line != null) {
          for (String word : line.split("\\s")) {
            size += word.getBytes().length;
          }
          line = reader.readLine();
        }
      }
      reader.close();
    }

    assertTrue(size >= dataSize);
    assertTrue(size <= dataSize + maxLineSize);
  }
Exemplo n.º 10
0
 @Test
 public void testEmptyLogDir() throws Exception {
   LOG.info("testEmptyLogDir");
   slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
   slm.finishInitialization();
   FileSystem fs = TEST_UTIL.getTestFileSystem();
   Path emptyLogDirPath = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
   fs.mkdirs(emptyLogDirPath);
   slm.splitLogDistributed(emptyLogDirPath);
   assertFalse(fs.exists(emptyLogDirPath));
 }
Exemplo n.º 11
0
  /**
   * Test if the {@link JobConfigurationParser} can correctly extract out key-value pairs from the
   * job configuration.
   */
  @Test
  public void testJobConfigurationParsing() throws Exception {
    final FileSystem lfs = FileSystem.getLocal(new Configuration());

    final Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    final Path tempDir = new Path(rootTempDir, "TestJobConfigurationParser");
    lfs.delete(tempDir, true);

    // Add some configuration parameters to the conf
    JobConf jConf = new JobConf(false);
    String key = "test.data";
    String value = "hello world";
    jConf.set(key, value);

    // create the job conf file
    Path jobConfPath = new Path(tempDir.toString(), "job.xml");
    lfs.delete(jobConfPath, false);
    DataOutputStream jobConfStream = lfs.create(jobConfPath);
    jConf.writeXml(jobConfStream);
    jobConfStream.close();

    // now read the job conf file using the job configuration parser
    Properties properties = JobConfigurationParser.parse(lfs.open(jobConfPath));

    // check if the required parameter is loaded
    assertEquals(
        "Total number of extracted properties ("
            + properties.size()
            + ") doesn't match the expected size of 1 ["
            + "JobConfigurationParser]",
        1,
        properties.size());
    // check if the key is present in the extracted configuration
    assertTrue(
        "Key " + key + " is missing in the configuration extracted " + "[JobConfigurationParser]",
        properties.keySet().contains(key));
    // check if the desired property has the correct value
    assertEquals(
        "JobConfigurationParser couldn't recover the parameters" + " correctly",
        value,
        properties.get(key));

    // Test ZombieJob
    LoggedJob job = new LoggedJob();
    job.setJobProperties(properties);

    ZombieJob zjob = new ZombieJob(job, null);
    Configuration zconf = zjob.getJobConf();
    // check if the required parameter is loaded
    assertEquals("ZombieJob couldn't recover the parameters correctly", value, zconf.get(key));
  }
Exemplo n.º 12
0
  @BeforeClass
  public static void setUpOneTime() throws Exception {
    fs = new LocalFileSystem();
    fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration());

    HiveConf hiveConf = new HiveConf();
    hiveConf.setInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME, 0);
    // Hack to initialize cache with 0 expiry time causing it to return a new hive client every time
    // Otherwise the cache doesn't play well with the second test method with the client gets
    // closed() in the
    // tearDown() of the previous test
    HCatUtil.getHiveMetastoreClient(hiveConf);

    MapCreate.writeCount = 0;
    MapRead.readCount = 0;
  }
  @Test(timeout = 45000)
  public void testVanishingTaskZNode() throws Exception {
    LOG.info("testVanishingTaskZNode");

    conf.setInt("hbase.splitlog.manager.unassigned.timeout", 0);

    slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null);
    slm.finishInitialization();
    FileSystem fs = TEST_UTIL.getTestFileSystem();
    final Path logDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
    fs.mkdirs(logDir);
    Thread thread = null;
    try {
      Path logFile = new Path(logDir, UUID.randomUUID().toString());
      fs.createNewFile(logFile);
      thread =
          new Thread() {
            public void run() {
              try {
                // this call will block because there are no SplitLogWorkers,
                // until the task znode is deleted below. Then the call will
                // complete successfully, assuming the log is split.
                slm.splitLogDistributed(logDir);
              } catch (Exception e) {
                LOG.warn("splitLogDistributed failed", e);
              }
            }
          };
      thread.start();
      waitForCounter(tot_mgr_node_create_result, 0, 1, 10000);
      String znode = ZKSplitLog.getEncodedNodeName(zkw, logFile.toString());
      // remove the task znode, to finish the distributed log splitting
      ZKUtil.deleteNode(zkw, znode);
      waitForCounter(tot_mgr_get_data_nonode, 0, 1, 30000);
      waitForCounter(tot_mgr_log_split_batch_success, 0, 1, 1000);
      assertTrue(fs.exists(logFile));
    } finally {
      if (thread != null) {
        // interrupt the thread in case the test fails in the middle.
        // it has no effect if the thread is already terminated.
        thread.interrupt();
      }
      fs.delete(logDir, true);
    }
  }
  /**
   * Test if {@link RandomTextDataGenerator} can generate random text data with the desired
   * compression ratio. This involves - using {@link CompressionEmulationUtil} to configure the MR
   * job for generating the random text data with the desired compression ratio - running the MR job
   * - test {@link RandomTextDataGenerator}'s output and match the output size (compressed) with the
   * expected compression ratio.
   */
  private void testCompressionRatioConfigure(float ratio) throws Exception {
    long dataSize = 10 * 1024 * 1024;

    Configuration conf = new Configuration();
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);

    conf.setLong(GenerateData.GRIDMIX_GEN_BYTES, dataSize);

    float expectedRatio = CompressionEmulationUtil.DEFAULT_COMPRESSION_RATIO;
    if (ratio > 0) {
      // set the compression ratio in the conf
      CompressionEmulationUtil.setMapInputCompressionEmulationRatio(conf, ratio);
      expectedRatio = CompressionEmulationUtil.standardizeCompressionRatio(ratio);
    }

    // invoke the utility to map from ratio to word-size
    CompressionEmulationUtil.setupDataGeneratorConfig(conf);

    FileSystem lfs = FileSystem.getLocal(conf);

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestCustomRandomCompressedTextDataGenr");
    lfs.delete(tempDir, true);

    runDataGenJob(conf, tempDir);

    // validate the output data
    FileStatus[] files = lfs.listStatus(tempDir, new Utils.OutputFileUtils.OutputFilesFilter());
    long size = 0;

    for (FileStatus status : files) {
      size += status.getLen();
    }

    float compressionRatio = ((float) size) / dataSize;
    float stdRatio = CompressionEmulationUtil.standardizeCompressionRatio(compressionRatio);

    assertEquals(expectedRatio, stdRatio, 0.0D);
  }
  /**
   * Test of {@link FileQueue} can identify compressed file and provide readers to extract
   * uncompressed data only if input-compression is enabled.
   */
  @Test
  public void testFileQueueDecompression() throws IOException {
    JobConf conf = new JobConf();
    FileSystem lfs = FileSystem.getLocal(conf);
    String inputLine = "Hi Hello!";

    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);
    org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true);
    org.apache.hadoop.mapred.FileOutputFormat.setOutputCompressorClass(conf, GzipCodec.class);

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestFileQueueDecompression");
    lfs.delete(tempDir, true);

    // create a compressed file
    Path compressedFile = new Path(tempDir, "test");
    OutputStream out =
        CompressionEmulationUtil.getPossiblyCompressedOutputStream(compressedFile, conf);
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
    writer.write(inputLine);
    writer.close();

    compressedFile = compressedFile.suffix(".gz");
    // now read back the data from the compressed stream using FileQueue
    long fileSize = lfs.listStatus(compressedFile)[0].getLen();
    CombineFileSplit split =
        new CombineFileSplit(new Path[] {compressedFile}, new long[] {fileSize});
    FileQueue queue = new FileQueue(split, conf);
    byte[] bytes = new byte[inputLine.getBytes().length];
    queue.read(bytes);
    queue.close();
    String readLine = new String(bytes);
    assertEquals("Compression/Decompression error", inputLine, readLine);
  }
  /**
   * Test {@link CompressionEmulationUtil#getPossiblyDecompressedInputStream(Path, Configuration,
   * long)} and {@link CompressionEmulationUtil#getPossiblyCompressedOutputStream(Path,
   * Configuration)}.
   */
  @Test
  public void testPossiblyCompressedDecompressedStreams() throws IOException {
    JobConf conf = new JobConf();
    FileSystem lfs = FileSystem.getLocal(conf);
    String inputLine = "Hi Hello!";

    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);
    conf.setBoolean(FileOutputFormat.COMPRESS, true);
    conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class);

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestPossiblyCompressedDecompressedStreams");
    lfs.delete(tempDir, true);

    // create a compressed file
    Path compressedFile = new Path(tempDir, "test");
    OutputStream out =
        CompressionEmulationUtil.getPossiblyCompressedOutputStream(compressedFile, conf);
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
    writer.write(inputLine);
    writer.close();

    // now read back the data from the compressed stream
    compressedFile = compressedFile.suffix(".gz");
    InputStream in =
        CompressionEmulationUtil.getPossiblyDecompressedInputStream(compressedFile, conf, 0);
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    String readLine = reader.readLine();
    assertEquals("Compression/Decompression error", inputLine, readLine);
    reader.close();
  }
Exemplo n.º 17
0
  /**
   * Check if processing of input arguments is as expected by passing globbed input path
   * <li>without -recursive option and
   * <li>with -recursive option.
   */
  @Test
  public void testProcessInputArgument() throws Exception {
    final Configuration conf = new Configuration();
    final FileSystem lfs = FileSystem.getLocal(conf);

    // define the test's root temporary directory
    final Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());
    // define the test's root input directory
    Path testRootInputDir = new Path(rootTempDir, "TestProcessInputArgument");
    // define the nested input directory
    Path nestedInputDir = new Path(testRootInputDir, "1/2/3/4");
    // define the globbed version of the nested input directory
    Path globbedInputNestedDir = lfs.makeQualified(new Path(testRootInputDir, "*/*/*/*/*"));
    try {
      lfs.delete(nestedInputDir, true);

      List<String> recursiveInputPaths = new ArrayList<String>();
      List<String> nonRecursiveInputPaths = new ArrayList<String>();
      // Create input files under the given path with multiple levels of
      // sub directories
      createHistoryLogsHierarchy(nestedInputDir, lfs, recursiveInputPaths, nonRecursiveInputPaths);

      // Check the case of globbed input path and without -recursive option
      List<Path> inputs =
          MyOptions.processInputArgument(globbedInputNestedDir.toString(), conf, false);
      validateHistoryLogPaths(inputs, nonRecursiveInputPaths);
      // Check the case of globbed input path and with -recursive option
      inputs = MyOptions.processInputArgument(globbedInputNestedDir.toString(), conf, true);
      validateHistoryLogPaths(inputs, recursiveInputPaths);

    } finally {
      lfs.delete(testRootInputDir, true);
    }
  }
Exemplo n.º 18
0
  @Test
  public void testPathParts() throws Exception { // see PathParts
    FileSystem fs = dfsCluster.getFileSystem();
    int dfsClusterPort = fs.getWorkingDirectory().toUri().getPort();
    assertTrue(dfsClusterPort > 0);
    JobConf jobConf = getJobConf();
    Configuration simpleConf = new Configuration();

    for (Configuration conf : Arrays.asList(jobConf, simpleConf)) {
      for (String queryAndFragment : Arrays.asList("", "?key=value#fragment")) {
        for (String up : Arrays.asList("", "../")) {
          String down = up.length() == 0 ? "foo/" : "";
          String uploadURL = "hdfs://localhost:12345/user/foo/" + up + "bar.txt" + queryAndFragment;
          PathParts parts = new PathParts(uploadURL, conf);
          assertEquals(uploadURL, parts.getUploadURL());
          assertEquals("/user/" + down + "bar.txt", parts.getURIPath());
          assertEquals("bar.txt", parts.getName());
          assertEquals("hdfs", parts.getScheme());
          assertEquals("localhost", parts.getHost());
          assertEquals(12345, parts.getPort());
          assertEquals("hdfs://localhost:12345/user/" + down + "bar.txt", parts.getId());
          assertEquals(parts.getId(), parts.getDownloadURL());
          assertFileNotFound(parts);

          uploadURL = "hdfs://localhost/user/foo/" + up + "bar.txt" + queryAndFragment;
          parts = new PathParts(uploadURL, conf);
          assertEquals(uploadURL, parts.getUploadURL());
          assertEquals("/user/" + down + "bar.txt", parts.getURIPath());
          assertEquals("bar.txt", parts.getName());
          assertEquals("hdfs", parts.getScheme());
          assertEquals("localhost", parts.getHost());
          assertEquals(8020, parts.getPort());
          assertEquals("hdfs://localhost:8020/user/" + down + "bar.txt", parts.getId());
          assertEquals(parts.getId(), parts.getDownloadURL());
          assertFileNotFound(parts);
        }
      }
    }

    for (Configuration conf : Arrays.asList(jobConf)) {
      for (String queryAndFragment : Arrays.asList("", "?key=value#fragment")) {
        for (String up : Arrays.asList("", "../")) {
          // verify using absolute path
          String down = up.length() == 0 ? "foo/" : "";
          String uploadURL = "/user/foo/" + up + "bar.txt" + queryAndFragment;
          PathParts parts = new PathParts(uploadURL, conf);
          assertEquals(uploadURL, parts.getUploadURL());
          assertEquals("/user/" + down + "bar.txt", parts.getURIPath());
          assertEquals("bar.txt", parts.getName());
          assertEquals("hdfs", parts.getScheme());
          assertTrue(
              "localhost".equals(parts.getHost())
                  || "localhost.localdomain".equals(parts.getHost()));
          assertEquals(dfsClusterPort, parts.getPort());
          assertTrue(
              parts
                      .getId()
                      .equals("hdfs://localhost:" + dfsClusterPort + "/user/" + down + "bar.txt")
                  || parts
                      .getId()
                      .equals(
                          "hdfs://localhost.localdomain:"
                              + dfsClusterPort
                              + "/user/"
                              + down
                              + "bar.txt"));
          assertFileNotFound(parts);

          // verify relative path is interpreted to be relative to user's home dir and resolved to
          // an absolute path
          uploadURL = "xuser/foo/" + up + "bar.txt" + queryAndFragment;
          parts = new PathParts(uploadURL, conf);
          assertEquals(uploadURL, parts.getUploadURL());
          String homeDir = "/user/" + System.getProperty("user.name");
          assertEquals(homeDir + "/xuser/" + down + "bar.txt", parts.getURIPath());
          assertEquals("bar.txt", parts.getName());
          assertEquals("hdfs", parts.getScheme());
          assertTrue(
              "localhost".equals(parts.getHost())
                  || "localhost.localdomain".equals(parts.getHost()));
          assertEquals(dfsClusterPort, parts.getPort());
          assertTrue(
              parts
                      .getId()
                      .equals(
                          "hdfs://localhost:"
                              + dfsClusterPort
                              + homeDir
                              + "/xuser/"
                              + down
                              + "bar.txt")
                  || parts
                      .getId()
                      .equals(
                          "hdfs://localhost.localdomain:"
                              + dfsClusterPort
                              + homeDir
                              + "/xuser/"
                              + down
                              + "bar.txt"));
          assertFileNotFound(parts);
        }
      }
    }

    try {
      new PathParts("/user/foo/bar.txt", simpleConf);
      fail("host/port resolution requires minimr conf, not a simple conf");
    } catch (IllegalArgumentException e) {; // expected
    }
  }
Exemplo n.º 19
0
  // TODO - Move this to MR!
  // Use TaskDistributedCacheManager.CacheFiles.makeCacheFiles(URI[],
  // long[], boolean[], Path[], FileType)
  private static void parseDistributedCacheArtifacts(
      Configuration conf,
      Map<String, LocalResource> localResources,
      LocalResourceType type,
      URI[] uris,
      long[] timestamps,
      long[] sizes,
      boolean visibilities[],
      Path[] pathsToPutOnClasspath)
      throws IOException {

    if (uris != null) {
      // Sanity check
      if ((uris.length != timestamps.length)
          || (uris.length != sizes.length)
          || (uris.length != visibilities.length)) {
        throw new IllegalArgumentException(
            "Invalid specification for "
                + "distributed-cache artifacts of type "
                + type
                + " :"
                + " #uris="
                + uris.length
                + " #timestamps="
                + timestamps.length
                + " #visibilities="
                + visibilities.length);
      }

      Map<String, Path> classPaths = new HashMap<String, Path>();
      if (pathsToPutOnClasspath != null) {
        for (Path p : pathsToPutOnClasspath) {
          FileSystem remoteFS = p.getFileSystem(conf);
          p =
              remoteFS.resolvePath(
                  p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory()));
          classPaths.put(p.toUri().getPath().toString(), p);
        }
      }
      for (int i = 0; i < uris.length; ++i) {
        URI u = uris[i];
        Path p = new Path(u);
        FileSystem remoteFS = p.getFileSystem(conf);
        p =
            remoteFS.resolvePath(
                p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory()));
        // Add URI fragment or just the filename
        Path name = new Path((null == u.getFragment()) ? p.getName() : u.getFragment());
        if (name.isAbsolute()) {
          throw new IllegalArgumentException("Resource name must be relative");
        }
        String linkName = name.toUri().getPath();
        localResources.put(
            linkName,
            BuilderUtils.newLocalResource(
                p.toUri(),
                type,
                visibilities[i] ? LocalResourceVisibility.PUBLIC : LocalResourceVisibility.PRIVATE,
                sizes[i],
                timestamps[i]));
      }
    }
  }
Exemplo n.º 20
0
  /** Command-line interface */
  public static void main(final String[] args) throws Exception {
    final Configuration conf = new HdfsConfiguration();
    Options fetcherOptions = new Options();
    fetcherOptions.addOption(WEBSERVICE, true, "HTTPS url to reach the NameNode at");
    fetcherOptions.addOption(RENEWER, true, "Name of the delegation token renewer");
    fetcherOptions.addOption(CANCEL, false, "cancel the token");
    fetcherOptions.addOption(RENEW, false, "renew the token");
    fetcherOptions.addOption(PRINT, false, "print the token");
    GenericOptionsParser parser = new GenericOptionsParser(conf, fetcherOptions, args);
    CommandLine cmd = parser.getCommandLine();

    // get options
    final String webUrl = cmd.hasOption(WEBSERVICE) ? cmd.getOptionValue(WEBSERVICE) : null;
    final String renewer = cmd.hasOption(RENEWER) ? cmd.getOptionValue(RENEWER) : null;
    final boolean cancel = cmd.hasOption(CANCEL);
    final boolean renew = cmd.hasOption(RENEW);
    final boolean print = cmd.hasOption(PRINT);
    String[] remaining = parser.getRemainingArgs();

    // check option validity
    if (cancel && renew || cancel && print || renew && print || cancel && renew && print) {
      System.err.println("ERROR: Only specify cancel, renew or print.");
      printUsage(System.err);
    }
    if (remaining.length != 1 || remaining[0].charAt(0) == '-') {
      System.err.println("ERROR: Must specify exacltly one token file");
      printUsage(System.err);
    }
    // default to using the local file system
    FileSystem local = FileSystem.getLocal(conf);
    final Path tokenFile = new Path(local.getWorkingDirectory(), remaining[0]);

    // Login the current user
    UserGroupInformation.getCurrentUser()
        .doAs(
            new PrivilegedExceptionAction<Object>() {
              @SuppressWarnings("unchecked")
              @Override
              public Object run() throws Exception {

                if (print) {
                  DelegationTokenIdentifier id =
                      new DelegationTokenSecretManager(0, 0, 0, 0, null).createIdentifier();
                  for (Token<?> token : readTokens(tokenFile, conf)) {
                    DataInputStream in =
                        new DataInputStream(new ByteArrayInputStream(token.getIdentifier()));
                    id.readFields(in);
                    System.out.println("Token (" + id + ") for " + token.getService());
                  }
                } else if (cancel) {
                  for (Token<?> token : readTokens(tokenFile, conf)) {
                    if (token.isManaged()) {
                      token.cancel(conf);
                      if (LOG.isDebugEnabled()) {
                        LOG.debug("Cancelled token for " + token.getService());
                      }
                    }
                  }
                } else if (renew) {
                  for (Token<?> token : readTokens(tokenFile, conf)) {
                    if (token.isManaged()) {
                      long result = token.renew(conf);
                      if (LOG.isDebugEnabled()) {
                        LOG.debug(
                            "Renewed token for "
                                + token.getService()
                                + " until: "
                                + new Date(result));
                      }
                    }
                  }
                } else {
                  // otherwise we are fetching
                  if (webUrl != null) {
                    Credentials creds = getDTfromRemote(webUrl, renewer);
                    creds.writeTokenStorageFile(tokenFile, conf);
                    for (Token<?> token : creds.getAllTokens()) {
                      if (LOG.isDebugEnabled()) {
                        LOG.debug(
                            "Fetched token via "
                                + webUrl
                                + " for "
                                + token.getService()
                                + " into "
                                + tokenFile);
                      }
                    }
                  } else {
                    FileSystem fs = FileSystem.get(conf);
                    Token<?> token = fs.getDelegationToken(renewer);
                    Credentials cred = new Credentials();
                    cred.addToken(token.getService(), token);
                    cred.writeTokenStorageFile(tokenFile, conf);
                    if (LOG.isDebugEnabled()) {
                      LOG.debug("Fetched token for " + token.getService() + " into " + tokenFile);
                    }
                  }
                }
                return null;
              }
            });
  }
Exemplo n.º 21
0
  /**
   * Run a local map reduce job to load data from in memory records to an HCatalog Table
   *
   * @param partitionValues
   * @param partitionColumns
   * @param records data to be written to HCatalog table
   * @param writeCount
   * @param assertWrite
   * @param asSingleMapTask
   * @return
   * @throws Exception
   */
  Job runMRCreate(
      Map<String, String> partitionValues,
      List<HCatFieldSchema> partitionColumns,
      List<HCatRecord> records,
      int writeCount,
      boolean assertWrite,
      boolean asSingleMapTask,
      String customDynamicPathPattern)
      throws Exception {

    writeRecords = records;
    MapCreate.writeCount = 0;

    Configuration conf = new Configuration();
    Job job = new Job(conf, "hcat mapreduce write test");
    job.setJarByClass(this.getClass());
    job.setMapperClass(HCatMapReduceTest.MapCreate.class);

    // input/output settings
    job.setInputFormatClass(TextInputFormat.class);

    if (asSingleMapTask) {
      // One input path would mean only one map task
      Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput");
      createInputFile(path, writeCount);
      TextInputFormat.setInputPaths(job, path);
    } else {
      // Create two input paths so that two map tasks get triggered. There could be other ways
      // to trigger two map tasks.
      Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput");
      createInputFile(path, writeCount / 2);

      Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2");
      createInputFile(path2, (writeCount - writeCount / 2));

      TextInputFormat.setInputPaths(job, path, path2);
    }

    job.setOutputFormatClass(HCatOutputFormat.class);

    OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues);
    if (customDynamicPathPattern != null) {
      job.getConfiguration()
          .set(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN, customDynamicPathPattern);
    }
    HCatOutputFormat.setOutput(job, outputJobInfo);

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(DefaultHCatRecord.class);

    job.setNumReduceTasks(0);

    HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns));

    boolean success = job.waitForCompletion(true);

    // Ensure counters are set when data has actually been read.
    if (partitionValues != null) {
      assertTrue(
          job.getCounters().getGroup("FileSystemCounters").findCounter("FILE_BYTES_READ").getValue()
              > 0);
    }

    if (!HCatUtil.isHadoop23()) {
      // Local mode outputcommitter hook is not invoked in Hadoop 1.x
      if (success) {
        new FileOutputCommitterContainer(job, null).commitJob(job);
      } else {
        new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED);
      }
    }
    if (assertWrite) {
      // we assert only if we expected to assert with this call.
      Assert.assertEquals(writeCount, MapCreate.writeCount);
    }

    if (isTableExternal()) {
      externalTableLocation = outputJobInfo.getTableInfo().getTableLocation();
    }

    return job;
  }