/**
   * run a distributed job and verify that TokenCache is available
   *
   * @throws IOException
   */
  @Test
  public void testTokenCache() throws IOException {

    System.out.println("running dist job");

    // make sure JT starts
    jConf = mrCluster.createJobConf();

    // provide namenodes names for the job to get the delegation tokens for
    String nnUri = dfsCluster.getURI(0).toString();
    jConf.set(MRJobConfig.JOB_NAMENODES, nnUri + "," + nnUri);
    // job tracker principla id..
    jConf.set(JTConfig.JT_USER_NAME, "jt_id/foo@BAR");

    // using argument to pass the file name
    String[] args = {
      "-tokenCacheFile", tokenFileName.toString(), "-m", "1", "-r", "1", "-mt", "1", "-rt", "1"
    };

    int res = -1;
    try {
      res = ToolRunner.run(jConf, new MySleepJob(), args);
    } catch (Exception e) {
      System.out.println("Job failed with" + e.getLocalizedMessage());
      e.printStackTrace(System.out);
      fail("Job failed");
    }
    assertEquals("dist job res is not 0", res, 0);
  }
  @Test(timeout = 60000)
  public void testSymlinkHdfsDisable() throws Exception {
    Configuration conf = new HdfsConfiguration();
    // disable symlink resolution
    conf.setBoolean(CommonConfigurationKeys.FS_CLIENT_RESOLVE_REMOTE_SYMLINKS_KEY, false);
    // spin up minicluster, get dfs and filecontext
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
    DistributedFileSystem dfs = cluster.getFileSystem();
    FileContext fc = FileContext.getFileContext(cluster.getURI(0), conf);
    // Create test files/links
    FileContextTestHelper helper = new FileContextTestHelper("/tmp/TestSymlinkHdfsDisable");
    Path root = helper.getTestRootPath(fc);
    Path target = new Path(root, "target");
    Path link = new Path(root, "link");
    DFSTestUtil.createFile(dfs, target, 4096, (short) 1, 0xDEADDEAD);
    fc.createSymlink(target, link, false);

    // Try to resolve links with FileSystem and FileContext
    try {
      fc.open(link);
      fail("Expected error when attempting to resolve link");
    } catch (IOException e) {
      GenericTestUtils.assertExceptionContains("resolution is disabled", e);
    }
    try {
      dfs.open(link);
      fail("Expected error when attempting to resolve link");
    } catch (IOException e) {
      GenericTestUtils.assertExceptionContains("resolution is disabled", e);
    }
  }
 @BeforeClass
 public static void setUpBeforeClass() throws Exception {
   File minidfsDir = new File("target/minidfs-" + UUID.randomUUID()).getAbsoluteFile();
   minidfsDir.mkdirs();
   Assert.assertTrue(minidfsDir.exists());
   System.setProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA, minidfsDir.getPath());
   Configuration conf = new HdfsConfiguration();
   conf.set("dfs.namenode.fs-limits.min-block-size", String.valueOf(32));
   EditLogFileOutputStream.setShouldSkipFsyncForTesting(true);
   miniDFS = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
   dir = new Path(miniDFS.getURI() + "/dir");
   FileSystem fs = miniDFS.getFileSystem();
   fs.mkdirs(dir);
   writeFile(fs, new Path(dir + "/forAllTests/" + "path"), 1000);
   dummyEtc = new File(minidfsDir, "dummy-etc");
   dummyEtc.mkdirs();
   Assert.assertTrue(dummyEtc.exists());
   Configuration dummyConf = new Configuration(false);
   for (String file : new String[] {"core", "hdfs", "mapred", "yarn"}) {
     File siteXml = new File(dummyEtc, file + "-site.xml");
     FileOutputStream out = new FileOutputStream(siteXml);
     dummyConf.writeXml(out);
     out.close();
   }
   resourcesDir = minidfsDir.getAbsolutePath();
   hadoopConfDir = dummyEtc.getName();
   System.setProperty("sdc.resources.dir", resourcesDir);
   ;
 }
Example #4
0
  @Before
  public void createHDFS() {
    try {
      Configuration hdConf = new Configuration();

      File baseDir = new File("./target/hdfs/hdfsTest").getAbsoluteFile();
      FileUtil.fullyDelete(baseDir);
      hdConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.getAbsolutePath());
      MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(hdConf);
      hdfsCluster = builder.build();

      hdfsURI =
          "hdfs://" + hdfsCluster.getURI().getHost() + ":" + hdfsCluster.getNameNodePort() + "/";

      hdPath = new org.apache.hadoop.fs.Path("/test");
      hdfs = hdPath.getFileSystem(hdConf);
      FSDataOutputStream stream = hdfs.create(hdPath);
      for (int i = 0; i < 10; i++) {
        stream.write("Hello HDFS\n".getBytes());
      }
      stream.close();

    } catch (Throwable e) {
      e.printStackTrace();
      Assert.fail("Test failed " + e.getMessage());
    }
  }
 private void configure(ClusterHdfsDSource hdfsClusterSource, String dirLocation) {
   hdfsClusterSource.hdfsUri = miniDFS.getURI().toString();
   hdfsClusterSource.hdfsDirLocations = Arrays.asList(dirLocation);
   hdfsClusterSource.hdfsConfigs = new HashMap<String, String>();
   hdfsClusterSource.hdfsConfigs.put("x", "X");
   hdfsClusterSource.dataFormat = DataFormat.TEXT;
   hdfsClusterSource.textMaxLineLen = 1024;
 }
  @Test(timeout = 30000)
  public void testProduce() throws Exception {
    SourceRunner sourceRunner =
        new SourceRunner.Builder(ClusterHdfsDSource.class)
            .addOutputLane("lane")
            .setExecutionMode(ExecutionMode.CLUSTER_BATCH)
            .addConfiguration("hdfsUri", miniDFS.getURI().toString())
            .addConfiguration("hdfsDirLocations", Arrays.asList(dir.toUri().getPath()))
            .addConfiguration("recursive", false)
            .addConfiguration("hdfsConfigs", new HashMap<String, String>())
            .addConfiguration("dataFormat", DataFormat.TEXT)
            .addConfiguration("textMaxLineLen", 1024)
            .addConfiguration("produceSingleRecordPerMessage", false)
            .addConfiguration("regex", null)
            .addConfiguration("grokPatternDefinition", null)
            .addConfiguration("enableLog4jCustomLogFormat", false)
            .addConfiguration("customLogFormat", null)
            .addConfiguration("fieldPathsToGroupName", null)
            .addConfiguration("log4jCustomLogFormat", null)
            .addConfiguration("grokPattern", null)
            .addConfiguration("hdfsKerberos", false)
            .addConfiguration("hdfsConfDir", hadoopConfDir)
            .setResourcesDir(resourcesDir)
            .build();
    sourceRunner.runInit();

    List<Map.Entry> list = new ArrayList<>();
    list.add(new Pair(new LongWritable(1), new Text("aaa")));
    list.add(new Pair(new LongWritable(2), new Text("bbb")));
    list.add(new Pair(new LongWritable(3), new Text("ccc")));

    Thread th = createThreadForAddingBatch(sourceRunner, list);
    try {
      StageRunner.Output output = sourceRunner.runProduce(null, 5);

      String newOffset = output.getNewOffset();
      Assert.assertEquals("3", newOffset);
      List<Record> records = output.getRecords().get("lane");
      Assert.assertEquals(3, records.size());

      for (int i = 0; i < records.size(); i++) {
        Assert.assertNotNull(records.get(i).get("/text"));
        LOG.info("Header " + records.get(i).getHeader().getSourceId());
        Assert.assertTrue(!records.get(i).get("/text").getValueAsString().isEmpty());
        Assert.assertEquals(
            list.get(i).getValue().toString(), records.get(i).get("/text").getValueAsString());
      }

      if (sourceRunner != null) {
        sourceRunner.runDestroy();
      }
    } finally {
      th.interrupt();
    }
  }
  @BeforeClass
  public static void createHDFS() throws IOException {
    Configuration conf = new Configuration();

    File dataDir = tempFolder.newFolder();

    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, dataDir.getAbsolutePath());
    MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf);
    hdfsCluster = builder.build();

    dfs = hdfsCluster.getFileSystem();

    hdfsURI =
        "hdfs://"
            + NetUtils.hostAndPortToUrlString(
                hdfsCluster.getURI().getHost(), hdfsCluster.getNameNodePort())
            + "/";
  }
  @Test(timeout = 30000)
  public void testProduceAvroData() throws Exception {
    SourceRunner sourceRunner =
        new SourceRunner.Builder(ClusterHdfsDSource.class)
            .addOutputLane("lane")
            .setExecutionMode(ExecutionMode.CLUSTER_BATCH)
            .addConfiguration("hdfsUri", miniDFS.getURI().toString())
            .addConfiguration("hdfsDirLocations", Arrays.asList(dir.toUri().getPath()))
            .addConfiguration("recursive", false)
            .addConfiguration("hdfsConfigs", new HashMap<String, String>())
            .addConfiguration("dataFormat", DataFormat.AVRO)
            .addConfiguration("csvFileFormat", CsvMode.CSV)
            .addConfiguration("csvHeader", CsvHeader.WITH_HEADER)
            .addConfiguration("csvMaxObjectLen", 4096)
            .addConfiguration("textMaxLineLen", 1024)
            .addConfiguration("produceSingleRecordPerMessage", false)
            .addConfiguration("regex", null)
            .addConfiguration("grokPatternDefinition", null)
            .addConfiguration("enableLog4jCustomLogFormat", false)
            .addConfiguration("customLogFormat", null)
            .addConfiguration("fieldPathsToGroupName", null)
            .addConfiguration("log4jCustomLogFormat", null)
            .addConfiguration("grokPattern", null)
            .addConfiguration("hdfsKerberos", false)
            .addConfiguration("hdfsConfDir", hadoopConfDir)
            .setResourcesDir(resourcesDir)
            .build();
    sourceRunner.runInit();

    List<Map.Entry> list = new ArrayList<>();
    list.add(
        new Pair(
            "path::" + "1" + "::1",
            createAvroData("a", 30, ImmutableList.of("*****@*****.**", "*****@*****.**"))));
    list.add(
        new Pair(
            "path::" + "1" + "::2",
            createAvroData("b", 40, ImmutableList.of("*****@*****.**", "*****@*****.**"))));

    Thread th = createThreadForAddingBatch(sourceRunner, list);
    try {
      StageRunner.Output output = sourceRunner.runProduce(null, 5);
      String newOffset = output.getNewOffset();
      Assert.assertEquals("path::" + "1::2", newOffset);
      List<Record> records = output.getRecords().get("lane");
      Assert.assertEquals(2, records.size());

      Record record = records.get(0);
      Assert.assertTrue(record.has("/name"));
      Assert.assertEquals("a", record.get("/name").getValueAsString());
      Assert.assertTrue(record.has("/age"));
      Assert.assertEquals(30, record.get("/age").getValueAsInteger());
      Assert.assertTrue(record.has("/emails"));
      Assert.assertTrue(record.get("/emails").getValueAsList() instanceof List);
      List<Field> emails = record.get("/emails").getValueAsList();
      Assert.assertEquals(2, emails.size());
      Assert.assertEquals("*****@*****.**", emails.get(0).getValueAsString());
      Assert.assertEquals("*****@*****.**", emails.get(1).getValueAsString());

      record = records.get(1);
      Assert.assertTrue(record.has("/name"));
      Assert.assertEquals("b", record.get("/name").getValueAsString());
      Assert.assertTrue(record.has("/age"));
      Assert.assertEquals(40, record.get("/age").getValueAsInteger());
      Assert.assertTrue(record.has("/emails"));
      Assert.assertTrue(record.get("/emails").getValueAsList() instanceof List);
      emails = record.get("/emails").getValueAsList();
      Assert.assertEquals(2, emails.size());
      Assert.assertEquals("*****@*****.**", emails.get(0).getValueAsString());
      Assert.assertEquals("*****@*****.**", emails.get(1).getValueAsString());

    } finally {
      th.interrupt();
    }
  }
  @Test(timeout = 30000)
  public void testProduceDelimitedWithHeader() throws Exception {
    SourceRunner sourceRunner =
        new SourceRunner.Builder(ClusterHdfsDSource.class)
            .addOutputLane("lane")
            .setExecutionMode(ExecutionMode.CLUSTER_BATCH)
            .addConfiguration("hdfsUri", miniDFS.getURI().toString())
            .addConfiguration("hdfsDirLocations", Arrays.asList(dir.toUri().getPath()))
            .addConfiguration("recursive", false)
            .addConfiguration("hdfsConfigs", new HashMap<String, String>())
            .addConfiguration("dataFormat", DataFormat.DELIMITED)
            .addConfiguration("csvFileFormat", CsvMode.CSV)
            .addConfiguration("csvHeader", CsvHeader.WITH_HEADER)
            .addConfiguration("csvMaxObjectLen", 4096)
            .addConfiguration("csvRecordType", CsvRecordType.LIST)
            .addConfiguration("textMaxLineLen", 1024)
            .addConfiguration("produceSingleRecordPerMessage", false)
            .addConfiguration("regex", null)
            .addConfiguration("grokPatternDefinition", null)
            .addConfiguration("enableLog4jCustomLogFormat", false)
            .addConfiguration("customLogFormat", null)
            .addConfiguration("fieldPathsToGroupName", null)
            .addConfiguration("log4jCustomLogFormat", null)
            .addConfiguration("grokPattern", null)
            .addConfiguration("hdfsKerberos", false)
            .addConfiguration("hdfsConfDir", hadoopConfDir)
            .setResourcesDir(resourcesDir)
            .build();
    sourceRunner.runInit();

    List<Map.Entry> list = new ArrayList<>();
    list.add(new Pair("HEADER_COL_1,HEADER_COL_2", null));
    list.add(new Pair("path::" + "1", new String("a,b\nC,D\nc,d")));

    Thread th = createThreadForAddingBatch(sourceRunner, list);
    try {
      StageRunner.Output output = sourceRunner.runProduce(null, 5);

      String newOffset = output.getNewOffset();
      Assert.assertEquals("path::" + "1", newOffset);
      List<Record> records = output.getRecords().get("lane");
      Assert.assertEquals(3, records.size());
      Record record = records.get(0);
      Assert.assertEquals(
          "a",
          record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
      Assert.assertEquals(
          "HEADER_COL_1",
          record.get().getValueAsList().get(0).getValueAsMap().get("header").getValueAsString());
      Assert.assertEquals(
          "b",
          record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
      Assert.assertEquals(
          "HEADER_COL_2",
          record.get().getValueAsList().get(1).getValueAsMap().get("header").getValueAsString());
      record = records.get(1);
      Assert.assertEquals(
          "C",
          record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
      Assert.assertEquals(
          "HEADER_COL_1",
          record.get().getValueAsList().get(0).getValueAsMap().get("header").getValueAsString());
      Assert.assertEquals(
          "D",
          record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
      Assert.assertEquals(
          "HEADER_COL_2",
          record.get().getValueAsList().get(1).getValueAsMap().get("header").getValueAsString());
      record = records.get(2);
      Assert.assertEquals(
          "c",
          record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
      Assert.assertEquals(
          "HEADER_COL_1",
          record.get().getValueAsList().get(0).getValueAsMap().get("header").getValueAsString());
      Assert.assertEquals(
          "d",
          record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
      Assert.assertEquals(
          "HEADER_COL_2",
          record.get().getValueAsList().get(1).getValueAsMap().get("header").getValueAsString());
      if (sourceRunner != null) {
        sourceRunner.runDestroy();
      }
    } finally {
      th.interrupt();
    }
  }
  @Test
  public void testWrongHDFSDirLocation() throws Exception {
    ClusterHdfsDSource dSource = new ForTestClusterHdfsDSource();
    configure(dSource, dir.toUri().getPath());
    dSource.hdfsUri = "/pathwithnoschemeorauthority";
    ClusterHdfsSource clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
    try {
      List<ConfigIssue> issues =
          clusterHdfsSource.init(
              null,
              ContextInfoCreator.createSourceContext(
                  "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane")));
      assertEquals(String.valueOf(issues), 1, issues.size());
      assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_02"));

      dSource.hdfsUri = "file://localhost:8020/";
      clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
      issues =
          clusterHdfsSource.init(
              null,
              ContextInfoCreator.createSourceContext(
                  "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane")));
      assertEquals(String.valueOf(issues), 1, issues.size());
      assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_12"));

      dSource.hdfsUri = "hdfs:///noauthority";
      clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
      issues =
          clusterHdfsSource.init(
              null,
              ContextInfoCreator.createSourceContext(
                  "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane")));
      assertEquals(String.valueOf(issues), 1, issues.size());
      assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_13"));

      dSource.hdfsUri = "hdfs://localhost:8020";
      clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
      issues =
          clusterHdfsSource.init(
              null,
              ContextInfoCreator.createSourceContext(
                  "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane")));
      assertEquals(String.valueOf(issues), 1, issues.size());
      assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_11"));

      dSource.hdfsUri = miniDFS.getURI().toString();
      dSource.hdfsDirLocations = Arrays.asList("/pathdoesnotexist");
      clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
      issues =
          clusterHdfsSource.init(
              null,
              ContextInfoCreator.createSourceContext(
                  "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane")));
      assertEquals(String.valueOf(issues), 1, issues.size());
      assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_10"));

      dSource.hdfsUri = miniDFS.getURI().toString();
      dSource.hdfsDirLocations = Arrays.asList(dir.toUri().getPath());
      FileSystem fs = miniDFS.getFileSystem();
      Path someFile = new Path(new Path(dir.toUri()), "/someFile");
      fs.create(someFile).close();
      clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
      issues =
          clusterHdfsSource.init(
              null,
              ContextInfoCreator.createSourceContext(
                  "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane")));
      assertEquals(String.valueOf(issues), 0, issues.size());

      dSource.hdfsUri = null;
      dSource.hdfsConfigs.put(
          CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, miniDFS.getURI().toString());
      someFile = new Path(new Path(dir.toUri()), "/someFile2");
      fs.create(someFile).close();
      clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
      issues =
          clusterHdfsSource.init(
              null,
              ContextInfoCreator.createSourceContext(
                  "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane")));
      assertEquals(String.valueOf(issues), 0, issues.size());

      Path dummyFile = new Path(new Path(dir.toUri()), "/dummyFile");
      fs.create(dummyFile).close();
      dSource.hdfsUri = miniDFS.getURI().toString();
      dSource.hdfsDirLocations = Arrays.asList(dummyFile.toUri().getPath());
      clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
      issues =
          clusterHdfsSource.init(
              null,
              ContextInfoCreator.createSourceContext(
                  "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane")));
      assertEquals(String.valueOf(issues), 1, issues.size());
      assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_15"));

      Path emptyDir = new Path(dir.toUri().getPath(), "emptyDir");
      fs.mkdirs(emptyDir);
      dSource.hdfsUri = miniDFS.getURI().toString();
      dSource.hdfsDirLocations = Arrays.asList(emptyDir.toUri().getPath());
      clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
      issues =
          clusterHdfsSource.init(
              null,
              ContextInfoCreator.createSourceContext(
                  "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane")));
      assertEquals(String.valueOf(issues), 1, issues.size());
      assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_16"));

      Path path1 = new Path(emptyDir, "path1");
      fs.create(path1).close();
      dSource.hdfsUri = miniDFS.getURI().toString();
      dSource.hdfsDirLocations = Arrays.asList(emptyDir.toUri().getPath());
      clusterHdfsSource = (ClusterHdfsSource) dSource.createSource();
      issues =
          clusterHdfsSource.init(
              null,
              ContextInfoCreator.createSourceContext(
                  "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane")));
      assertEquals(String.valueOf(issues), 0, issues.size());
    } finally {
      clusterHdfsSource.destroy();
    }
  }