/** * run a distributed job and verify that TokenCache is available * * @throws IOException */ @Test public void testTokenCache() throws IOException { System.out.println("running dist job"); // make sure JT starts jConf = mrCluster.createJobConf(); // provide namenodes names for the job to get the delegation tokens for String nnUri = dfsCluster.getURI(0).toString(); jConf.set(MRJobConfig.JOB_NAMENODES, nnUri + "," + nnUri); // job tracker principla id.. jConf.set(JTConfig.JT_USER_NAME, "jt_id/foo@BAR"); // using argument to pass the file name String[] args = { "-tokenCacheFile", tokenFileName.toString(), "-m", "1", "-r", "1", "-mt", "1", "-rt", "1" }; int res = -1; try { res = ToolRunner.run(jConf, new MySleepJob(), args); } catch (Exception e) { System.out.println("Job failed with" + e.getLocalizedMessage()); e.printStackTrace(System.out); fail("Job failed"); } assertEquals("dist job res is not 0", res, 0); }
@Test(timeout = 60000) public void testSymlinkHdfsDisable() throws Exception { Configuration conf = new HdfsConfiguration(); // disable symlink resolution conf.setBoolean(CommonConfigurationKeys.FS_CLIENT_RESOLVE_REMOTE_SYMLINKS_KEY, false); // spin up minicluster, get dfs and filecontext MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); DistributedFileSystem dfs = cluster.getFileSystem(); FileContext fc = FileContext.getFileContext(cluster.getURI(0), conf); // Create test files/links FileContextTestHelper helper = new FileContextTestHelper("/tmp/TestSymlinkHdfsDisable"); Path root = helper.getTestRootPath(fc); Path target = new Path(root, "target"); Path link = new Path(root, "link"); DFSTestUtil.createFile(dfs, target, 4096, (short) 1, 0xDEADDEAD); fc.createSymlink(target, link, false); // Try to resolve links with FileSystem and FileContext try { fc.open(link); fail("Expected error when attempting to resolve link"); } catch (IOException e) { GenericTestUtils.assertExceptionContains("resolution is disabled", e); } try { dfs.open(link); fail("Expected error when attempting to resolve link"); } catch (IOException e) { GenericTestUtils.assertExceptionContains("resolution is disabled", e); } }
@BeforeClass public static void setUpBeforeClass() throws Exception { File minidfsDir = new File("target/minidfs-" + UUID.randomUUID()).getAbsoluteFile(); minidfsDir.mkdirs(); Assert.assertTrue(minidfsDir.exists()); System.setProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA, minidfsDir.getPath()); Configuration conf = new HdfsConfiguration(); conf.set("dfs.namenode.fs-limits.min-block-size", String.valueOf(32)); EditLogFileOutputStream.setShouldSkipFsyncForTesting(true); miniDFS = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); dir = new Path(miniDFS.getURI() + "/dir"); FileSystem fs = miniDFS.getFileSystem(); fs.mkdirs(dir); writeFile(fs, new Path(dir + "/forAllTests/" + "path"), 1000); dummyEtc = new File(minidfsDir, "dummy-etc"); dummyEtc.mkdirs(); Assert.assertTrue(dummyEtc.exists()); Configuration dummyConf = new Configuration(false); for (String file : new String[] {"core", "hdfs", "mapred", "yarn"}) { File siteXml = new File(dummyEtc, file + "-site.xml"); FileOutputStream out = new FileOutputStream(siteXml); dummyConf.writeXml(out); out.close(); } resourcesDir = minidfsDir.getAbsolutePath(); hadoopConfDir = dummyEtc.getName(); System.setProperty("sdc.resources.dir", resourcesDir); ; }
@Before public void createHDFS() { try { Configuration hdConf = new Configuration(); File baseDir = new File("./target/hdfs/hdfsTest").getAbsoluteFile(); FileUtil.fullyDelete(baseDir); hdConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.getAbsolutePath()); MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(hdConf); hdfsCluster = builder.build(); hdfsURI = "hdfs://" + hdfsCluster.getURI().getHost() + ":" + hdfsCluster.getNameNodePort() + "/"; hdPath = new org.apache.hadoop.fs.Path("/test"); hdfs = hdPath.getFileSystem(hdConf); FSDataOutputStream stream = hdfs.create(hdPath); for (int i = 0; i < 10; i++) { stream.write("Hello HDFS\n".getBytes()); } stream.close(); } catch (Throwable e) { e.printStackTrace(); Assert.fail("Test failed " + e.getMessage()); } }
private void configure(ClusterHdfsDSource hdfsClusterSource, String dirLocation) { hdfsClusterSource.hdfsUri = miniDFS.getURI().toString(); hdfsClusterSource.hdfsDirLocations = Arrays.asList(dirLocation); hdfsClusterSource.hdfsConfigs = new HashMap<String, String>(); hdfsClusterSource.hdfsConfigs.put("x", "X"); hdfsClusterSource.dataFormat = DataFormat.TEXT; hdfsClusterSource.textMaxLineLen = 1024; }
@Test(timeout = 30000) public void testProduce() throws Exception { SourceRunner sourceRunner = new SourceRunner.Builder(ClusterHdfsDSource.class) .addOutputLane("lane") .setExecutionMode(ExecutionMode.CLUSTER_BATCH) .addConfiguration("hdfsUri", miniDFS.getURI().toString()) .addConfiguration("hdfsDirLocations", Arrays.asList(dir.toUri().getPath())) .addConfiguration("recursive", false) .addConfiguration("hdfsConfigs", new HashMap<String, String>()) .addConfiguration("dataFormat", DataFormat.TEXT) .addConfiguration("textMaxLineLen", 1024) .addConfiguration("produceSingleRecordPerMessage", false) .addConfiguration("regex", null) .addConfiguration("grokPatternDefinition", null) .addConfiguration("enableLog4jCustomLogFormat", false) .addConfiguration("customLogFormat", null) .addConfiguration("fieldPathsToGroupName", null) .addConfiguration("log4jCustomLogFormat", null) .addConfiguration("grokPattern", null) .addConfiguration("hdfsKerberos", false) .addConfiguration("hdfsConfDir", hadoopConfDir) .setResourcesDir(resourcesDir) .build(); sourceRunner.runInit(); List<Map.Entry> list = new ArrayList<>(); list.add(new Pair(new LongWritable(1), new Text("aaa"))); list.add(new Pair(new LongWritable(2), new Text("bbb"))); list.add(new Pair(new LongWritable(3), new Text("ccc"))); Thread th = createThreadForAddingBatch(sourceRunner, list); try { StageRunner.Output output = sourceRunner.runProduce(null, 5); String newOffset = output.getNewOffset(); Assert.assertEquals("3", newOffset); List<Record> records = output.getRecords().get("lane"); Assert.assertEquals(3, records.size()); for (int i = 0; i < records.size(); i++) { Assert.assertNotNull(records.get(i).get("/text")); LOG.info("Header " + records.get(i).getHeader().getSourceId()); Assert.assertTrue(!records.get(i).get("/text").getValueAsString().isEmpty()); Assert.assertEquals( list.get(i).getValue().toString(), records.get(i).get("/text").getValueAsString()); } if (sourceRunner != null) { sourceRunner.runDestroy(); } } finally { th.interrupt(); } }
@BeforeClass public static void createHDFS() throws IOException { Configuration conf = new Configuration(); File dataDir = tempFolder.newFolder(); conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, dataDir.getAbsolutePath()); MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf); hdfsCluster = builder.build(); dfs = hdfsCluster.getFileSystem(); hdfsURI = "hdfs://" + NetUtils.hostAndPortToUrlString( hdfsCluster.getURI().getHost(), hdfsCluster.getNameNodePort()) + "/"; }
@Test(timeout = 30000) public void testProduceAvroData() throws Exception { SourceRunner sourceRunner = new SourceRunner.Builder(ClusterHdfsDSource.class) .addOutputLane("lane") .setExecutionMode(ExecutionMode.CLUSTER_BATCH) .addConfiguration("hdfsUri", miniDFS.getURI().toString()) .addConfiguration("hdfsDirLocations", Arrays.asList(dir.toUri().getPath())) .addConfiguration("recursive", false) .addConfiguration("hdfsConfigs", new HashMap<String, String>()) .addConfiguration("dataFormat", DataFormat.AVRO) .addConfiguration("csvFileFormat", CsvMode.CSV) .addConfiguration("csvHeader", CsvHeader.WITH_HEADER) .addConfiguration("csvMaxObjectLen", 4096) .addConfiguration("textMaxLineLen", 1024) .addConfiguration("produceSingleRecordPerMessage", false) .addConfiguration("regex", null) .addConfiguration("grokPatternDefinition", null) .addConfiguration("enableLog4jCustomLogFormat", false) .addConfiguration("customLogFormat", null) .addConfiguration("fieldPathsToGroupName", null) .addConfiguration("log4jCustomLogFormat", null) .addConfiguration("grokPattern", null) .addConfiguration("hdfsKerberos", false) .addConfiguration("hdfsConfDir", hadoopConfDir) .setResourcesDir(resourcesDir) .build(); sourceRunner.runInit(); List<Map.Entry> list = new ArrayList<>(); list.add( new Pair( "path::" + "1" + "::1", createAvroData("a", 30, ImmutableList.of("*****@*****.**", "*****@*****.**")))); list.add( new Pair( "path::" + "1" + "::2", createAvroData("b", 40, ImmutableList.of("*****@*****.**", "*****@*****.**")))); Thread th = createThreadForAddingBatch(sourceRunner, list); try { StageRunner.Output output = sourceRunner.runProduce(null, 5); String newOffset = output.getNewOffset(); Assert.assertEquals("path::" + "1::2", newOffset); List<Record> records = output.getRecords().get("lane"); Assert.assertEquals(2, records.size()); Record record = records.get(0); Assert.assertTrue(record.has("/name")); Assert.assertEquals("a", record.get("/name").getValueAsString()); Assert.assertTrue(record.has("/age")); Assert.assertEquals(30, record.get("/age").getValueAsInteger()); Assert.assertTrue(record.has("/emails")); Assert.assertTrue(record.get("/emails").getValueAsList() instanceof List); List<Field> emails = record.get("/emails").getValueAsList(); Assert.assertEquals(2, emails.size()); Assert.assertEquals("*****@*****.**", emails.get(0).getValueAsString()); Assert.assertEquals("*****@*****.**", emails.get(1).getValueAsString()); record = records.get(1); Assert.assertTrue(record.has("/name")); Assert.assertEquals("b", record.get("/name").getValueAsString()); Assert.assertTrue(record.has("/age")); Assert.assertEquals(40, record.get("/age").getValueAsInteger()); Assert.assertTrue(record.has("/emails")); Assert.assertTrue(record.get("/emails").getValueAsList() instanceof List); emails = record.get("/emails").getValueAsList(); Assert.assertEquals(2, emails.size()); Assert.assertEquals("*****@*****.**", emails.get(0).getValueAsString()); Assert.assertEquals("*****@*****.**", emails.get(1).getValueAsString()); } finally { th.interrupt(); } }
@Test(timeout = 30000) public void testProduceDelimitedWithHeader() throws Exception { SourceRunner sourceRunner = new SourceRunner.Builder(ClusterHdfsDSource.class) .addOutputLane("lane") .setExecutionMode(ExecutionMode.CLUSTER_BATCH) .addConfiguration("hdfsUri", miniDFS.getURI().toString()) .addConfiguration("hdfsDirLocations", Arrays.asList(dir.toUri().getPath())) .addConfiguration("recursive", false) .addConfiguration("hdfsConfigs", new HashMap<String, String>()) .addConfiguration("dataFormat", DataFormat.DELIMITED) .addConfiguration("csvFileFormat", CsvMode.CSV) .addConfiguration("csvHeader", CsvHeader.WITH_HEADER) .addConfiguration("csvMaxObjectLen", 4096) .addConfiguration("csvRecordType", CsvRecordType.LIST) .addConfiguration("textMaxLineLen", 1024) .addConfiguration("produceSingleRecordPerMessage", false) .addConfiguration("regex", null) .addConfiguration("grokPatternDefinition", null) .addConfiguration("enableLog4jCustomLogFormat", false) .addConfiguration("customLogFormat", null) .addConfiguration("fieldPathsToGroupName", null) .addConfiguration("log4jCustomLogFormat", null) .addConfiguration("grokPattern", null) .addConfiguration("hdfsKerberos", false) .addConfiguration("hdfsConfDir", hadoopConfDir) .setResourcesDir(resourcesDir) .build(); sourceRunner.runInit(); List<Map.Entry> list = new ArrayList<>(); list.add(new Pair("HEADER_COL_1,HEADER_COL_2", null)); list.add(new Pair("path::" + "1", new String("a,b\nC,D\nc,d"))); Thread th = createThreadForAddingBatch(sourceRunner, list); try { StageRunner.Output output = sourceRunner.runProduce(null, 5); String newOffset = output.getNewOffset(); Assert.assertEquals("path::" + "1", newOffset); List<Record> records = output.getRecords().get("lane"); Assert.assertEquals(3, records.size()); Record record = records.get(0); Assert.assertEquals( "a", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString()); Assert.assertEquals( "HEADER_COL_1", record.get().getValueAsList().get(0).getValueAsMap().get("header").getValueAsString()); Assert.assertEquals( "b", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString()); Assert.assertEquals( "HEADER_COL_2", record.get().getValueAsList().get(1).getValueAsMap().get("header").getValueAsString()); record = records.get(1); Assert.assertEquals( "C", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString()); Assert.assertEquals( "HEADER_COL_1", record.get().getValueAsList().get(0).getValueAsMap().get("header").getValueAsString()); Assert.assertEquals( "D", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString()); Assert.assertEquals( "HEADER_COL_2", record.get().getValueAsList().get(1).getValueAsMap().get("header").getValueAsString()); record = records.get(2); Assert.assertEquals( "c", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString()); Assert.assertEquals( "HEADER_COL_1", record.get().getValueAsList().get(0).getValueAsMap().get("header").getValueAsString()); Assert.assertEquals( "d", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString()); Assert.assertEquals( "HEADER_COL_2", record.get().getValueAsList().get(1).getValueAsMap().get("header").getValueAsString()); if (sourceRunner != null) { sourceRunner.runDestroy(); } } finally { th.interrupt(); } }
@Test public void testWrongHDFSDirLocation() throws Exception { ClusterHdfsDSource dSource = new ForTestClusterHdfsDSource(); configure(dSource, dir.toUri().getPath()); dSource.hdfsUri = "/pathwithnoschemeorauthority"; ClusterHdfsSource clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); try { List<ConfigIssue> issues = clusterHdfsSource.init( null, ContextInfoCreator.createSourceContext( "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_02")); dSource.hdfsUri = "file://localhost:8020/"; clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init( null, ContextInfoCreator.createSourceContext( "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_12")); dSource.hdfsUri = "hdfs:///noauthority"; clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init( null, ContextInfoCreator.createSourceContext( "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_13")); dSource.hdfsUri = "hdfs://localhost:8020"; clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init( null, ContextInfoCreator.createSourceContext( "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_11")); dSource.hdfsUri = miniDFS.getURI().toString(); dSource.hdfsDirLocations = Arrays.asList("/pathdoesnotexist"); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init( null, ContextInfoCreator.createSourceContext( "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_10")); dSource.hdfsUri = miniDFS.getURI().toString(); dSource.hdfsDirLocations = Arrays.asList(dir.toUri().getPath()); FileSystem fs = miniDFS.getFileSystem(); Path someFile = new Path(new Path(dir.toUri()), "/someFile"); fs.create(someFile).close(); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init( null, ContextInfoCreator.createSourceContext( "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 0, issues.size()); dSource.hdfsUri = null; dSource.hdfsConfigs.put( CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, miniDFS.getURI().toString()); someFile = new Path(new Path(dir.toUri()), "/someFile2"); fs.create(someFile).close(); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init( null, ContextInfoCreator.createSourceContext( "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 0, issues.size()); Path dummyFile = new Path(new Path(dir.toUri()), "/dummyFile"); fs.create(dummyFile).close(); dSource.hdfsUri = miniDFS.getURI().toString(); dSource.hdfsDirLocations = Arrays.asList(dummyFile.toUri().getPath()); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init( null, ContextInfoCreator.createSourceContext( "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_15")); Path emptyDir = new Path(dir.toUri().getPath(), "emptyDir"); fs.mkdirs(emptyDir); dSource.hdfsUri = miniDFS.getURI().toString(); dSource.hdfsDirLocations = Arrays.asList(emptyDir.toUri().getPath()); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init( null, ContextInfoCreator.createSourceContext( "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_16")); Path path1 = new Path(emptyDir, "path1"); fs.create(path1).close(); dSource.hdfsUri = miniDFS.getURI().toString(); dSource.hdfsDirLocations = Arrays.asList(emptyDir.toUri().getPath()); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init( null, ContextInfoCreator.createSourceContext( "myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 0, issues.size()); } finally { clusterHdfsSource.destroy(); } }