/** * Run a local map reduce job to read records from HCatalog table and verify if the count is as * expected * * @param readCount * @param filter * @return * @throws Exception */ List<HCatRecord> runMRRead(int readCount, String filter) throws Exception { MapRead.readCount = 0; readRecords.clear(); Configuration conf = new Configuration(); conf.set(HiveConf.ConfVars.METASTORE_INTEGER_JDO_PUSHDOWN.varname, "true"); Job job = new Job(conf, "hcat mapreduce read test"); job.setJarByClass(this.getClass()); job.setMapperClass(HCatMapReduceTest.MapRead.class); // input/output settings job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); HCatInputFormat.setInput(job, dbName, tableName, filter); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceOutput"); if (fs.exists(path)) { fs.delete(path, true); } TextOutputFormat.setOutputPath(job, path); job.waitForCompletion(true); Assert.assertEquals(readCount, MapRead.readCount); return readRecords; }
@Test public void testVanishingTaskZNode() throws Exception { LOG.info("testVanishingTaskZNode"); conf.setInt("hbase.splitlog.manager.unassigned.timeout", 0); slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null); slm.finishInitialization(); FileSystem fs = TEST_UTIL.getTestFileSystem(); final Path logDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString()); fs.mkdirs(logDir); Path logFile = new Path(logDir, UUID.randomUUID().toString()); fs.createNewFile(logFile); new Thread() { public void run() { try { // this call will block because there are no SplitLogWorkers slm.splitLogDistributed(logDir); } catch (Exception e) { LOG.warn("splitLogDistributed failed", e); fail(); } } }.start(); waitForCounter(tot_mgr_node_create_result, 0, 1, 10000); String znode = ZKSplitLog.getEncodedNodeName(zkw, logFile.toString()); // remove the task znode ZKUtil.deleteNode(zkw, znode); waitForCounter(tot_mgr_get_data_nonode, 0, 1, 30000); waitForCounter(tot_mgr_log_split_batch_success, 0, 1, 1000); assertTrue(fs.exists(logFile)); fs.delete(logDir, true); }
/* * (non-Javadoc) * * @see org.anon.smart.d2cache.store.StoreTransaction#commit() */ @Override public void commit() throws CtxException { FileSystem hdfs = ((HadoopFileStoreConnection) _connection).getHadoopFS(); assertion().assertNotNull(hdfs, "Hadoop FileSystem is null"); String repo = hdfs.getWorkingDirectory().toUri().toString(); for (Object fi : files.keySet()) { try { String[] params = (String[]) fi; Path destination = new Path(repo + "/" + params[1]); Path fldr = destination.getParent(); if (!hdfs.exists(fldr)) hdfs.mkdirs(fldr); hdfs.copyFromLocalFile(true, new Path(params[0]), destination); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
@Test public void testTableWithCFNameStartWithUnderScore() throws Exception { Path dir = util.getDataTestDirOnTestFS("cfNameStartWithUnderScore"); FileSystem fs = util.getTestFileSystem(); dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory()); String family = "_cf"; Path familyDir = new Path(dir, family); byte[] from = Bytes.toBytes("begin"); byte[] to = Bytes.toBytes("end"); Configuration conf = util.getConfiguration(); String tableName = "mytable_cfNameStartWithUnderScore"; Table table = util.createTable(TableName.valueOf(tableName), family); HFileTestUtil.createHFile( conf, fs, new Path(familyDir, "hfile"), Bytes.toBytes(family), QUALIFIER, from, to, 1000); LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf); String[] args = {dir.toString(), tableName}; try { loader.run(args); assertEquals(1000, util.countRows(table)); } finally { if (null != table) { table.close(); } } }
/** * print the current file system location * * @return !null string */ @Override public String pwd() { if (isRunningAsUser()) { return super.pwd(); } final FileSystem fs = getDFS(); return absolutePath(fs.getWorkingDirectory()); }
/** Test compressible {@link GridmixRecord}. */ @Test public void testCompressibleGridmixRecord() throws IOException { JobConf conf = new JobConf(); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); FileSystem lfs = FileSystem.getLocal(conf); int dataSize = 1024 * 1024 * 10; // 10 MB float ratio = 0.357F; // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestPossiblyCompressibleGridmixRecord"); lfs.delete(tempDir, true); // define a compressible GridmixRecord GridmixRecord record = new GridmixRecord(dataSize, 0); record.setCompressibility(true, ratio); // enable compression conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class); org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true); // write the record to a file Path recordFile = new Path(tempDir, "record"); OutputStream outStream = CompressionEmulationUtil.getPossiblyCompressedOutputStream(recordFile, conf); DataOutputStream out = new DataOutputStream(outStream); record.write(out); out.close(); outStream.close(); // open the compressed stream for reading Path actualRecordFile = recordFile.suffix(".gz"); InputStream in = CompressionEmulationUtil.getPossiblyDecompressedInputStream(actualRecordFile, conf, 0); // get the compressed file size long compressedFileSize = lfs.listStatus(actualRecordFile)[0].getLen(); GridmixRecord recordRead = new GridmixRecord(); recordRead.readFields(new DataInputStream(in)); assertEquals( "Record size mismatch in a compressible GridmixRecord", dataSize, recordRead.getSize()); assertTrue( "Failed to generate a compressible GridmixRecord", recordRead.getSize() > compressedFileSize); // check if the record can generate data with the desired compression ratio float seenRatio = ((float) compressedFileSize) / dataSize; assertEquals( CompressionEmulationUtil.standardizeCompressionRatio(ratio), CompressionEmulationUtil.standardizeCompressionRatio(seenRatio), 1.0D); }
public void execute(Environment env, CommandLine cmd, ConsoleReader reader) { FileSystem hdfs = (FileSystem) env.getValue(Constants.HDFS); String wd = hdfs.getWorkingDirectory().toString(); if (cmd.hasOption("l")) { log(cmd, wd); } else { log(cmd, wd.substring(env.getProperty(Constants.HDFS_URL).length())); } FSUtil.prompt(env); }
//// jobSubmitDir is /tmp staging dir. copy lib 、files and so on into jobSubmitDir. private void copyAndConfigureFiles(Job job, Path jobSubmitDir) throws IOException { Configuration conf = job.getConfiguration(); short replication = (short) conf.getInt(Job.SUBMIT_REPLICATION, 10); copyAndConfigureFiles(job, jobSubmitDir, replication); // Set the working directory if (job.getWorkingDirectory() == null) { job.setWorkingDirectory(jtFs.getWorkingDirectory()); } }
/** Test {@link RandomTextDataMapper} via {@link CompressionEmulationUtil}. */ @Test public void testRandomCompressedTextDataGenerator() throws Exception { int wordSize = 10; int listSize = 20; long dataSize = 10 * 1024 * 1024; Configuration conf = new Configuration(); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); // configure the RandomTextDataGenerator to generate desired sized data conf.setInt(RandomTextDataGenerator.GRIDMIX_DATAGEN_RANDOMTEXT_LISTSIZE, listSize); conf.setInt(RandomTextDataGenerator.GRIDMIX_DATAGEN_RANDOMTEXT_WORDSIZE, wordSize); conf.setLong(GenerateData.GRIDMIX_GEN_BYTES, dataSize); FileSystem lfs = FileSystem.getLocal(conf); // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestRandomCompressedTextDataGenr"); lfs.delete(tempDir, true); runDataGenJob(conf, tempDir); // validate the output data FileStatus[] files = lfs.listStatus(tempDir, new Utils.OutputFileUtils.OutputFilesFilter()); long size = 0; long maxLineSize = 0; for (FileStatus status : files) { InputStream in = CompressionEmulationUtil.getPossiblyDecompressedInputStream(status.getPath(), conf, 0); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String line = reader.readLine(); if (line != null) { long lineSize = line.getBytes().length; if (lineSize > maxLineSize) { maxLineSize = lineSize; } while (line != null) { for (String word : line.split("\\s")) { size += word.getBytes().length; } line = reader.readLine(); } } reader.close(); } assertTrue(size >= dataSize); assertTrue(size <= dataSize + maxLineSize); }
@Test public void testEmptyLogDir() throws Exception { LOG.info("testEmptyLogDir"); slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null); slm.finishInitialization(); FileSystem fs = TEST_UTIL.getTestFileSystem(); Path emptyLogDirPath = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString()); fs.mkdirs(emptyLogDirPath); slm.splitLogDistributed(emptyLogDirPath); assertFalse(fs.exists(emptyLogDirPath)); }
/** * Test if the {@link JobConfigurationParser} can correctly extract out key-value pairs from the * job configuration. */ @Test public void testJobConfigurationParsing() throws Exception { final FileSystem lfs = FileSystem.getLocal(new Configuration()); final Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); final Path tempDir = new Path(rootTempDir, "TestJobConfigurationParser"); lfs.delete(tempDir, true); // Add some configuration parameters to the conf JobConf jConf = new JobConf(false); String key = "test.data"; String value = "hello world"; jConf.set(key, value); // create the job conf file Path jobConfPath = new Path(tempDir.toString(), "job.xml"); lfs.delete(jobConfPath, false); DataOutputStream jobConfStream = lfs.create(jobConfPath); jConf.writeXml(jobConfStream); jobConfStream.close(); // now read the job conf file using the job configuration parser Properties properties = JobConfigurationParser.parse(lfs.open(jobConfPath)); // check if the required parameter is loaded assertEquals( "Total number of extracted properties (" + properties.size() + ") doesn't match the expected size of 1 [" + "JobConfigurationParser]", 1, properties.size()); // check if the key is present in the extracted configuration assertTrue( "Key " + key + " is missing in the configuration extracted " + "[JobConfigurationParser]", properties.keySet().contains(key)); // check if the desired property has the correct value assertEquals( "JobConfigurationParser couldn't recover the parameters" + " correctly", value, properties.get(key)); // Test ZombieJob LoggedJob job = new LoggedJob(); job.setJobProperties(properties); ZombieJob zjob = new ZombieJob(job, null); Configuration zconf = zjob.getJobConf(); // check if the required parameter is loaded assertEquals("ZombieJob couldn't recover the parameters correctly", value, zconf.get(key)); }
@BeforeClass public static void setUpOneTime() throws Exception { fs = new LocalFileSystem(); fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration()); HiveConf hiveConf = new HiveConf(); hiveConf.setInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME, 0); // Hack to initialize cache with 0 expiry time causing it to return a new hive client every time // Otherwise the cache doesn't play well with the second test method with the client gets // closed() in the // tearDown() of the previous test HCatUtil.getHiveMetastoreClient(hiveConf); MapCreate.writeCount = 0; MapRead.readCount = 0; }
@Test(timeout = 45000) public void testVanishingTaskZNode() throws Exception { LOG.info("testVanishingTaskZNode"); conf.setInt("hbase.splitlog.manager.unassigned.timeout", 0); slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null); slm.finishInitialization(); FileSystem fs = TEST_UTIL.getTestFileSystem(); final Path logDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString()); fs.mkdirs(logDir); Thread thread = null; try { Path logFile = new Path(logDir, UUID.randomUUID().toString()); fs.createNewFile(logFile); thread = new Thread() { public void run() { try { // this call will block because there are no SplitLogWorkers, // until the task znode is deleted below. Then the call will // complete successfully, assuming the log is split. slm.splitLogDistributed(logDir); } catch (Exception e) { LOG.warn("splitLogDistributed failed", e); } } }; thread.start(); waitForCounter(tot_mgr_node_create_result, 0, 1, 10000); String znode = ZKSplitLog.getEncodedNodeName(zkw, logFile.toString()); // remove the task znode, to finish the distributed log splitting ZKUtil.deleteNode(zkw, znode); waitForCounter(tot_mgr_get_data_nonode, 0, 1, 30000); waitForCounter(tot_mgr_log_split_batch_success, 0, 1, 1000); assertTrue(fs.exists(logFile)); } finally { if (thread != null) { // interrupt the thread in case the test fails in the middle. // it has no effect if the thread is already terminated. thread.interrupt(); } fs.delete(logDir, true); } }
/** * Test if {@link RandomTextDataGenerator} can generate random text data with the desired * compression ratio. This involves - using {@link CompressionEmulationUtil} to configure the MR * job for generating the random text data with the desired compression ratio - running the MR job * - test {@link RandomTextDataGenerator}'s output and match the output size (compressed) with the * expected compression ratio. */ private void testCompressionRatioConfigure(float ratio) throws Exception { long dataSize = 10 * 1024 * 1024; Configuration conf = new Configuration(); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); conf.setLong(GenerateData.GRIDMIX_GEN_BYTES, dataSize); float expectedRatio = CompressionEmulationUtil.DEFAULT_COMPRESSION_RATIO; if (ratio > 0) { // set the compression ratio in the conf CompressionEmulationUtil.setMapInputCompressionEmulationRatio(conf, ratio); expectedRatio = CompressionEmulationUtil.standardizeCompressionRatio(ratio); } // invoke the utility to map from ratio to word-size CompressionEmulationUtil.setupDataGeneratorConfig(conf); FileSystem lfs = FileSystem.getLocal(conf); // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestCustomRandomCompressedTextDataGenr"); lfs.delete(tempDir, true); runDataGenJob(conf, tempDir); // validate the output data FileStatus[] files = lfs.listStatus(tempDir, new Utils.OutputFileUtils.OutputFilesFilter()); long size = 0; for (FileStatus status : files) { size += status.getLen(); } float compressionRatio = ((float) size) / dataSize; float stdRatio = CompressionEmulationUtil.standardizeCompressionRatio(compressionRatio); assertEquals(expectedRatio, stdRatio, 0.0D); }
/** * Test of {@link FileQueue} can identify compressed file and provide readers to extract * uncompressed data only if input-compression is enabled. */ @Test public void testFileQueueDecompression() throws IOException { JobConf conf = new JobConf(); FileSystem lfs = FileSystem.getLocal(conf); String inputLine = "Hi Hello!"; CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true); org.apache.hadoop.mapred.FileOutputFormat.setOutputCompressorClass(conf, GzipCodec.class); // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestFileQueueDecompression"); lfs.delete(tempDir, true); // create a compressed file Path compressedFile = new Path(tempDir, "test"); OutputStream out = CompressionEmulationUtil.getPossiblyCompressedOutputStream(compressedFile, conf); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); writer.write(inputLine); writer.close(); compressedFile = compressedFile.suffix(".gz"); // now read back the data from the compressed stream using FileQueue long fileSize = lfs.listStatus(compressedFile)[0].getLen(); CombineFileSplit split = new CombineFileSplit(new Path[] {compressedFile}, new long[] {fileSize}); FileQueue queue = new FileQueue(split, conf); byte[] bytes = new byte[inputLine.getBytes().length]; queue.read(bytes); queue.close(); String readLine = new String(bytes); assertEquals("Compression/Decompression error", inputLine, readLine); }
/** * Test {@link CompressionEmulationUtil#getPossiblyDecompressedInputStream(Path, Configuration, * long)} and {@link CompressionEmulationUtil#getPossiblyCompressedOutputStream(Path, * Configuration)}. */ @Test public void testPossiblyCompressedDecompressedStreams() throws IOException { JobConf conf = new JobConf(); FileSystem lfs = FileSystem.getLocal(conf); String inputLine = "Hi Hello!"; CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); conf.setBoolean(FileOutputFormat.COMPRESS, true); conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class); // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestPossiblyCompressedDecompressedStreams"); lfs.delete(tempDir, true); // create a compressed file Path compressedFile = new Path(tempDir, "test"); OutputStream out = CompressionEmulationUtil.getPossiblyCompressedOutputStream(compressedFile, conf); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); writer.write(inputLine); writer.close(); // now read back the data from the compressed stream compressedFile = compressedFile.suffix(".gz"); InputStream in = CompressionEmulationUtil.getPossiblyDecompressedInputStream(compressedFile, conf, 0); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String readLine = reader.readLine(); assertEquals("Compression/Decompression error", inputLine, readLine); reader.close(); }
/** * Check if processing of input arguments is as expected by passing globbed input path * <li>without -recursive option and * <li>with -recursive option. */ @Test public void testProcessInputArgument() throws Exception { final Configuration conf = new Configuration(); final FileSystem lfs = FileSystem.getLocal(conf); // define the test's root temporary directory final Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); // define the test's root input directory Path testRootInputDir = new Path(rootTempDir, "TestProcessInputArgument"); // define the nested input directory Path nestedInputDir = new Path(testRootInputDir, "1/2/3/4"); // define the globbed version of the nested input directory Path globbedInputNestedDir = lfs.makeQualified(new Path(testRootInputDir, "*/*/*/*/*")); try { lfs.delete(nestedInputDir, true); List<String> recursiveInputPaths = new ArrayList<String>(); List<String> nonRecursiveInputPaths = new ArrayList<String>(); // Create input files under the given path with multiple levels of // sub directories createHistoryLogsHierarchy(nestedInputDir, lfs, recursiveInputPaths, nonRecursiveInputPaths); // Check the case of globbed input path and without -recursive option List<Path> inputs = MyOptions.processInputArgument(globbedInputNestedDir.toString(), conf, false); validateHistoryLogPaths(inputs, nonRecursiveInputPaths); // Check the case of globbed input path and with -recursive option inputs = MyOptions.processInputArgument(globbedInputNestedDir.toString(), conf, true); validateHistoryLogPaths(inputs, recursiveInputPaths); } finally { lfs.delete(testRootInputDir, true); } }
@Test public void testPathParts() throws Exception { // see PathParts FileSystem fs = dfsCluster.getFileSystem(); int dfsClusterPort = fs.getWorkingDirectory().toUri().getPort(); assertTrue(dfsClusterPort > 0); JobConf jobConf = getJobConf(); Configuration simpleConf = new Configuration(); for (Configuration conf : Arrays.asList(jobConf, simpleConf)) { for (String queryAndFragment : Arrays.asList("", "?key=value#fragment")) { for (String up : Arrays.asList("", "../")) { String down = up.length() == 0 ? "foo/" : ""; String uploadURL = "hdfs://localhost:12345/user/foo/" + up + "bar.txt" + queryAndFragment; PathParts parts = new PathParts(uploadURL, conf); assertEquals(uploadURL, parts.getUploadURL()); assertEquals("/user/" + down + "bar.txt", parts.getURIPath()); assertEquals("bar.txt", parts.getName()); assertEquals("hdfs", parts.getScheme()); assertEquals("localhost", parts.getHost()); assertEquals(12345, parts.getPort()); assertEquals("hdfs://localhost:12345/user/" + down + "bar.txt", parts.getId()); assertEquals(parts.getId(), parts.getDownloadURL()); assertFileNotFound(parts); uploadURL = "hdfs://localhost/user/foo/" + up + "bar.txt" + queryAndFragment; parts = new PathParts(uploadURL, conf); assertEquals(uploadURL, parts.getUploadURL()); assertEquals("/user/" + down + "bar.txt", parts.getURIPath()); assertEquals("bar.txt", parts.getName()); assertEquals("hdfs", parts.getScheme()); assertEquals("localhost", parts.getHost()); assertEquals(8020, parts.getPort()); assertEquals("hdfs://localhost:8020/user/" + down + "bar.txt", parts.getId()); assertEquals(parts.getId(), parts.getDownloadURL()); assertFileNotFound(parts); } } } for (Configuration conf : Arrays.asList(jobConf)) { for (String queryAndFragment : Arrays.asList("", "?key=value#fragment")) { for (String up : Arrays.asList("", "../")) { // verify using absolute path String down = up.length() == 0 ? "foo/" : ""; String uploadURL = "/user/foo/" + up + "bar.txt" + queryAndFragment; PathParts parts = new PathParts(uploadURL, conf); assertEquals(uploadURL, parts.getUploadURL()); assertEquals("/user/" + down + "bar.txt", parts.getURIPath()); assertEquals("bar.txt", parts.getName()); assertEquals("hdfs", parts.getScheme()); assertTrue( "localhost".equals(parts.getHost()) || "localhost.localdomain".equals(parts.getHost())); assertEquals(dfsClusterPort, parts.getPort()); assertTrue( parts .getId() .equals("hdfs://localhost:" + dfsClusterPort + "/user/" + down + "bar.txt") || parts .getId() .equals( "hdfs://localhost.localdomain:" + dfsClusterPort + "/user/" + down + "bar.txt")); assertFileNotFound(parts); // verify relative path is interpreted to be relative to user's home dir and resolved to // an absolute path uploadURL = "xuser/foo/" + up + "bar.txt" + queryAndFragment; parts = new PathParts(uploadURL, conf); assertEquals(uploadURL, parts.getUploadURL()); String homeDir = "/user/" + System.getProperty("user.name"); assertEquals(homeDir + "/xuser/" + down + "bar.txt", parts.getURIPath()); assertEquals("bar.txt", parts.getName()); assertEquals("hdfs", parts.getScheme()); assertTrue( "localhost".equals(parts.getHost()) || "localhost.localdomain".equals(parts.getHost())); assertEquals(dfsClusterPort, parts.getPort()); assertTrue( parts .getId() .equals( "hdfs://localhost:" + dfsClusterPort + homeDir + "/xuser/" + down + "bar.txt") || parts .getId() .equals( "hdfs://localhost.localdomain:" + dfsClusterPort + homeDir + "/xuser/" + down + "bar.txt")); assertFileNotFound(parts); } } } try { new PathParts("/user/foo/bar.txt", simpleConf); fail("host/port resolution requires minimr conf, not a simple conf"); } catch (IllegalArgumentException e) {; // expected } }
// TODO - Move this to MR! // Use TaskDistributedCacheManager.CacheFiles.makeCacheFiles(URI[], // long[], boolean[], Path[], FileType) private static void parseDistributedCacheArtifacts( Configuration conf, Map<String, LocalResource> localResources, LocalResourceType type, URI[] uris, long[] timestamps, long[] sizes, boolean visibilities[], Path[] pathsToPutOnClasspath) throws IOException { if (uris != null) { // Sanity check if ((uris.length != timestamps.length) || (uris.length != sizes.length) || (uris.length != visibilities.length)) { throw new IllegalArgumentException( "Invalid specification for " + "distributed-cache artifacts of type " + type + " :" + " #uris=" + uris.length + " #timestamps=" + timestamps.length + " #visibilities=" + visibilities.length); } Map<String, Path> classPaths = new HashMap<String, Path>(); if (pathsToPutOnClasspath != null) { for (Path p : pathsToPutOnClasspath) { FileSystem remoteFS = p.getFileSystem(conf); p = remoteFS.resolvePath( p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory())); classPaths.put(p.toUri().getPath().toString(), p); } } for (int i = 0; i < uris.length; ++i) { URI u = uris[i]; Path p = new Path(u); FileSystem remoteFS = p.getFileSystem(conf); p = remoteFS.resolvePath( p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory())); // Add URI fragment or just the filename Path name = new Path((null == u.getFragment()) ? p.getName() : u.getFragment()); if (name.isAbsolute()) { throw new IllegalArgumentException("Resource name must be relative"); } String linkName = name.toUri().getPath(); localResources.put( linkName, BuilderUtils.newLocalResource( p.toUri(), type, visibilities[i] ? LocalResourceVisibility.PUBLIC : LocalResourceVisibility.PRIVATE, sizes[i], timestamps[i])); } } }
/** Command-line interface */ public static void main(final String[] args) throws Exception { final Configuration conf = new HdfsConfiguration(); Options fetcherOptions = new Options(); fetcherOptions.addOption(WEBSERVICE, true, "HTTPS url to reach the NameNode at"); fetcherOptions.addOption(RENEWER, true, "Name of the delegation token renewer"); fetcherOptions.addOption(CANCEL, false, "cancel the token"); fetcherOptions.addOption(RENEW, false, "renew the token"); fetcherOptions.addOption(PRINT, false, "print the token"); GenericOptionsParser parser = new GenericOptionsParser(conf, fetcherOptions, args); CommandLine cmd = parser.getCommandLine(); // get options final String webUrl = cmd.hasOption(WEBSERVICE) ? cmd.getOptionValue(WEBSERVICE) : null; final String renewer = cmd.hasOption(RENEWER) ? cmd.getOptionValue(RENEWER) : null; final boolean cancel = cmd.hasOption(CANCEL); final boolean renew = cmd.hasOption(RENEW); final boolean print = cmd.hasOption(PRINT); String[] remaining = parser.getRemainingArgs(); // check option validity if (cancel && renew || cancel && print || renew && print || cancel && renew && print) { System.err.println("ERROR: Only specify cancel, renew or print."); printUsage(System.err); } if (remaining.length != 1 || remaining[0].charAt(0) == '-') { System.err.println("ERROR: Must specify exacltly one token file"); printUsage(System.err); } // default to using the local file system FileSystem local = FileSystem.getLocal(conf); final Path tokenFile = new Path(local.getWorkingDirectory(), remaining[0]); // Login the current user UserGroupInformation.getCurrentUser() .doAs( new PrivilegedExceptionAction<Object>() { @SuppressWarnings("unchecked") @Override public Object run() throws Exception { if (print) { DelegationTokenIdentifier id = new DelegationTokenSecretManager(0, 0, 0, 0, null).createIdentifier(); for (Token<?> token : readTokens(tokenFile, conf)) { DataInputStream in = new DataInputStream(new ByteArrayInputStream(token.getIdentifier())); id.readFields(in); System.out.println("Token (" + id + ") for " + token.getService()); } } else if (cancel) { for (Token<?> token : readTokens(tokenFile, conf)) { if (token.isManaged()) { token.cancel(conf); if (LOG.isDebugEnabled()) { LOG.debug("Cancelled token for " + token.getService()); } } } } else if (renew) { for (Token<?> token : readTokens(tokenFile, conf)) { if (token.isManaged()) { long result = token.renew(conf); if (LOG.isDebugEnabled()) { LOG.debug( "Renewed token for " + token.getService() + " until: " + new Date(result)); } } } } else { // otherwise we are fetching if (webUrl != null) { Credentials creds = getDTfromRemote(webUrl, renewer); creds.writeTokenStorageFile(tokenFile, conf); for (Token<?> token : creds.getAllTokens()) { if (LOG.isDebugEnabled()) { LOG.debug( "Fetched token via " + webUrl + " for " + token.getService() + " into " + tokenFile); } } } else { FileSystem fs = FileSystem.get(conf); Token<?> token = fs.getDelegationToken(renewer); Credentials cred = new Credentials(); cred.addToken(token.getService(), token); cred.writeTokenStorageFile(tokenFile, conf); if (LOG.isDebugEnabled()) { LOG.debug("Fetched token for " + token.getService() + " into " + tokenFile); } } } return null; } }); }
/** * Run a local map reduce job to load data from in memory records to an HCatalog Table * * @param partitionValues * @param partitionColumns * @param records data to be written to HCatalog table * @param writeCount * @param assertWrite * @param asSingleMapTask * @return * @throws Exception */ Job runMRCreate( Map<String, String> partitionValues, List<HCatFieldSchema> partitionColumns, List<HCatRecord> records, int writeCount, boolean assertWrite, boolean asSingleMapTask, String customDynamicPathPattern) throws Exception { writeRecords = records; MapCreate.writeCount = 0; Configuration conf = new Configuration(); Job job = new Job(conf, "hcat mapreduce write test"); job.setJarByClass(this.getClass()); job.setMapperClass(HCatMapReduceTest.MapCreate.class); // input/output settings job.setInputFormatClass(TextInputFormat.class); if (asSingleMapTask) { // One input path would mean only one map task Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); createInputFile(path, writeCount); TextInputFormat.setInputPaths(job, path); } else { // Create two input paths so that two map tasks get triggered. There could be other ways // to trigger two map tasks. Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); createInputFile(path, writeCount / 2); Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2"); createInputFile(path2, (writeCount - writeCount / 2)); TextInputFormat.setInputPaths(job, path, path2); } job.setOutputFormatClass(HCatOutputFormat.class); OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues); if (customDynamicPathPattern != null) { job.getConfiguration() .set(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN, customDynamicPathPattern); } HCatOutputFormat.setOutput(job, outputJobInfo); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(DefaultHCatRecord.class); job.setNumReduceTasks(0); HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns)); boolean success = job.waitForCompletion(true); // Ensure counters are set when data has actually been read. if (partitionValues != null) { assertTrue( job.getCounters().getGroup("FileSystemCounters").findCounter("FILE_BYTES_READ").getValue() > 0); } if (!HCatUtil.isHadoop23()) { // Local mode outputcommitter hook is not invoked in Hadoop 1.x if (success) { new FileOutputCommitterContainer(job, null).commitJob(job); } else { new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED); } } if (assertWrite) { // we assert only if we expected to assert with this call. Assert.assertEquals(writeCount, MapCreate.writeCount); } if (isTableExternal()) { externalTableLocation = outputJobInfo.getTableInfo().getTableLocation(); } return job; }