/** * Driver to copy srcPath to destPath depending on required protocol. * * @param args arguments */ static void copy(final Configuration conf, final Arguments args) throws IOException { LOG.info("srcPaths=" + args.srcs); LOG.info("destPath=" + args.dst); checkSrcPath(conf, args.srcs); JobConf job = createJobConf(conf); if (args.preservedAttributes != null) { job.set(PRESERVE_STATUS_LABEL, args.preservedAttributes); } if (args.mapredSslConf != null) { job.set("dfs.https.client.keystore.resource", args.mapredSslConf); } // Initialize the mapper try { setup(conf, job, args); JobClient.runJob(job); finalize(conf, job, args.dst, args.preservedAttributes); } finally { // delete tmp fullyDelete(job.get(TMP_DIR_LABEL), job); // delete jobDirectory fullyDelete(job.get(JOB_DIR_LABEL), job); } }
public void configure(JobConf conf) { /* * It reads all the configurations and distributed cache from outside. */ // Read number of nodes in input layer and output layer from configuration inputNumdims = conf.get("numdims"); inputNumhid = conf.get("numhid"); // Read the weights from distributed cache Path[] pathwaysFiles = new Path[0]; try { pathwaysFiles = DistributedCache.getLocalCacheFiles(conf); for (Path path : pathwaysFiles) { /* * this loop reads all the distributed cache files * In fact, the driver program ensures that there is only one distributed cache file */ BufferedReader fis = new BufferedReader(new FileReader(path.toString())); weightline = fis.readLine(); } } catch (Exception e) { e.printStackTrace(); } }
public void configure(JobConf conf) { numberOfCenters = Integer.valueOf(conf.get("numberOfCenters")); centersDirectory = conf.get("centersReadDirectory"); try { Configuration c = new Configuration(); FileSystem fs = FileSystem.get(c); for (int index = 0; index < numberOfCenters; ++index) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(centersDirectory + "/centers/" + index), c); LongWritable key = new LongWritable(); Point value = new Point(); reader.next(key, value); Point center = (Point) value; centers.add(center); reader.close(); } } catch (IOException e) { // do nothing // I hope this doesn't happen System.out.println("well, damn."); e.printStackTrace(); } }
// Mostly for setting up the symlinks. Note that when we setup the distributed // cache, we didn't create the symlinks. This is done on a per task basis // by the currently executing task. public static void setupWorkDir(JobConf conf) throws IOException { File workDir = new File(".").getAbsoluteFile(); FileUtil.fullyDelete(workDir); if (DistributedCache.getSymlink(conf)) { URI[] archives = DistributedCache.getCacheArchives(conf); URI[] files = DistributedCache.getCacheFiles(conf); Path[] localArchives = DistributedCache.getLocalCacheArchives(conf); Path[] localFiles = DistributedCache.getLocalCacheFiles(conf); if (archives != null) { for (int i = 0; i < archives.length; i++) { String link = archives[i].getFragment(); if (link != null) { link = workDir.toString() + Path.SEPARATOR + link; File flink = new File(link); if (!flink.exists()) { FileUtil.symLink(localArchives[i].toString(), link); } } } } if (files != null) { for (int i = 0; i < files.length; i++) { String link = files[i].getFragment(); if (link != null) { link = workDir.toString() + Path.SEPARATOR + link; File flink = new File(link); if (!flink.exists()) { FileUtil.symLink(localFiles[i].toString(), link); } } } } } File jobCacheDir = null; if (conf.getJar() != null) { jobCacheDir = new File(new Path(conf.getJar()).getParent().toString()); } // create symlinks for all the files in job cache dir in current // workingdir for streaming try { DistributedCache.createAllSymlink(conf, jobCacheDir, workDir); } catch (IOException ie) { // Do not exit even if symlinks have not been created. LOG.warn(StringUtils.stringifyException(ie)); } // add java.io.tmpdir given by mapred.child.tmp String tmp = conf.get("mapred.child.tmp", "./tmp"); Path tmpDir = new Path(tmp); // if temp directory path is not absolute // prepend it with workDir. if (!tmpDir.isAbsolute()) { tmpDir = new Path(workDir.toString(), tmp); FileSystem localFs = FileSystem.getLocal(conf); if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) { throw new IOException("Mkdirs failed to create " + tmpDir.toString()); } } }
private static void finalize( Configuration conf, JobConf jobconf, final Path destPath, String presevedAttributes) throws IOException { if (presevedAttributes == null) { return; } EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes); if (!preseved.contains(FileAttribute.USER) && !preseved.contains(FileAttribute.GROUP) && !preseved.contains(FileAttribute.PERMISSION)) { return; } FileSystem dstfs = destPath.getFileSystem(conf); Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL)); SequenceFile.Reader in = null; try { in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf), dstdirlist, jobconf); Text dsttext = new Text(); FilePair pair = new FilePair(); for (; in.next(dsttext, pair); ) { Path absdst = new Path(destPath, pair.output); updatePermissions(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs); } } finally { checkAndClose(in); } }
private static IntWritable deduceInputFile(JobConf job) { Path[] inputPaths = FileInputFormat.getInputPaths(job); Path inputFile = new Path(job.get("map.input.file")); // value == one for sort-input; value == two for sort-output return (inputFile.getParent().equals(inputPaths[0])) ? sortInput : sortOutput; }
/** * Mapper configuration. Extracts source and destination file system, as well as top-level paths * on source and destination directories. Gets the named file systems, to be used later in map. */ public void configure(JobConf job) { destPath = new Path(job.get(DST_DIR_LABEL, "/")); try { destFileSys = destPath.getFileSystem(job); } catch (IOException ex) { throw new RuntimeException("Unable to get the named file system.", ex); } sizeBuf = job.getInt("copy.buf.size", 128 * 1024); buffer = new byte[sizeBuf]; ignoreReadFailures = job.getBoolean(Options.IGNORE_READ_FAILURES.propertyname, false); preserve_status = job.getBoolean(Options.PRESERVE_STATUS.propertyname, false); if (preserve_status) { preseved = FileAttribute.parse(job.get(PRESERVE_STATUS_LABEL)); } update = job.getBoolean(Options.UPDATE.propertyname, false); overwrite = !update && job.getBoolean(Options.OVERWRITE.propertyname, false); this.job = job; }
/** * @param split * @param jobConf * @throws IOException */ public XMLRecordReader(FileSplit split, JobConf jobConf) throws IOException { if (jobConf.get(START_TAG_KEY) == null || jobConf.get(END_TAG_KEY) == null) { throw new RuntimeException("Error! XML start and end tags unspecified!"); } startTag = jobConf.get(START_TAG_KEY).getBytes("utf-8"); endTag = jobConf.get(END_TAG_KEY).getBytes("utf-8"); start = split.getStart(); Path file = split.getPath(); CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(jobConf); CompressionCodec codec = compressionCodecs.getCodec(file); FileSystem fs = file.getFileSystem(jobConf); if (codec != null) { LOG.info("Reading compressed file..."); fsin = new DataInputStream(codec.createInputStream(fs.open(file))); end = Long.MAX_VALUE; } else { LOG.info("Reading uncompressed file..."); FSDataInputStream fileIn = fs.open(file); fileIn.seek(start); fsin = fileIn; end = start + split.getLength(); } recordStartPos = start; // Because input streams of gzipped files are not seekable (specifically, // do not support // getPos), we need to keep track of bytes consumed ourselves. pos = start; }
/** @see org.apache.hadoop.mapred.JobSubmissionProtocol#getStagingAreaDir() */ public String getStagingAreaDir() throws IOException { Path stagingRootDir = new Path(conf.get("mapreduce.jobtracker.staging.root.dir", "/tmp/hadoop/mapred/staging")); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); String user; randid = rand.nextInt(Integer.MAX_VALUE); if (ugi != null) { user = ugi.getShortUserName() + randid; } else { user = "******" + randid; } return fs.makeQualified(new Path(stagingRootDir, user + "/.staging")).toString(); }
/** * Produce splits such that each is no greater than the quotient of the total size and the * number of splits requested. * * @param job The handle to the JobConf object * @param numSplits Number of splits requested */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { int cnfiles = job.getInt(SRC_COUNT_LABEL, -1); long cbsize = job.getLong(TOTAL_SIZE_LABEL, -1); String srcfilelist = job.get(SRC_LIST_LABEL, ""); if (cnfiles < 0 || cbsize < 0 || "".equals(srcfilelist)) { throw new RuntimeException( "Invalid metadata: #files(" + cnfiles + ") total_size(" + cbsize + ") listuri(" + srcfilelist + ")"); } Path src = new Path(srcfilelist); FileSystem fs = src.getFileSystem(job); FileStatus srcst = fs.getFileStatus(src); ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits); LongWritable key = new LongWritable(); FilePair value = new FilePair(); final long targetsize = cbsize / numSplits; long pos = 0L; long last = 0L; long acc = 0L; long cbrem = srcst.getLen(); SequenceFile.Reader sl = null; try { sl = new SequenceFile.Reader(fs, src, job); for (; sl.next(key, value); last = sl.getPosition()) { // if adding this split would put this split past the target size, // cut the last split and put this next file in the next split. if (acc + key.get() > targetsize && acc != 0) { long splitsize = last - pos; splits.add(new FileSplit(src, pos, splitsize, (String[]) null)); cbrem -= splitsize; pos = last; acc = 0L; } acc += key.get(); } } finally { checkAndClose(sl); } if (cbrem != 0) { splits.add(new FileSplit(src, pos, cbrem, (String[]) null)); } return splits.toArray(new FileSplit[splits.size()]); }
@Override public void configure(JobConf job) { super.configure(job); this.localDir = job.get("mapred.child.tmp"); try { FileAndWriter faw = openFileWriter(localDir, 1); s1File = faw.file; s1FileWriter = faw.writer; faw = openFileWriter(localDir, 2); s2File = faw.file; s2FileWriter = faw.writer; } catch (IOException e) { e.printStackTrace(); } }
TaskTrackerRunner(int trackerId, int numDir, String hostname, JobConf cfg) throws IOException { this.trackerId = trackerId; this.numDir = numDir; localDirs = new String[numDir]; final JobConf conf; if (cfg == null) { conf = createJobConf(); } else { conf = createJobConf(cfg); } if (hostname != null) { conf.set("slave.host.name", hostname); } conf.set("mapred.task.tracker.http.address", "0.0.0.0:0"); conf.set("mapred.task.tracker.report.address", "127.0.0.1:" + taskTrackerPort); File localDirBase = new File(conf.get("mapred.local.dir")).getAbsoluteFile(); localDirBase.mkdirs(); StringBuffer localPath = new StringBuffer(); for (int i = 0; i < numDir; ++i) { File ttDir = new File(localDirBase, Integer.toString(trackerId) + "_" + i); if (!ttDir.mkdirs()) { if (!ttDir.isDirectory()) { throw new IOException("Mkdirs failed to create " + ttDir); } } localDirs[i] = ttDir.toString(); if (i != 0) { localPath.append(","); } localPath.append(localDirs[i]); } conf.set("mapred.local.dir", localPath.toString()); LOG.info("mapred.local.dir is " + localPath); try { tt = ugi.doAs( new PrivilegedExceptionAction<TaskTracker>() { public TaskTracker run() throws InterruptedException, IOException { return createTaskTracker(conf); } }); isInitialized = true; } catch (Throwable e) { isDead = true; tt = null; LOG.error("task tracker " + trackerId + " crashed", e); } }
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { Path src = new Path( job.get( org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FILE, null)); FileSystem fs = src.getFileSystem(job); ArrayList<IndirectSplit> splits = new ArrayList<IndirectSplit>(numSplits); LongWritable key = new LongWritable(); Text value = new Text(); for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, job); sl.next(key, value); ) { splits.add(new IndirectSplit(new Path(value.toString()), key.get())); } return splits.toArray(new IndirectSplit[splits.size()]); }
/** * @param taskid * @param workDir * @return * @throws IOException */ static Path createChildTmpDir(File workDir, JobConf conf) throws IOException { // add java.io.tmpdir given by mapred.child.tmp String tmp = conf.get("mapred.child.tmp", "./tmp"); Path tmpDir = new Path(tmp); // if temp directory path is not absolute, prepend it with workDir. if (!tmpDir.isAbsolute()) { tmpDir = new Path(workDir.toString(), tmp); FileSystem localFs = FileSystem.getLocal(conf); if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) { throw new IOException("Mkdirs failed to create " + tmpDir.toString()); } } return tmpDir; }
/** * Creates the configuration for mumak simulation. This is kept modular mostly for testing * purposes. so that the standard configuration can be modified before passing it to the init() * function. * * @return JobConf: the configuration for the SimulatorJobTracker */ JobConf createMumakConf() { JobConf jobConf = new JobConf(getConf()); jobConf.setClass( "topology.node.switch.mapping.impl", StaticMapping.class, DNSToSwitchMapping.class); jobConf.set("fs.default.name", "file:///"); jobConf.set("mapred.job.tracker", "localhost:8012"); jobConf.setInt("mapred.jobtracker.job.history.block.size", 512); jobConf.setInt("mapred.jobtracker.job.history.buffer.size", 512); jobConf.setLong("mapred.tasktracker.expiry.interval", 5000); jobConf.setInt("mapred.reduce.copy.backoff", 4); jobConf.setLong("mapred.job.reuse.jvm.num.tasks", -1); jobConf.setUser("mumak"); jobConf.set( "mapred.system.dir", jobConf.get("hadoop.log.dir", "/tmp/hadoop-" + jobConf.getUser()) + "/mapred/system"); return jobConf; }
public void configure(JobConf job) { // 'key' == sortInput for sort-input; key == sortOutput for sort-output key = deduceInputFile(job); if (key == sortOutput) { partitioner = new HashPartitioner<WritableComparable, Writable>(); // Figure the 'current' partition and no. of reduces of the 'sort' try { URI inputURI = new URI(job.get("map.input.file")); String inputFile = inputURI.getPath(); partition = Integer.valueOf(inputFile.substring(inputFile.lastIndexOf("part") + 5)).intValue(); noSortReducers = job.getInt("sortvalidate.sort.reduce.tasks", -1); } catch (Exception e) { System.err.println("Caught: " + e); System.exit(-1); } } }
/** * Validates map phase progress after each record is processed by map task using custom task * reporter. */ public void testMapProgress() throws Exception { JobConf job = new JobConf(); fs = FileSystem.getLocal(job); Path rootDir = new Path(TEST_ROOT_DIR); createInputFile(rootDir); job.setNumReduceTasks(0); TaskAttemptID taskId = TaskAttemptID.forName("attempt_200907082313_0424_m_000000_0"); job.setClass("mapreduce.job.outputformat.class", NullOutputFormat.class, OutputFormat.class); job.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, TEST_ROOT_DIR); jobId = taskId.getJobID(); JobContext jContext = new JobContextImpl(job, jobId); InputFormat<?, ?> input = ReflectionUtils.newInstance(jContext.getInputFormatClass(), job); List<InputSplit> splits = input.getSplits(jContext); JobSplitWriter.createSplitFiles( new Path(TEST_ROOT_DIR), job, new Path(TEST_ROOT_DIR).getFileSystem(job), splits); TaskSplitMetaInfo[] splitMetaInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, job, new Path(TEST_ROOT_DIR)); job.setUseNewMapper(true); // use new api for (int i = 0; i < splitMetaInfo.length; i++) { // rawSplits.length is 1 map = new TestMapTask( job.get(JTConfig.JT_SYSTEM_DIR, "/tmp/hadoop/mapred/system") + jobId + "job.xml", taskId, i, splitMetaInfo[i].getSplitIndex(), 1); JobConf localConf = new JobConf(job); map.localizeConfiguration(localConf); map.setConf(localConf); map.run(localConf, fakeUmbilical); } // clean up fs.delete(rootDir, true); }
/** * Map method. Copies one file from source file system to destination. * * @param key src len * @param value FilePair (FileStatus src, Path dst) * @param out Log of failed copies * @param reporter */ public void map( LongWritable key, FilePair value, OutputCollector<WritableComparable<?>, Text> out, Reporter reporter) throws IOException { final FileStatus srcstat = value.input; final Path relativedst = new Path(value.output); try { copy(srcstat, relativedst, out, reporter); } catch (IOException e) { ++failcount; reporter.incrCounter(Counter.FAIL, 1); updateStatus(reporter); final String sfailure = "FAIL " + relativedst + " : " + StringUtils.stringifyException(e); out.collect(null, new Text(sfailure)); LOG.info(sfailure); try { for (int i = 0; i < 3; ++i) { try { final Path tmp = new Path(job.get(TMP_DIR_LABEL), relativedst); if (destFileSys.delete(tmp, true)) break; } catch (Throwable ex) { // ignore, we are just cleaning up LOG.debug("Ignoring cleanup exception", ex); } // update status, so we don't get timed out updateStatus(reporter); Thread.sleep(3 * 1000); } } catch (InterruptedException inte) { throw (IOException) new IOException().initCause(inte); } } finally { updateStatus(reporter); } }
@Override public final void run() { try { // before preparing the job localize // all the archives TaskAttemptID taskid = t.getTaskID(); LocalDirAllocator lDirAlloc = new LocalDirAllocator("mapred.local.dir"); File jobCacheDir = null; if (conf.getJar() != null) { jobCacheDir = new File(new Path(conf.getJar()).getParent().toString()); } File workDir = new File( lDirAlloc .getLocalPathToRead( TaskTracker.getJobCacheSubdir() + Path.SEPARATOR + t.getJobID() + Path.SEPARATOR + t.getTaskID() + Path.SEPARATOR + MRConstants.WORKDIR, conf) .toString()); URI[] archives = DistributedCache.getCacheArchives(conf); URI[] files = DistributedCache.getCacheFiles(conf); FileStatus fileStatus; FileSystem fileSystem; Path localPath; String baseDir; if ((archives != null) || (files != null)) { if (archives != null) { String[] archivesTimestamps = DistributedCache.getArchiveTimestamps(conf); Path[] p = new Path[archives.length]; for (int i = 0; i < archives.length; i++) { fileSystem = FileSystem.get(archives[i], conf); fileStatus = fileSystem.getFileStatus(new Path(archives[i].getPath())); String cacheId = DistributedCache.makeRelative(archives[i], conf); String cachePath = TaskTracker.getCacheSubdir() + Path.SEPARATOR + cacheId; if (lDirAlloc.ifExists(cachePath, conf)) { localPath = lDirAlloc.getLocalPathToRead(cachePath, conf); } else { localPath = lDirAlloc.getLocalPathForWrite(cachePath, fileStatus.getLen(), conf); } baseDir = localPath.toString().replace(cacheId, ""); p[i] = DistributedCache.getLocalCache( archives[i], conf, new Path(baseDir), fileStatus, true, Long.parseLong(archivesTimestamps[i]), new Path(workDir.getAbsolutePath()), false); } DistributedCache.setLocalArchives(conf, stringifyPathArray(p)); } if ((files != null)) { String[] fileTimestamps = DistributedCache.getFileTimestamps(conf); Path[] p = new Path[files.length]; for (int i = 0; i < files.length; i++) { fileSystem = FileSystem.get(files[i], conf); fileStatus = fileSystem.getFileStatus(new Path(files[i].getPath())); String cacheId = DistributedCache.makeRelative(files[i], conf); String cachePath = TaskTracker.getCacheSubdir() + Path.SEPARATOR + cacheId; if (lDirAlloc.ifExists(cachePath, conf)) { localPath = lDirAlloc.getLocalPathToRead(cachePath, conf); } else { localPath = lDirAlloc.getLocalPathForWrite(cachePath, fileStatus.getLen(), conf); } baseDir = localPath.toString().replace(cacheId, ""); p[i] = DistributedCache.getLocalCache( files[i], conf, new Path(baseDir), fileStatus, false, Long.parseLong(fileTimestamps[i]), new Path(workDir.getAbsolutePath()), false); } DistributedCache.setLocalFiles(conf, stringifyPathArray(p)); } Path localTaskFile = new Path(t.getJobFile()); FileSystem localFs = FileSystem.getLocal(conf); localFs.delete(localTaskFile, true); OutputStream out = localFs.create(localTaskFile); try { conf.writeXml(out); } finally { out.close(); } } if (!prepare()) { return; } String sep = System.getProperty("path.separator"); StringBuffer classPath = new StringBuffer(); // start with same classpath as parent process classPath.append(System.getProperty("java.class.path")); classPath.append(sep); if (!workDir.mkdirs()) { if (!workDir.isDirectory()) { LOG.fatal("Mkdirs failed to create " + workDir.toString()); } } String jar = conf.getJar(); if (jar != null) { // if jar exists, it into workDir File[] libs = new File(jobCacheDir, "lib").listFiles(); if (libs != null) { for (int i = 0; i < libs.length; i++) { classPath.append(sep); // add libs from jar to classpath classPath.append(libs[i]); } } classPath.append(sep); classPath.append(new File(jobCacheDir, "classes")); classPath.append(sep); classPath.append(jobCacheDir); } // include the user specified classpath // archive paths Path[] archiveClasspaths = DistributedCache.getArchiveClassPaths(conf); if (archiveClasspaths != null && archives != null) { Path[] localArchives = DistributedCache.getLocalCacheArchives(conf); if (localArchives != null) { for (int i = 0; i < archives.length; i++) { for (int j = 0; j < archiveClasspaths.length; j++) { if (archives[i].getPath().equals(archiveClasspaths[j].toString())) { classPath.append(sep); classPath.append(localArchives[i].toString()); } } } } } // file paths Path[] fileClasspaths = DistributedCache.getFileClassPaths(conf); if (fileClasspaths != null && files != null) { Path[] localFiles = DistributedCache.getLocalCacheFiles(conf); if (localFiles != null) { for (int i = 0; i < files.length; i++) { for (int j = 0; j < fileClasspaths.length; j++) { if (files[i].getPath().equals(fileClasspaths[j].toString())) { classPath.append(sep); classPath.append(localFiles[i].toString()); } } } } } classPath.append(sep); classPath.append(workDir); // Build exec child jmv args. Vector<String> vargs = new Vector<String>(8); File jvm = // use same jvm as parent new File(new File(System.getProperty("java.home"), "bin"), "java"); vargs.add(jvm.toString()); // Add child (task) java-vm options. // // The following symbols if present in mapred.child.java.opts value are // replaced: // + @taskid@ is interpolated with value of TaskID. // Other occurrences of @ will not be altered. // // Example with multiple arguments and substitutions, showing // jvm GC logging, and start of a passwordless JVM JMX agent so can // connect with jconsole and the likes to watch child memory, threads // and get thread dumps. // // <property> // <name>mapred.child.java.opts</name> // <value>-verbose:gc -Xloggc:/tmp/@[email protected] \ // -Dcom.sun.management.jmxremote.authenticate=false \ // -Dcom.sun.management.jmxremote.ssl=false \ // </value> // </property> // String javaOpts = conf.get("mapred.child.java.opts", "-Xmx200m"); javaOpts = javaOpts.replace("@taskid@", taskid.toString()); String[] javaOptsSplit = javaOpts.split(" "); // Add java.library.path; necessary for loading native libraries. // // 1. To support native-hadoop library i.e. libhadoop.so, we add the // parent processes' java.library.path to the child. // 2. We also add the 'cwd' of the task to it's java.library.path to help // users distribute native libraries via the DistributedCache. // 3. The user can also specify extra paths to be added to the // java.library.path via mapred.child.java.opts. // String libraryPath = System.getProperty("java.library.path"); if (libraryPath == null) { libraryPath = workDir.getAbsolutePath(); } else { libraryPath += sep + workDir; } boolean hasUserLDPath = false; for (int i = 0; i < javaOptsSplit.length; i++) { if (javaOptsSplit[i].startsWith("-Djava.library.path=")) { javaOptsSplit[i] += sep + libraryPath; hasUserLDPath = true; break; } } if (!hasUserLDPath) { vargs.add("-Djava.library.path=" + libraryPath); } for (int i = 0; i < javaOptsSplit.length; i++) { vargs.add(javaOptsSplit[i]); } // add java.io.tmpdir given by mapred.child.tmp String tmp = conf.get("mapred.child.tmp", "./tmp"); Path tmpDir = new Path(tmp); // if temp directory path is not absolute // prepend it with workDir. if (!tmpDir.isAbsolute()) { tmpDir = new Path(workDir.toString(), tmp); } FileSystem localFs = FileSystem.getLocal(conf); if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) { throw new IOException("Mkdirs failed to create " + tmpDir.toString()); } vargs.add("-Djava.io.tmpdir=" + tmpDir.toString()); // Add classpath. vargs.add("-classpath"); vargs.add(classPath.toString()); // Setup the log4j prop long logSize = TaskLog.getTaskLogLength(conf); vargs.add( "-Dhadoop.log.dir=" + new File(System.getProperty("hadoop.log.dir")).getAbsolutePath()); vargs.add("-Dhadoop.root.logger=INFO,TLA"); vargs.add("-Dhadoop.tasklog.taskid=" + taskid); vargs.add("-Dhadoop.tasklog.totalLogFileSize=" + logSize); if (conf.getProfileEnabled()) { if (conf.getProfileTaskRange(t.isMapTask()).isIncluded(t.getPartition())) { File prof = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.PROFILE); vargs.add(String.format(conf.getProfileParams(), prof.toString())); } } // Add main class and its arguments vargs.add(Child.class.getName()); // main of Child // pass umbilical address InetSocketAddress address = tracker.getTaskTrackerReportAddress(); vargs.add(address.getAddress().getHostAddress()); vargs.add(Integer.toString(address.getPort())); vargs.add(taskid.toString()); // pass task identifier String pidFile = null; if (tracker.isTaskMemoryManagerEnabled()) { pidFile = lDirAlloc .getLocalPathForWrite( (TaskTracker.getPidFilesSubdir() + Path.SEPARATOR + taskid), this.conf) .toString(); } // set memory limit using ulimit if feasible and necessary ... String[] ulimitCmd = Shell.getUlimitMemoryCommand(conf); List<String> setup = null; if (ulimitCmd != null) { setup = new ArrayList<String>(); for (String arg : ulimitCmd) { setup.add(arg); } } // Set up the redirection of the task's stdout and stderr streams File stdout = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDOUT); File stderr = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDERR); stdout.getParentFile().mkdirs(); tracker.getTaskTrackerInstrumentation().reportTaskLaunch(taskid, stdout, stderr); Map<String, String> env = new HashMap<String, String>(); StringBuffer ldLibraryPath = new StringBuffer(); ldLibraryPath.append(workDir.toString()); String oldLdLibraryPath = null; oldLdLibraryPath = System.getenv("LD_LIBRARY_PATH"); if (oldLdLibraryPath != null) { ldLibraryPath.append(sep); ldLibraryPath.append(oldLdLibraryPath); } env.put("LD_LIBRARY_PATH", ldLibraryPath.toString()); jvmManager.launchJvm( this, jvmManager.constructJvmEnv( setup, vargs, stdout, stderr, logSize, workDir, env, pidFile, conf)); synchronized (lock) { while (!done) { lock.wait(); } } tracker.getTaskTrackerInstrumentation().reportTaskEnd(t.getTaskID()); if (exitCodeSet) { if (!killed && exitCode != 0) { if (exitCode == 65) { tracker.getTaskTrackerInstrumentation().taskFailedPing(t.getTaskID()); } throw new IOException("Task process exit with nonzero status of " + exitCode + "."); } } } catch (FSError e) { LOG.fatal("FSError", e); try { tracker.fsError(t.getTaskID(), e.getMessage()); } catch (IOException ie) { LOG.fatal(t.getTaskID() + " reporting FSError", ie); } } catch (Throwable throwable) { LOG.warn(t.getTaskID() + " Child Error", throwable); ByteArrayOutputStream baos = new ByteArrayOutputStream(); throwable.printStackTrace(new PrintStream(baos)); try { tracker.reportDiagnosticInfo(t.getTaskID(), baos.toString()); } catch (IOException e) { LOG.warn(t.getTaskID() + " Reporting Diagnostics", e); } } finally { try { URI[] archives = DistributedCache.getCacheArchives(conf); URI[] files = DistributedCache.getCacheFiles(conf); if (archives != null) { for (int i = 0; i < archives.length; i++) { DistributedCache.releaseCache(archives[i], conf); } } if (files != null) { for (int i = 0; i < files.length; i++) { DistributedCache.releaseCache(files[i], conf); } } } catch (IOException ie) { LOG.warn("Error releasing caches : Cache files might not have been cleaned up"); } tracker.reportTaskFinished(t.getTaskID(), false); if (t.isMapTask()) { tracker.addFreeMapSlot(); } else { tracker.addFreeReduceSlot(); } } }
private String getCJTJavaOpts(JobConf conf) { return conf.get("mapred.corona.standalonecjt.java.opts", JobConf.DEFAULT_MAPRED_TASK_JAVA_OPTS); }
@Override public void configure(JobConf conf) { TaskAttemptID id = TaskAttemptID.forName(conf.get("mapred.task.id")); shouldFail = id.getId() == 0 && id.getTaskID().getId() == 0; }
/** * Get the environment variables for the child map/reduce tasks. * * @param jobConf job configuration * @return the environment variables for the child map/reduce tasks or <code>null</code> if * unspecified * @deprecated Use environment variables specific to the map or reduce tasks set via {@link * JobConf#MAPRED_MAP_TASK_ENV} or {@link JobConf#MAPRED_REDUCE_TASK_ENV} */ public String getChildEnv(JobConf jobConf) { return jobConf.get(JobConf.MAPRED_TASK_ENV); }
private String getVMEnvironment( String errorInfo, File workDir, JobConf conf, Map<String, String> env, TaskAttemptID taskid, long logSize) throws Throwable { StringBuffer ldLibraryPath = new StringBuffer(); ldLibraryPath.append(workDir.toString()); String oldLdLibraryPath = null; oldLdLibraryPath = System.getenv("LD_LIBRARY_PATH"); if (oldLdLibraryPath != null) { ldLibraryPath.append(SYSTEM_PATH_SEPARATOR); ldLibraryPath.append(oldLdLibraryPath); } env.put("LD_LIBRARY_PATH", ldLibraryPath.toString()); env.put(HADOOP_WORK_DIR, workDir.toString()); // put jobTokenFile name into env String jobTokenFile = conf.get(TokenCache.JOB_TOKENS_FILENAME); LOG.debug("putting jobToken file name into environment " + jobTokenFile); env.put(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION, jobTokenFile); // for the child of task jvm, set hadoop.root.logger env.put("HADOOP_ROOT_LOGGER", "INFO,TLA"); String hadoopClientOpts = System.getenv("HADOOP_CLIENT_OPTS"); if (hadoopClientOpts == null) { hadoopClientOpts = ""; } else { hadoopClientOpts = hadoopClientOpts + " "; } hadoopClientOpts = hadoopClientOpts + "-Dhadoop.tasklog.taskid=" + taskid + " -Dhadoop.tasklog.iscleanup=" + t.isTaskCleanupTask() + " -Dhadoop.tasklog.totalLogFileSize=" + logSize; // following line is a backport from jira MAPREDUCE-1286 env.put("HADOOP_CLIENT_OPTS", hadoopClientOpts); // add the env variables passed by the user String mapredChildEnv = getChildEnv(conf); if (mapredChildEnv != null && mapredChildEnv.length() > 0) { String childEnvs[] = mapredChildEnv.split(","); for (String cEnv : childEnvs) { try { String[] parts = cEnv.split("="); // split on '=' String value = env.get(parts[0]); if (value != null) { // replace $env with the child's env constructed by tt's // example LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp value = parts[1].replace("$" + parts[0], value); } else { // this key is not configured by the tt for the child .. get it // from the tt's env // example PATH=$PATH:/tmp value = System.getenv(parts[0]); if (value != null) { // the env key is present in the tt's env value = parts[1].replace("$" + parts[0], value); } else { // the env key is note present anywhere .. simply set it // example X=$X:/tmp or X=/tmp value = parts[1].replace("$" + parts[0], ""); } } env.put(parts[0], value); } catch (Throwable t) { // set the error msg errorInfo = "Invalid User environment settings : " + mapredChildEnv + ". Failed to parse user-passed environment param." + " Expecting : env1=value1,env2=value2..."; LOG.warn(errorInfo); throw t; } } } return errorInfo; }
/** * Copy a file to a destination. * * @param srcstat src path and metadata * @param dstpath dst path * @param reporter */ private void copy( FileStatus srcstat, Path relativedst, OutputCollector<WritableComparable<?>, Text> outc, Reporter reporter) throws IOException { Path absdst = new Path(destPath, relativedst); int totfiles = job.getInt(SRC_COUNT_LABEL, -1); assert totfiles >= 0 : "Invalid file count " + totfiles; // if a directory, ensure created even if empty if (srcstat.isDir()) { if (destFileSys.exists(absdst)) { if (!destFileSys.getFileStatus(absdst).isDir()) { throw new IOException("Failed to mkdirs: " + absdst + " is a file."); } } else if (!destFileSys.mkdirs(absdst)) { throw new IOException("Failed to mkdirs " + absdst); } // TODO: when modification times can be set, directories should be // emitted to reducers so they might be preserved. Also, mkdirs does // not currently return an error when the directory already exists; // if this changes, all directory work might as well be done in reduce return; } if (destFileSys.exists(absdst) && !overwrite && !needsUpdate(srcstat, destFileSys, absdst)) { outc.collect(null, new Text("SKIP: " + srcstat.getPath())); ++skipcount; reporter.incrCounter(Counter.SKIP, 1); updateStatus(reporter); return; } Path tmpfile = new Path(job.get(TMP_DIR_LABEL), relativedst); long cbcopied = 0L; FSDataInputStream in = null; FSDataOutputStream out = null; try { // open src file try { in = srcstat.getPath().getFileSystem(job).open(srcstat.getPath()); } catch (IOException e) { LOG.error("Failed to open src file " + srcstat.getPath() + ", ignore and return"); in = null; return; } reporter.incrCounter(Counter.BYTESEXPECTED, srcstat.getLen()); // open tmp file out = create(tmpfile, reporter, srcstat); // copy file for (int cbread; (cbread = in.read(buffer)) >= 0; ) { out.write(buffer, 0, cbread); cbcopied += cbread; reporter.setStatus( String.format("%.2f ", cbcopied * 100.0 / srcstat.getLen()) + absdst + " [ " + StringUtils.humanReadableInt(cbcopied) + " / " + StringUtils.humanReadableInt(srcstat.getLen()) + " ]"); } } finally { checkAndClose(in); checkAndClose(out); } if (cbcopied != srcstat.getLen()) { if (srcstat.getLen() == 0 && cbcopied > 0) { LOG.info("most likely see a WAL file corruption: " + srcstat.getPath()); } else { throw new IOException( "File size not matched: copied " + bytesString(cbcopied) + " to tmpfile (=" + tmpfile + ") but expected " + bytesString(srcstat.getLen()) + " from " + srcstat.getPath()); } } else { if (totfiles == 1) { // Copying a single file; use dst path provided by user as destination // rather than destination directory, if a file Path dstparent = absdst.getParent(); if (!(destFileSys.exists(dstparent) && destFileSys.getFileStatus(dstparent).isDir())) { absdst = dstparent; } } if (destFileSys.exists(absdst) && destFileSys.getFileStatus(absdst).isDir()) { throw new IOException(absdst + " is a directory"); } if (!destFileSys.mkdirs(absdst.getParent())) { throw new IOException("Failed to craete parent dir: " + absdst.getParent()); } rename(tmpfile, absdst); FileStatus dststat = destFileSys.getFileStatus(absdst); if (dststat.getLen() != srcstat.getLen()) { destFileSys.delete(absdst, false); throw new IOException( "File size not matched: copied " + bytesString(dststat.getLen()) + " to dst (=" + absdst + ") but expected " + bytesString(srcstat.getLen()) + " from " + srcstat.getPath()); } updatePermissions(srcstat, dststat); } // report at least once for each file ++copycount; reporter.incrCounter(Counter.BYTESCOPIED, cbcopied); reporter.incrCounter(Counter.COPY, 1); updateStatus(reporter); }
/** * Initiate components in the simulation. The JobConf is create separately and passed to the * init(). * * @param JobConf: The configuration for the jobtracker. * @throws InterruptedException * @throws IOException if trace or topology files cannot be opened. */ @SuppressWarnings("deprecation") void init(JobConf jobConf) throws InterruptedException, IOException { FileSystem lfs = FileSystem.getLocal(getConf()); Path logPath = new Path(System.getProperty("hadoop.log.dir")).makeQualified(lfs); jobConf.set("mapred.system.dir", logPath.toString()); jobConf.set("hadoop.job.history.location", (new Path(logPath, "history").toString())); // start time for virtual clock // possible improvement: set default value to sth more meaningful based on // the 1st job long now = getTimeProperty(jobConf, "mumak.start.time", System.currentTimeMillis()); jt = SimulatorJobTracker.startTracker(jobConf, now, this); jt.offerService(); masterRandomSeed = jobConf.getLong("mumak.random.seed", System.nanoTime()); // max Map/Reduce tasks per node int maxMaps = getConf() .getInt("mapred.tasktracker.map.tasks.maximum", SimulatorTaskTracker.DEFAULT_MAP_SLOTS); int maxReduces = getConf() .getInt( "mapred.tasktracker.reduce.tasks.maximum", SimulatorTaskTracker.DEFAULT_REDUCE_SLOTS); MachineNode defaultNode = new MachineNode.Builder("default", 2) .setMapSlots(maxMaps) .setReduceSlots(maxReduces) .build(); LoggedNetworkTopology topology = new ClusterTopologyReader(new Path(topologyFile), jobConf).get(); // Setting the static mapping before removing numeric IP hosts. setStaticMapping(topology); if (getConf().getBoolean("mumak.topology.filter-numeric-ips", true)) { removeIpHosts(topology); } ZombieCluster cluster = new ZombieCluster(topology, defaultNode); // create TTs based on topology.json long firstJobStartTime = startTaskTrackers(cluster, jobConf, now); long subRandomSeed = RandomSeedGenerator.getSeed("forSimulatorJobStoryProducer", masterRandomSeed); JobStoryProducer jobStoryProducer = new SimulatorJobStoryProducer( new Path(traceFile), cluster, firstJobStartTime, jobConf, subRandomSeed); final SimulatorJobSubmissionPolicy submissionPolicy = SimulatorJobSubmissionPolicy.getPolicy(jobConf); jc = new SimulatorJobClient(jt, jobStoryProducer, submissionPolicy); queue.addAll(jc.init(firstJobStartTime)); // if the taskScheduler is CapacityTaskScheduler start off the JobInitialization // threads too if (jobConf .get("mapred.jobtracker.taskScheduler") .equals(CapacityTaskScheduler.class.getName())) { LOG.info("CapacityScheduler used: starting simulatorThreads"); startSimulatorThreadsCapSched(now); } terminateTime = getTimeProperty(jobConf, "mumak.terminate.time", Long.MAX_VALUE); }
/** * Get the java command line options for the child map/reduce tasks. * * @param jobConf job configuration * @param defaultValue default value * @return the java command line options for child map/reduce tasks * @deprecated Use command line options specific to map or reduce tasks set via {@link * JobConf#MAPRED_MAP_TASK_JAVA_OPTS} or {@link JobConf#MAPRED_REDUCE_TASK_JAVA_OPTS} */ @Deprecated public String getChildJavaOpts(JobConf jobConf, String defaultValue) { return jobConf.get(JobConf.MAPRED_TASK_JAVA_OPTS, defaultValue); }
static void checkRecords( Configuration defaults, int noMaps, int noReduces, Path sortInput, Path sortOutput) throws IOException { JobConf jobConf = new JobConf(defaults, RecordChecker.class); jobConf.setJobName("sortvalidate-record-checker"); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setOutputKeyClass(BytesWritable.class); jobConf.setOutputValueClass(IntWritable.class); jobConf.setMapperClass(Map.class); jobConf.setReducerClass(Reduce.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); if (noMaps == -1) { noMaps = cluster.getTaskTrackers() * jobConf.getInt("test.sortvalidate.maps_per_host", 10); } if (noReduces == -1) { noReduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sortReduces = jobConf.get("test.sortvalidate.reduces_per_host"); if (sortReduces != null) { noReduces = cluster.getTaskTrackers() * Integer.parseInt(sortReduces); } } jobConf.setNumMapTasks(noMaps); jobConf.setNumReduceTasks(noReduces); FileInputFormat.setInputPaths(jobConf, sortInput); FileInputFormat.addInputPath(jobConf, sortOutput); Path outputPath = new Path("/tmp/sortvalidate/recordchecker"); FileSystem fs = FileSystem.get(defaults); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } FileOutputFormat.setOutputPath(jobConf, outputPath); // Uncomment to run locally in a single process // job_conf.set("mapred.job.tracker", "local"); Path[] inputPaths = FileInputFormat.getInputPaths(jobConf); System.out.println( "\nSortValidator.RecordChecker: Running on " + cluster.getTaskTrackers() + " nodes to validate sort from " + inputPaths[0] + ", " + inputPaths[1] + " into " + FileOutputFormat.getOutputPath(jobConf) + " with " + noReduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println( "The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); }
public void configure(JobConf job) { this.inputArgs = Utils.asArray(job.get("user.args")); }
/** @see org.apache.hadoop.mapred.JobSubmissionProtocol#getSystemDir() */ public String getSystemDir() { Path sysDir = new Path(conf.get("mapred.system.dir", "/tmp/hadoop/mapred/system")); return fs.makeQualified(sysDir).toString(); }
@Override public void configure(JobConf config) { splitChar = (char) NumberUtils.toInt(config.get("user.split")); }