// Mostly for setting up the symlinks. Note that when we setup the distributed // cache, we didn't create the symlinks. This is done on a per task basis // by the currently executing task. public static void setupWorkDir(JobConf conf) throws IOException { File workDir = new File(".").getAbsoluteFile(); FileUtil.fullyDelete(workDir); if (DistributedCache.getSymlink(conf)) { URI[] archives = DistributedCache.getCacheArchives(conf); URI[] files = DistributedCache.getCacheFiles(conf); Path[] localArchives = DistributedCache.getLocalCacheArchives(conf); Path[] localFiles = DistributedCache.getLocalCacheFiles(conf); if (archives != null) { for (int i = 0; i < archives.length; i++) { String link = archives[i].getFragment(); if (link != null) { link = workDir.toString() + Path.SEPARATOR + link; File flink = new File(link); if (!flink.exists()) { FileUtil.symLink(localArchives[i].toString(), link); } } } } if (files != null) { for (int i = 0; i < files.length; i++) { String link = files[i].getFragment(); if (link != null) { link = workDir.toString() + Path.SEPARATOR + link; File flink = new File(link); if (!flink.exists()) { FileUtil.symLink(localFiles[i].toString(), link); } } } } } File jobCacheDir = null; if (conf.getJar() != null) { jobCacheDir = new File(new Path(conf.getJar()).getParent().toString()); } // create symlinks for all the files in job cache dir in current // workingdir for streaming try { DistributedCache.createAllSymlink(conf, jobCacheDir, workDir); } catch (IOException ie) { // Do not exit even if symlinks have not been created. LOG.warn(StringUtils.stringifyException(ie)); } // add java.io.tmpdir given by mapred.child.tmp String tmp = conf.get("mapred.child.tmp", "./tmp"); Path tmpDir = new Path(tmp); // if temp directory path is not absolute // prepend it with workDir. if (!tmpDir.isAbsolute()) { tmpDir = new Path(workDir.toString(), tmp); FileSystem localFs = FileSystem.getLocal(conf); if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) { throw new IOException("Mkdirs failed to create " + tmpDir.toString()); } } }
private static int[] parseJobSetup(Path jobFile) { int[] result = new int[2]; DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); try { DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(jobFile.toString()); Element configElement = doc.getDocumentElement(); NodeList nodes = configElement.getElementsByTagName("property"); if (nodes != null && nodes.getLength() > 0) { for (int i = 0; i < nodes.getLength(); i++) { Element property = (Element) nodes.item(i); String elName = xmlGetSingleValue(property, "name"); if (elName == "example.count") { result[0] = Integer.parseInt(xmlGetSingleValue(property, "value")); } else if (elName == "batch.size") { result[1] = Integer.parseInt(xmlGetSingleValue(property, "value")); } } } } catch (ParserConfigurationException pce) { System.err.println( "Caught exception while parsing the cached file '" + jobFile + "' : " + StringUtils.stringifyException(pce)); return null; } catch (SAXException se) { System.err.println( "Caught exception while parsing the cached file '" + jobFile + "' : " + StringUtils.stringifyException(se)); return null; } catch (IOException ioe) { System.err.println( "Caught exception while parsing the cached file '" + jobFile + "' : " + StringUtils.stringifyException(ioe)); return null; } return result; }
/** * This method gets called everytime before any read/write to make sure that any change to * localDirs is reflected immediately. */ private synchronized void confChanged(Configuration conf) throws IOException { String newLocalDirs = conf.get(contextCfgItemName); if (!newLocalDirs.equals(savedLocalDirs)) { String[] localDirs = conf.getStrings(contextCfgItemName); localFS = FileSystem.getLocal(conf); int numDirs = localDirs.length; ArrayList<String> dirs = new ArrayList<String>(numDirs); ArrayList<DF> dfList = new ArrayList<DF>(numDirs); for (int i = 0; i < numDirs; i++) { try { // filter problematic directories Path tmpDir = new Path(localDirs[i]); if (localFS.mkdirs(tmpDir) || localFS.exists(tmpDir)) { try { DiskChecker.checkDir(new File(localDirs[i])); dirs.add(localDirs[i]); dfList.add(new DF(new File(localDirs[i]), 30000)); } catch (DiskErrorException de) { LOG.warn(localDirs[i] + "is not writable\n" + StringUtils.stringifyException(de)); } } else { LOG.warn("Failed to create " + localDirs[i]); } } catch (IOException ie) { LOG.warn( "Failed to create " + localDirs[i] + ": " + ie.getMessage() + "\n" + StringUtils.stringifyException(ie)); } // ignore } localDirsPath = new Path[dirs.size()]; for (int i = 0; i < localDirsPath.length; i++) { localDirsPath[i] = new Path(dirs.get(i)); } dirDF = dfList.toArray(new DF[dirs.size()]); savedLocalDirs = newLocalDirs; // randomize the first disk picked in the round-robin selection dirNumLastAccessed = dirIndexRandomizer.nextInt(dirs.size()); } }
private void parseJobSetup(Path jobFile) { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); try { DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(jobFile.toString()); Element configElement = doc.getDocumentElement(); NodeList nodes = configElement.getElementsByTagName("property"); if (nodes != null && nodes.getLength() > 0) { for (int i = 0; i < nodes.getLength(); i++) { Element property = (Element) nodes.item(i); String elName = xmlGetSingleValue(property, "name"); if (elName.compareToIgnoreCase("gibbs.steps") == 0) { this.gibbsSteps = Integer.parseInt(xmlGetSingleValue(property, "value")); } else if (elName.compareToIgnoreCase("learning.rate") == 0) { this.learningRate = Double.parseDouble(xmlGetSingleValue(property, "value")); } else if (elName.compareToIgnoreCase("weight.cost") == 0) { this.weightCost = Double.parseDouble(xmlGetSingleValue(property, "value")); } else if (elName.compareToIgnoreCase("class.count") == 0) { this.classCount = Integer.parseInt(xmlGetSingleValue(property, "value")); } } } } catch (ParserConfigurationException pce) { System.err.println( "Caught exception while parsing the cached file '" + jobFile + "' : " + StringUtils.stringifyException(pce)); } catch (SAXException se) { System.err.println( "Caught exception while parsing the cached file '" + jobFile + "' : " + StringUtils.stringifyException(se)); } catch (IOException ioe) { System.err.println( "Caught exception while parsing the cached file '" + jobFile + "' : " + StringUtils.stringifyException(ioe)); } }
public int run(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: Injector <crawldb> <url_dir>"); return -1; } try { inject(new Path(args[0]), new Path(args[1])); return 0; } catch (Exception e) { LOG.error("Injector: " + StringUtils.stringifyException(e)); return -1; } }
public void configure(Configuration conf) { if (conf.getBoolean("minibatch.job.setup", false)) { Path[] jobSetupFiles = new Path[0]; try { jobSetupFiles = DistributedCache.getLocalCacheFiles(conf); } catch (IOException ioe) { System.err.println( "Caught exception while getting cached files: " + StringUtils.stringifyException(ioe)); } for (Path jobSetup : jobSetupFiles) { parseJobSetup(jobSetup); } } }
private Path createPath(Path path, boolean checkWrite) throws IOException { Path file = new Path(localDirsPath[dirNumLastAccessed], path); if (checkWrite) { // check whether we are able to create a directory here. If the disk // happens to be RDONLY we will fail try { DiskChecker.checkDir(new File(file.getParent().toUri().getPath())); } catch (DiskErrorException d) { LOG.warn(StringUtils.stringifyException(d)); return null; } } return file; }
private String runResolveCommand(List<String> args) { int loopCount = 0; if (args.size() == 0) { return null; } StringBuffer allOutput = new StringBuffer(); int numProcessed = 0; if (maxArgs < MIN_ALLOWABLE_ARGS) { LOG.warn( "Invalid value " + Integer.toString(maxArgs) + " for " + SCRIPT_ARG_COUNT_KEY + "; must be >= " + Integer.toString(MIN_ALLOWABLE_ARGS)); return null; } while (numProcessed != args.size()) { int start = maxArgs * loopCount; List<String> cmdList = new ArrayList<String>(); cmdList.add(scriptName); for (numProcessed = start; numProcessed < (start + maxArgs) && numProcessed < args.size(); numProcessed++) { cmdList.add(args.get(numProcessed)); } File dir = null; String userDir; if ((userDir = System.getProperty("user.dir")) != null) { dir = new File(userDir); } ShellCommandExecutor s = new ShellCommandExecutor(cmdList.toArray(new String[0]), dir); try { s.execute(); allOutput.append(s.getOutput() + " "); } catch (Exception e) { LOG.warn(StringUtils.stringifyException(e)); return null; } loopCount++; } return allOutput.toString(); }
/** * Forcibly terminates the currently running Java virtual machine. * * @param status * @param t * @throws ExitException */ public static void halt(int status, Throwable t) throws HaltException { halt(status, StringUtils.stringifyException(t)); }
/** * Like {@link terminate(int, String)} but uses the given throwable to initialize the * ExitException. * * @param status * @param t throwable used to create the ExitException * @throws ExitException if System.exit is disabled for test purposes */ public static void terminate(int status, Throwable t) throws ExitException { terminate(status, StringUtils.stringifyException(t)); }
/** * Modify configuration according user-specified generic options * * @param conf Configuration to be modified * @param line User-specified generic options */ private void processGeneralOptions(Configuration conf, CommandLine line) { if (line.hasOption("fs")) { FileSystem.setDefaultUri(conf, line.getOptionValue("fs")); } if (line.hasOption("jt")) { conf.set("mapred.job.tracker", line.getOptionValue("jt")); } if (line.hasOption("conf")) { String[] values = line.getOptionValues("conf"); for (String value : values) { conf.addResource(new Path(value)); } } try { if (line.hasOption("libjars")) { conf.set("tmpjars", validateFiles(line.getOptionValue("libjars"), conf)); // setting libjars in client classpath URL[] libjars = getLibJars(conf); if (libjars != null && libjars.length > 0) { conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); Thread.currentThread() .setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } if (line.hasOption("files")) { conf.set("tmpfiles", validateFiles(line.getOptionValue("files"), conf)); } if (line.hasOption("archives")) { conf.set("tmparchives", validateFiles(line.getOptionValue("archives"), conf)); } } catch (IOException ioe) { System.err.println(StringUtils.stringifyException(ioe)); } if (line.hasOption('D')) { String[] property = line.getOptionValues('D'); for (String prop : property) { String[] keyval = prop.split("=", 2); if (keyval.length == 2) { conf.set(keyval[0], keyval[1]); } } } conf.setBoolean("mapred.used.genericoptionsparser", true); // tokensFile if (line.hasOption("tokenCacheFile")) { String fileName = line.getOptionValue("tokenCacheFile"); // check if the local file exists try { FileSystem localFs = FileSystem.getLocal(conf); Path p = new Path(fileName); if (!localFs.exists(p)) { throw new FileNotFoundException("File " + fileName + " does not exist."); } LOG.debug("setting conf tokensFile: " + fileName); conf.set("mapreduce.job.credentials.json", localFs.makeQualified(p).toString()); } catch (IOException e) { throw new RuntimeException(e); } } }