Ejemplo n.º 1
0
  // Mostly for setting up the symlinks. Note that when we setup the distributed
  // cache, we didn't create the symlinks. This is done on a per task basis
  // by the currently executing task.
  public static void setupWorkDir(JobConf conf) throws IOException {
    File workDir = new File(".").getAbsoluteFile();
    FileUtil.fullyDelete(workDir);
    if (DistributedCache.getSymlink(conf)) {
      URI[] archives = DistributedCache.getCacheArchives(conf);
      URI[] files = DistributedCache.getCacheFiles(conf);
      Path[] localArchives = DistributedCache.getLocalCacheArchives(conf);
      Path[] localFiles = DistributedCache.getLocalCacheFiles(conf);
      if (archives != null) {
        for (int i = 0; i < archives.length; i++) {
          String link = archives[i].getFragment();
          if (link != null) {
            link = workDir.toString() + Path.SEPARATOR + link;
            File flink = new File(link);
            if (!flink.exists()) {
              FileUtil.symLink(localArchives[i].toString(), link);
            }
          }
        }
      }
      if (files != null) {
        for (int i = 0; i < files.length; i++) {
          String link = files[i].getFragment();
          if (link != null) {
            link = workDir.toString() + Path.SEPARATOR + link;
            File flink = new File(link);
            if (!flink.exists()) {
              FileUtil.symLink(localFiles[i].toString(), link);
            }
          }
        }
      }
    }
    File jobCacheDir = null;
    if (conf.getJar() != null) {
      jobCacheDir = new File(new Path(conf.getJar()).getParent().toString());
    }

    // create symlinks for all the files in job cache dir in current
    // workingdir for streaming
    try {
      DistributedCache.createAllSymlink(conf, jobCacheDir, workDir);
    } catch (IOException ie) {
      // Do not exit even if symlinks have not been created.
      LOG.warn(StringUtils.stringifyException(ie));
    }
    // add java.io.tmpdir given by mapred.child.tmp
    String tmp = conf.get("mapred.child.tmp", "./tmp");
    Path tmpDir = new Path(tmp);

    // if temp directory path is not absolute
    // prepend it with workDir.
    if (!tmpDir.isAbsolute()) {
      tmpDir = new Path(workDir.toString(), tmp);
      FileSystem localFs = FileSystem.getLocal(conf);
      if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) {
        throw new IOException("Mkdirs failed to create " + tmpDir.toString());
      }
    }
  }
  private static int[] parseJobSetup(Path jobFile) {
    int[] result = new int[2];
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    try {
      DocumentBuilder db = dbf.newDocumentBuilder();
      Document doc = db.parse(jobFile.toString());
      Element configElement = doc.getDocumentElement();
      NodeList nodes = configElement.getElementsByTagName("property");
      if (nodes != null && nodes.getLength() > 0) {
        for (int i = 0; i < nodes.getLength(); i++) {
          Element property = (Element) nodes.item(i);
          String elName = xmlGetSingleValue(property, "name");
          if (elName == "example.count") {
            result[0] = Integer.parseInt(xmlGetSingleValue(property, "value"));
          } else if (elName == "batch.size") {
            result[1] = Integer.parseInt(xmlGetSingleValue(property, "value"));
          }
        }
      }

    } catch (ParserConfigurationException pce) {
      System.err.println(
          "Caught exception while parsing the cached file '"
              + jobFile
              + "' : "
              + StringUtils.stringifyException(pce));
      return null;
    } catch (SAXException se) {
      System.err.println(
          "Caught exception while parsing the cached file '"
              + jobFile
              + "' : "
              + StringUtils.stringifyException(se));
      return null;
    } catch (IOException ioe) {
      System.err.println(
          "Caught exception while parsing the cached file '"
              + jobFile
              + "' : "
              + StringUtils.stringifyException(ioe));
      return null;
    }
    return result;
  }
Ejemplo n.º 3
0
    /**
     * This method gets called everytime before any read/write to make sure that any change to
     * localDirs is reflected immediately.
     */
    private synchronized void confChanged(Configuration conf) throws IOException {
      String newLocalDirs = conf.get(contextCfgItemName);
      if (!newLocalDirs.equals(savedLocalDirs)) {
        String[] localDirs = conf.getStrings(contextCfgItemName);
        localFS = FileSystem.getLocal(conf);
        int numDirs = localDirs.length;
        ArrayList<String> dirs = new ArrayList<String>(numDirs);
        ArrayList<DF> dfList = new ArrayList<DF>(numDirs);
        for (int i = 0; i < numDirs; i++) {
          try {
            // filter problematic directories
            Path tmpDir = new Path(localDirs[i]);
            if (localFS.mkdirs(tmpDir) || localFS.exists(tmpDir)) {
              try {
                DiskChecker.checkDir(new File(localDirs[i]));
                dirs.add(localDirs[i]);
                dfList.add(new DF(new File(localDirs[i]), 30000));
              } catch (DiskErrorException de) {
                LOG.warn(localDirs[i] + "is not writable\n" + StringUtils.stringifyException(de));
              }
            } else {
              LOG.warn("Failed to create " + localDirs[i]);
            }
          } catch (IOException ie) {
            LOG.warn(
                "Failed to create "
                    + localDirs[i]
                    + ": "
                    + ie.getMessage()
                    + "\n"
                    + StringUtils.stringifyException(ie));
          } // ignore
        }
        localDirsPath = new Path[dirs.size()];
        for (int i = 0; i < localDirsPath.length; i++) {
          localDirsPath[i] = new Path(dirs.get(i));
        }
        dirDF = dfList.toArray(new DF[dirs.size()]);
        savedLocalDirs = newLocalDirs;

        // randomize the first disk picked in the round-robin selection
        dirNumLastAccessed = dirIndexRandomizer.nextInt(dirs.size());
      }
    }
    private void parseJobSetup(Path jobFile) {
      DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
      try {
        DocumentBuilder db = dbf.newDocumentBuilder();
        Document doc = db.parse(jobFile.toString());
        Element configElement = doc.getDocumentElement();
        NodeList nodes = configElement.getElementsByTagName("property");
        if (nodes != null && nodes.getLength() > 0) {
          for (int i = 0; i < nodes.getLength(); i++) {
            Element property = (Element) nodes.item(i);
            String elName = xmlGetSingleValue(property, "name");
            if (elName.compareToIgnoreCase("gibbs.steps") == 0) {
              this.gibbsSteps = Integer.parseInt(xmlGetSingleValue(property, "value"));
            } else if (elName.compareToIgnoreCase("learning.rate") == 0) {
              this.learningRate = Double.parseDouble(xmlGetSingleValue(property, "value"));
            } else if (elName.compareToIgnoreCase("weight.cost") == 0) {
              this.weightCost = Double.parseDouble(xmlGetSingleValue(property, "value"));
            } else if (elName.compareToIgnoreCase("class.count") == 0) {
              this.classCount = Integer.parseInt(xmlGetSingleValue(property, "value"));
            }
          }
        }

      } catch (ParserConfigurationException pce) {
        System.err.println(
            "Caught exception while parsing the cached file '"
                + jobFile
                + "' : "
                + StringUtils.stringifyException(pce));
      } catch (SAXException se) {
        System.err.println(
            "Caught exception while parsing the cached file '"
                + jobFile
                + "' : "
                + StringUtils.stringifyException(se));
      } catch (IOException ioe) {
        System.err.println(
            "Caught exception while parsing the cached file '"
                + jobFile
                + "' : "
                + StringUtils.stringifyException(ioe));
      }
    }
Ejemplo n.º 5
0
 public int run(String[] args) throws Exception {
   if (args.length < 2) {
     System.err.println("Usage: Injector <crawldb> <url_dir>");
     return -1;
   }
   try {
     inject(new Path(args[0]), new Path(args[1]));
     return 0;
   } catch (Exception e) {
     LOG.error("Injector: " + StringUtils.stringifyException(e));
     return -1;
   }
 }
 public void configure(Configuration conf) {
   if (conf.getBoolean("minibatch.job.setup", false)) {
     Path[] jobSetupFiles = new Path[0];
     try {
       jobSetupFiles = DistributedCache.getLocalCacheFiles(conf);
     } catch (IOException ioe) {
       System.err.println(
           "Caught exception while getting cached files: "
               + StringUtils.stringifyException(ioe));
     }
     for (Path jobSetup : jobSetupFiles) {
       parseJobSetup(jobSetup);
     }
   }
 }
Ejemplo n.º 7
0
    private Path createPath(Path path, boolean checkWrite) throws IOException {
      Path file = new Path(localDirsPath[dirNumLastAccessed], path);

      if (checkWrite) {
        // check whether we are able to create a directory here. If the disk
        // happens to be RDONLY we will fail
        try {
          DiskChecker.checkDir(new File(file.getParent().toUri().getPath()));
        } catch (DiskErrorException d) {
          LOG.warn(StringUtils.stringifyException(d));
          return null;
        }
      }
      return file;
    }
Ejemplo n.º 8
0
    private String runResolveCommand(List<String> args) {
      int loopCount = 0;
      if (args.size() == 0) {
        return null;
      }
      StringBuffer allOutput = new StringBuffer();
      int numProcessed = 0;
      if (maxArgs < MIN_ALLOWABLE_ARGS) {
        LOG.warn(
            "Invalid value "
                + Integer.toString(maxArgs)
                + " for "
                + SCRIPT_ARG_COUNT_KEY
                + "; must be >= "
                + Integer.toString(MIN_ALLOWABLE_ARGS));
        return null;
      }

      while (numProcessed != args.size()) {
        int start = maxArgs * loopCount;
        List<String> cmdList = new ArrayList<String>();
        cmdList.add(scriptName);
        for (numProcessed = start;
            numProcessed < (start + maxArgs) && numProcessed < args.size();
            numProcessed++) {
          cmdList.add(args.get(numProcessed));
        }
        File dir = null;
        String userDir;
        if ((userDir = System.getProperty("user.dir")) != null) {
          dir = new File(userDir);
        }
        ShellCommandExecutor s = new ShellCommandExecutor(cmdList.toArray(new String[0]), dir);
        try {
          s.execute();
          allOutput.append(s.getOutput() + " ");
        } catch (Exception e) {
          LOG.warn(StringUtils.stringifyException(e));
          return null;
        }
        loopCount++;
      }
      return allOutput.toString();
    }
Ejemplo n.º 9
0
 /**
  * Forcibly terminates the currently running Java virtual machine.
  *
  * @param status
  * @param t
  * @throws ExitException
  */
 public static void halt(int status, Throwable t) throws HaltException {
   halt(status, StringUtils.stringifyException(t));
 }
Ejemplo n.º 10
0
 /**
  * Like {@link terminate(int, String)} but uses the given throwable to initialize the
  * ExitException.
  *
  * @param status
  * @param t throwable used to create the ExitException
  * @throws ExitException if System.exit is disabled for test purposes
  */
 public static void terminate(int status, Throwable t) throws ExitException {
   terminate(status, StringUtils.stringifyException(t));
 }
Ejemplo n.º 11
0
  /**
   * Modify configuration according user-specified generic options
   *
   * @param conf Configuration to be modified
   * @param line User-specified generic options
   */
  private void processGeneralOptions(Configuration conf, CommandLine line) {
    if (line.hasOption("fs")) {
      FileSystem.setDefaultUri(conf, line.getOptionValue("fs"));
    }

    if (line.hasOption("jt")) {
      conf.set("mapred.job.tracker", line.getOptionValue("jt"));
    }
    if (line.hasOption("conf")) {
      String[] values = line.getOptionValues("conf");
      for (String value : values) {
        conf.addResource(new Path(value));
      }
    }
    try {
      if (line.hasOption("libjars")) {
        conf.set("tmpjars", validateFiles(line.getOptionValue("libjars"), conf));
        // setting libjars in client classpath
        URL[] libjars = getLibJars(conf);
        if (libjars != null && libjars.length > 0) {
          conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
          Thread.currentThread()
              .setContextClassLoader(
                  new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
        }
      }
      if (line.hasOption("files")) {
        conf.set("tmpfiles", validateFiles(line.getOptionValue("files"), conf));
      }
      if (line.hasOption("archives")) {
        conf.set("tmparchives", validateFiles(line.getOptionValue("archives"), conf));
      }
    } catch (IOException ioe) {
      System.err.println(StringUtils.stringifyException(ioe));
    }
    if (line.hasOption('D')) {
      String[] property = line.getOptionValues('D');
      for (String prop : property) {
        String[] keyval = prop.split("=", 2);
        if (keyval.length == 2) {
          conf.set(keyval[0], keyval[1]);
        }
      }
    }
    conf.setBoolean("mapred.used.genericoptionsparser", true);

    // tokensFile
    if (line.hasOption("tokenCacheFile")) {
      String fileName = line.getOptionValue("tokenCacheFile");
      // check if the local file exists
      try {
        FileSystem localFs = FileSystem.getLocal(conf);
        Path p = new Path(fileName);
        if (!localFs.exists(p)) {
          throw new FileNotFoundException("File " + fileName + " does not exist.");
        }

        LOG.debug("setting conf tokensFile: " + fileName);
        conf.set("mapreduce.job.credentials.json", localFs.makeQualified(p).toString());
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  }