/*
   * (non-Javadoc)
   *
   * @see org.anon.smart.d2cache.store.StoreTransaction#commit()
   */
  @Override
  public void commit() throws CtxException {

    FileSystem hdfs = ((HadoopFileStoreConnection) _connection).getHadoopFS();

    assertion().assertNotNull(hdfs, "Hadoop FileSystem is null");

    String repo = hdfs.getWorkingDirectory().toUri().toString();

    for (Object fi : files.keySet()) {
      try {

        String[] params = (String[]) fi;

        Path destination = new Path(repo + "/" + params[1]);
        Path fldr = destination.getParent();
        if (!hdfs.exists(fldr)) hdfs.mkdirs(fldr);
        hdfs.copyFromLocalFile(true, new Path(params[0]), destination);

      } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }
  }
Exemplo n.º 2
0
 private String copyFromLocal(FileSystem fs, Path basePath, String[] files) throws IOException {
   StringBuilder csv = new StringBuilder(files.length * (basePath.toString().length() + 16));
   for (String localFile : files) {
     Path src = new Path(localFile);
     String filename = src.getName();
     Path dst = new Path(basePath, filename);
     URI localFileURI = null;
     try {
       localFileURI = new URI(localFile);
     } catch (URISyntaxException e) {
       throw new IOException(e);
     }
     if (localFileURI.getScheme() == null || localFileURI.getScheme().startsWith("file")) {
       LOG.info("Copy {} from local filesystem to {}", localFile, dst);
       fs.copyFromLocalFile(false, true, src, dst);
     } else {
       LOG.info("Copy {} from DFS to {}", localFile, dst);
       FileUtil.copy(fs, src, fs, dst, false, true, conf);
     }
     if (csv.length() > 0) {
       csv.append(LIB_JARS_SEP);
     }
     csv.append(dst.toString());
   }
   return csv.toString();
 }
Exemplo n.º 3
0
 @Test
 public void copyFromLocal() throws Exception {
   fs.copyFromLocalFile(
       new Path(
           "/Volumes/Goer/code/bigdata/moodle-archi/moodle-test/src/main/resources/input/devairlinedataset/txt/1987"),
       new Path("/bigdata/sampledata"));
 }
Exemplo n.º 4
0
 public void copyFile(String local, String remote) throws IOException {
   FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
   // remote---/用户/用户下的文件或文件夹
   fs.copyFromLocalFile(new Path(local), new Path(remote));
   log.debug("copy from: " + local + " to " + remote);
   fs.close();
 }
Exemplo n.º 5
0
 // TODO#: assumes throw
 private void localizeJarForClass(FileSystem lfs, Path libDir, String className, boolean doThrow)
     throws IOException {
   String jarPath = null;
   boolean hasException = false;
   try {
     Class<?> auxClass = Class.forName(className);
     jarPath = Utilities.jarFinderGetJar(auxClass);
   } catch (Throwable t) {
     if (doThrow) {
       throw (t instanceof IOException) ? (IOException) t : new IOException(t);
     }
     hasException = true;
     String err =
         "Cannot find a jar for ["
             + className
             + "] due to an exception ("
             + t.getMessage()
             + "); not packaging the jar";
     LOG.error(err, t);
     System.err.println(err);
   }
   if (jarPath != null) {
     lfs.copyFromLocalFile(new Path(jarPath), libDir);
   } else if (!hasException) {
     String err = "Cannot find a jar for [" + className + "]; not packaging the jar";
     if (doThrow) {
       throw new IOException(err);
     }
     LOG.error(err);
     System.err.println(err);
   }
 }
Exemplo n.º 6
0
 /**
  * * Function to copy the image files from local file system to hdfs
  *
  * @param conf
  * @param sourcePath
  * @param destinationPath
  * @throws Exception
  */
 public static void copyFromLocal(JobConf conf, String sourcePath, String destinationPath)
     throws Exception {
   FileSystem fs = FileSystem.get(conf);
   Path interPath = new Path(destinationPath);
   fs.mkdirs(interPath);
   fs.copyFromLocalFile(new Path(sourcePath), interPath);
 }
    public void close(Reporter reporter) throws IOException {
      for (Integer shard : lps.keySet()) {
        String lpDir = localManager.localTmpDir("" + shard);
        LOG.info("Closing LP for shard " + shard + " at " + lpDir);
        lps.get(shard).close();
        LOG.info("Closed LP for shard " + shard + " at " + lpDir);
        progress();
        String remoteDir = args.outputDirHdfs + "/" + shard;

        // Do all this stuff to ensure that S3 actually does delete
        int deleteAttempt = 4;
        while (fileSystem.exists(new Path(remoteDir)) && deleteAttempt > 0) {
          LOG.info("Deleting existing shard " + shard + " at " + remoteDir);
          fileSystem.delete(new Path(remoteDir), true);
          --deleteAttempt;
        }
        if (fileSystem.exists(new Path(remoteDir)) && deleteAttempt == 0) {
          throw new IOException(
              "Failed to delete shard "
                  + shard
                  + " at "
                  + remoteDir
                  + " after "
                  + deleteAttempt
                  + " attempts!");
        } else {
          LOG.info("Deleted existing shard " + shard + " at " + remoteDir);
        }
        LOG.info("Copying " + lpDir + " to " + remoteDir);
        fileSystem.copyFromLocalFile(new Path(lpDir), new Path(remoteDir));
        LOG.info("Copied " + lpDir + " to " + remoteDir);
        progress();
      }
      localManager.cleanup();
    }
Exemplo n.º 8
0
  /**
   * @param fs File system.
   * @param src Source path.
   * @param dst Destination path.
   * @return Path to file to hdfs file system.
   */
  public static Path copyLocalToHdfs(FileSystem fs, String src, String dst) throws Exception {
    Path dstPath = new Path(dst);

    // Local file isn't removed, dst file override.
    fs.copyFromLocalFile(false, true, new Path(src), dstPath);

    return dstPath;
  }
Exemplo n.º 9
0
  private RunningJob _test(String... arg) throws Exception {
    Path actionDir = getFsTestCaseDir();

    File jar =
        IOUtils.createJar(
            new File(getTestCaseDir()),
            "launcher.jar",
            LauncherMapper.class,
            LauncherSecurityManager.class,
            LauncherException.class,
            LauncherMainTester.class);

    FileSystem fs = getFileSystem();

    Path launcherJar = new Path(actionDir, "launcher.jar");
    fs.copyFromLocalFile(new Path(jar.toString()), launcherJar);

    JobConf jobConf = new JobConf();
    jobConf.set("user.name", getTestUser());
    jobConf.set("group.name", getTestGroup());
    jobConf.setInt("mapred.map.tasks", 1);
    jobConf.setInt("mapred.map.max.attempts", 1);
    jobConf.setInt("mapred.reduce.max.attempts", 1);

    jobConf.set("mapred.job.tracker", getJobTrackerUri());
    jobConf.set("fs.default.name", getNameNodeUri());
    injectKerberosInfo(jobConf);

    LauncherMapper lm = new LauncherMapper();
    lm.setupMainClass(jobConf, LauncherMainTester.class.getName());
    lm.setupMainArguments(jobConf, arg);

    Configuration actionConf = new XConfiguration();
    lm.setupLauncherInfo(jobConf, "1", "1@a", actionDir, "1@a-0", actionConf);

    assertEquals("1", actionConf.get("oozie.job.id"));
    assertEquals("1@a", actionConf.get("oozie.action.id"));

    DistributedCache.addFileToClassPath(new Path(launcherJar.toUri().getPath()), jobConf);

    JobClient jobClient = createJobClient();

    final RunningJob runningJob = jobClient.submitJob(jobConf);

    System.out.println("Action Dir: " + actionDir);
    System.out.println("LauncherMapper ID: " + runningJob.getJobID().toString());

    waitFor(
        180 * 1000,
        new Predicate() {
          public boolean evaluate() throws Exception {
            return runningJob.isComplete();
          }
        });
    return runningJob;
  }
 @Test
 public void test() throws Exception {
   String outputPath = "/tmp/output-plume-singleflattenchanneltest";
   String inputPath = "/tmp/input-wordcount.txt";
   String inputPath2 = "/tmp/input-moretext.txt";
   // Prepare input for test
   FileSystem system = FileSystem.getLocal(new Configuration());
   system.copyFromLocalFile(
       new Path(Resources.getResource("simple-text.txt").getPath()), new Path(inputPath));
   system.copyFromLocalFile(
       new Path(Resources.getResource("simple-text.txt").getPath()), new Path(inputPath2));
   // Prepare output for test
   system.delete(new Path(outputPath), true);
   // Prepare workflow
   MapRedSingleFlattenChannelTestWorkflow workFlow = new MapRedSingleFlattenChannelTestWorkflow();
   // Execute it
   MapRedExecutor executor = new MapRedExecutor();
   executor.execute(workFlow, outputPath);
   /** TODO add test validation */
 }
Exemplo n.º 11
0
 public boolean copy(String source, Path dest) {
   try {
     if (!fileSystem.exists(dest)) {
       logger.warn("File " + dest + " does not exists");
       return false;
     }
     fileSystem.copyFromLocalFile(new Path(source), dest);
     return true;
   } catch (IOException e) {
     logger.error(e.getMessage(), e);
   }
   return false;
 }
Exemplo n.º 12
0
  public static void main(String[] args) throws Exception {
    String hadoopConfPath = args[0];
    String localBackupPath = args[1];
    String backupUri = args[2];

    Configuration hadoopConf = new Configuration();
    try (DataInputStream in = new DataInputStream(new FileInputStream(hadoopConfPath))) {
      hadoopConf.readFields(in);
    }

    FileSystem fs = FileSystem.get(new URI(backupUri), hadoopConf);

    fs.copyFromLocalFile(new Path(localBackupPath), new Path(backupUri));
  }
Exemplo n.º 13
0
  /**
   * @return Path to remote file (usually hdfs)
   * @throws IOException
   */
  public static Path setupLocalResource(
      FileSystem fs, String appId, Path localRsrcPath, LocalResource appMasterJar, Path homedir)
      throws IOException {

    // copy resource to HDFS
    String suffix = ".flink/" + appId + "/" + localRsrcPath.getName();

    Path dst = new Path(homedir, suffix);

    LOG.info("Copying from " + localRsrcPath + " to " + dst);
    fs.copyFromLocalFile(localRsrcPath, dst);
    registerLocalResource(fs, dst, appMasterJar);
    return dst;
  }
Exemplo n.º 14
0
  /**
   * @param src path to the source for the resource
   * @param dest path in hdfs for the resource
   * @param conf
   * @return localresource from tez localization.
   * @throws IOException when any file system related calls fails.
   */
  public LocalResource localizeResource(Path src, Path dest, Configuration conf)
      throws IOException {
    FileSystem destFS = dest.getFileSystem(conf);
    if (!(destFS instanceof DistributedFileSystem)) {
      throw new IOException(ErrorMsg.INVALID_HDFS_URI.format(dest.toString()));
    }

    if (src != null) {
      // copy the src to the destination and create local resource.
      // do not overwrite.
      LOG.info("Localizing resource because it does not exist: " + src + " to dest: " + dest);
      try {
        destFS.copyFromLocalFile(false, false, src, dest);
      } catch (IOException e) {
        LOG.info("Looks like another thread is writing the same file will wait.");
        int waitAttempts =
            conf.getInt(
                HiveConf.ConfVars.HIVE_LOCALIZE_RESOURCE_NUM_WAIT_ATTEMPTS.varname,
                HiveConf.ConfVars.HIVE_LOCALIZE_RESOURCE_NUM_WAIT_ATTEMPTS.defaultIntVal);
        long sleepInterval =
            conf.getLong(
                HiveConf.ConfVars.HIVE_LOCALIZE_RESOURCE_WAIT_INTERVAL.varname,
                HiveConf.ConfVars.HIVE_LOCALIZE_RESOURCE_WAIT_INTERVAL.defaultLongVal);
        LOG.info("Number of wait attempts: " + waitAttempts + ". Wait interval: " + sleepInterval);
        boolean found = false;
        for (int i = 0; i < waitAttempts; i++) {
          if (!checkPreExisting(src, dest, conf)) {
            try {
              Thread.currentThread().sleep(sleepInterval);
            } catch (InterruptedException interruptedException) {
              throw new IOException(interruptedException);
            }
          } else {
            found = true;
            break;
          }
        }
        if (!found) {
          LOG.error("Could not find the jar that was being uploaded");
          throw new IOException(
              "Previous writer likely failed to write "
                  + dest
                  + ". Failing because I am unlikely to write too.");
        }
      }
    }

    return createLocalResource(
        destFS, dest, LocalResourceType.FILE, LocalResourceVisibility.APPLICATION);
  }
Exemplo n.º 15
0
 public static void copyFromLocal(String localPath, String destination) {
   try {
     JobConf hadoopConfig = HdpBootstrap.hadoopConfig();
     FileSystem fs = FileSystem.get(hadoopConfig);
     if (!(fs instanceof LocalFileSystem)) {
       Path src = new Path(localPath);
       Path dst = new Path(destination);
       fs.copyFromLocalFile(false, true, src, dst);
       System.out.println(String.format("Copying [%s] to [%s]", src, dst));
     }
   } catch (IOException ex) {
     throw new RuntimeException(ex);
   }
 }
Exemplo n.º 16
0
 /**
  * 复制单个本地文件至hdfs
  *
  * @param srcFilePath 包含路径的源文件 如:/abc.txt
  * @param destFilePath 目标文件目录;若文件目录不存在则自动创建 如:/dest
  * @throws IOException
  */
 public static boolean copyFile(String srcFilePath, String destFilePath) {
   boolean result = false;
   File file = new File(srcFilePath);
   if (file.length() == 0) {
     return result;
   }
   try {
     fs.copyFromLocalFile(new Path(srcFilePath), new Path(destFilePath));
     result = true;
   } catch (Exception e1) {
     LOG.error("读取文件失败:" + e1.getMessage());
     e1.printStackTrace();
   }
   return result;
 }
Exemplo n.º 17
0
  /**
   * Submit a local file to the filesystem references by the instance's cluster filesystem
   *
   * @param clusterFS remote fs
   * @param localFile filename
   * @param subdir subdirectory (expected to end in a "/")
   * @param destFileName destination filename
   * @return the local resource ref
   * @throws IOException trouble copying to HDFS
   */
  private static LocalResource submitFile(
      FileSystem clusterFS, File localFile, Path tempPath, String subdir, String destFileName)
      throws IOException {
    Path src = new Path(localFile.toString());
    Path subdirPath = new Path(tempPath, subdir);
    clusterFS.mkdirs(subdirPath);
    Path destPath = new Path(subdirPath, destFileName);

    clusterFS.copyFromLocalFile(false, true, src, destPath);

    // Set the type of resource - file or archive
    // archives are untarred at destination
    // we don't need the jar file to be untarred for now
    return createAmResource(clusterFS, destPath, LocalResourceType.FILE);
  }
Exemplo n.º 18
0
  public static void main(String[] args) throws Exception {

    final String sitePath = "/tmp/scale-site.conf";
    final String testPath = "/tmp/scale-test.conf";
    Opts opts = new Opts();
    opts.parseArgs(Run.class.getName(), args);

    Configuration conf = CachedConfiguration.getInstance();
    FileSystem fs;
    fs = FileSystem.get(conf);

    fs.copyToLocalFile(new Path("/accumulo-scale/conf/site.conf"), new Path(sitePath));
    fs.copyToLocalFile(
        new Path(String.format("/accumulo-scale/conf/%s.conf", opts.testId)), new Path(testPath));

    // load configuration file properties
    Properties scaleProps = new Properties();
    Properties testProps = new Properties();
    try {
      FileInputStream fis = new FileInputStream(sitePath);
      scaleProps.load(fis);
      fis.close();
      fis = new FileInputStream(testPath);
      testProps.load(fis);
    } catch (Exception e) {
      System.out.println("Problem loading config file");
      e.printStackTrace();
    }

    ScaleTest test =
        (ScaleTest)
            Class.forName(String.format("accumulo.test.scalability.%s", opts.testId)).newInstance();

    test.init(scaleProps, testProps, opts.numTabletServers);

    if (opts.action.equalsIgnoreCase("setup")) {
      test.setup();
    } else if (opts.action.equalsIgnoreCase("client")) {
      InetAddress addr = InetAddress.getLocalHost();
      String host = addr.getHostName();
      fs.createNewFile(new Path("/accumulo-scale/clients/" + host));
      test.client();
      fs.copyFromLocalFile(new Path("/tmp/scale.out"), new Path("/accumulo-scale/results/" + host));
    } else if (opts.action.equalsIgnoreCase("teardown")) {
      test.teardown();
    }
  }
Exemplo n.º 19
0
 public void copyFromLocalFile(boolean delSrc, boolean overwrite, String src, String dst) {
   IOException te = null;
   LOG.info("Trying to copy from " + src + " to " + dst);
   int cnt = 0;
   while (cnt < MAX_TRY) {
     try {
       mFs.copyFromLocalFile(delSrc, overwrite, new Path(src), new Path(dst));
     } catch (IOException e) {
       cnt++;
       LOG.error(cnt + " : " + e.getMessage(), e);
       te = e;
       continue;
     }
     LOG.info("Finished the copy from " + src + " to " + dst);
     return;
   }
   CommonUtils.runtimeException(te);
 }
Exemplo n.º 20
0
  private void copyConfig(LlapOptions options, FileSystem lfs, Path confPath, String f)
      throws IOException {
    if (f.equals("llap-daemon-site.xml")) {
      FSDataOutputStream confStream = lfs.create(new Path(confPath, f));

      Configuration copy = resolve(conf, "llap-daemon-site.xml");

      for (Entry<Object, Object> props : options.getConfig().entrySet()) {
        // overrides
        copy.set((String) props.getKey(), (String) props.getValue());
      }

      copy.writeXml(confStream);
      confStream.close();
    } else {
      // they will be file:// URLs
      lfs.copyFromLocalFile(new Path(conf.getResource(f).toString()), confPath);
    }
  }
Exemplo n.º 21
0
  public static void copyFilesFromFolder(JobConf conf, String path, Path outputPath)
      throws Exception {

    FileSystem fs = FileSystem.get(conf);
    File dir = new File(path);
    // System.out.println("input Path "+path.toString());
    FileFilter fileFilter =
        new FileFilter() {
          @Override
          public boolean accept(File file) {
            return !file.isDirectory();
          }
        };
    if (dir.isDirectory()) {
      File[] list = dir.listFiles(fileFilter);
      for (int i = 0; i < list.length; i++) {
        fs.copyFromLocalFile(new Path(list[i].getPath()), outputPath);
      }
    }
  }
 protected void moveFile(String filePath) throws Exception {
   String remoteFilePath =
       filePath.substring(filePath.lastIndexOf("/") + 1, filePath.lastIndexOf("."));
   remoteFilePath = remoteOutputDir + remoteFilePath;
   try {
     Path pLocalPath = new Path(filePath);
     Path pRemoteFilePath = new Path(remoteFilePath + ".chukwa");
     remoteFs.copyFromLocalFile(false, true, pLocalPath, pRemoteFilePath);
     Path pFinalRemoteFilePath = new Path(remoteFilePath + ".done");
     if (remoteFs.rename(pRemoteFilePath, pFinalRemoteFilePath)) {
       localFs.delete(pLocalPath, false);
       log.info("move done deleting from local: " + pLocalPath);
     } else {
       throw new RuntimeException(
           "Cannot rename remote file, " + pRemoteFilePath + " to " + pFinalRemoteFilePath);
     }
   } catch (FileNotFoundException ex) {
     // do nothing since if the file is no longer there it's
     // because it has already been moved over by the cleanup task.
   } catch (Exception e) {
     log.warn("Cannot copy to the remote HDFS", e);
     throw e;
   }
 }
  @Test
  public void runBuildIndex() throws Exception {
    Configuration conf = IntegrationUtils.getBespinConfiguration();
    FileSystem fs = FileSystem.get(conf);

    assertTrue(fs.exists(collectionPath));

    fs.delete(new Path(index), true);

    List<String> jars = Lists.newArrayList();
    jars.add(IntegrationUtils.getJar("lib", "cloud9"));
    jars.add(IntegrationUtils.getJar("lib", "guava"));
    jars.add(IntegrationUtils.getJar("lib", "dsiutils"));
    jars.add(IntegrationUtils.getJar("lib", "fastutil"));
    jars.add(IntegrationUtils.getJar("lib", "jsap"));
    jars.add(IntegrationUtils.getJar("lib", "sux4j"));
    jars.add(IntegrationUtils.getJar("lib", "commons-collections"));
    jars.add(IntegrationUtils.getJar("lib", "lucene-analyzers"));
    jars.add(IntegrationUtils.getJar("lib", "lucene-core"));
    jars.add(IntegrationUtils.getJar("lib", "tools"));
    jars.add(IntegrationUtils.getJar("lib", "maxent"));
    jars.add(IntegrationUtils.getJar("lib", "commons-lang"));
    jars.add(IntegrationUtils.getJar("lib", "commons-cli"));
    jars.add(IntegrationUtils.getJar("lib", "bliki-core"));
    jars.add(IntegrationUtils.getJar("lib", "lintools-datatypes-1.0.0"));
    jars.add(IntegrationUtils.getJar("dist", "ivory"));

    String libjars = String.format("-libjars=%s", Joiner.on(",").join(jars));

    // Done with indexing, now do retrieval run.
    fs.copyFromLocalFile(
        false,
        true,
        new Path("data/vocab/vocab.en-" + LANGUAGE + ".en"),
        new Path(index + "/vocab.en-" + LANGUAGE + ".en"));
    fs.copyFromLocalFile(
        false,
        true,
        new Path("data/vocab/vocab.en-" + LANGUAGE + "." + LANGUAGE + ""),
        new Path(index + "/vocab.en-" + LANGUAGE + "." + LANGUAGE + ""));
    fs.copyFromLocalFile(
        false,
        true,
        new Path("data/vocab/ttable.en-" + LANGUAGE + ""),
        new Path(index + "/ttable.en-" + LANGUAGE + ""));
    fs.copyFromLocalFile(
        false,
        true,
        new Path("data/tokenizer/" + LANGUAGE + "-token.bin"),
        new Path(index + "/" + LANGUAGE + "-token.bin"));
    fs.copyFromLocalFile(
        false, true, new Path("data/tokenizer/en-token.bin"), new Path(index + "/en-token.bin"));
    fs.copyFromLocalFile(
        false,
        true,
        new Path("data/tokenizer/" + LANGUAGE + ".stop.stemmed"),
        new Path(index + "/" + LANGUAGE + ".stop.stemmed"));
    fs.copyFromLocalFile(
        false,
        true,
        new Path("data/tokenizer/en.stop.stemmed"),
        new Path(index + "/en.stop.stemmed"));
    for (int i = 0; i < numTopics; i++) {
      fs.copyFromLocalFile(
          false,
          true,
          new Path("data/" + PATH + "/" + MTMODEL + ".grammar/grammar." + i),
          new Path(index + "/grammar." + i));
    }
    fs.copyFromLocalFile(
        false,
        true,
        new Path(
            "data/"
                + PATH
                + "/"
                + MTMODEL
                + "/title_en-"
                + LANGUAGE
                + "-trans10-filtered-integration.xml"),
        new Path(index + "/title_en-" + LANGUAGE + "-trans10-filtered.xml"));

    String[] args =
        new String[] {
          "hadoop jar",
          IntegrationUtils.getJar("dist", "ivory"),
          ivory.app.PreprocessTrecForeign.class.getCanonicalName(),
          libjars,
          "-input=" + collectionPath.toString(),
          "-index=" + index,
          "-lang=" + LANGUAGE,
          "-tokenizerclass=" + LuceneArabicAnalyzer.class.getCanonicalName(),
          "-tokenizermodel=" + index + "/" + LANGUAGE + "-token.bin",
          "-name=" + VerifyTrecArabicPositionalIndexIP.class.getCanonicalName()
        };

    IntegrationUtils.exec(Joiner.on(" ").join(args));

    args =
        new String[] {
          "hadoop jar",
          IntegrationUtils.getJar("dist", "ivory"),
          ivory.app.BuildIndex.class.getCanonicalName(),
          libjars,
          "-index=" + index,
          "-indexPartitions=10",
          "-positionalIndexIP"
        };

    IntegrationUtils.exec(Joiner.on(" ").join(args));

    QueryEngine qr = new QueryEngine();

    for (int heuristic = 0; heuristic <= 2; heuristic++) {
      conf =
          RunQueryEngine.parseArgs(
              new String[] {
                "-index=" + index,
                "-queries_path=" + index + "/title_en-" + LANGUAGE + "-trans10-filtered.xml",
                "-run=en-" + LANGUAGE + ".interp",
                "-query_type=mtN",
                "-doc_lang=" + LANGUAGE + "",
                "-query_lang=en",
                "-doc_tokenizer=" + index + "/" + LANGUAGE + "-token.bin",
                "-query_tokenizer=" + index + "/en-token.bin",
                "-query_vocab=" + index + "/vocab.en-" + LANGUAGE + ".en",
                "-doc_vocab=" + index + "/vocab.en-" + LANGUAGE + "." + LANGUAGE + "",
                "-f2eProbs=" + index + "/ttable.en-" + LANGUAGE + "",
                "-LexProbThreshold=0.005",
                "-CumProbThreshold=0.95",
                "-mt_weight=0.3",
                "-grammar_weight=0.4",
                "-bitext_weight=0.3",
                "-token_weight=1",
                "-phrase_weight=0",
                "-kBest=10",
                "-doc_stemmed_stopwordlist=" + index + "/" + LANGUAGE + ".stop.stemmed",
                "-query_stemmed_stopwordlist=" + index + "/en.stop.stemmed",
                "--one2many=" + heuristic,
                "--is_stemming",
                "--is_doc_stemmed"
              },
              fs,
              conf);

      long start = System.currentTimeMillis();
      qr.init(conf, fs);
      qr.runQueries(conf);
      long end = System.currentTimeMillis();

      System.err.println(
          "Total query time for heuristic " + heuristic + ":" + (end - start) + "ms");
    }
    ivory.regression.sigir2013.cdec.EnAr_TREC02.initialize();
    ivory.regression.sigir2013.cdec.EnAr_TREC02.verifyAllResults(
        qr.getModels(),
        qr.getAllResults(),
        qr.getDocnoMapping(),
        new Qrels("data/" + PATH + "/qrels." + PATH + ".txt"));

    System.err.println("Done!");
  }
Exemplo n.º 24
0
 public static void main(String[] args) throws IOException {
   FileSystem fs = FileSystem.get(new Configuration());
   fs.copyFromLocalFile(new Path(args[0]), new Path(args[1]));
 }
Exemplo n.º 25
0
  /** Execute a query plan using Hadoop. */
  @SuppressWarnings({"deprecation", "unchecked"})
  @Override
  public int execute(DriverContext driverContext) {

    IOPrepareCache ioPrepareCache = IOPrepareCache.get();
    ioPrepareCache.clear();

    boolean success = true;

    Context ctx = driverContext.getCtx();
    boolean ctxCreated = false;
    Path emptyScratchDir;

    MapWork mWork = work.getMapWork();
    ReduceWork rWork = work.getReduceWork();

    try {
      if (ctx == null) {
        ctx = new Context(job);
        ctxCreated = true;
      }

      emptyScratchDir = ctx.getMRTmpPath();
      FileSystem fs = emptyScratchDir.getFileSystem(job);
      fs.mkdirs(emptyScratchDir);
    } catch (IOException e) {
      e.printStackTrace();
      console.printError(
          "Error launching map-reduce job",
          "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
      return 5;
    }

    HiveFileFormatUtils.prepareJobOutput(job);
    // See the javadoc on HiveOutputFormatImpl and HadoopShims.prepareJobOutput()
    job.setOutputFormat(HiveOutputFormatImpl.class);

    job.setMapperClass(ExecMapper.class);

    job.setMapOutputKeyClass(HiveKey.class);
    job.setMapOutputValueClass(BytesWritable.class);

    try {
      String partitioner = HiveConf.getVar(job, ConfVars.HIVEPARTITIONER);
      job.setPartitionerClass(JavaUtils.loadClass(partitioner));
    } catch (ClassNotFoundException e) {
      throw new RuntimeException(e.getMessage(), e);
    }

    if (mWork.getNumMapTasks() != null) {
      job.setNumMapTasks(mWork.getNumMapTasks().intValue());
    }

    if (mWork.getMaxSplitSize() != null) {
      HiveConf.setLongVar(
          job, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mWork.getMaxSplitSize().longValue());
    }

    if (mWork.getMinSplitSize() != null) {
      HiveConf.setLongVar(
          job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mWork.getMinSplitSize().longValue());
    }

    if (mWork.getMinSplitSizePerNode() != null) {
      HiveConf.setLongVar(
          job,
          HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE,
          mWork.getMinSplitSizePerNode().longValue());
    }

    if (mWork.getMinSplitSizePerRack() != null) {
      HiveConf.setLongVar(
          job,
          HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK,
          mWork.getMinSplitSizePerRack().longValue());
    }

    job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0);
    job.setReducerClass(ExecReducer.class);

    // set input format information if necessary
    setInputAttributes(job);

    // Turn on speculative execution for reducers
    boolean useSpeculativeExecReducers =
        HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS);
    HiveConf.setBoolVar(
        job, HiveConf.ConfVars.HADOOPSPECULATIVEEXECREDUCERS, useSpeculativeExecReducers);

    String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT);

    if (mWork.isUseBucketizedHiveInputFormat()) {
      inpFormat = BucketizedHiveInputFormat.class.getName();
    }

    LOG.info("Using " + inpFormat);

    try {
      job.setInputFormat(JavaUtils.loadClass(inpFormat));
    } catch (ClassNotFoundException e) {
      throw new RuntimeException(e.getMessage(), e);
    }

    // No-Op - we don't really write anything here ..
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Transfer HIVEAUXJARS and HIVEADDEDJARS to "tmpjars" so hadoop understands
    // it
    String auxJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEAUXJARS);
    String addedJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDJARS);
    if (StringUtils.isNotBlank(auxJars) || StringUtils.isNotBlank(addedJars)) {
      String allJars =
          StringUtils.isNotBlank(auxJars)
              ? (StringUtils.isNotBlank(addedJars) ? addedJars + "," + auxJars : auxJars)
              : addedJars;
      LOG.info("adding libjars: " + allJars);
      initializeFiles("tmpjars", allJars);
    }

    // Transfer HIVEADDEDFILES to "tmpfiles" so hadoop understands it
    String addedFiles = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDFILES);
    if (StringUtils.isNotBlank(addedFiles)) {
      initializeFiles("tmpfiles", addedFiles);
    }
    int returnVal = 0;
    boolean noName = StringUtils.isEmpty(HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJOBNAME));

    if (noName) {
      // This is for a special case to ensure unit tests pass
      HiveConf.setVar(job, HiveConf.ConfVars.HADOOPJOBNAME, "JOB" + Utilities.randGen.nextInt());
    }
    String addedArchives = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDARCHIVES);
    // Transfer HIVEADDEDARCHIVES to "tmparchives" so hadoop understands it
    if (StringUtils.isNotBlank(addedArchives)) {
      initializeFiles("tmparchives", addedArchives);
    }

    try {
      MapredLocalWork localwork = mWork.getMapRedLocalWork();
      if (localwork != null && localwork.hasStagedAlias()) {
        if (!ShimLoader.getHadoopShims().isLocalMode(job)) {
          Path localPath = localwork.getTmpPath();
          Path hdfsPath = mWork.getTmpHDFSPath();

          FileSystem hdfs = hdfsPath.getFileSystem(job);
          FileSystem localFS = localPath.getFileSystem(job);
          FileStatus[] hashtableFiles = localFS.listStatus(localPath);
          int fileNumber = hashtableFiles.length;
          String[] fileNames = new String[fileNumber];

          for (int i = 0; i < fileNumber; i++) {
            fileNames[i] = hashtableFiles[i].getPath().getName();
          }

          // package and compress all the hashtable files to an archive file
          String stageId = this.getId();
          String archiveFileName = Utilities.generateTarFileName(stageId);
          localwork.setStageID(stageId);

          CompressionUtils.tar(localPath.toUri().getPath(), fileNames, archiveFileName);
          Path archivePath = Utilities.generateTarPath(localPath, stageId);
          LOG.info("Archive " + hashtableFiles.length + " hash table files to " + archivePath);

          // upload archive file to hdfs
          Path hdfsFilePath = Utilities.generateTarPath(hdfsPath, stageId);
          short replication = (short) job.getInt("mapred.submit.replication", 10);
          hdfs.copyFromLocalFile(archivePath, hdfsFilePath);
          hdfs.setReplication(hdfsFilePath, replication);
          LOG.info("Upload 1 archive file  from" + archivePath + " to: " + hdfsFilePath);

          // add the archive file to distributed cache
          DistributedCache.createSymlink(job);
          DistributedCache.addCacheArchive(hdfsFilePath.toUri(), job);
          LOG.info(
              "Add 1 archive file to distributed cache. Archive file: " + hdfsFilePath.toUri());
        }
      }
      work.configureJobConf(job);
      List<Path> inputPaths = Utilities.getInputPaths(job, mWork, emptyScratchDir, ctx, false);
      Utilities.setInputPaths(job, inputPaths);

      Utilities.setMapRedWork(job, work, ctx.getMRTmpPath());

      if (mWork.getSamplingType() > 0 && rWork != null && job.getNumReduceTasks() > 1) {
        try {
          handleSampling(ctx, mWork, job);
          job.setPartitionerClass(HiveTotalOrderPartitioner.class);
        } catch (IllegalStateException e) {
          console.printInfo("Not enough sampling data.. Rolling back to single reducer task");
          rWork.setNumReduceTasks(1);
          job.setNumReduceTasks(1);
        } catch (Exception e) {
          LOG.error("Sampling error", e);
          console.printError(
              e.toString(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
          rWork.setNumReduceTasks(1);
          job.setNumReduceTasks(1);
        }
      }

      // remove the pwd from conf file so that job tracker doesn't show this
      // logs
      String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
      if (pwd != null) {
        HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
      }
      JobClient jc = new JobClient(job);
      // make this client wait if job tracker is not behaving well.
      Throttle.checkJobTracker(job, LOG);

      if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) {
        // initialize stats publishing table
        StatsPublisher statsPublisher;
        StatsFactory factory = StatsFactory.newFactory(job);
        if (factory != null) {
          statsPublisher = factory.getStatsPublisher();
          List<String> statsTmpDir = Utilities.getStatsTmpDirs(mWork, job);
          if (rWork != null) {
            statsTmpDir.addAll(Utilities.getStatsTmpDirs(rWork, job));
          }
          StatsCollectionContext sc = new StatsCollectionContext(job);
          sc.setStatsTmpDirs(statsTmpDir);
          if (!statsPublisher.init(sc)) { // creating stats table if not exists
            if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
              throw new HiveException(
                  ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
            }
          }
        }
      }

      Utilities.createTmpDirs(job, mWork);
      Utilities.createTmpDirs(job, rWork);

      SessionState ss = SessionState.get();
      if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")
          && ss != null) {
        TezSessionState session = ss.getTezSession();
        TezSessionPoolManager.getInstance().close(session, true);
      }

      // Finally SUBMIT the JOB!
      rj = jc.submitJob(job);
      // replace it back
      if (pwd != null) {
        HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, pwd);
      }

      returnVal = jobExecHelper.progress(rj, jc, ctx.getHiveTxnManager());
      success = (returnVal == 0);
    } catch (Exception e) {
      e.printStackTrace();
      String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
      if (rj != null) {
        mesg = "Ended Job = " + rj.getJobID() + mesg;
      } else {
        mesg = "Job Submission failed" + mesg;
      }

      // Has to use full name to make sure it does not conflict with
      // org.apache.commons.lang.StringUtils
      console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));

      success = false;
      returnVal = 1;
    } finally {
      Utilities.clearWork(job);
      try {
        if (ctxCreated) {
          ctx.clear();
        }

        if (rj != null) {
          if (returnVal != 0) {
            rj.killJob();
          }
          jobID = rj.getID().toString();
        }
      } catch (Exception e) {
        LOG.warn("Failed while cleaning up ", e);
      } finally {
        HadoopJobExecHelper.runningJobs.remove(rj);
      }
    }

    // get the list of Dynamic partition paths
    try {
      if (rj != null) {
        if (mWork.getAliasToWork() != null) {
          for (Operator<? extends OperatorDesc> op : mWork.getAliasToWork().values()) {
            op.jobClose(job, success);
          }
        }
        if (rWork != null) {
          rWork.getReducer().jobClose(job, success);
        }
      }
    } catch (Exception e) {
      // jobClose needs to execute successfully otherwise fail task
      if (success) {
        success = false;
        returnVal = 3;
        String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'";
        console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
      }
    }

    return (returnVal);
  }
 private void uploadToRemoteFileSystem(Path filePath) throws IOException {
   fs.copyFromLocalFile(filePath, workingPath);
 }
Exemplo n.º 27
0
  private void run(String[] args) throws Exception {
    LlapOptionsProcessor optionsProcessor = new LlapOptionsProcessor();
    LlapOptions options = optionsProcessor.processOptions(args);

    if (options == null) {
      // help
      return;
    }

    Path tmpDir = new Path(options.getDirectory());

    if (conf == null) {
      throw new Exception("Cannot load any configuration to run command");
    }

    FileSystem fs = FileSystem.get(conf);
    FileSystem lfs = FileSystem.getLocal(conf).getRawFileSystem();

    // needed so that the file is actually loaded into configuration.
    for (String f : NEEDED_CONFIGS) {
      conf.addResource(f);
      if (conf.getResource(f) == null) {
        throw new Exception("Unable to find required config file: " + f);
      }
    }
    for (String f : OPTIONAL_CONFIGS) {
      conf.addResource(f);
    }
    conf.reloadConfiguration();

    if (options.getName() != null) {
      // update service registry configs - caveat: this has nothing to do with the actual settings
      // as read by the AM
      // if needed, use --hiveconf llap.daemon.service.hosts=@llap0 to dynamically switch between
      // instances
      conf.set(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + options.getName());
    }

    if (options.getSize() != -1) {
      if (options.getCache() != -1) {
        Preconditions.checkArgument(
            options.getCache() < options.getSize(),
            "Cache has to be smaller than the container sizing");
      }
      if (options.getXmx() != -1) {
        Preconditions.checkArgument(
            options.getXmx() < options.getSize(),
            "Working memory has to be smaller than the container sizing");
      }
      if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT)) {
        Preconditions.checkArgument(
            options.getXmx() + options.getCache() < options.getSize(),
            "Working memory + cache has to be smaller than the containing sizing ");
      }
    }

    final long minAlloc = conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1);
    if (options.getSize() != -1) {
      final long containerSize = options.getSize() / (1024 * 1024);
      Preconditions.checkArgument(
          containerSize >= minAlloc,
          "Container size should be greater than minimum allocation(%s)",
          minAlloc + "m");
      conf.setLong(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, containerSize);
    }

    if (options.getExecutors() != -1) {
      conf.setLong(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, options.getExecutors());
      // TODO: vcpu settings - possibly when DRFA works right
    }

    if (options.getCache() != -1) {
      conf.setLong(HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, options.getCache());
    }

    if (options.getXmx() != -1) {
      // Needs more explanation here
      // Xmx is not the max heap value in JDK8
      // You need to subtract 50% of the survivor fraction from this, to get actual usable memory
      // before it goes into GC
      conf.setLong(
          ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname,
          (long) (options.getXmx()) / (1024 * 1024));
    }

    for (Entry<Object, Object> props : options.getConfig().entrySet()) {
      conf.set((String) props.getKey(), (String) props.getValue());
    }

    URL logger = conf.getResource("llap-daemon-log4j2.properties");

    if (null == logger) {
      throw new Exception("Unable to find required config file: llap-daemon-log4j2.properties");
    }

    Path home = new Path(System.getenv("HIVE_HOME"));
    Path scripts = new Path(new Path(new Path(home, "scripts"), "llap"), "bin");

    if (!lfs.exists(home)) {
      throw new Exception("Unable to find HIVE_HOME:" + home);
    } else if (!lfs.exists(scripts)) {
      LOG.warn("Unable to find llap scripts:" + scripts);
    }

    Path libDir = new Path(tmpDir, "lib");

    String tezLibs = conf.get("tez.lib.uris");
    if (tezLibs == null) {
      LOG.warn("Missing tez.lib.uris in tez-site.xml");
    }
    if (LOG.isDebugEnabled()) {
      LOG.debug("Copying tez libs from " + tezLibs);
    }
    lfs.mkdirs(libDir);
    fs.copyToLocalFile(new Path(tezLibs), new Path(libDir, "tez.tar.gz"));
    CompressionUtils.unTar(new Path(libDir, "tez.tar.gz").toString(), libDir.toString(), true);
    lfs.delete(new Path(libDir, "tez.tar.gz"), false);

    lfs.copyFromLocalFile(new Path(Utilities.jarFinderGetJar(LlapInputFormat.class)), libDir);
    lfs.copyFromLocalFile(new Path(Utilities.jarFinderGetJar(HiveInputFormat.class)), libDir);

    // copy default aux classes (json/hbase)

    for (String className : DEFAULT_AUX_CLASSES) {
      localizeJarForClass(lfs, libDir, className, false);
    }

    if (options.getIsHBase()) {
      try {
        localizeJarForClass(lfs, libDir, HBASE_SERDE_CLASS, true);
        Job fakeJob = new Job(new JobConf()); // HBase API is convoluted.
        TableMapReduceUtil.addDependencyJars(fakeJob);
        Collection<String> hbaseJars = fakeJob.getConfiguration().getStringCollection("tmpjars");
        for (String jarPath : hbaseJars) {
          if (!jarPath.isEmpty()) {
            lfs.copyFromLocalFile(new Path(jarPath), libDir);
          }
        }
      } catch (Throwable t) {
        String err = "Failed to add HBase jars. Use --auxhbase=false to avoid localizing them";
        LOG.error(err);
        System.err.println(err);
        throw new RuntimeException(t);
      }
    }

    String auxJars = options.getAuxJars();
    if (auxJars != null && !auxJars.isEmpty()) {
      // TODO: transitive dependencies warning?
      String[] jarPaths = auxJars.split(",");
      for (String jarPath : jarPaths) {
        if (!jarPath.isEmpty()) {
          lfs.copyFromLocalFile(new Path(jarPath), libDir);
        }
      }
    }

    Path confPath = new Path(tmpDir, "conf");
    lfs.mkdirs(confPath);

    for (String f : NEEDED_CONFIGS) {
      copyConfig(options, lfs, confPath, f);
    }
    for (String f : OPTIONAL_CONFIGS) {
      try {
        copyConfig(options, lfs, confPath, f);
      } catch (Throwable t) {
        LOG.info("Error getting an optional config " + f + "; ignoring: " + t.getMessage());
      }
    }

    lfs.copyFromLocalFile(new Path(logger.toString()), confPath);

    // extract configs for processing by the python fragments in Slider
    JSONObject configs = new JSONObject();

    configs.put(
        ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname,
        HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB));

    configs.put(
        HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname,
        HiveConf.getLongVar(conf, HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE));

    configs.put(
        HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT.varname,
        HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT));

    configs.put(
        ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname,
        HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB));

    configs.put(
        ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE.varname,
        HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE));

    configs.put(
        ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname,
        HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_NUM_EXECUTORS));

    configs.put(
        YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
        conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1));

    configs.put(
        YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES,
        conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, -1));

    FSDataOutputStream os = lfs.create(new Path(tmpDir, "config.json"));
    OutputStreamWriter w = new OutputStreamWriter(os);
    configs.write(w);
    w.close();
    os.close();

    lfs.close();
    fs.close();

    if (LOG.isDebugEnabled()) {
      LOG.debug("Exiting successfully");
    }
  }
Exemplo n.º 28
0
  /**
   * Launch application for the dag represented by this client.
   *
   * @throws YarnException
   * @throws IOException
   */
  public void startApplication() throws YarnException, IOException {
    Class<?>[] defaultClasses;

    if (applicationType.equals(YARN_APPLICATION_TYPE)) {
      // TODO restrict the security check to only check if security is enabled for webservices.
      if (UserGroupInformation.isSecurityEnabled()) {
        defaultClasses = DATATORRENT_SECURITY_CLASSES;
      } else {
        defaultClasses = DATATORRENT_CLASSES;
      }
    } else {
      throw new IllegalStateException(applicationType + " is not a valid application type.");
    }

    LinkedHashSet<String> localJarFiles = findJars(dag, defaultClasses);

    if (resources != null) {
      localJarFiles.addAll(resources);
    }

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info(
        "Got Cluster metric info from ASM"
            + ", numNodeManagers="
            + clusterMetrics.getNumNodeManagers());

    // GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class);
    // GetClusterNodesResponse clusterNodesResp =
    // rmClient.clientRM.getClusterNodes(clusterNodesReq);
    // LOG.info("Got Cluster node info from ASM");
    // for (NodeReport node : clusterNodesResp.getNodeReports()) {
    //  LOG.info("Got node report from ASM for"
    //           + ", nodeId=" + node.getNodeId()
    //           + ", nodeAddress" + node.getHttpAddress()
    //           + ", nodeRackName" + node.getRackName()
    //           + ", nodeNumContainers" + node.getNumContainers()
    //           + ", nodeHealthStatus" + node.getHealthReport());
    // }
    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
      for (QueueACL userAcl : aclInfo.getUserAcls()) {
        LOG.info(
            "User ACL Info for Queue"
                + ", queueName="
                + aclInfo.getQueueName()
                + ", userAcl="
                + userAcl.name());
      }
    }

    // Get a new application id
    YarnClientApplication newApp = yarnClient.createApplication();
    appId = newApp.getNewApplicationResponse().getApplicationId();

    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = newApp.getNewApplicationResponse().getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);
    int amMemory = dag.getMasterMemoryMB();
    if (amMemory > maxMem) {
      LOG.info(
          "AM memory specified above max threshold of cluster. Using max value."
              + ", specified="
              + amMemory
              + ", max="
              + maxMem);
      amMemory = maxMem;
    }

    if (dag.getAttributes().get(LogicalPlan.APPLICATION_ID) == null) {
      dag.setAttribute(LogicalPlan.APPLICATION_ID, appId.toString());
    }

    // Create launch context for app master
    LOG.info("Setting up application submission context for ASM");
    ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class);

    // set the application id
    appContext.setApplicationId(appId);
    // set the application name
    appContext.setApplicationName(dag.getValue(LogicalPlan.APPLICATION_NAME));
    appContext.setApplicationType(this.applicationType);
    if (YARN_APPLICATION_TYPE.equals(this.applicationType)) {
      // appContext.setMaxAppAttempts(1); // no retries until Stram is HA
    }

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    // Setup security tokens
    // If security is enabled get ResourceManager and NameNode delegation tokens.
    // Set these tokens on the container so that they are sent as part of application submission.
    // This also sets them up for renewal by ResourceManager. The NameNode delegation rmToken
    // is also used by ResourceManager to fetch the jars from HDFS and set them up for the
    // application master launch.
    if (UserGroupInformation.isSecurityEnabled()) {
      Credentials credentials = new Credentials();
      String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
      if (tokenRenewer == null || tokenRenewer.length() == 0) {
        throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
      }

      // For now, only getting tokens for the default file-system.
      FileSystem fs = StramClientUtils.newFileSystemInstance(conf);
      try {
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
          for (Token<?> token : tokens) {
            LOG.info("Got dt for " + fs.getUri() + "; " + token);
          }
        }
      } finally {
        fs.close();
      }

      addRMDelegationToken(tokenRenewer, credentials);

      DataOutputBuffer dob = new DataOutputBuffer();
      credentials.writeTokenStorageToStream(dob);
      ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
      amContainer.setTokens(fsTokens);
    }

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    // copy required jar files to dfs, to be localized for containers
    FileSystem fs = StramClientUtils.newFileSystemInstance(conf);
    try {
      Path appsBasePath =
          new Path(StramClientUtils.getDTDFSRootDir(fs, conf), StramClientUtils.SUBDIR_APPS);
      Path appPath = new Path(appsBasePath, appId.toString());

      String libJarsCsv = copyFromLocal(fs, appPath, localJarFiles.toArray(new String[] {}));

      LOG.info("libjars: {}", libJarsCsv);
      dag.getAttributes().put(LogicalPlan.LIBRARY_JARS, libJarsCsv);
      LaunchContainerRunnable.addFilesToLocalResources(
          LocalResourceType.FILE, libJarsCsv, localResources, fs);

      if (archives != null) {
        String[] localFiles = archives.split(",");
        String archivesCsv = copyFromLocal(fs, appPath, localFiles);
        LOG.info("archives: {}", archivesCsv);
        dag.getAttributes().put(LogicalPlan.ARCHIVES, archivesCsv);
        LaunchContainerRunnable.addFilesToLocalResources(
            LocalResourceType.ARCHIVE, archivesCsv, localResources, fs);
      }

      if (files != null) {
        String[] localFiles = files.split(",");
        String filesCsv = copyFromLocal(fs, appPath, localFiles);
        LOG.info("files: {}", filesCsv);
        dag.getAttributes().put(LogicalPlan.FILES, filesCsv);
        LaunchContainerRunnable.addFilesToLocalResources(
            LocalResourceType.FILE, filesCsv, localResources, fs);
      }

      dag.getAttributes().put(LogicalPlan.APPLICATION_PATH, appPath.toString());
      if (dag.getAttributes().get(OperatorContext.STORAGE_AGENT) == null) {
          /* which would be the most likely case */
        Path checkpointPath = new Path(appPath, LogicalPlan.SUBDIR_CHECKPOINTS);
        // use conf client side to pickup any proxy settings from dt-site.xml
        dag.setAttribute(
            OperatorContext.STORAGE_AGENT, new FSStorageAgent(checkpointPath.toString(), conf));
      }
      if (dag.getAttributes().get(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR) == null) {
        dag.setAttribute(
            LogicalPlan.CONTAINER_OPTS_CONFIGURATOR, new BasicContainerOptConfigurator());
      }

      // Set the log4j properties if needed
      if (!log4jPropFile.isEmpty()) {
        Path log4jSrc = new Path(log4jPropFile);
        Path log4jDst = new Path(appPath, "log4j.props");
        fs.copyFromLocalFile(false, true, log4jSrc, log4jDst);
        FileStatus log4jFileStatus = fs.getFileStatus(log4jDst);
        LocalResource log4jRsrc = Records.newRecord(LocalResource.class);
        log4jRsrc.setType(LocalResourceType.FILE);
        log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
        log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri()));
        log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime());
        log4jRsrc.setSize(log4jFileStatus.getLen());
        localResources.put("log4j.properties", log4jRsrc);
      }

      if (originalAppId != null) {
        Path origAppPath = new Path(appsBasePath, this.originalAppId);
        LOG.info("Restart from {}", origAppPath);
        copyInitialState(origAppPath);
      }

      // push logical plan to DFS location
      Path cfgDst = new Path(appPath, LogicalPlan.SER_FILE_NAME);
      FSDataOutputStream outStream = fs.create(cfgDst, true);
      LogicalPlan.write(this.dag, outStream);
      outStream.close();

      Path launchConfigDst = new Path(appPath, LogicalPlan.LAUNCH_CONFIG_FILE_NAME);
      outStream = fs.create(launchConfigDst, true);
      conf.writeXml(outStream);
      outStream.close();

      FileStatus topologyFileStatus = fs.getFileStatus(cfgDst);
      LocalResource topologyRsrc = Records.newRecord(LocalResource.class);
      topologyRsrc.setType(LocalResourceType.FILE);
      topologyRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
      topologyRsrc.setResource(ConverterUtils.getYarnUrlFromURI(cfgDst.toUri()));
      topologyRsrc.setTimestamp(topologyFileStatus.getModificationTime());
      topologyRsrc.setSize(topologyFileStatus.getLen());
      localResources.put(LogicalPlan.SER_FILE_NAME, topologyRsrc);

      // Set local resource info into app master container launch context
      amContainer.setLocalResources(localResources);

      // Set the necessary security tokens as needed
      // amContainer.setContainerTokens(containerToken);
      // Set the env variables to be setup in the env where the application master will be run
      LOG.info("Set the environment for the application master");
      Map<String, String> env = new HashMap<String, String>();

      // Add application jar(s) location to classpath
      // At some point we should not be required to add
      // the hadoop specific classpaths to the env.
      // It should be provided out of the box.
      // For now setting all required classpaths including
      // the classpath to "." for the application jar(s)
      // including ${CLASSPATH} will duplicate the class path in app master, removing it for now
      // StringBuilder classPathEnv = new StringBuilder("${CLASSPATH}:./*");
      StringBuilder classPathEnv = new StringBuilder("./*");
      String classpath = conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH);
      for (String c :
          StringUtils.isBlank(classpath)
              ? YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH
              : classpath.split(",")) {
        if (c.equals("$HADOOP_CLIENT_CONF_DIR")) {
          // SPOI-2501
          continue;
        }
        classPathEnv.append(':');
        classPathEnv.append(c.trim());
      }
      env.put("CLASSPATH", classPathEnv.toString());
      // propagate to replace node managers user name (effective in non-secure mode)
      env.put("HADOOP_USER_NAME", UserGroupInformation.getLoginUser().getUserName());

      amContainer.setEnvironment(env);

      // Set the necessary command to execute the application master
      ArrayList<CharSequence> vargs = new ArrayList<CharSequence>(30);

      // Set java executable command
      LOG.info("Setting up app master command");
      vargs.add(javaCmd);
      if (dag.isDebug()) {
        vargs.add("-agentlib:jdwp=transport=dt_socket,server=y,suspend=n");
      }
      // Set Xmx based on am memory size
      // default heap size 75% of total memory
      vargs.add("-Xmx" + (amMemory * 3 / 4) + "m");
      vargs.add("-XX:+HeapDumpOnOutOfMemoryError");
      vargs.add("-XX:HeapDumpPath=/tmp/dt-heap-" + appId.getId() + ".bin");
      vargs.add("-Dhadoop.root.logger=" + (dag.isDebug() ? "DEBUG" : "INFO") + ",RFA");
      vargs.add("-Dhadoop.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
      vargs.add(String.format("-D%s=%s", StreamingContainer.PROP_APP_PATH, dag.assertAppPath()));
      if (dag.isDebug()) {
        vargs.add("-Dlog4j.debug=true");
      }

      String loggersLevel = conf.get(DTLoggerFactory.DT_LOGGERS_LEVEL);
      if (loggersLevel != null) {
        vargs.add(String.format("-D%s=%s", DTLoggerFactory.DT_LOGGERS_LEVEL, loggersLevel));
      }
      vargs.add(StreamingAppMaster.class.getName());
      vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
      vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

      // Get final command
      StringBuilder command = new StringBuilder(9 * vargs.size());
      for (CharSequence str : vargs) {
        command.append(str).append(" ");
      }

      LOG.info("Completed setting up app master command " + command.toString());
      List<String> commands = new ArrayList<String>();
      commands.add(command.toString());
      amContainer.setCommands(commands);

      // Set up resource type requirements
      // For now, only memory is supported so we set memory requirements
      Resource capability = Records.newRecord(Resource.class);
      capability.setMemory(amMemory);
      appContext.setResource(capability);

      // Service data is a binary blob that can be passed to the application
      // Not needed in this scenario
      // amContainer.setServiceData(serviceData);
      appContext.setAMContainerSpec(amContainer);

      // Set the priority for the application master
      Priority pri = Records.newRecord(Priority.class);
      pri.setPriority(amPriority);
      appContext.setPriority(pri);
      // Set the queue to which this application is to be submitted in the RM
      appContext.setQueue(queueName);

      // Submit the application to the applications manager
      // SubmitApplicationResponse submitResp = rmClient.submitApplication(appRequest);
      // Ignore the response as either a valid response object is returned on success
      // or an exception thrown to denote some form of a failure
      String specStr =
          Objects.toStringHelper("Submitting application: ")
              .add("name", appContext.getApplicationName())
              .add("queue", appContext.getQueue())
              .add("user", UserGroupInformation.getLoginUser())
              .add("resource", appContext.getResource())
              .toString();
      LOG.info(specStr);
      if (dag.isDebug()) {
        // LOG.info("Full submission context: " + appContext);
      }
      yarnClient.submitApplication(appContext);
    } finally {
      fs.close();
    }
  }
Exemplo n.º 29
0
  public DAG createDAG(
      FileSystem remoteFs,
      Configuration conf,
      Path remoteStagingDir,
      int numMapper,
      int numReducer,
      int iReduceStagesCount,
      int numIReducer,
      long mapSleepTime,
      int mapSleepCount,
      long reduceSleepTime,
      int reduceSleepCount,
      long iReduceSleepTime,
      int iReduceSleepCount,
      boolean writeSplitsToDFS,
      boolean generateSplitsInAM)
      throws IOException, YarnException {

    Configuration mapStageConf = new JobConf(conf);
    mapStageConf.setInt(MRJobConfig.NUM_MAPS, numMapper);
    mapStageConf.setLong(MAP_SLEEP_TIME, mapSleepTime);
    mapStageConf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
    mapStageConf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime);
    mapStageConf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
    mapStageConf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
    mapStageConf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount);
    mapStageConf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount);
    mapStageConf.setInt(IREDUCE_TASKS_COUNT, numIReducer);
    mapStageConf.set(MRJobConfig.MAP_CLASS_ATTR, SleepMapper.class.getName());
    mapStageConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, SleepInputFormat.class.getName());
    if (numIReducer == 0 && numReducer == 0) {
      mapStageConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName());
    }

    MRHelpers.translateVertexConfToTez(mapStageConf);

    Configuration[] intermediateReduceStageConfs = null;
    if (iReduceStagesCount > 0 && numIReducer > 0) {
      intermediateReduceStageConfs = new JobConf[iReduceStagesCount];
      for (int i = 1; i <= iReduceStagesCount; ++i) {
        JobConf iReduceStageConf = new JobConf(conf);
        iReduceStageConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, iReduceSleepTime);
        iReduceStageConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, iReduceSleepCount);
        iReduceStageConf.setInt(MRJobConfig.NUM_REDUCES, numIReducer);
        iReduceStageConf.set(MRJobConfig.REDUCE_CLASS_ATTR, ISleepReducer.class.getName());
        iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
        iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
        iReduceStageConf.set(
            MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());

        MRHelpers.translateVertexConfToTez(iReduceStageConf);
        intermediateReduceStageConfs[i - 1] = iReduceStageConf;
      }
    }

    Configuration finalReduceConf = null;
    if (numReducer > 0) {
      finalReduceConf = new JobConf(conf);
      finalReduceConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, reduceSleepTime);
      finalReduceConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, reduceSleepCount);
      finalReduceConf.setInt(MRJobConfig.NUM_REDUCES, numReducer);
      finalReduceConf.set(MRJobConfig.REDUCE_CLASS_ATTR, SleepReducer.class.getName());
      finalReduceConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
      finalReduceConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
      finalReduceConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName());

      MRHelpers.translateVertexConfToTez(finalReduceConf);
    }

    MRHelpers.doJobClientMagic(mapStageConf);
    if (iReduceStagesCount > 0 && numIReducer > 0) {
      for (int i = 0; i < iReduceStagesCount; ++i) {
        MRHelpers.doJobClientMagic(intermediateReduceStageConfs[i]);
      }
    }
    if (numReducer > 0) {
      MRHelpers.doJobClientMagic(finalReduceConf);
    }

    InputSplitInfo inputSplitInfo = null;
    if (!generateSplitsInAM) {
      if (writeSplitsToDFS) {
        LOG.info("Writing splits to DFS");
        try {
          inputSplitInfo = MRHelpers.generateInputSplits(mapStageConf, remoteStagingDir);
        } catch (InterruptedException e) {
          throw new TezUncheckedException("Could not generate input splits", e);
        } catch (ClassNotFoundException e) {
          throw new TezUncheckedException("Failed to generate input splits", e);
        }
      } else {
        try {
          LOG.info("Creating in-mem splits");
          inputSplitInfo = MRHelpers.generateInputSplitsToMem(mapStageConf);
        } catch (ClassNotFoundException e) {
          throw new TezUncheckedException("Could not generate input splits", e);
        } catch (InterruptedException e) {
          throw new TezUncheckedException("Could not generate input splits", e);
        }
      }
      if (inputSplitInfo.getCredentials() != null) {
        this.credentials.addAll(inputSplitInfo.getCredentials());
      }
    }

    DAG dag = new DAG("MRRSleepJob");
    String jarPath = ClassUtil.findContainingJar(getClass());
    if (jarPath == null) {
      throw new TezUncheckedException(
          "Could not find any jar containing" + " MRRSleepJob.class in the classpath");
    }
    Path remoteJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "dag_job.jar"));
    remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
    FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath);

    TokenCache.obtainTokensForNamenodes(this.credentials, new Path[] {remoteJarPath}, mapStageConf);

    Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>();
    LocalResource dagJarLocalRsrc =
        LocalResource.newInstance(
            ConverterUtils.getYarnUrlFromPath(remoteJarPath),
            LocalResourceType.FILE,
            LocalResourceVisibility.APPLICATION,
            jarFileStatus.getLen(),
            jarFileStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);

    List<Vertex> vertices = new ArrayList<Vertex>();

    byte[] mapInputPayload = null;
    byte[] mapUserPayload = MRHelpers.createUserPayloadFromConf(mapStageConf);
    if (writeSplitsToDFS || generateSplitsInAM) {
      mapInputPayload = MRHelpers.createMRInputPayload(mapUserPayload, null);
    } else {
      mapInputPayload =
          MRHelpers.createMRInputPayload(mapUserPayload, inputSplitInfo.getSplitsProto());
    }
    int numTasks = generateSplitsInAM ? -1 : numMapper;

    Vertex mapVertex =
        new Vertex(
            "map",
            new ProcessorDescriptor(MapProcessor.class.getName()).setUserPayload(mapUserPayload),
            numTasks,
            MRHelpers.getMapResource(mapStageConf));
    if (!generateSplitsInAM) {
      mapVertex.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
    }

    if (writeSplitsToDFS) {
      Map<String, LocalResource> mapLocalResources = new HashMap<String, LocalResource>();
      mapLocalResources.putAll(commonLocalResources);
      MRHelpers.updateLocalResourcesForInputSplits(remoteFs, inputSplitInfo, mapLocalResources);
      mapVertex.setTaskLocalFiles(mapLocalResources);
    } else {
      mapVertex.setTaskLocalFiles(commonLocalResources);
    }

    if (generateSplitsInAM) {
      MRHelpers.addMRInput(mapVertex, mapInputPayload, MRInputAMSplitGenerator.class);
    } else {
      if (writeSplitsToDFS) {
        MRHelpers.addMRInput(mapVertex, mapInputPayload, null);
      } else {
        MRHelpers.addMRInput(mapVertex, mapInputPayload, MRInputSplitDistributor.class);
      }
    }
    vertices.add(mapVertex);

    if (iReduceStagesCount > 0 && numIReducer > 0) {
      for (int i = 0; i < iReduceStagesCount; ++i) {
        Configuration iconf = intermediateReduceStageConfs[i];
        byte[] iReduceUserPayload = MRHelpers.createUserPayloadFromConf(iconf);
        Vertex ivertex =
            new Vertex(
                "ireduce" + (i + 1),
                new ProcessorDescriptor(ReduceProcessor.class.getName())
                    .setUserPayload(iReduceUserPayload),
                numIReducer,
                MRHelpers.getReduceResource(iconf));
        ivertex.setTaskLocalFiles(commonLocalResources);
        vertices.add(ivertex);
      }
    }

    Vertex finalReduceVertex = null;
    if (numReducer > 0) {
      byte[] reducePayload = MRHelpers.createUserPayloadFromConf(finalReduceConf);
      finalReduceVertex =
          new Vertex(
              "reduce",
              new ProcessorDescriptor(ReduceProcessor.class.getName())
                  .setUserPayload(reducePayload),
              numReducer,
              MRHelpers.getReduceResource(finalReduceConf));
      finalReduceVertex.setTaskLocalFiles(commonLocalResources);
      MRHelpers.addMROutputLegacy(finalReduceVertex, reducePayload);
      vertices.add(finalReduceVertex);
    } else {
      // Map only job
      MRHelpers.addMROutputLegacy(mapVertex, mapUserPayload);
    }

    Configuration partitionerConf = new Configuration(false);
    partitionerConf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());
    OrderedPartitionedKVEdgeConfigurer edgeConf =
        OrderedPartitionedKVEdgeConfigurer.newBuilder(
                IntWritable.class.getName(),
                IntWritable.class.getName(),
                MRPartitioner.class.getName(),
                partitionerConf)
            .configureInput()
            .useLegacyInput()
            .done()
            .build();

    for (int i = 0; i < vertices.size(); ++i) {
      dag.addVertex(vertices.get(i));
      if (i != 0) {
        dag.addEdge(
            new Edge(vertices.get(i - 1), vertices.get(i), edgeConf.createDefaultEdgeProperty()));
      }
    }

    return dag;
  }
Exemplo n.º 30
0
  @Override
  public void serviceInit(Configuration conf) throws Exception {
    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_TEZ_FRAMEWORK_NAME);
    // blacklisting disabled to prevent scheduling issues
    conf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);
    if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) {
      conf.set(
          MRJobConfig.MR_AM_STAGING_DIR,
          new File(getTestWorkDir(), "apps_staging_dir" + Path.SEPARATOR).getAbsolutePath());
    }

    if (conf.get(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC) == null) {
      // nothing defined. set quick delete value
      conf.setLong(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, 0l);
    }

    File appJarLocalFile = new File(MiniTezCluster.APPJAR);

    if (!appJarLocalFile.exists()) {
      String message = "TezAppJar " + MiniTezCluster.APPJAR + " not found. Exiting.";
      LOG.info(message);
      throw new TezUncheckedException(message);
    }

    FileSystem fs = FileSystem.get(conf);
    Path testRootDir = fs.makeQualified(new Path("target", getName() + "-tmpDir"));
    Path appRemoteJar = new Path(testRootDir, "TezAppJar.jar");
    // Copy AppJar and make it public.
    Path appMasterJar = new Path(MiniTezCluster.APPJAR);
    fs.copyFromLocalFile(appMasterJar, appRemoteJar);
    fs.setPermission(appRemoteJar, new FsPermission("777"));

    conf.set(TezConfiguration.TEZ_LIB_URIS, appRemoteJar.toUri().toString());
    LOG.info("Set TEZ-LIB-URI to: " + conf.get(TezConfiguration.TEZ_LIB_URIS));

    // VMEM monitoring disabled, PMEM monitoring enabled.
    conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
    conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);

    conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000");

    try {
      Path stagingPath =
          FileContext.getFileContext(conf)
              .makeQualified(new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR)));
      /*
       * Re-configure the staging path on Windows if the file system is localFs.
       * We need to use a absolute path that contains the drive letter. The unit
       * test could run on a different drive than the AM. We can run into the
       * issue that job files are localized to the drive where the test runs on,
       * while the AM starts on a different drive and fails to find the job
       * metafiles. Using absolute path can avoid this ambiguity.
       */
      if (Path.WINDOWS) {
        if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) {
          conf.set(
              MRJobConfig.MR_AM_STAGING_DIR,
              new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR)).getAbsolutePath());
        }
      }
      FileContext fc = FileContext.getFileContext(stagingPath.toUri(), conf);
      if (fc.util().exists(stagingPath)) {
        LOG.info(stagingPath + " exists! deleting...");
        fc.delete(stagingPath, true);
      }
      LOG.info("mkdir: " + stagingPath);
      fc.mkdir(stagingPath, null, true);

      // mkdir done directory as well
      String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf);
      Path doneDirPath = fc.makeQualified(new Path(doneDir));
      fc.mkdir(doneDirPath, null, true);
    } catch (IOException e) {
      throw new TezUncheckedException("Could not create staging directory. ", e);
    }
    conf.set(MRConfig.MASTER_ADDRESS, "test");

    // configure the shuffle service in NM
    conf.setStrings(
        YarnConfiguration.NM_AUX_SERVICES,
        new String[] {ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID});
    conf.setClass(
        String.format(
            YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID),
        ShuffleHandler.class,
        Service.class);

    // Non-standard shuffle port
    conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);

    conf.setClass(
        YarnConfiguration.NM_CONTAINER_EXECUTOR,
        DefaultContainerExecutor.class,
        ContainerExecutor.class);

    // TestMRJobs is for testing non-uberized operation only; see TestUberAM
    // for corresponding uberized tests.
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    super.serviceInit(conf);
  }