/** * Creates the JAR file with the global files on the driver and then uploads it to the job folder * on (H)DFS. * * @return the map to be used as the "global" resources when submitting Evaluators. * @throws IOException if the creation of the JAR or the upload fails */ @Override public synchronized Map<String, LocalResource> call() throws IOException { final Map<String, LocalResource> globalResources = new HashMap<>(1); if (!this.isUploaded) { this.pathToGlobalJar = this.uploader.uploadToJobFolder(makeGlobalJar()); this.isUploaded = true; } final LocalResource updatedGlobalJarResource = this.uploader.makeLocalResourceForJarFile(this.pathToGlobalJar); if (this.globalJarResource != null && this.globalJarResource.getTimestamp() != updatedGlobalJarResource.getTimestamp()) { LOG.log( Level.WARNING, "The global JAR LocalResource timestamp has been changed from " + this.globalJarResource.getTimestamp() + " to " + updatedGlobalJarResource.getTimestamp()); } this.globalJarResource = updatedGlobalJarResource; // For now, always rewrite the information due to REEF-348 globalResources.put(this.fileNames.getGlobalFolderPath(), updatedGlobalJarResource); return globalResources; }
private void setupAppMasterJar(Path jarPath, LocalResource appMasterJar) throws IOException { FileStatus jarStat = FileSystem.get(conf).getFileStatus(jarPath); appMasterJar.setResource(ConverterUtils.getYarnUrlFromPath(jarPath)); appMasterJar.setSize(jarStat.getLen()); appMasterJar.setTimestamp(jarStat.getModificationTime()); appMasterJar.setType(LocalResourceType.FILE); appMasterJar.setVisibility(LocalResourceVisibility.PUBLIC); }
public static void registerLocalResource( FileSystem fs, Path remoteRsrcPath, LocalResource localResource) throws IOException { FileStatus jarStat = fs.getFileStatus(remoteRsrcPath); localResource.setResource(ConverterUtils.getYarnUrlFromURI(remoteRsrcPath.toUri())); localResource.setSize(jarStat.getLen()); localResource.setTimestamp(jarStat.getModificationTime()); localResource.setType(LocalResourceType.FILE); localResource.setVisibility(LocalResourceVisibility.PUBLIC); }
private LocalResource createLocalResource(String name) { LocalResource lr = LocalResource.newInstance( URL.newInstance(null, "localhost", 2321, name), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, 1, 1000000); return lr; }
public static LocalResource newLocalResource( URL url, LocalResourceType type, LocalResourceVisibility visibility, long size, long timestamp, boolean shouldBeUploadedToSharedCache) { LocalResource resource = recordFactory.newRecordInstance(LocalResource.class); resource.setResource(url); resource.setType(type); resource.setVisibility(visibility); resource.setSize(size); resource.setTimestamp(timestamp); resource.setShouldBeUploadedToSharedCache(shouldBeUploadedToSharedCache); return resource; }
static LocalResource newYarnAppResource( FileSystem fs, Path path, LocalResourceType type, LocalResourceVisibility vis) throws IOException { Path qualified = fs.makeQualified(path); FileStatus status = fs.getFileStatus(qualified); LocalResource resource = Records.newRecord(LocalResource.class); resource.setType(type); resource.setVisibility(vis); resource.setResource(ConverterUtils.getYarnUrlFromPath(qualified)); resource.setTimestamp(status.getModificationTime()); resource.setSize(status.getLen()); return resource; }
public static LocalResource newLocalResource( URL url, LocalResourceType type, LocalResourceVisibility visibility, long size, long timestamp) { LocalResource resource = recordFactory.newRecordInstance(LocalResource.class); resource.setResource(url); resource.setType(type); resource.setVisibility(visibility); resource.setSize(size); resource.setTimestamp(timestamp); return resource; }
/** Creates a LocalResource instance for the JAR file referenced by the given Path. */ public LocalResource getLocalResourceForPath(final Path jarPath, final LocalResourceType type) throws IOException { final FileStatus status = FileContext.getFileContext(fileSystem.getUri()).getFileStatus(jarPath); final LocalResource localResource = Records.newRecord(LocalResource.class); localResource.setType(type); localResource.setVisibility(LocalResourceVisibility.APPLICATION); localResource.setResource(ConverterUtils.getYarnUrlFromPath(status.getPath())); localResource.setTimestamp(status.getModificationTime()); localResource.setSize(status.getLen()); return localResource; }
/** * @param file Path. * @param fs File system. * @param type Local resource type. * @throws Exception If failed. */ public static LocalResource setupFile(Path file, FileSystem fs, LocalResourceType type) throws Exception { LocalResource resource = Records.newRecord(LocalResource.class); file = fs.makeQualified(file); FileStatus stat = fs.getFileStatus(file); resource.setResource(ConverterUtils.getYarnUrlFromPath(file)); resource.setSize(stat.getLen()); resource.setTimestamp(stat.getModificationTime()); resource.setType(type); resource.setVisibility(LocalResourceVisibility.APPLICATION); return resource; }
/** * Create an AM resource from the * * @param hdfs HDFS or other filesystem in use * @param destPath dest path in filesystem * @param resourceType resource type * @return the resource set up wih application-level visibility and the timestamp & size set from * the file stats. */ public static LocalResource createAmResource( FileSystem hdfs, Path destPath, LocalResourceType resourceType) throws IOException { FileStatus destStatus = hdfs.getFileStatus(destPath); LocalResource amResource = Records.newRecord(LocalResource.class); amResource.setType(resourceType); // Set visibility of the resource // Setting to most private option amResource.setVisibility(LocalResourceVisibility.APPLICATION); // Set the resource to be copied over amResource.setResource(ConverterUtils.getYarnUrlFromPath(destPath)); // Set timestamp and length of file so that the framework // can do basic sanity checks for the local resource // after it has been copied over to ensure it is the same // resource the client intended to use with the application amResource.setTimestamp(destStatus.getModificationTime()); amResource.setSize(destStatus.getLen()); return amResource; }
/* * Helper method to create a yarn local resource. */ private LocalResource createLocalResource( FileSystem remoteFs, Path file, LocalResourceType type, LocalResourceVisibility visibility) { FileStatus fstat = null; try { fstat = remoteFs.getFileStatus(file); } catch (IOException e) { e.printStackTrace(); } URL resourceURL = ConverterUtils.getYarnUrlFromPath(file); long resourceSize = fstat.getLen(); long resourceModificationTime = fstat.getModificationTime(); LOG.info("Resource modification time: " + resourceModificationTime); LocalResource lr = Records.newRecord(LocalResource.class); lr.setResource(resourceURL); lr.setType(type); lr.setSize(resourceSize); lr.setVisibility(visibility); lr.setTimestamp(resourceModificationTime); return lr; }
/* * Helper function to retrieve the basename of a local resource */ public String getBaseName(LocalResource lr) { return FilenameUtils.getName(lr.getResource().getFile()); }
/** * Launch application for the dag represented by this client. * * @throws YarnException * @throws IOException */ public void startApplication() throws YarnException, IOException { Class<?>[] defaultClasses; if (applicationType.equals(YARN_APPLICATION_TYPE)) { // TODO restrict the security check to only check if security is enabled for webservices. if (UserGroupInformation.isSecurityEnabled()) { defaultClasses = DATATORRENT_SECURITY_CLASSES; } else { defaultClasses = DATATORRENT_CLASSES; } } else { throw new IllegalStateException(applicationType + " is not a valid application type."); } LinkedHashSet<String> localJarFiles = findJars(dag, defaultClasses); if (resources != null) { localJarFiles.addAll(resources); } YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info( "Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); // GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class); // GetClusterNodesResponse clusterNodesResp = // rmClient.clientRM.getClusterNodes(clusterNodesReq); // LOG.info("Got Cluster node info from ASM"); // for (NodeReport node : clusterNodesResp.getNodeReports()) { // LOG.info("Got node report from ASM for" // + ", nodeId=" + node.getNodeId() // + ", nodeAddress" + node.getHttpAddress() // + ", nodeRackName" + node.getRackName() // + ", nodeNumContainers" + node.getNumContainers() // + ", nodeHealthStatus" + node.getHealthReport()); // } List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info( "User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication newApp = yarnClient.createApplication(); appId = newApp.getNewApplicationResponse().getApplicationId(); // Dump out information about cluster capability as seen by the resource manager int maxMem = newApp.getNewApplicationResponse().getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); int amMemory = dag.getMasterMemoryMB(); if (amMemory > maxMem) { LOG.info( "AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } if (dag.getAttributes().get(LogicalPlan.APPLICATION_ID) == null) { dag.setAttribute(LogicalPlan.APPLICATION_ID, appId.toString()); } // Create launch context for app master LOG.info("Setting up application submission context for ASM"); ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); // set the application id appContext.setApplicationId(appId); // set the application name appContext.setApplicationName(dag.getValue(LogicalPlan.APPLICATION_NAME)); appContext.setApplicationType(this.applicationType); if (YARN_APPLICATION_TYPE.equals(this.applicationType)) { // appContext.setMaxAppAttempts(1); // no retries until Stram is HA } // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // Setup security tokens // If security is enabled get ResourceManager and NameNode delegation tokens. // Set these tokens on the container so that they are sent as part of application submission. // This also sets them up for renewal by ResourceManager. The NameNode delegation rmToken // is also used by ResourceManager to fetch the jars from HDFS and set them up for the // application master launch. if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. FileSystem fs = StramClientUtils.newFileSystemInstance(conf); try { final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } } finally { fs.close(); } addRMDelegationToken(tokenRenewer, credentials); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); // copy required jar files to dfs, to be localized for containers FileSystem fs = StramClientUtils.newFileSystemInstance(conf); try { Path appsBasePath = new Path(StramClientUtils.getDTDFSRootDir(fs, conf), StramClientUtils.SUBDIR_APPS); Path appPath = new Path(appsBasePath, appId.toString()); String libJarsCsv = copyFromLocal(fs, appPath, localJarFiles.toArray(new String[] {})); LOG.info("libjars: {}", libJarsCsv); dag.getAttributes().put(LogicalPlan.LIBRARY_JARS, libJarsCsv); LaunchContainerRunnable.addFilesToLocalResources( LocalResourceType.FILE, libJarsCsv, localResources, fs); if (archives != null) { String[] localFiles = archives.split(","); String archivesCsv = copyFromLocal(fs, appPath, localFiles); LOG.info("archives: {}", archivesCsv); dag.getAttributes().put(LogicalPlan.ARCHIVES, archivesCsv); LaunchContainerRunnable.addFilesToLocalResources( LocalResourceType.ARCHIVE, archivesCsv, localResources, fs); } if (files != null) { String[] localFiles = files.split(","); String filesCsv = copyFromLocal(fs, appPath, localFiles); LOG.info("files: {}", filesCsv); dag.getAttributes().put(LogicalPlan.FILES, filesCsv); LaunchContainerRunnable.addFilesToLocalResources( LocalResourceType.FILE, filesCsv, localResources, fs); } dag.getAttributes().put(LogicalPlan.APPLICATION_PATH, appPath.toString()); if (dag.getAttributes().get(OperatorContext.STORAGE_AGENT) == null) { /* which would be the most likely case */ Path checkpointPath = new Path(appPath, LogicalPlan.SUBDIR_CHECKPOINTS); // use conf client side to pickup any proxy settings from dt-site.xml dag.setAttribute( OperatorContext.STORAGE_AGENT, new FSStorageAgent(checkpointPath.toString(), conf)); } if (dag.getAttributes().get(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR) == null) { dag.setAttribute( LogicalPlan.CONTAINER_OPTS_CONFIGURATOR, new BasicContainerOptConfigurator()); } // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { Path log4jSrc = new Path(log4jPropFile); Path log4jDst = new Path(appPath, "log4j.props"); fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); LocalResource log4jRsrc = Records.newRecord(LocalResource.class); log4jRsrc.setType(LocalResourceType.FILE); log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); log4jRsrc.setSize(log4jFileStatus.getLen()); localResources.put("log4j.properties", log4jRsrc); } if (originalAppId != null) { Path origAppPath = new Path(appsBasePath, this.originalAppId); LOG.info("Restart from {}", origAppPath); copyInitialState(origAppPath); } // push logical plan to DFS location Path cfgDst = new Path(appPath, LogicalPlan.SER_FILE_NAME); FSDataOutputStream outStream = fs.create(cfgDst, true); LogicalPlan.write(this.dag, outStream); outStream.close(); Path launchConfigDst = new Path(appPath, LogicalPlan.LAUNCH_CONFIG_FILE_NAME); outStream = fs.create(launchConfigDst, true); conf.writeXml(outStream); outStream.close(); FileStatus topologyFileStatus = fs.getFileStatus(cfgDst); LocalResource topologyRsrc = Records.newRecord(LocalResource.class); topologyRsrc.setType(LocalResourceType.FILE); topologyRsrc.setVisibility(LocalResourceVisibility.APPLICATION); topologyRsrc.setResource(ConverterUtils.getYarnUrlFromURI(cfgDst.toUri())); topologyRsrc.setTimestamp(topologyFileStatus.getModificationTime()); topologyRsrc.setSize(topologyFileStatus.getLen()); localResources.put(LogicalPlan.SER_FILE_NAME, topologyRsrc); // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed // amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // Add application jar(s) location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar(s) // including ${CLASSPATH} will duplicate the class path in app master, removing it for now // StringBuilder classPathEnv = new StringBuilder("${CLASSPATH}:./*"); StringBuilder classPathEnv = new StringBuilder("./*"); String classpath = conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH); for (String c : StringUtils.isBlank(classpath) ? YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH : classpath.split(",")) { if (c.equals("$HADOOP_CLIENT_CONF_DIR")) { // SPOI-2501 continue; } classPathEnv.append(':'); classPathEnv.append(c.trim()); } env.put("CLASSPATH", classPathEnv.toString()); // propagate to replace node managers user name (effective in non-secure mode) env.put("HADOOP_USER_NAME", UserGroupInformation.getLoginUser().getUserName()); amContainer.setEnvironment(env); // Set the necessary command to execute the application master ArrayList<CharSequence> vargs = new ArrayList<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(javaCmd); if (dag.isDebug()) { vargs.add("-agentlib:jdwp=transport=dt_socket,server=y,suspend=n"); } // Set Xmx based on am memory size // default heap size 75% of total memory vargs.add("-Xmx" + (amMemory * 3 / 4) + "m"); vargs.add("-XX:+HeapDumpOnOutOfMemoryError"); vargs.add("-XX:HeapDumpPath=/tmp/dt-heap-" + appId.getId() + ".bin"); vargs.add("-Dhadoop.root.logger=" + (dag.isDebug() ? "DEBUG" : "INFO") + ",RFA"); vargs.add("-Dhadoop.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR); vargs.add(String.format("-D%s=%s", StreamingContainer.PROP_APP_PATH, dag.assertAppPath())); if (dag.isDebug()) { vargs.add("-Dlog4j.debug=true"); } String loggersLevel = conf.get(DTLoggerFactory.DT_LOGGERS_LEVEL); if (loggersLevel != null) { vargs.add(String.format("-D%s=%s", DTLoggerFactory.DT_LOGGERS_LEVEL, loggersLevel)); } vargs.add(StreamingAppMaster.class.getName()); vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final command StringBuilder command = new StringBuilder(9 * vargs.size()); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(queueName); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = rmClient.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure String specStr = Objects.toStringHelper("Submitting application: ") .add("name", appContext.getApplicationName()) .add("queue", appContext.getQueue()) .add("user", UserGroupInformation.getLoginUser()) .add("resource", appContext.getResource()) .toString(); LOG.info(specStr); if (dag.isDebug()) { // LOG.info("Full submission context: " + appContext); } yarnClient.submitApplication(appContext); } finally { fs.close(); } }
public DAG createDAG( FileSystem remoteFs, Configuration conf, Path remoteStagingDir, int numMapper, int numReducer, int iReduceStagesCount, int numIReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount, long iReduceSleepTime, int iReduceSleepCount, boolean writeSplitsToDFS, boolean generateSplitsInAM) throws IOException, YarnException { Configuration mapStageConf = new JobConf(conf); mapStageConf.setInt(MRJobConfig.NUM_MAPS, numMapper); mapStageConf.setLong(MAP_SLEEP_TIME, mapSleepTime); mapStageConf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime); mapStageConf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime); mapStageConf.setInt(MAP_SLEEP_COUNT, mapSleepCount); mapStageConf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount); mapStageConf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount); mapStageConf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount); mapStageConf.setInt(IREDUCE_TASKS_COUNT, numIReducer); mapStageConf.set(MRJobConfig.MAP_CLASS_ATTR, SleepMapper.class.getName()); mapStageConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, SleepInputFormat.class.getName()); if (numIReducer == 0 && numReducer == 0) { mapStageConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName()); } MRHelpers.translateVertexConfToTez(mapStageConf); Configuration[] intermediateReduceStageConfs = null; if (iReduceStagesCount > 0 && numIReducer > 0) { intermediateReduceStageConfs = new JobConf[iReduceStagesCount]; for (int i = 1; i <= iReduceStagesCount; ++i) { JobConf iReduceStageConf = new JobConf(conf); iReduceStageConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, iReduceSleepTime); iReduceStageConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, iReduceSleepCount); iReduceStageConf.setInt(MRJobConfig.NUM_REDUCES, numIReducer); iReduceStageConf.set(MRJobConfig.REDUCE_CLASS_ATTR, ISleepReducer.class.getName()); iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName()); iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName()); iReduceStageConf.set( MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName()); MRHelpers.translateVertexConfToTez(iReduceStageConf); intermediateReduceStageConfs[i - 1] = iReduceStageConf; } } Configuration finalReduceConf = null; if (numReducer > 0) { finalReduceConf = new JobConf(conf); finalReduceConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, reduceSleepTime); finalReduceConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, reduceSleepCount); finalReduceConf.setInt(MRJobConfig.NUM_REDUCES, numReducer); finalReduceConf.set(MRJobConfig.REDUCE_CLASS_ATTR, SleepReducer.class.getName()); finalReduceConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName()); finalReduceConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName()); finalReduceConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName()); MRHelpers.translateVertexConfToTez(finalReduceConf); } MRHelpers.doJobClientMagic(mapStageConf); if (iReduceStagesCount > 0 && numIReducer > 0) { for (int i = 0; i < iReduceStagesCount; ++i) { MRHelpers.doJobClientMagic(intermediateReduceStageConfs[i]); } } if (numReducer > 0) { MRHelpers.doJobClientMagic(finalReduceConf); } InputSplitInfo inputSplitInfo = null; if (!generateSplitsInAM) { if (writeSplitsToDFS) { LOG.info("Writing splits to DFS"); try { inputSplitInfo = MRHelpers.generateInputSplits(mapStageConf, remoteStagingDir); } catch (InterruptedException e) { throw new TezUncheckedException("Could not generate input splits", e); } catch (ClassNotFoundException e) { throw new TezUncheckedException("Failed to generate input splits", e); } } else { try { LOG.info("Creating in-mem splits"); inputSplitInfo = MRHelpers.generateInputSplitsToMem(mapStageConf); } catch (ClassNotFoundException e) { throw new TezUncheckedException("Could not generate input splits", e); } catch (InterruptedException e) { throw new TezUncheckedException("Could not generate input splits", e); } } if (inputSplitInfo.getCredentials() != null) { this.credentials.addAll(inputSplitInfo.getCredentials()); } } DAG dag = new DAG("MRRSleepJob"); String jarPath = ClassUtil.findContainingJar(getClass()); if (jarPath == null) { throw new TezUncheckedException( "Could not find any jar containing" + " MRRSleepJob.class in the classpath"); } Path remoteJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "dag_job.jar")); remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath); FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath); TokenCache.obtainTokensForNamenodes(this.credentials, new Path[] {remoteJarPath}, mapStageConf); Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>(); LocalResource dagJarLocalRsrc = LocalResource.newInstance( ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, jarFileStatus.getLen(), jarFileStatus.getModificationTime()); commonLocalResources.put("dag_job.jar", dagJarLocalRsrc); List<Vertex> vertices = new ArrayList<Vertex>(); byte[] mapInputPayload = null; byte[] mapUserPayload = MRHelpers.createUserPayloadFromConf(mapStageConf); if (writeSplitsToDFS || generateSplitsInAM) { mapInputPayload = MRHelpers.createMRInputPayload(mapUserPayload, null); } else { mapInputPayload = MRHelpers.createMRInputPayload(mapUserPayload, inputSplitInfo.getSplitsProto()); } int numTasks = generateSplitsInAM ? -1 : numMapper; Vertex mapVertex = new Vertex( "map", new ProcessorDescriptor(MapProcessor.class.getName()).setUserPayload(mapUserPayload), numTasks, MRHelpers.getMapResource(mapStageConf)); if (!generateSplitsInAM) { mapVertex.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints()); } if (writeSplitsToDFS) { Map<String, LocalResource> mapLocalResources = new HashMap<String, LocalResource>(); mapLocalResources.putAll(commonLocalResources); MRHelpers.updateLocalResourcesForInputSplits(remoteFs, inputSplitInfo, mapLocalResources); mapVertex.setTaskLocalFiles(mapLocalResources); } else { mapVertex.setTaskLocalFiles(commonLocalResources); } if (generateSplitsInAM) { MRHelpers.addMRInput(mapVertex, mapInputPayload, MRInputAMSplitGenerator.class); } else { if (writeSplitsToDFS) { MRHelpers.addMRInput(mapVertex, mapInputPayload, null); } else { MRHelpers.addMRInput(mapVertex, mapInputPayload, MRInputSplitDistributor.class); } } vertices.add(mapVertex); if (iReduceStagesCount > 0 && numIReducer > 0) { for (int i = 0; i < iReduceStagesCount; ++i) { Configuration iconf = intermediateReduceStageConfs[i]; byte[] iReduceUserPayload = MRHelpers.createUserPayloadFromConf(iconf); Vertex ivertex = new Vertex( "ireduce" + (i + 1), new ProcessorDescriptor(ReduceProcessor.class.getName()) .setUserPayload(iReduceUserPayload), numIReducer, MRHelpers.getReduceResource(iconf)); ivertex.setTaskLocalFiles(commonLocalResources); vertices.add(ivertex); } } Vertex finalReduceVertex = null; if (numReducer > 0) { byte[] reducePayload = MRHelpers.createUserPayloadFromConf(finalReduceConf); finalReduceVertex = new Vertex( "reduce", new ProcessorDescriptor(ReduceProcessor.class.getName()) .setUserPayload(reducePayload), numReducer, MRHelpers.getReduceResource(finalReduceConf)); finalReduceVertex.setTaskLocalFiles(commonLocalResources); MRHelpers.addMROutputLegacy(finalReduceVertex, reducePayload); vertices.add(finalReduceVertex); } else { // Map only job MRHelpers.addMROutputLegacy(mapVertex, mapUserPayload); } Configuration partitionerConf = new Configuration(false); partitionerConf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName()); OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer.newBuilder( IntWritable.class.getName(), IntWritable.class.getName(), MRPartitioner.class.getName(), partitionerConf) .configureInput() .useLegacyInput() .done() .build(); for (int i = 0; i < vertices.size(); ++i) { dag.addVertex(vertices.get(i)); if (i != 0) { dag.addEdge( new Edge(vertices.get(i - 1), vertices.get(i), edgeConf.createDefaultEdgeProperty())); } } return dag; }
@Override /** * Connects to CM, sets up container launch context for shell command and eventually dispatches * the container start request to the CM. */ public void run() { LOG.info("Setting up container launch container for containerid=" + container.getId()); ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); // Set the environment ctx.setEnvironment(shellEnv); // Set the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); // The container for the eventual shell commands needs its own local // resources too. // In this scenario, if a shell script is specified, we need to have it // copied and made available to the container. if (!scriptPath.isEmpty()) { Path renamedScriptPath = null; if (Shell.WINDOWS) { renamedScriptPath = new Path(scriptPath + ".bat"); } else { renamedScriptPath = new Path(scriptPath + ".sh"); } try { // rename the script file based on the underlying OS syntax. renameScriptFile(renamedScriptPath); } catch (Exception e) { LOG.error("Not able to add suffix (.bat/.sh) to the shell script filename", e); // We know we cannot continue launching the container // so we should release it. numCompletedContainers.incrementAndGet(); numFailedContainers.incrementAndGet(); return; } LocalResource shellRsrc = Records.newRecord(LocalResource.class); shellRsrc.setType(LocalResourceType.FILE); shellRsrc.setVisibility(LocalResourceVisibility.APPLICATION); try { shellRsrc.setResource( ConverterUtils.getYarnUrlFromURI(new URI(renamedScriptPath.toString()))); } catch (URISyntaxException e) { LOG.error( "Error when trying to use shell script path specified" + " in env, path=" + renamedScriptPath, e); // A failure scenario on bad input such as invalid shell script path // We know we cannot continue launching the container // so we should release it. // TODO numCompletedContainers.incrementAndGet(); numFailedContainers.incrementAndGet(); return; } shellRsrc.setTimestamp(shellScriptPathTimestamp); shellRsrc.setSize(shellScriptPathLen); localResources.put( Shell.WINDOWS ? ExecBatScripStringtPath : ExecShellStringPath, shellRsrc); shellCommand = Shell.WINDOWS ? windows_command : linux_bash_command; } ctx.setLocalResources(localResources); // Set the necessary command to execute on the allocated container Vector<CharSequence> vargs = new Vector<CharSequence>(5); // Set executable command vargs.add(shellCommand); // Set shell script path if (!scriptPath.isEmpty()) { vargs.add(Shell.WINDOWS ? ExecBatScripStringtPath : ExecShellStringPath); } // Set args for the shell command if any vargs.add(shellArgs); // Add log redirect params vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } List<String> commands = new ArrayList<String>(); commands.add(command.toString()); ctx.setCommands(commands); // Set up tokens for the container too. Today, for normal shell commands, // the container in distribute-shell doesn't need any tokens. We are // populating them mainly for NodeManagers to be able to download any // files in the distributed file-system. The tokens are otherwise also // useful in cases, for e.g., when one is running a "hadoop dfs" command // inside the distributed shell. ctx.setTokens(allTokens.duplicate()); containerListener.addContainer(container.getId(), container); nmClientAsync.startContainerAsync(container, ctx); }