private TezJob getJob(TezPlanContainerNode tezPlanNode, TezPlanContainer planContainer) throws JobCreationException { try { Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); localResources.putAll(planContainer.getLocalResources()); TezOperPlan tezPlan = tezPlanNode.getTezOperPlan(); localResources.putAll(tezPlan.getExtraResources()); String shipFiles = pigContext.getProperties().getProperty("pig.streaming.ship.files"); if (shipFiles != null) { for (String file : shipFiles.split(",")) { TezResourceManager.getInstance().addTezResource(new File(file.trim()).toURI()); } } String cacheFiles = pigContext.getProperties().getProperty("pig.streaming.cache.files"); if (cacheFiles != null) { addCacheResources(cacheFiles.split(",")); } for (Map.Entry<String, LocalResource> entry : localResources.entrySet()) { log.info("Local resource: " + entry.getKey()); } DAG tezDag = buildDAG(tezPlanNode, localResources); tezDag.setDAGInfo(createDagInfo(TezScriptState.get().getScript())); // set Tez caller context // Reflection for the following code since it is only available since tez 0.8.1: // CallerContext context = CallerContext.create(ATSService.CallerContext, // ATSService.getPigAuditId(pigContext), // ATSService.EntityType, ""); // tezDag.setCallerContext(context); Class callerContextClass = null; try { callerContextClass = Class.forName("org.apache.tez.client.CallerContext"); } catch (ClassNotFoundException e) { // If pre-Tez 0.8.1, skip setting CallerContext } if (callerContextClass != null) { Method builderBuildMethod = callerContextClass.getMethod( "create", String.class, String.class, String.class, String.class); Object context = builderBuildMethod.invoke( null, PigATSClient.CALLER_CONTEXT, PigATSClient.getPigAuditId(pigContext), PigATSClient.ENTITY_TYPE, ""); Method dagSetCallerContext = tezDag.getClass().getMethod("setCallerContext", context.getClass()); dagSetCallerContext.invoke(tezDag, context); } log.info("Total estimated parallelism is " + tezPlan.getEstimatedTotalParallelism()); return new TezJob(tezConf, tezDag, localResources, tezPlan); } catch (Exception e) { int errCode = 2017; String msg = "Internal error creating job configuration."; throw new JobCreationException(msg, errCode, PigException.BUG, e); } }
public DAG buildDAG(TezPlanContainerNode tezPlanNode, Map<String, LocalResource> localResources) throws IOException, YarnException { DAG tezDag = DAG.create(tezPlanNode.getOperatorKey().toString()); tezDag.setCredentials(tezPlanNode.getTezOperPlan().getCredentials()); TezDagBuilder dagBuilder = new TezDagBuilder(pigContext, tezPlanNode.getTezOperPlan(), tezDag, localResources); dagBuilder.visit(); dagBuilder.avoidContainerReuseIfInputSplitInDisk(); return tezDag; }
@Test(timeout = 10000) public void testBasicSpeculationPerVertexConf() throws Exception { DAG dag = DAG.create("test"); String vNameNoSpec = "A"; String vNameSpec = "B"; Vertex vA = Vertex.create(vNameNoSpec, ProcessorDescriptor.create("Proc.class"), 5); Vertex vB = Vertex.create(vNameSpec, ProcessorDescriptor.create("Proc.class"), 5); vA.setConf(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, "false"); dag.addVertex(vA); dag.addVertex(vB); // min/max src fraction is set to 1. So vertices will run sequentially dag.addEdge( Edge.create( vA, vB, EdgeProperty.create( DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("O"), InputDescriptor.create("I")))); MockTezClient tezClient = createTezSession(); DAGClient dagClient = tezClient.submitDAG(dag); DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG(); TezVertexID vertexId = dagImpl.getVertex(vNameSpec).getVertexId(); TezVertexID vertexIdNoSpec = dagImpl.getVertex(vNameNoSpec).getVertexId(); // original attempt is killed and speculative one is successful TezTaskAttemptID killedTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0); TezTaskAttemptID noSpecTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexIdNoSpec, 0), 0); // cause speculation trigger for both mockLauncher.setStatusUpdatesForTask(killedTaId, 100); mockLauncher.setStatusUpdatesForTask(noSpecTaId, 100); mockLauncher.startScheduling(true); dagClient.waitForCompletion(); Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState()); org.apache.tez.dag.app.dag.Vertex vSpec = dagImpl.getVertex(vertexId); org.apache.tez.dag.app.dag.Vertex vNoSpec = dagImpl.getVertex(vertexIdNoSpec); // speculation for vA but not for vB Assert.assertTrue( vSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS).getValue() > 0); Assert.assertEquals( 0, vNoSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS).getValue()); tezClient.stop(); }
@Test public void testBuildDag() throws IllegalArgumentException, IOException, Exception { DAG dag = task.build(conf, work, path, appLr, null, new Context(conf)); for (BaseWork w : work.getAllWork()) { Vertex v = dag.getVertex(w.getName()); assertNotNull(v); List<Vertex> outs = v.getOutputVertices(); for (BaseWork x : work.getChildren(w)) { boolean found = false; for (Vertex u : outs) { if (u.getName().equals(x.getName())) { found = true; break; } } assertTrue(found); } } }
public void testBasicSpeculation(boolean withProgress) throws Exception { DAG dag = DAG.create("test"); Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5); dag.addVertex(vA); MockTezClient tezClient = createTezSession(); DAGClient dagClient = tezClient.submitDAG(dag); DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG(); TezVertexID vertexId = TezVertexID.getInstance(dagImpl.getID(), 0); // original attempt is killed and speculative one is successful TezTaskAttemptID killedTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0); TezTaskAttemptID successTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 1); mockLauncher.updateProgress(withProgress); // cause speculation trigger mockLauncher.setStatusUpdatesForTask(killedTaId, 100); mockLauncher.startScheduling(true); dagClient.waitForCompletion(); Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState()); Task task = dagImpl.getTask(killedTaId.getTaskID()); Assert.assertEquals(2, task.getAttempts().size()); Assert.assertEquals(successTaId, task.getSuccessfulAttempt().getID()); TaskAttempt killedAttempt = task.getAttempt(killedTaId); Joiner.on(",").join(killedAttempt.getDiagnostics()).contains("Killed as speculative attempt"); Assert.assertEquals( TaskAttemptTerminationCause.TERMINATED_EFFECTIVE_SPECULATION, killedAttempt.getTerminationCause()); if (withProgress) { // without progress updates occasionally more than 1 task speculates Assert.assertEquals( 1, task.getCounters().findCounter(TaskCounter.NUM_SPECULATIONS).getValue()); Assert.assertEquals( 1, dagImpl.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS).getValue()); org.apache.tez.dag.app.dag.Vertex v = dagImpl.getVertex(killedTaId.getTaskID().getVertexID()); Assert.assertEquals( 1, v.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS).getValue()); } tezClient.stop(); }
@Test public void testSubmit() throws Exception { DAG dag = DAG.create("test"); task.submit( conf, dag, path, appLr, sessionState, Collections.<LocalResource>emptyList(), new String[0], Collections.<String, LocalResource>emptyMap()); // validate close/reopen verify(sessionState, times(1)).open(any(HiveConf.class), any(String[].class)); verify(sessionState, times(1)).close(eq(true)); // now uses pool after HIVE-7043 verify(session, times(2)).submitDAG(any(DAG.class)); }
private DAGPlan createDAG() { // Create a plan with 3 vertices: A, B, C. Group(A,B)->C Configuration conf = new Configuration(false); int dummyTaskCount = 1; Resource dummyTaskResource = Resource.newInstance(1, 1); org.apache.tez.dag.api.Vertex v1 = new org.apache.tez.dag.api.Vertex( "vertex1", new ProcessorDescriptor("Processor").setHistoryText("vertex1 Processor HistoryText"), dummyTaskCount, dummyTaskResource); v1.addInput( "input1", new InputDescriptor("input.class").setHistoryText("input HistoryText"), null); org.apache.tez.dag.api.Vertex v2 = new org.apache.tez.dag.api.Vertex( "vertex2", new ProcessorDescriptor("Processor").setHistoryText("vertex2 Processor HistoryText"), dummyTaskCount, dummyTaskResource); org.apache.tez.dag.api.Vertex v3 = new org.apache.tez.dag.api.Vertex( "vertex3", new ProcessorDescriptor("Processor").setHistoryText("vertex3 Processor HistoryText"), dummyTaskCount, dummyTaskResource); DAG dag = new DAG("testDag"); String groupName1 = "uv12"; org.apache.tez.dag.api.VertexGroup uv12 = dag.createVertexGroup(groupName1, v1, v2); OutputDescriptor outDesc = new OutputDescriptor("output.class").setHistoryText("uvOut HistoryText"); uv12.addOutput("uvOut", outDesc, OutputCommitter.class); v3.addOutput("uvOut", outDesc, OutputCommitter.class); GroupInputEdge e1 = new GroupInputEdge( uv12, v3, new EdgeProperty( DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, new OutputDescriptor("dummy output class").setHistoryText("Dummy History Text"), new InputDescriptor("dummy input class").setHistoryText("Dummy History Text")), new InputDescriptor("merge.class").setHistoryText("Merge HistoryText")); dag.addVertex(v1); dag.addVertex(v2); dag.addVertex(v3); dag.addEdge(e1); return dag.createDag(conf); }
private void addCredentials(MapWork mapWork, DAG dag) { Set<String> paths = mapWork.getPathToAliases().keySet(); if (paths != null && !paths.isEmpty()) { Iterator<URI> pathIterator = Iterators.transform( paths.iterator(), new Function<String, URI>() { @Override public URI apply(String input) { return new Path(input).toUri(); } }); Set<URI> uris = new HashSet<URI>(); Iterators.addAll(uris, pathIterator); if (LOG.isDebugEnabled()) { for (URI uri : uris) { LOG.debug("Marking URI as needing credentials: " + uri); } } dag.addURIsForCredentials(uris); } }
@Test public void testEmptyWork() throws IllegalArgumentException, IOException, Exception { DAG dag = task.build(conf, new TezWork(""), path, appLr, null, new Context(conf)); assertEquals(dag.getVertices().size(), 0); }
public DAG createDAG( FileSystem remoteFs, Configuration conf, Path remoteStagingDir, int numMapper, int numReducer, int iReduceStagesCount, int numIReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount, long iReduceSleepTime, int iReduceSleepCount, boolean writeSplitsToDFS, boolean generateSplitsInAM) throws IOException, YarnException { Configuration mapStageConf = new JobConf(conf); mapStageConf.setInt(MRJobConfig.NUM_MAPS, numMapper); mapStageConf.setLong(MAP_SLEEP_TIME, mapSleepTime); mapStageConf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime); mapStageConf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime); mapStageConf.setInt(MAP_SLEEP_COUNT, mapSleepCount); mapStageConf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount); mapStageConf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount); mapStageConf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount); mapStageConf.setInt(IREDUCE_TASKS_COUNT, numIReducer); mapStageConf.set(MRJobConfig.MAP_CLASS_ATTR, SleepMapper.class.getName()); mapStageConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, SleepInputFormat.class.getName()); if (numIReducer == 0 && numReducer == 0) { mapStageConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName()); } MRHelpers.translateVertexConfToTez(mapStageConf); Configuration[] intermediateReduceStageConfs = null; if (iReduceStagesCount > 0 && numIReducer > 0) { intermediateReduceStageConfs = new JobConf[iReduceStagesCount]; for (int i = 1; i <= iReduceStagesCount; ++i) { JobConf iReduceStageConf = new JobConf(conf); iReduceStageConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, iReduceSleepTime); iReduceStageConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, iReduceSleepCount); iReduceStageConf.setInt(MRJobConfig.NUM_REDUCES, numIReducer); iReduceStageConf.set(MRJobConfig.REDUCE_CLASS_ATTR, ISleepReducer.class.getName()); iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName()); iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName()); iReduceStageConf.set( MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName()); MRHelpers.translateVertexConfToTez(iReduceStageConf); intermediateReduceStageConfs[i - 1] = iReduceStageConf; } } Configuration finalReduceConf = null; if (numReducer > 0) { finalReduceConf = new JobConf(conf); finalReduceConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, reduceSleepTime); finalReduceConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, reduceSleepCount); finalReduceConf.setInt(MRJobConfig.NUM_REDUCES, numReducer); finalReduceConf.set(MRJobConfig.REDUCE_CLASS_ATTR, SleepReducer.class.getName()); finalReduceConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName()); finalReduceConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName()); finalReduceConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName()); MRHelpers.translateVertexConfToTez(finalReduceConf); } MRHelpers.doJobClientMagic(mapStageConf); if (iReduceStagesCount > 0 && numIReducer > 0) { for (int i = 0; i < iReduceStagesCount; ++i) { MRHelpers.doJobClientMagic(intermediateReduceStageConfs[i]); } } if (numReducer > 0) { MRHelpers.doJobClientMagic(finalReduceConf); } InputSplitInfo inputSplitInfo = null; if (!generateSplitsInAM) { if (writeSplitsToDFS) { LOG.info("Writing splits to DFS"); try { inputSplitInfo = MRHelpers.generateInputSplits(mapStageConf, remoteStagingDir); } catch (InterruptedException e) { throw new TezUncheckedException("Could not generate input splits", e); } catch (ClassNotFoundException e) { throw new TezUncheckedException("Failed to generate input splits", e); } } else { try { LOG.info("Creating in-mem splits"); inputSplitInfo = MRHelpers.generateInputSplitsToMem(mapStageConf); } catch (ClassNotFoundException e) { throw new TezUncheckedException("Could not generate input splits", e); } catch (InterruptedException e) { throw new TezUncheckedException("Could not generate input splits", e); } } if (inputSplitInfo.getCredentials() != null) { this.credentials.addAll(inputSplitInfo.getCredentials()); } } DAG dag = new DAG("MRRSleepJob"); String jarPath = ClassUtil.findContainingJar(getClass()); if (jarPath == null) { throw new TezUncheckedException( "Could not find any jar containing" + " MRRSleepJob.class in the classpath"); } Path remoteJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "dag_job.jar")); remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath); FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath); TokenCache.obtainTokensForNamenodes(this.credentials, new Path[] {remoteJarPath}, mapStageConf); Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>(); LocalResource dagJarLocalRsrc = LocalResource.newInstance( ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, jarFileStatus.getLen(), jarFileStatus.getModificationTime()); commonLocalResources.put("dag_job.jar", dagJarLocalRsrc); List<Vertex> vertices = new ArrayList<Vertex>(); byte[] mapInputPayload = null; byte[] mapUserPayload = MRHelpers.createUserPayloadFromConf(mapStageConf); if (writeSplitsToDFS || generateSplitsInAM) { mapInputPayload = MRHelpers.createMRInputPayload(mapUserPayload, null); } else { mapInputPayload = MRHelpers.createMRInputPayload(mapUserPayload, inputSplitInfo.getSplitsProto()); } int numTasks = generateSplitsInAM ? -1 : numMapper; Vertex mapVertex = new Vertex( "map", new ProcessorDescriptor(MapProcessor.class.getName()).setUserPayload(mapUserPayload), numTasks, MRHelpers.getMapResource(mapStageConf)); if (!generateSplitsInAM) { mapVertex.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints()); } if (writeSplitsToDFS) { Map<String, LocalResource> mapLocalResources = new HashMap<String, LocalResource>(); mapLocalResources.putAll(commonLocalResources); MRHelpers.updateLocalResourcesForInputSplits(remoteFs, inputSplitInfo, mapLocalResources); mapVertex.setTaskLocalFiles(mapLocalResources); } else { mapVertex.setTaskLocalFiles(commonLocalResources); } if (generateSplitsInAM) { MRHelpers.addMRInput(mapVertex, mapInputPayload, MRInputAMSplitGenerator.class); } else { if (writeSplitsToDFS) { MRHelpers.addMRInput(mapVertex, mapInputPayload, null); } else { MRHelpers.addMRInput(mapVertex, mapInputPayload, MRInputSplitDistributor.class); } } vertices.add(mapVertex); if (iReduceStagesCount > 0 && numIReducer > 0) { for (int i = 0; i < iReduceStagesCount; ++i) { Configuration iconf = intermediateReduceStageConfs[i]; byte[] iReduceUserPayload = MRHelpers.createUserPayloadFromConf(iconf); Vertex ivertex = new Vertex( "ireduce" + (i + 1), new ProcessorDescriptor(ReduceProcessor.class.getName()) .setUserPayload(iReduceUserPayload), numIReducer, MRHelpers.getReduceResource(iconf)); ivertex.setTaskLocalFiles(commonLocalResources); vertices.add(ivertex); } } Vertex finalReduceVertex = null; if (numReducer > 0) { byte[] reducePayload = MRHelpers.createUserPayloadFromConf(finalReduceConf); finalReduceVertex = new Vertex( "reduce", new ProcessorDescriptor(ReduceProcessor.class.getName()) .setUserPayload(reducePayload), numReducer, MRHelpers.getReduceResource(finalReduceConf)); finalReduceVertex.setTaskLocalFiles(commonLocalResources); MRHelpers.addMROutputLegacy(finalReduceVertex, reducePayload); vertices.add(finalReduceVertex); } else { // Map only job MRHelpers.addMROutputLegacy(mapVertex, mapUserPayload); } Configuration partitionerConf = new Configuration(false); partitionerConf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName()); OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer.newBuilder( IntWritable.class.getName(), IntWritable.class.getName(), MRPartitioner.class.getName(), partitionerConf) .configureInput() .useLegacyInput() .done() .build(); for (int i = 0; i < vertices.size(); ++i) { dag.addVertex(vertices.get(i)); if (i != 0) { dag.addEdge( new Edge(vertices.get(i - 1), vertices.get(i), edgeConf.createDefaultEdgeProperty())); } } return dag; }
/** * Submit a DAG to a Tez Session. Blocks until either the DAG is submitted to the session or * configured timeout period expires. Cleans up session if the submission timed out. * * @param dag DAG to be submitted to Session * @return DAGClient to monitor the DAG * @throws TezException * @throws IOException * @throws SessionNotRunning if session is not alive * @throws DAGSubmissionTimedOut if submission timed out */ public synchronized DAGClient submitDAG(DAG dag) throws TezException, IOException { if (!sessionStarted) { throw new TezUncheckedException("Session not started"); } else if (sessionStopped) { throw new TezUncheckedException("Session stopped"); } String dagId = null; LOG.info( "Submitting dag to TezSession" + ", sessionName=" + sessionName + ", applicationId=" + applicationId); // Add tez jars to vertices too for (Vertex v : dag.getVertices()) { v.getTaskLocalResources().putAll(tezJarResources); if (null != tezConfPBLRsrc) { v.getTaskLocalResources().put(TezConfiguration.TEZ_PB_BINARY_CONF_NAME, tezConfPBLRsrc); } } DAGPlan dagPlan = dag.createDag(sessionConfig.getTezConfiguration()); SubmitDAGRequestProto requestProto = SubmitDAGRequestProto.newBuilder().setDAGPlan(dagPlan).build(); DAGClientAMProtocolBlockingPB proxy; long startTime = System.currentTimeMillis(); int timeout = sessionConfig .getTezConfiguration() .getInt( TezConfiguration.TEZ_SESSION_CLIENT_TIMEOUT_SECS, TezConfiguration.TEZ_SESSION_CLIENT_TIMEOUT_SECS_DEFAULT); long endTime = startTime + (timeout * 1000); while (true) { // FIXME implement a max time to wait for submit proxy = TezClientUtils.getSessionAMProxy( yarnClient, sessionConfig.getYarnConfiguration(), applicationId); if (proxy != null) { break; } try { Thread.sleep(100l); } catch (InterruptedException e) { // Ignore } if (System.currentTimeMillis() > endTime) { try { LOG.warn("DAG submission to session timed out, stopping session"); stop(); } catch (Throwable t) { LOG.info("Got an exception when trying to stop session", t); } throw new DAGSubmissionTimedOut( "Could not submit DAG to Tez Session" + ", timed out after " + timeout + " seconds"); } } try { dagId = proxy.submitDAG(null, requestProto).getDagId(); } catch (ServiceException e) { throw new TezException(e); } LOG.info( "Submitted dag to TezSession" + ", sessionName=" + sessionName + ", applicationId=" + applicationId + ", dagId=" + dagId); return new DAGClientRPCImpl(applicationId, dagId, sessionConfig.getTezConfiguration()); }