/** * Method getCreateFlowStep ... * * @param flowName of type String * @param steps of type Map<String, FlowStep> * @param sinkName of type String * @param numJobs of type int * @return FlowStep */ private FlowStep getCreateFlowStep( String flowName, Map<String, FlowStep> steps, String sinkName, int numJobs) { if (steps.containsKey(sinkName)) return steps.get(sinkName); if (LOG.isDebugEnabled()) LOG.debug("creating step: " + sinkName); FlowStep step = new FlowStep(makeStepName(steps, numJobs, sinkName)); step.setParentFlowName(flowName); steps.put(sinkName, step); return step; }
public void testLocalModeSink() throws Exception { Tap source = new Hfs(new TextLine(), "input/path"); Tap sink = new Lfs(new TextLine(), "output/path", true); Pipe pipe = new Pipe("test"); Map<Object, Object> props = getProperties(); Flow flow = new FlowConnector(props).connect(source, sink, pipe); List<FlowStep> steps = flow.getSteps(); assertEquals("wrong size", 1, steps.size()); FlowStep step = (FlowStep) steps.get(0); String tracker = step.getJobConf(MultiMapReducePlanner.getJobConf(props)).get("mapred.job.tracker"); boolean isLocal = tracker.equalsIgnoreCase("local"); assertTrue("is not local", isLocal); }
/** * Creates the map reduce step graph. * * @param flowName * @param elementGraph * @param traps */ private void makeStepGraph(String flowName, ElementGraph elementGraph, Map<String, Tap> traps) { SimpleDirectedGraph<Tap, Integer> tapGraph = elementGraph.makeTapGraph(); int numJobs = countNumJobs(tapGraph); Map<String, FlowStep> steps = new LinkedHashMap<String, FlowStep>(); TopologicalOrderIterator<Tap, Integer> topoIterator = new TopologicalOrderIterator<Tap, Integer>(tapGraph); int count = 0; while (topoIterator.hasNext()) { Tap source = topoIterator.next(); if (LOG.isDebugEnabled()) LOG.debug("handling source: " + source); List<Tap> sinks = Graphs.successorListOf(tapGraph, source); for (Tap sink : sinks) { if (LOG.isDebugEnabled()) LOG.debug("handling path: " + source + " -> " + sink); FlowStep step = getCreateFlowStep(flowName, steps, sink.toString(), numJobs); addVertex(step); if (steps.containsKey(source.toString())) addEdge(steps.get(source.toString()), step, count++); // support multiple paths from source to sink // this allows for self joins on groups, even with different operation stacks between them // note we must ignore paths with intermediate taps List<GraphPath<FlowElement, Scope>> paths = elementGraph.getAllShortestPathsBetween(source, sink); for (GraphPath<FlowElement, Scope> path : paths) { if (pathContainsTap(path)) continue; List<Scope> scopes = path.getEdgeList(); String sourceName = scopes.get(0).getName(); // root node of the shortest path step.sources.put((Tap) source, sourceName); step.sink = sink; if (step.sink.isWriteDirect()) step.tempSink = new TempHfs(sink.getPath().toUri().getPath()); FlowElement lhs = source; step.graph.addVertex(lhs); boolean onMapSide = true; for (Scope scope : scopes) { FlowElement rhs = elementGraph.getEdgeTarget(scope); step.graph.addVertex(rhs); step.graph.addEdge(lhs, rhs, scope); if (rhs instanceof Group) { step.group = (Group) rhs; onMapSide = false; } else if (rhs instanceof Pipe) // add relevant traps to step { String name = ((Pipe) rhs).getName(); if (traps.containsKey(name)) { if (onMapSide) step.mapperTraps.put(name, traps.get(name)); else step.reducerTraps.put(name, traps.get(name)); } } lhs = rhs; } } } } }