/** * Checks if the instance dependency chain created with the <code>setVertexToShareInstancesWith * </code> method is acyclic. * * @return <code>true</code> if the dependency chain is acyclic, <code>false</code> otherwise */ public boolean isInstanceDependencyChainAcyclic() { final AbstractJobVertex[] allVertices = this.getAllJobVertices(); final Set<AbstractJobVertex> alreadyVisited = new HashSet<AbstractJobVertex>(); for (AbstractJobVertex vertex : allVertices) { if (alreadyVisited.contains(vertex)) { continue; } AbstractJobVertex vertexToShareInstancesWith = vertex.getVertexToShareInstancesWith(); if (vertexToShareInstancesWith != null) { final Set<AbstractJobVertex> cycleMap = new HashSet<AbstractJobVertex>(); while (vertexToShareInstancesWith != null) { if (cycleMap.contains(vertexToShareInstancesWith)) { return false; } else { alreadyVisited.add(vertexToShareInstancesWith); cycleMap.add(vertexToShareInstancesWith); vertexToShareInstancesWith = vertexToShareInstancesWith.getVertexToShareInstancesWith(); } } } } return true; }
/** * Auxiliary method for cycle detection. Performs a depth-first traversal with vertex markings to * detect a cycle. If a node with a temporary marking is found, then there is a cycle. Once all * children of a vertex have been traversed the parent node cannot be part of another cycle and is * thus permanently marked. * * @param jv current job vertex to check * @param temporarilyMarked set of temporarily marked nodes * @param permanentlyMarked set of permanently marked nodes * @return <code>true</code> if there is a cycle, <code>false</code> otherwise */ private boolean detectCycle( final AbstractJobVertex jv, final HashSet<JobVertexID> temporarilyMarked, final HashSet<JobVertexID> permanentlyMarked) { JobVertexID vertexID = jv.getID(); if (permanentlyMarked.contains(vertexID)) { return false; } else if (temporarilyMarked.contains(vertexID)) { return true; } else { temporarilyMarked.add(vertexID); for (int i = 0; i < jv.getNumberOfForwardConnections(); i++) { if (detectCycle( jv.getForwardConnection(i).getConnectedVertex(), temporarilyMarked, permanentlyMarked)) { return true; } } permanentlyMarked.add(vertexID); return false; } }
@Test public void testTopologicalSort1() { try { AbstractJobVertex source1 = new AbstractJobVertex("source1"); AbstractJobVertex source2 = new AbstractJobVertex("source2"); AbstractJobVertex target1 = new AbstractJobVertex("target1"); AbstractJobVertex target2 = new AbstractJobVertex("target2"); AbstractJobVertex intermediate1 = new AbstractJobVertex("intermediate1"); AbstractJobVertex intermediate2 = new AbstractJobVertex("intermediate2"); target1.connectNewDataSetAsInput(source1, DistributionPattern.POINTWISE); target2.connectNewDataSetAsInput(source1, DistributionPattern.POINTWISE); target2.connectNewDataSetAsInput(intermediate2, DistributionPattern.POINTWISE); intermediate2.connectNewDataSetAsInput(intermediate1, DistributionPattern.POINTWISE); intermediate1.connectNewDataSetAsInput(source2, DistributionPattern.POINTWISE); JobGraph graph = new JobGraph( "TestGraph", source1, source2, intermediate1, intermediate2, target1, target2); List<AbstractJobVertex> sorted = graph.getVerticesSortedTopologicallyFromSources(); assertEquals(6, sorted.size()); assertBefore(source1, target1, sorted); assertBefore(source1, target2, sorted); assertBefore(source2, target2, sorted); assertBefore(source2, intermediate1, sorted); assertBefore(source2, intermediate2, sorted); assertBefore(intermediate1, target2, sorted); assertBefore(intermediate2, target2, sorted); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testTopoSortCyclicGraphNoSources() { try { AbstractJobVertex v1 = new AbstractJobVertex("1"); AbstractJobVertex v2 = new AbstractJobVertex("2"); AbstractJobVertex v3 = new AbstractJobVertex("3"); AbstractJobVertex v4 = new AbstractJobVertex("4"); v1.connectNewDataSetAsInput(v4, DistributionPattern.POINTWISE); v2.connectNewDataSetAsInput(v1, DistributionPattern.POINTWISE); v3.connectNewDataSetAsInput(v2, DistributionPattern.POINTWISE); v4.connectNewDataSetAsInput(v3, DistributionPattern.POINTWISE); JobGraph jg = new JobGraph("Cyclic Graph", v1, v2, v3, v4); try { jg.getVerticesSortedTopologicallyFromSources(); fail("Failed to raise error on topologically sorting cyclic graph."); } catch (InvalidProgramException e) { // that what we wanted } } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
/** * Auxiliary method to collect all vertices which are reachable from the input vertices. * * @param jv the currently considered job vertex * @param collector a temporary list to store the vertices that have already been visisted */ private void collectVertices( final AbstractJobVertex jv, final HashSet<JobVertexID> visited, final List<AbstractJobVertex> collector) { visited.add(jv.getID()); collector.add(jv); for (int i = 0; i < jv.getNumberOfForwardConnections(); i++) { AbstractJobVertex vertex = jv.getForwardConnection(i).getConnectedVertex(); if (!visited.contains(vertex.getID())) { collectVertices(vertex, visited, collector); } } }
@Test public void testTopologicalSort3() { // --> op1 -- // / \ // (source) - +-> op2 -> op3 // \ / // --------- try { AbstractJobVertex source = new AbstractJobVertex("source"); AbstractJobVertex op1 = new AbstractJobVertex("op4"); AbstractJobVertex op2 = new AbstractJobVertex("op2"); AbstractJobVertex op3 = new AbstractJobVertex("op3"); op1.connectNewDataSetAsInput(source, DistributionPattern.POINTWISE); op2.connectNewDataSetAsInput(op1, DistributionPattern.POINTWISE); op2.connectNewDataSetAsInput(source, DistributionPattern.POINTWISE); op3.connectNewDataSetAsInput(op2, DistributionPattern.POINTWISE); JobGraph graph = new JobGraph("TestGraph", source, op1, op2, op3); List<AbstractJobVertex> sorted = graph.getVerticesSortedTopologicallyFromSources(); assertEquals(4, sorted.size()); assertBefore(source, op1, sorted); assertBefore(source, op2, sorted); assertBefore(op1, op2, sorted); assertBefore(op2, op3, sorted); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
/** * Returns an array of all job vertices than can be reached when traversing the job graph from the * input vertices. Each job vertex is contained only one time. * * @return an array of all job vertices than can be reached when traversing the job graph from the * input vertices */ public AbstractJobVertex[] getAllReachableJobVertices() { if (bufferedAllReachableJobVertices == null) { final List<AbstractJobVertex> collector = new ArrayList<AbstractJobVertex>(); final HashSet<JobVertexID> visited = new HashSet<JobVertexID>(); final Iterator<AbstractJobInputVertex> inputs = getInputVertices(); while (inputs.hasNext()) { AbstractJobVertex vertex = inputs.next(); if (!visited.contains(vertex.getID())) { collectVertices(vertex, visited, collector); } } bufferedAllReachableJobVertices = collector.toArray(new AbstractJobVertex[0]); } return bufferedAllReachableJobVertices; }
/** * Checks for all registered job vertices if their in-/out-degree is correct. * * @return <code>null</code> if the in-/out-degree of all vertices is correct or the first job * vertex whose in-/out-degree is incorrect. */ public AbstractJobVertex areVertexDegreesCorrect() { // Check input vertices final Iterator<AbstractJobInputVertex> iter = getInputVertices(); while (iter.hasNext()) { final AbstractJobVertex jv = iter.next(); if (jv.getNumberOfForwardConnections() < 1 || jv.getNumberOfBackwardConnections() > 0) { return jv; } } // Check task vertices final Iterator<JobTaskVertex> iter2 = getTaskVertices(); while (iter2.hasNext()) { final AbstractJobVertex jv = iter2.next(); if (jv.getNumberOfForwardConnections() < 1 || jv.getNumberOfBackwardConnections() < 1) { return jv; } } // Check output vertices final Iterator<AbstractJobOutputVertex> iter3 = getOutputVertices(); while (iter3.hasNext()) { final AbstractJobVertex jv = iter3.next(); if (jv.getNumberOfForwardConnections() > 0 || jv.getNumberOfBackwardConnections() < 1) { return jv; } } return null; }
@Test public void testSerialization() { try { JobGraph jg = new JobGraph("The graph"); // add some configuration values { jg.getJobConfiguration().setString("some key", "some value"); jg.getJobConfiguration().setDouble("Life of ", Math.PI); } // add some vertices { AbstractJobVertex source1 = new AbstractJobVertex("source1"); AbstractJobVertex source2 = new AbstractJobVertex("source2"); AbstractJobVertex target = new AbstractJobVertex("target"); target.connectNewDataSetAsInput(source1, DistributionPattern.POINTWISE); target.connectNewDataSetAsInput(source2, DistributionPattern.ALL_TO_ALL); jg.addVertex(source1); jg.addVertex(source2); jg.addVertex(target); } // de-/serialize and compare JobGraph copy = CommonTestUtils.createCopySerializable(jg); assertEquals(jg.getName(), copy.getName()); assertEquals(jg.getJobID(), copy.getJobID()); assertEquals(jg.getJobConfiguration(), copy.getJobConfiguration()); assertEquals(jg.getNumberOfVertices(), copy.getNumberOfVertices()); for (AbstractJobVertex vertex : copy.getVertices()) { AbstractJobVertex original = jg.findVertexByID(vertex.getID()); assertNotNull(original); assertEquals(original.getName(), vertex.getName()); assertEquals(original.getNumberOfInputs(), vertex.getNumberOfInputs()); assertEquals( original.getNumberOfProducedIntermediateDataSets(), vertex.getNumberOfProducedIntermediateDataSets()); } } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testTopologicalSort2() { try { AbstractJobVertex source1 = new AbstractJobVertex("source1"); AbstractJobVertex source2 = new AbstractJobVertex("source2"); AbstractJobVertex root = new AbstractJobVertex("root"); AbstractJobVertex l11 = new AbstractJobVertex("layer 1 - 1"); AbstractJobVertex l12 = new AbstractJobVertex("layer 1 - 2"); AbstractJobVertex l13 = new AbstractJobVertex("layer 1 - 3"); AbstractJobVertex l2 = new AbstractJobVertex("layer 2"); root.connectNewDataSetAsInput(l13, DistributionPattern.POINTWISE); root.connectNewDataSetAsInput(source2, DistributionPattern.POINTWISE); root.connectNewDataSetAsInput(l2, DistributionPattern.POINTWISE); l2.connectNewDataSetAsInput(l11, DistributionPattern.POINTWISE); l2.connectNewDataSetAsInput(l12, DistributionPattern.POINTWISE); l11.connectNewDataSetAsInput(source1, DistributionPattern.POINTWISE); l12.connectNewDataSetAsInput(source1, DistributionPattern.POINTWISE); l12.connectNewDataSetAsInput(source2, DistributionPattern.POINTWISE); l13.connectNewDataSetAsInput(source2, DistributionPattern.POINTWISE); JobGraph graph = new JobGraph("TestGraph", source1, source2, root, l11, l13, l12, l2); List<AbstractJobVertex> sorted = graph.getVerticesSortedTopologicallyFromSources(); assertEquals(7, sorted.size()); assertBefore(source1, root, sorted); assertBefore(source2, root, sorted); assertBefore(l11, root, sorted); assertBefore(l12, root, sorted); assertBefore(l13, root, sorted); assertBefore(l2, root, sorted); assertBefore(l11, l2, sorted); assertBefore(l12, l2, sorted); assertBefore(l2, root, sorted); assertBefore(source1, l2, sorted); assertBefore(source2, l2, sorted); assertBefore(source2, l13, sorted); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Override public void read(final DataInput in) throws IOException { // Read job id this.jobID.read(in); // Read the job name this.jobName = StringRecord.readString(in); // Read required jar files readRequiredJarFiles(in); // First read total number of vertices; final int numVertices = in.readInt(); // First, recreate each vertex and add it to reconstructionMap for (int i = 0; i < numVertices; i++) { final String className = StringRecord.readString(in); final JobVertexID id = new JobVertexID(); id.read(in); final String vertexName = StringRecord.readString(in); Class<? extends IOReadableWritable> c; try { c = ClassUtils.getRecordByName(className); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe.toString()); } // Find constructor Constructor<? extends IOReadableWritable> cst; try { cst = c.getConstructor(String.class, JobVertexID.class, JobGraph.class); } catch (SecurityException e1) { throw new IOException(e1.toString()); } catch (NoSuchMethodException e1) { throw new IOException(e1.toString()); } try { cst.newInstance(vertexName, id, this); } catch (IllegalArgumentException e) { throw new IOException(e.toString()); } catch (InstantiationException e) { throw new IOException(e.toString()); } catch (IllegalAccessException e) { throw new IOException(e.toString()); } catch (InvocationTargetException e) { throw new IOException(e.toString()); } } final JobVertexID tmpID = new JobVertexID(); for (int i = 0; i < numVertices; i++) { AbstractJobVertex jv; tmpID.read(in); if (inputVertices.containsKey(tmpID)) { jv = inputVertices.get(tmpID); } else { if (outputVertices.containsKey(tmpID)) { jv = outputVertices.get(tmpID); } else { if (taskVertices.containsKey(tmpID)) { jv = taskVertices.get(tmpID); } else { throw new IOException("Cannot find vertex with ID " + tmpID + " in any vertex map."); } } } // Read the vertex data jv.read(in); } // Find the class loader for the job final ClassLoader cl = LibraryCacheManager.getClassLoader(this.jobID); if (cl == null) { throw new IOException("Cannot find class loader for job graph " + this.jobID); } // Re-instantiate the job configuration object and read the configuration this.jobConfiguration = new Configuration(cl); this.jobConfiguration.read(in); }