コード例 #1
0
ファイル: HBaseWriter.java プロジェクト: AnikaGross/gradoop
  /**
   * Converts runtime edge data to persistent edge data (includes source/target vertex data) and
   * writes it to HBase.
   *
   * @param epgmDatabase EPGM database instance
   * @param edgeDataHandler edge data handler
   * @param persistentEdgeDataFactory persistent edge data factory
   * @param edgeDataTableName HBase edge data table name
   * @param <PED> persistent edge data type
   * @throws IOException
   */
  public <PED extends PersistentEdgeData<VD>> void writeEdgeData(
      final EPGMDatabase<VD, ED, GD> epgmDatabase,
      final EdgeDataHandler<ED, VD> edgeDataHandler,
      final PersistentEdgeDataFactory<ED, VD, PED> persistentEdgeDataFactory,
      final String edgeDataTableName)
      throws IOException {

    Graph<Long, VD, ED> graph = epgmDatabase.getDatabaseGraph().getGellyGraph();

    DataSet<PersistentEdgeData<VD>> persistentEdgeDataSet =
        graph
            .getVertices()
            // join vertex with edges on edge source vertex id
            .join(graph.getEdges())
            .where(0)
            .equalTo(1)
            // join result with vertices on edge target vertex id
            .join(graph.getVertices())
            .where("1.1")
            .equalTo(0)
            // ((source-vertex-data, edge-data), target-vertex-data)
            .with(new PersistentEdgeDataJoinFunction<>(persistentEdgeDataFactory));

    // write (persistent-edge-data) to HBase table
    Job job = Job.getInstance();
    job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, edgeDataTableName);

    persistentEdgeDataSet
        .map(new HBaseWriter.EdgeDataToHBaseMapper<>(edgeDataHandler))
        .output(new HadoopOutputFormat<>(new TableOutputFormat<LongWritable>(), job));
  }
コード例 #2
0
  @SuppressWarnings("serial")
  public static void main(String[] args) throws Exception {

    if (!parseParameters(args)) {
      return;
    }

    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    DataSet<Edge<Long, NullValue>> edges = getEdgesDataSet(env);

    Graph<Long, Long, NullValue> graph =
        Graph.fromDataSet(
            edges,
            new MapFunction<Long, Long>() {
              @Override
              public Long map(Long value) throws Exception {
                return value;
              }
            },
            env);

    DataSet<Vertex<Long, Long>> verticesWithMinIds =
        graph.run(new GSAConnectedComponents<Long, Long, NullValue>(maxIterations));

    // emit result
    if (fileOutput) {
      verticesWithMinIds.writeAsCsv(outputPath, "\n", ",");

      // since file sinks are lazy, we trigger the execution explicitly
      env.execute("Connected Components Example");
    } else {
      verticesWithMinIds.print();
    }
  }
コード例 #3
0
 /**
  * Prints a graph as two files (edges, vertices) to the file system.
  *
  * @param g The graph.
  * @param filePathEdges File path for edges.
  * @param filePathVertices File path for vertices.
  */
 @SuppressWarnings("rawtypes")
 public static void printGraph(Graph g, String filePathEdges, String filePathVertices) {
   try {
     g.getEdgesAsTuple3().writeAsCsv(filePathEdges, "\n", "\t", WriteMode.OVERWRITE);
     g.getVerticesAsTuple2().writeAsCsv(filePathVertices, "\n", "\t", WriteMode.OVERWRITE);
   } catch (Exception e) {
     e.printStackTrace();
   }
 }
コード例 #4
0
 /**
  * Prints the edges and vertices of a graph <code>g</code> to the console.
  *
  * @param g A graph.
  * @return Result.
  */
 @SuppressWarnings("rawtypes")
 public static String showEdgesVertices(Graph g) {
   String str = "";
   try {
     str = "Edges:\n";
     for (Object edge : g.getEdges().collect()) str += " " + edge + "\n";
     str += "\nVertices:\n";
     for (Object vertex : g.getVertices().collect()) str += " " + vertex + "\n";
   } catch (Exception e1) {
     e1.printStackTrace();
   }
   return str;
 }
コード例 #5
0
ファイル: EdgeTargetDegree.java プロジェクト: yjshen/flink
  @Override
  public DataSet<Edge<K, Tuple2<EV, LongValue>>> run(Graph<K, VV, EV> input) throws Exception {
    // t, d(t)
    DataSet<Vertex<K, LongValue>> vertexDegrees =
        input.run(
            new VertexDegree<K, VV, EV>()
                .setReduceOnTargetId(!reduceOnSourceId)
                .setParallelism(parallelism));

    // s, t, d(t)
    return input
        .getEdges()
        .join(vertexDegrees, JoinHint.REPARTITION_HASH_SECOND)
        .where(1)
        .equalTo(0)
        .with(new JoinEdgeWithVertexDegree<K, EV>())
        .setParallelism(parallelism)
        .name("Edge target degree");
  }
コード例 #6
0
  @Test
  public void testWithSimpleGraph() throws Exception {
    Graph<IntValue, NullValue, NullValue> graph =
        undirectedSimpleGraph.run(new MaximumDegree<IntValue, NullValue, NullValue>(3));

    String expectedVerticesResult =
        "(0,(null))\n" + "(1,(null))\n" + "(2,(null))\n" + "(4,(null))\n" + "(5,(null))";

    TestBaseUtils.compareResultAsText(graph.getVertices().collect(), expectedVerticesResult);

    String expectedEdgesResult =
        "(0,1,(null))\n"
            + "(0,2,(null))\n"
            + "(1,0,(null))\n"
            + "(1,2,(null))\n"
            + "(2,0,(null))\n"
            + "(2,1,(null))";

    TestBaseUtils.compareResultAsText(graph.getEdges().collect(), expectedEdgesResult);
  }
コード例 #7
0
ファイル: VertexMetrics.java プロジェクト: George5814/flink
  @Override
  public VertexMetrics<K, VV, EV> run(Graph<K, VV, EV> input) throws Exception {
    super.run(input);

    DataSet<Vertex<K, Degrees>> vertexDegree =
        input.run(
            new VertexDegrees<K, VV, EV>()
                .setIncludeZeroDegreeVertices(includeZeroDegreeVertices)
                .setParallelism(parallelism));

    vertexDegree.output(new VertexMetricsHelper<K>(id)).name("Vertex metrics");

    return this;
  }
コード例 #8
0
ファイル: Exclusion.java プロジェクト: AnikaGross/gradoop
  /** {@inheritDoc} */
  @Override
  protected LogicalGraph<VD, ED, GD> executeInternal(
      LogicalGraph<VD, ED, GD> firstGraph, LogicalGraph<VD, ED, GD> secondGraph) {
    final Long newGraphID = FlinkConstants.EXCLUDE_GRAPH_ID;

    Graph<Long, VD, ED> graph1 = firstGraph.getGellyGraph();
    Graph<Long, VD, ED> graph2 = secondGraph.getGellyGraph();

    // union vertex sets, group by vertex id, filter vertices where the group
    // contains exactly one vertex which belongs to the graph, the operator is
    // called on
    DataSet<Vertex<Long, VD>> newVertexSet =
        graph1
            .getVertices()
            .union(graph2.getVertices())
            .groupBy(new KeySelectors.VertexKeySelector<VD>())
            .reduceGroup(new VertexGroupReducer<VD>(1L, firstGraph.getId(), secondGraph.getId()))
            .map(new VertexToGraphUpdater<VD>(newGraphID));

    JoinFunction<Edge<Long, ED>, Vertex<Long, VD>, Edge<Long, ED>> joinFunc =
        new JoinFunction<Edge<Long, ED>, Vertex<Long, VD>, Edge<Long, ED>>() {
          @Override
          public Edge<Long, ED> join(Edge<Long, ED> leftTuple, Vertex<Long, VD> rightTuple)
              throws Exception {
            return leftTuple;
          }
        };

    // In exclude(), we are only interested in edges that connect vertices
    // that are in the exclusion of the vertex sets. Thus, we join the edges
    // from the left graph with the new vertex set using source and target ids.
    DataSet<Edge<Long, ED>> newEdgeSet =
        graph1
            .getEdges()
            .join(newVertexSet)
            .where(new KeySelectors.EdgeSourceVertexKeySelector<ED>())
            .equalTo(new KeySelectors.VertexKeySelector<VD>())
            .with(joinFunc)
            .join(newVertexSet)
            .where(new KeySelectors.EdgeTargetVertexKeySelector<ED>())
            .equalTo(new KeySelectors.VertexKeySelector<VD>())
            .with(joinFunc)
            .map(new EdgeToGraphUpdater<ED>(newGraphID));

    return LogicalGraph.fromGraph(
        Graph.fromDataSet(newVertexSet, newEdgeSet, graph1.getContext()),
        firstGraph.getGraphDataFactory().createGraphData(newGraphID),
        firstGraph.getVertexDataFactory(),
        firstGraph.getEdgeDataFactory(),
        firstGraph.getGraphDataFactory());
  }
コード例 #9
0
  @Test
  public void testTranslation() {
    try {
      final String ITERATION_NAME = "Test Name";

      final String AGGREGATOR_NAME = "AggregatorName";

      final String BC_SET_GATHER_NAME = "gather messages";

      final String BC_SET_SUM_NAME = "sum updates";

      final String BC_SET_APLLY_NAME = "apply updates";

      final int NUM_ITERATIONS = 13;

      final int ITERATION_parallelism = 77;

      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

      DataSet<Long> bcGather = env.fromElements(1L);
      DataSet<Long> bcSum = env.fromElements(1L);
      DataSet<Long> bcApply = env.fromElements(1L);

      DataSet<Vertex<Long, Long>> result;

      // ------------ construct the test program ------------------
      {
        DataSet<Edge<Long, NullValue>> edges =
            env.fromElements(new Tuple3<Long, Long, NullValue>(1L, 2L, NullValue.getInstance()))
                .map(new Tuple3ToEdgeMap<Long, NullValue>());

        Graph<Long, Long, NullValue> graph = Graph.fromDataSet(edges, new InitVertices(), env);

        GSAConfiguration parameters = new GSAConfiguration();

        parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());
        parameters.setName(ITERATION_NAME);
        parameters.setParallelism(ITERATION_parallelism);
        parameters.addBroadcastSetForGatherFunction(BC_SET_GATHER_NAME, bcGather);
        parameters.addBroadcastSetForSumFunction(BC_SET_SUM_NAME, bcSum);
        parameters.addBroadcastSetForApplyFunction(BC_SET_APLLY_NAME, bcApply);

        result =
            graph
                .runGatherSumApplyIteration(
                    new GatherNeighborIds(),
                    new SelectMinId(),
                    new UpdateComponentId(),
                    NUM_ITERATIONS,
                    parameters)
                .getVertices();

        result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
      }

      // ------------- validate the java program ----------------

      assertTrue(result instanceof DeltaIterationResultSet);

      DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
      DeltaIteration<?, ?> iteration = (DeltaIteration<?, ?>) resultSet.getIterationHead();

      // check the basic iteration properties
      assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
      assertArrayEquals(new int[] {0}, resultSet.getKeyPositions());
      assertEquals(ITERATION_parallelism, iteration.getParallelism());
      assertEquals(ITERATION_NAME, iteration.getName());

      assertEquals(
          AGGREGATOR_NAME,
          iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

      // validate that the semantic properties are set as they should
      TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin =
          (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
      assertTrue(
          solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
      assertTrue(
          solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));

      SingleInputUdfOperator<?, ?, ?> sumReduce =
          (SingleInputUdfOperator<?, ?, ?>) solutionSetJoin.getInput1();
      SingleInputUdfOperator<?, ?, ?> gatherMap =
          (SingleInputUdfOperator<?, ?, ?>) sumReduce.getInput();

      // validate that the broadcast sets are forwarded
      assertEquals(bcGather, gatherMap.getBroadcastSets().get(BC_SET_GATHER_NAME));
      assertEquals(bcSum, sumReduce.getBroadcastSets().get(BC_SET_SUM_NAME));
      assertEquals(bcApply, solutionSetJoin.getBroadcastSets().get(BC_SET_APLLY_NAME));
    } catch (Exception e) {
      System.err.println(e.getMessage());
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
コード例 #10
0
ファイル: HBaseWriter.java プロジェクト: AnikaGross/gradoop
  /**
   * Converts runtime vertex data to persistent vertex data (includes incoming and outgoing edge
   * data) and writes it to HBase.
   *
   * @param epgmDatabase EPGM database instance
   * @param vertexDataHandler vertex data handler
   * @param persistentVertexDataFactory persistent vertex data factory
   * @param vertexDataTableName HBase vertex data table name
   * @param <PVD> persistent vertex data type
   * @throws Exception
   */
  public <PVD extends PersistentVertexData<ED>> void writeVertexData(
      final EPGMDatabase<VD, ED, GD> epgmDatabase,
      final VertexDataHandler<VD, ED> vertexDataHandler,
      final PersistentVertexDataFactory<VD, ED, PVD> persistentVertexDataFactory,
      final String vertexDataTableName)
      throws Exception {

    final Graph<Long, VD, ED> graph = epgmDatabase.getDatabaseGraph().getGellyGraph();

    // group edges by source vertex id (vertex-id, [out-edge-data])
    GroupReduceOperator<Edge<Long, ED>, Tuple2<Long, Set<ED>>> vertexToOutgoingEdges =
        graph
            .getEdges()
            .groupBy(0) // group by source vertex id
            .reduceGroup(
                new GroupReduceFunction<Edge<Long, ED>, Tuple2<Long, Set<ED>>>() {
                  @Override
                  public void reduce(
                      Iterable<Edge<Long, ED>> edgeIterable,
                      Collector<Tuple2<Long, Set<ED>>> collector)
                      throws Exception {
                    Set<ED> outgoingEdgeData = Sets.newHashSet();
                    Long vertexId = null;
                    boolean initialized = false;
                    for (Edge<Long, ED> edgeData : edgeIterable) {
                      if (!initialized) {
                        vertexId = edgeData.getSource();
                        initialized = true;
                      }
                      outgoingEdgeData.add(edgeData.getValue());
                    }
                    collector.collect(new Tuple2<>(vertexId, outgoingEdgeData));
                  }
                });

    // group edges by target vertex id (vertex-id, [in-edge-data])
    GroupReduceOperator<Edge<Long, ED>, Tuple2<Long, Set<ED>>> vertexToIncomingEdges =
        graph
            .getEdges()
            .groupBy(1) // group by target vertex id
            .reduceGroup(
                new GroupReduceFunction<Edge<Long, ED>, Tuple2<Long, Set<ED>>>() {
                  @Override
                  public void reduce(
                      Iterable<Edge<Long, ED>> edgeIterable,
                      Collector<Tuple2<Long, Set<ED>>> collector)
                      throws Exception {
                    Set<ED> outgoingEdgeData = Sets.newHashSet();
                    Long vertexId = null;
                    boolean initialized = false;
                    for (Edge<Long, ED> edgeData : edgeIterable) {
                      if (!initialized) {
                        vertexId = edgeData.getTarget();
                        initialized = true;
                      }
                      outgoingEdgeData.add(edgeData.getValue());
                    }
                    collector.collect(new Tuple2<>(vertexId, outgoingEdgeData));
                  }
                });

    // co-group (vertex-data) with (vertex-id, [out-edge-data]) to simulate left
    // outer join
    DataSet<Tuple2<Vertex<Long, VD>, Set<ED>>> vertexDataWithOutgoingEdges =
        graph
            .getVertices()
            .coGroup(vertexToOutgoingEdges)
            .where(0)
            .equalTo(0)
            .with(
                new CoGroupFunction<
                    Vertex<Long, VD>, Tuple2<Long, Set<ED>>, Tuple2<Vertex<Long, VD>, Set<ED>>>() {
                  @Override
                  public void coGroup(
                      Iterable<Vertex<Long, VD>> vertexIterable,
                      Iterable<Tuple2<Long, Set<ED>>> outEdgesIterable,
                      Collector<Tuple2<Vertex<Long, VD>, Set<ED>>> collector)
                      throws Exception {
                    Vertex<Long, VD> vertex = null;
                    Set<ED> outgoingEdgeData = null;
                    // read vertex data from left group
                    for (Vertex<Long, VD> v : vertexIterable) {
                      vertex = v;
                    }
                    // read outgoing edge from right group (may be empty)
                    for (Tuple2<Long, Set<ED>> oEdges : outEdgesIterable) {
                      outgoingEdgeData = oEdges.f1;
                    }
                    collector.collect(new Tuple2<>(vertex, outgoingEdgeData));
                  }
                });

    // co-group (vertex-data, (vertex-id, [out-edge-data])) with (vertex-id,
    // [in-edge-data]) to simulate left outer join
    DataSet<PersistentVertexData<ED>> persistentVertexDataSet =
        vertexDataWithOutgoingEdges
            .coGroup(vertexToIncomingEdges)
            .where("0.0")
            .equalTo(0)
            .with(new PersistentVertexDataCoGroupFunction<>(persistentVertexDataFactory));

    // write (persistent-vertex-data) to HBase table
    Job job = Job.getInstance();
    job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, vertexDataTableName);

    persistentVertexDataSet
        .map(new HBaseWriter.VertexDataToHBaseMapper<>(vertexDataHandler))
        .output(new HadoopOutputFormat<>(new TableOutputFormat<LongWritable>(), job));
  }
コード例 #11
0
ファイル: HBaseWriter.java プロジェクト: AnikaGross/gradoop
  /**
   * Converts runtime graph data to persistent graph data (including vertex and edge identifiers)
   * and writes it to HBase.
   *
   * @param epgmDatabase EPGM database instance
   * @param graphDataHandler graph data handler
   * @param persistentGraphDataFactory persistent graph data factory
   * @param graphDataTableName HBase graph data table name
   * @param <PGD> persistent graph data type
   * @throws IOException
   */
  public <PGD extends PersistentGraphData> void writeGraphData(
      final EPGMDatabase<VD, ED, GD> epgmDatabase,
      final GraphDataHandler<GD> graphDataHandler,
      final PersistentGraphDataFactory<GD, PGD> persistentGraphDataFactory,
      final String graphDataTableName)
      throws IOException {
    final Graph<Long, VD, ED> graph = epgmDatabase.getDatabaseGraph().getGellyGraph();

    // build (graph-id, vertex-id) tuples from vertices
    FlatMapOperator<Vertex<Long, VD>, Tuple2<Long, Long>> graphIdToVertexId =
        graph
            .getVertices()
            .flatMap(
                new FlatMapFunction<Vertex<Long, VD>, Tuple2<Long, Long>>() {
                  @Override
                  public void flatMap(
                      Vertex<Long, VD> vertex, Collector<Tuple2<Long, Long>> collector)
                      throws Exception {
                    if (vertex.getValue().getGraphCount() > 0) {
                      for (Long graphID : vertex.getValue().getGraphs()) {
                        collector.collect(new Tuple2<>(graphID, vertex.f0));
                      }
                    }
                  }
                });

    // build (graph-id, edge-id) tuples from vertices
    FlatMapOperator<Edge<Long, ED>, Tuple2<Long, Long>> graphIdToEdgeId =
        graph
            .getEdges()
            .flatMap(
                new FlatMapFunction<Edge<Long, ED>, Tuple2<Long, Long>>() {
                  @Override
                  public void flatMap(Edge<Long, ED> edge, Collector<Tuple2<Long, Long>> collector)
                      throws Exception {
                    if (edge.getValue().getGraphCount() > 0) {
                      for (Long graphId : edge.getValue().getGraphs()) {
                        collector.collect(new Tuple2<>(graphId, edge.getValue().getId()));
                      }
                    }
                  }
                });

    // co-group (graph-id, vertex-id) and (graph-id, edge-id) tuples to
    // (graph-id, {vertex-id}, {edge-id}) triples
    CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple3<Long, Set<Long>, Set<Long>>>
        graphToVertexIdsAndEdgeIds =
            graphIdToVertexId
                .coGroup(graphIdToEdgeId)
                .where(0)
                .equalTo(0)
                .with(
                    new CoGroupFunction<
                        Tuple2<Long, Long>,
                        Tuple2<Long, Long>,
                        Tuple3<Long, Set<Long>, Set<Long>>>() {

                      @Override
                      public void coGroup(
                          Iterable<Tuple2<Long, Long>> graphToVertexIds,
                          Iterable<Tuple2<Long, Long>> graphToEdgeIds,
                          Collector<Tuple3<Long, Set<Long>, Set<Long>>> collector)
                          throws Exception {
                        Set<Long> vertexIds = Sets.newHashSet();
                        Set<Long> edgeIds = Sets.newHashSet();
                        Long graphId = null;
                        boolean initialized = false;
                        for (Tuple2<Long, Long> graphToVertexTuple : graphToVertexIds) {
                          if (!initialized) {
                            graphId = graphToVertexTuple.f0;
                            initialized = true;
                          }
                          vertexIds.add(graphToVertexTuple.f1);
                        }
                        for (Tuple2<Long, Long> graphToEdgeTuple : graphToEdgeIds) {
                          edgeIds.add(graphToEdgeTuple.f1);
                        }
                        collector.collect(new Tuple3<>(graphId, vertexIds, edgeIds));
                      }
                    });

    // join (graph-id, {vertex-id}, {edge-id}) triples with
    // (graph-id, graph-data) and build (persistent-graph-data)
    JoinOperator.EquiJoin<
            Tuple3<Long, Set<Long>, Set<Long>>, Subgraph<Long, GD>, PersistentGraphData>
        persistentGraphDataSet =
            graphToVertexIdsAndEdgeIds
                .join(epgmDatabase.getCollection().getSubgraphs())
                .where(0)
                .equalTo(0)
                .with(new PersistentGraphDataJoinFunction<>(persistentGraphDataFactory));

    // write (persistent-graph-data) to HBase table
    Job job = Job.getInstance();
    job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, graphDataTableName);

    persistentGraphDataSet
        .map(new HBaseWriter.GraphDataToHBaseMapper<>(graphDataHandler))
        .output(new HadoopOutputFormat<>(new TableOutputFormat<LongWritable>(), job));
  }