@SuppressWarnings("serial") @Test public void testRun() throws Exception { int verticesCount = 5000; int edgesCount = verticesCount * 2; ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment(); environment.getConfig().disableSysoutLogging(); Graph<Long, Long, Long> graph = GraphGenerator.generateGraph(verticesCount, edgesCount, environment); PCConnectedComponents<Long, Long> algo = new PCConnectedComponents<>(verticesCount); List<Tuple2<Long, Long>> result = algo.run(graph) .map( new RichMapFunction<Vertex<Long, Long>, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(Vertex<Long, Long> value) throws Exception { return new Tuple2<>(value.getId(), value.getValue()); } }) .collect(); ConnectedComponentsData.checkOddEvenResult(result); }
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); System.out.println( "Usage: KMeans --points <path> --centroids <path> --output <path> --iterations <n>"); // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig() .setGlobalJobParameters(params); // make parameters available in the web interface // get input data: // read the points and centroids from the provided paths or fall back to default data DataSet<Point> points = getPointDataSet(params, env); DataSet<Centroid> centroids = getCentroidDataSet(params, env); // set number of bulk iterations for KMeans algorithm IterativeDataSet<Centroid> loop = centroids.iterate(params.getInt("iterations", 10)); DataSet<Centroid> newCentroids = points // compute closest centroid for each point .map(new SelectNearestCenter()) .withBroadcastSet(loop, "centroids") // count and sum point coordinates for each centroid .map(new CountAppender()) .groupBy(0) .reduce(new CentroidAccumulator()) // compute new centroids from point counts and coordinate sums .map(new CentroidAverager()); // feed new centroids back into next iteration DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids); DataSet<Tuple2<Integer, Point>> clusteredPoints = points // assign points to final clusters .map(new SelectNearestCenter()) .withBroadcastSet(finalCentroids, "centroids"); // emit result if (params.has("output")) { clusteredPoints.writeAsCsv(params.get("output"), "\n", " "); // since file sinks are lazy, we trigger the execution explicitly env.execute("KMeans Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); clusteredPoints.print(); } }
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); System.out.println( "Usage: WebLogAnalysis --documents <path> --ranks <path> --visits <path> --output <path>"); env.getConfig().setGlobalJobParameters(params); // get input data DataSet<Tuple2<String, String>> documents = getDocumentsDataSet(env, params); DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env, params); DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env, params); // Retain documents with keywords DataSet<Tuple1<String>> filterDocs = documents.filter(new FilterDocByKeyWords()).project(0); // Filter ranks by minimum rank DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks.filter(new FilterByRank()); // Filter visits by visit date DataSet<Tuple1<String>> filterVisits = visits.filter(new FilterVisitsByDate()).project(0); // Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks = filterDocs.join(filterRanks).where(0).equalTo(1).projectSecond(0, 1, 2); // Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain // time DataSet<Tuple3<Integer, String, Integer>> result = joinDocsRanks.coGroup(filterVisits).where(1).equalTo(0).with(new AntiJoinVisits()); // emit result if (params.has("output")) { result.writeAsCsv(params.get("output"), "\n", "|"); // execute program env.execute("WebLogAnalysis Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); result.print(); } }
protected void internalNonBlockingStart() throws IOException { Plan plan = env.createProgramPlan(); Optimizer optimizer = new Optimizer(new DataStatistics(), new org.apache.flink.configuration.Configuration()); OptimizedPlan optimizedPlan = optimizer.compile(plan); final JobGraph jobGraph = new JobGraphGenerator().compileJobGraph(optimizedPlan); for (String jarPath : classPath) { jobGraph.addJar(new Path(jarPath)); } jobID = jobGraph.getJobID(); accumulatorCache.setJobID(jobID); if (isLocalExecution()) { flowStep.logInfo("Executing in local mode."); startLocalCluster(); org.apache.flink.configuration.Configuration config = new org.apache.flink.configuration.Configuration(); config.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, localCluster.hostname()); client = new Client(config); client.setPrintStatusDuringExecution(env.getConfig().isSysoutLoggingEnabled()); } else { flowStep.logInfo("Executing in cluster mode."); try { String path = this.getClass().getProtectionDomain().getCodeSource().getLocation().toURI().getPath(); jobGraph.addJar(new Path(path)); classPath.add(path); } catch (URISyntaxException e) { throw new IOException("Could not add the submission JAR as a dependency."); } client = ((ContextEnvironment) env).getClient(); } List<URL> fileList = new ArrayList<URL>(classPath.size()); for (String path : classPath) { URL url; try { url = new URL(path); } catch (MalformedURLException e) { url = new URL("file://" + path); } fileList.add(url); } final ClassLoader loader = JobWithJars.buildUserCodeClassLoader( fileList, Collections.<URL>emptyList(), getClass().getClassLoader()); accumulatorCache.setClient(client); final Callable<JobSubmissionResult> callable = new Callable<JobSubmissionResult>() { @Override public JobSubmissionResult call() throws Exception { return client.runBlocking(jobGraph, loader); } }; jobSubmission = executorService.submit(callable); flowStep.logInfo("submitted Flink job: " + jobID); }