/** * Returns a qualified path object. * * @param fs the FileSystem that should be used to obtain the current working directory * @return the qualified path object */ public Path makeQualified(FileSystem fs) { Path path = this; if (!isAbsolute()) { path = new Path(fs.getWorkingDirectory(), this); } final URI pathUri = path.toUri(); final URI fsUri = fs.getUri(); String scheme = pathUri.getScheme(); String authority = pathUri.getAuthority(); if (scheme != null && (authority != null || fsUri.getAuthority() == null)) { return path; } if (scheme == null) { scheme = fsUri.getScheme(); } if (authority == null) { authority = fsUri.getAuthority(); if (authority == null) { authority = ""; } } return new Path(scheme + ":" + "//" + authority + pathUri.getPath()); }
@Test public void testHDFS() { Path file = new Path(hdfsURI + hdPath); org.apache.hadoop.fs.Path result = new org.apache.hadoop.fs.Path(hdfsURI + "/result"); try { FileSystem fs = file.getFileSystem(); Assert.assertTrue("Must be HadoopFileSystem", fs instanceof HadoopFileSystem); DopOneTestEnvironment.setAsContext(); try { WordCount.main(new String[] {file.toString(), result.toString()}); } catch (Throwable t) { t.printStackTrace(); Assert.fail("Test failed with " + t.getMessage()); } finally { DopOneTestEnvironment.unsetAsContext(); } Assert.assertTrue("No result file present", hdfs.exists(result)); // validate output: org.apache.hadoop.fs.FSDataInputStream inStream = hdfs.open(result); StringWriter writer = new StringWriter(); IOUtils.copy(inStream, writer); String resultString = writer.toString(); Assert.assertEquals("hdfs 10\n" + "hello 10\n", resultString); inStream.close(); } catch (IOException e) { e.printStackTrace(); Assert.fail("Error in test: " + e.getMessage()); } }
public CompletedFuture(Path entry) { try { LocalFileSystem fs = (LocalFileSystem) entry.getFileSystem(); result = entry.isAbsolute() ? new Path(entry.toUri().getPath()) : new Path(fs.getWorkingDirectory(), entry); } catch (Exception e) { throw new RuntimeException( "DistributedCache supports only local files for Collection Environments"); } }
@Override public FSDataInputStream open(final Path f, final int bufferSize) throws IOException { final org.apache.hadoop.fs.FSDataInputStream fdis = this.fs.open(new org.apache.hadoop.fs.Path(f.toString()), bufferSize); return new DistributedDataInputStream(fdis); }
private static void ensureLocalFileDeleted(Path path) { URI uri = path.toUri(); if ("file".equals(uri.getScheme())) { File file = new File(uri.getPath()); assertFalse("file not properly deleted", file.exists()); } else { throw new IllegalArgumentException("not a local path"); } }
@Override public FileStatus[] listStatus(final Path f) throws IOException { final org.apache.hadoop.fs.FileStatus[] hadoopFiles = this.fs.listStatus(new org.apache.hadoop.fs.Path(f.toString())); final FileStatus[] files = new FileStatus[hadoopFiles.length]; // Convert types for (int i = 0; i < files.length; i++) { files[i] = new DistributedFileStatus(hadoopFiles[i]); } return files; }
@Override public FSDataOutputStream create( final Path f, final boolean overwrite, final int bufferSize, final short replication, final long blockSize) throws IOException { final org.apache.hadoop.fs.FSDataOutputStream fdos = this.fs.create( new org.apache.hadoop.fs.Path(f.toString()), overwrite, bufferSize, replication, blockSize); return new DistributedDataOutputStream(fdos); }
@Override public boolean rename(final Path src, final Path dst) throws IOException { return this.fs.rename( new org.apache.hadoop.fs.Path(src.toString()), new org.apache.hadoop.fs.Path(dst.toString())); }
@Override public boolean mkdirs(final Path f) throws IOException { return this.fs.mkdirs(new org.apache.hadoop.fs.Path(f.toString())); }
@Override public boolean delete(final Path f, final boolean recursive) throws IOException { return this.fs.delete(new org.apache.hadoop.fs.Path(f.toString()), recursive); }
@Override public FSDataOutputStream create(final Path f, final boolean overwrite) throws IOException { final org.apache.hadoop.fs.FSDataOutputStream fdos = this.fs.create(new org.apache.hadoop.fs.Path(f.toString()), overwrite); return new DistributedDataOutputStream(fdos); }
@Override public FileStatus getFileStatus(final Path f) throws IOException { org.apache.hadoop.fs.FileStatus status = this.fs.getFileStatus(new org.apache.hadoop.fs.Path(f.toString())); return new DistributedFileStatus(status); }
public static JobGraph getJobGraph(String[] args) throws Exception { int degreeOfParallelism = 2; String pageWithRankInputPath = ""; // "file://" + PlayConstants.PLAY_DIR + "test-inputs/danglingpagerank/pageWithRank"; String adjacencyListInputPath = ""; // "file://" + PlayConstants.PLAY_DIR + // "test-inputs/danglingpagerank/adjacencylists"; String outputPath = Path.constructTestURI( CustomCompensatableDanglingPageRankWithCombiner.class, "flink_iterations"); int minorConsumer = 2; int matchMemory = 5; int coGroupSortMemory = 5; int numIterations = 25; long numVertices = 5; long numDanglingVertices = 1; String failingWorkers = "1"; int failingIteration = 2; double messageLoss = 0.75; if (args.length >= 14) { degreeOfParallelism = Integer.parseInt(args[0]); pageWithRankInputPath = args[1]; adjacencyListInputPath = args[2]; outputPath = args[3]; // [4] is config path minorConsumer = Integer.parseInt(args[5]); matchMemory = Integer.parseInt(args[6]); coGroupSortMemory = Integer.parseInt(args[7]); numIterations = Integer.parseInt(args[8]); numVertices = Long.parseLong(args[9]); numDanglingVertices = Long.parseLong(args[10]); failingWorkers = args[11]; failingIteration = Integer.parseInt(args[12]); messageLoss = Double.parseDouble(args[13]); } int totalMemoryConsumption = 3 * minorConsumer + 2 * coGroupSortMemory + matchMemory; JobGraph jobGraph = new JobGraph("CompensatableDanglingPageRank"); // --------------- the inputs --------------------- // page rank input JobInputVertex pageWithRankInput = JobGraphUtils.createInput( new CustomImprovedDanglingPageRankInputFormat(), pageWithRankInputPath, "DanglingPageWithRankInput", jobGraph, degreeOfParallelism); TaskConfig pageWithRankInputConfig = new TaskConfig(pageWithRankInput.getConfiguration()); pageWithRankInputConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH); pageWithRankInputConfig.setOutputComparator(vertexWithRankAndDanglingComparator, 0); pageWithRankInputConfig.setOutputSerializer(vertexWithRankAndDanglingSerializer); pageWithRankInputConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices)); // edges as adjacency list JobInputVertex adjacencyListInput = JobGraphUtils.createInput( new CustomImprovedAdjacencyListInputFormat(), adjacencyListInputPath, "AdjancencyListInput", jobGraph, degreeOfParallelism); TaskConfig adjacencyListInputConfig = new TaskConfig(adjacencyListInput.getConfiguration()); adjacencyListInputConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH); adjacencyListInputConfig.setOutputSerializer(vertexWithAdjacencyListSerializer); adjacencyListInputConfig.setOutputComparator(vertexWithAdjacencyListComparator, 0); // --------------- the head --------------------- JobTaskVertex head = JobGraphUtils.createTask( IterationHeadPactTask.class, "IterationHead", jobGraph, degreeOfParallelism); TaskConfig headConfig = new TaskConfig(head.getConfiguration()); headConfig.setIterationId(ITERATION_ID); // initial input / partial solution headConfig.addInputToGroup(0); headConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0); headConfig.setInputSerializer(vertexWithRankAndDanglingSerializer, 0); headConfig.setInputComparator(vertexWithRankAndDanglingComparator, 0); headConfig.setInputLocalStrategy(0, LocalStrategy.SORT); headConfig.setRelativeMemoryInput(0, (double) minorConsumer / totalMemoryConsumption); headConfig.setFilehandlesInput(0, NUM_FILE_HANDLES_PER_SORT); headConfig.setSpillingThresholdInput(0, SORT_SPILL_THRESHOLD); // back channel / iterations headConfig.setRelativeBackChannelMemory((double) minorConsumer / totalMemoryConsumption); // output into iteration headConfig.setOutputSerializer(vertexWithRankAndDanglingSerializer); headConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); headConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); // final output TaskConfig headFinalOutConfig = new TaskConfig(new Configuration()); headFinalOutConfig.setOutputSerializer(vertexWithRankAndDanglingSerializer); headFinalOutConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); headConfig.setIterationHeadFinalOutputConfig(headFinalOutConfig); // the sync headConfig.setIterationHeadIndexOfSyncOutput(3); headConfig.setNumberOfIterations(numIterations); // the driver headConfig.setDriver(CollectorMapDriver.class); headConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP); headConfig.setStubWrapper( new UserCodeClassWrapper<CustomCompensatingMap>(CustomCompensatingMap.class)); headConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices)); headConfig.setStubParameter("compensation.failingWorker", failingWorkers); headConfig.setStubParameter("compensation.failingIteration", String.valueOf(failingIteration)); headConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss)); headConfig.addIterationAggregator( CustomCompensatableDotProductCoGroup.AGGREGATOR_NAME, new PageRankStatsAggregator()); // --------------- the join --------------------- JobTaskVertex intermediate = JobGraphUtils.createTask( IterationIntermediatePactTask.class, "IterationIntermediate", jobGraph, degreeOfParallelism); TaskConfig intermediateConfig = new TaskConfig(intermediate.getConfiguration()); intermediateConfig.setIterationId(ITERATION_ID); // intermediateConfig.setDriver(RepeatableHashjoinMatchDriverWithCachedBuildside.class); intermediateConfig.setDriver(BuildSecondCachedMatchDriver.class); intermediateConfig.setDriverStrategy(DriverStrategy.HYBRIDHASH_BUILD_SECOND); intermediateConfig.setRelativeMemoryDriver((double) matchMemory / totalMemoryConsumption); intermediateConfig.addInputToGroup(0); intermediateConfig.addInputToGroup(1); intermediateConfig.setInputSerializer(vertexWithRankAndDanglingSerializer, 0); intermediateConfig.setInputSerializer(vertexWithAdjacencyListSerializer, 1); intermediateConfig.setDriverComparator(vertexWithRankAndDanglingComparator, 0); intermediateConfig.setDriverComparator(vertexWithAdjacencyListComparator, 1); intermediateConfig.setDriverPairComparator(matchComparator); intermediateConfig.setOutputSerializer(vertexWithRankSerializer); intermediateConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); intermediateConfig.setStubWrapper( new UserCodeClassWrapper<CustomCompensatableDotProductMatch>( CustomCompensatableDotProductMatch.class)); intermediateConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices)); intermediateConfig.setStubParameter("compensation.failingWorker", failingWorkers); intermediateConfig.setStubParameter( "compensation.failingIteration", String.valueOf(failingIteration)); intermediateConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss)); // the combiner and the output TaskConfig combinerConfig = new TaskConfig(new Configuration()); combinerConfig.addInputToGroup(0); combinerConfig.setInputSerializer(vertexWithRankSerializer, 0); combinerConfig.setDriverStrategy(DriverStrategy.SORTED_GROUP_COMBINE); combinerConfig.setDriverComparator(vertexWithRankComparator, 0); combinerConfig.setRelativeMemoryDriver((double) coGroupSortMemory / totalMemoryConsumption); combinerConfig.setOutputSerializer(vertexWithRankSerializer); combinerConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH); combinerConfig.setOutputComparator(vertexWithRankComparator, 0); combinerConfig.setStubWrapper( new UserCodeClassWrapper<CustomRankCombiner>(CustomRankCombiner.class)); intermediateConfig.addChainedTask( SynchronousChainedCombineDriver.class, combinerConfig, "Combiner"); // ---------------- the tail (co group) -------------------- JobTaskVertex tail = JobGraphUtils.createTask( IterationTailPactTask.class, "IterationTail", jobGraph, degreeOfParallelism); TaskConfig tailConfig = new TaskConfig(tail.getConfiguration()); tailConfig.setIterationId(ITERATION_ID); tailConfig.setIsWorksetUpdate(); // inputs and driver tailConfig.setDriver(CoGroupDriver.class); tailConfig.setDriverStrategy(DriverStrategy.CO_GROUP); tailConfig.addInputToGroup(0); tailConfig.addInputToGroup(1); tailConfig.setInputSerializer(vertexWithRankAndDanglingSerializer, 0); tailConfig.setInputSerializer(vertexWithRankSerializer, 1); tailConfig.setDriverComparator(vertexWithRankAndDanglingComparator, 0); tailConfig.setDriverComparator(vertexWithRankComparator, 1); tailConfig.setDriverPairComparator(coGroupComparator); tailConfig.setInputAsynchronouslyMaterialized(0, true); tailConfig.setRelativeInputMaterializationMemory( 0, (double) minorConsumer / totalMemoryConsumption); tailConfig.setInputLocalStrategy(1, LocalStrategy.SORT); tailConfig.setInputComparator(vertexWithRankComparator, 1); tailConfig.setRelativeMemoryInput(1, (double) coGroupSortMemory / totalMemoryConsumption); tailConfig.setFilehandlesInput(1, NUM_FILE_HANDLES_PER_SORT); tailConfig.setSpillingThresholdInput(1, SORT_SPILL_THRESHOLD); tailConfig.addIterationAggregator( CustomCompensatableDotProductCoGroup.AGGREGATOR_NAME, new PageRankStatsAggregator()); // output tailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); tailConfig.setOutputSerializer(vertexWithRankAndDanglingSerializer); // the stub tailConfig.setStubWrapper( new UserCodeClassWrapper<CustomCompensatableDotProductCoGroup>( CustomCompensatableDotProductCoGroup.class)); tailConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices)); tailConfig.setStubParameter( "pageRank.numDanglingVertices", String.valueOf(numDanglingVertices)); tailConfig.setStubParameter("compensation.failingWorker", failingWorkers); tailConfig.setStubParameter("compensation.failingIteration", String.valueOf(failingIteration)); tailConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss)); // --------------- the output --------------------- JobOutputVertex output = JobGraphUtils.createFileOutput(jobGraph, "FinalOutput", degreeOfParallelism); TaskConfig outputConfig = new TaskConfig(output.getConfiguration()); outputConfig.addInputToGroup(0); outputConfig.setInputSerializer(vertexWithRankAndDanglingSerializer, 0); outputConfig.setStubWrapper( new UserCodeClassWrapper<CustomPageWithRankOutFormat>(CustomPageWithRankOutFormat.class)); outputConfig.setStubParameter(FileOutputFormat.FILE_PARAMETER_KEY, outputPath); // --------------- the auxiliaries --------------------- JobOutputVertex fakeTailOutput = JobGraphUtils.createFakeOutput(jobGraph, "FakeTailOutput", degreeOfParallelism); JobOutputVertex sync = JobGraphUtils.createSync(jobGraph, degreeOfParallelism); TaskConfig syncConfig = new TaskConfig(sync.getConfiguration()); syncConfig.setNumberOfIterations(numIterations); syncConfig.addIterationAggregator( CustomCompensatableDotProductCoGroup.AGGREGATOR_NAME, new PageRankStatsAggregator()); syncConfig.setConvergenceCriterion( CustomCompensatableDotProductCoGroup.AGGREGATOR_NAME, new DiffL1NormConvergenceCriterion()); syncConfig.setIterationId(ITERATION_ID); // --------------- the wiring --------------------- JobGraphUtils.connect( pageWithRankInput, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE); JobGraphUtils.connect(head, intermediate, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE); intermediateConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1); JobGraphUtils.connect( adjacencyListInput, intermediate, ChannelType.NETWORK, DistributionPattern.BIPARTITE); JobGraphUtils.connect(head, tail, ChannelType.NETWORK, DistributionPattern.POINTWISE); JobGraphUtils.connect(intermediate, tail, ChannelType.NETWORK, DistributionPattern.BIPARTITE); tailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1); tailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(1, degreeOfParallelism); JobGraphUtils.connect(head, output, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE); JobGraphUtils.connect( tail, fakeTailOutput, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE); JobGraphUtils.connect(head, sync, ChannelType.NETWORK, DistributionPattern.POINTWISE); fakeTailOutput.setVertexToShareInstancesWith(tail); tail.setVertexToShareInstancesWith(head); pageWithRankInput.setVertexToShareInstancesWith(head); adjacencyListInput.setVertexToShareInstancesWith(head); intermediate.setVertexToShareInstancesWith(head); output.setVertexToShareInstancesWith(head); sync.setVertexToShareInstancesWith(head); return jobGraph; }
@Override public FSDataInputStream open(final Path f) throws IOException { final org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(f.toString()); final org.apache.hadoop.fs.FSDataInputStream fdis = fs.open(path); return new HadoopDataInputStream(fdis); }