예제 #1
0
  /**
   * Returns a qualified path object.
   *
   * @param fs the FileSystem that should be used to obtain the current working directory
   * @return the qualified path object
   */
  public Path makeQualified(FileSystem fs) {
    Path path = this;
    if (!isAbsolute()) {
      path = new Path(fs.getWorkingDirectory(), this);
    }

    final URI pathUri = path.toUri();
    final URI fsUri = fs.getUri();

    String scheme = pathUri.getScheme();
    String authority = pathUri.getAuthority();

    if (scheme != null && (authority != null || fsUri.getAuthority() == null)) {
      return path;
    }

    if (scheme == null) {
      scheme = fsUri.getScheme();
    }

    if (authority == null) {
      authority = fsUri.getAuthority();
      if (authority == null) {
        authority = "";
      }
    }

    return new Path(scheme + ":" + "//" + authority + pathUri.getPath());
  }
예제 #2
0
  @Test
  public void testHDFS() {

    Path file = new Path(hdfsURI + hdPath);
    org.apache.hadoop.fs.Path result = new org.apache.hadoop.fs.Path(hdfsURI + "/result");
    try {
      FileSystem fs = file.getFileSystem();
      Assert.assertTrue("Must be HadoopFileSystem", fs instanceof HadoopFileSystem);

      DopOneTestEnvironment.setAsContext();
      try {
        WordCount.main(new String[] {file.toString(), result.toString()});
      } catch (Throwable t) {
        t.printStackTrace();
        Assert.fail("Test failed with " + t.getMessage());
      } finally {
        DopOneTestEnvironment.unsetAsContext();
      }

      Assert.assertTrue("No result file present", hdfs.exists(result));

      // validate output:
      org.apache.hadoop.fs.FSDataInputStream inStream = hdfs.open(result);
      StringWriter writer = new StringWriter();
      IOUtils.copy(inStream, writer);
      String resultString = writer.toString();

      Assert.assertEquals("hdfs 10\n" + "hello 10\n", resultString);
      inStream.close();

    } catch (IOException e) {
      e.printStackTrace();
      Assert.fail("Error in test: " + e.getMessage());
    }
  }
예제 #3
0
 public CompletedFuture(Path entry) {
   try {
     LocalFileSystem fs = (LocalFileSystem) entry.getFileSystem();
     result =
         entry.isAbsolute()
             ? new Path(entry.toUri().getPath())
             : new Path(fs.getWorkingDirectory(), entry);
   } catch (Exception e) {
     throw new RuntimeException(
         "DistributedCache supports only local files for Collection Environments");
   }
 }
  @Override
  public FSDataInputStream open(final Path f, final int bufferSize) throws IOException {

    final org.apache.hadoop.fs.FSDataInputStream fdis =
        this.fs.open(new org.apache.hadoop.fs.Path(f.toString()), bufferSize);

    return new DistributedDataInputStream(fdis);
  }
예제 #5
0
 private static void ensureLocalFileDeleted(Path path) {
   URI uri = path.toUri();
   if ("file".equals(uri.getScheme())) {
     File file = new File(uri.getPath());
     assertFalse("file not properly deleted", file.exists());
   } else {
     throw new IllegalArgumentException("not a local path");
   }
 }
  @Override
  public FileStatus[] listStatus(final Path f) throws IOException {
    final org.apache.hadoop.fs.FileStatus[] hadoopFiles =
        this.fs.listStatus(new org.apache.hadoop.fs.Path(f.toString()));
    final FileStatus[] files = new FileStatus[hadoopFiles.length];

    // Convert types
    for (int i = 0; i < files.length; i++) {
      files[i] = new DistributedFileStatus(hadoopFiles[i]);
    }

    return files;
  }
 @Override
 public FSDataOutputStream create(
     final Path f,
     final boolean overwrite,
     final int bufferSize,
     final short replication,
     final long blockSize)
     throws IOException {
   final org.apache.hadoop.fs.FSDataOutputStream fdos =
       this.fs.create(
           new org.apache.hadoop.fs.Path(f.toString()),
           overwrite,
           bufferSize,
           replication,
           blockSize);
   return new DistributedDataOutputStream(fdos);
 }
 @Override
 public boolean rename(final Path src, final Path dst) throws IOException {
   return this.fs.rename(
       new org.apache.hadoop.fs.Path(src.toString()),
       new org.apache.hadoop.fs.Path(dst.toString()));
 }
 @Override
 public boolean mkdirs(final Path f) throws IOException {
   return this.fs.mkdirs(new org.apache.hadoop.fs.Path(f.toString()));
 }
 @Override
 public boolean delete(final Path f, final boolean recursive) throws IOException {
   return this.fs.delete(new org.apache.hadoop.fs.Path(f.toString()), recursive);
 }
 @Override
 public FSDataOutputStream create(final Path f, final boolean overwrite) throws IOException {
   final org.apache.hadoop.fs.FSDataOutputStream fdos =
       this.fs.create(new org.apache.hadoop.fs.Path(f.toString()), overwrite);
   return new DistributedDataOutputStream(fdos);
 }
 @Override
 public FileStatus getFileStatus(final Path f) throws IOException {
   org.apache.hadoop.fs.FileStatus status =
       this.fs.getFileStatus(new org.apache.hadoop.fs.Path(f.toString()));
   return new DistributedFileStatus(status);
 }
  public static JobGraph getJobGraph(String[] args) throws Exception {

    int degreeOfParallelism = 2;
    String pageWithRankInputPath =
        ""; // "file://" + PlayConstants.PLAY_DIR + "test-inputs/danglingpagerank/pageWithRank";
    String adjacencyListInputPath = ""; // "file://" + PlayConstants.PLAY_DIR +
    //			"test-inputs/danglingpagerank/adjacencylists";
    String outputPath =
        Path.constructTestURI(
            CustomCompensatableDanglingPageRankWithCombiner.class, "flink_iterations");
    int minorConsumer = 2;
    int matchMemory = 5;
    int coGroupSortMemory = 5;
    int numIterations = 25;
    long numVertices = 5;
    long numDanglingVertices = 1;

    String failingWorkers = "1";
    int failingIteration = 2;
    double messageLoss = 0.75;

    if (args.length >= 14) {
      degreeOfParallelism = Integer.parseInt(args[0]);
      pageWithRankInputPath = args[1];
      adjacencyListInputPath = args[2];
      outputPath = args[3];
      // [4] is config path
      minorConsumer = Integer.parseInt(args[5]);
      matchMemory = Integer.parseInt(args[6]);
      coGroupSortMemory = Integer.parseInt(args[7]);
      numIterations = Integer.parseInt(args[8]);
      numVertices = Long.parseLong(args[9]);
      numDanglingVertices = Long.parseLong(args[10]);
      failingWorkers = args[11];
      failingIteration = Integer.parseInt(args[12]);
      messageLoss = Double.parseDouble(args[13]);
    }

    int totalMemoryConsumption = 3 * minorConsumer + 2 * coGroupSortMemory + matchMemory;

    JobGraph jobGraph = new JobGraph("CompensatableDanglingPageRank");

    // --------------- the inputs ---------------------

    // page rank input
    JobInputVertex pageWithRankInput =
        JobGraphUtils.createInput(
            new CustomImprovedDanglingPageRankInputFormat(),
            pageWithRankInputPath,
            "DanglingPageWithRankInput",
            jobGraph,
            degreeOfParallelism);
    TaskConfig pageWithRankInputConfig = new TaskConfig(pageWithRankInput.getConfiguration());
    pageWithRankInputConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
    pageWithRankInputConfig.setOutputComparator(vertexWithRankAndDanglingComparator, 0);
    pageWithRankInputConfig.setOutputSerializer(vertexWithRankAndDanglingSerializer);
    pageWithRankInputConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices));

    // edges as adjacency list
    JobInputVertex adjacencyListInput =
        JobGraphUtils.createInput(
            new CustomImprovedAdjacencyListInputFormat(),
            adjacencyListInputPath,
            "AdjancencyListInput",
            jobGraph,
            degreeOfParallelism);
    TaskConfig adjacencyListInputConfig = new TaskConfig(adjacencyListInput.getConfiguration());
    adjacencyListInputConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
    adjacencyListInputConfig.setOutputSerializer(vertexWithAdjacencyListSerializer);
    adjacencyListInputConfig.setOutputComparator(vertexWithAdjacencyListComparator, 0);

    // --------------- the head ---------------------
    JobTaskVertex head =
        JobGraphUtils.createTask(
            IterationHeadPactTask.class, "IterationHead", jobGraph, degreeOfParallelism);
    TaskConfig headConfig = new TaskConfig(head.getConfiguration());
    headConfig.setIterationId(ITERATION_ID);

    // initial input / partial solution
    headConfig.addInputToGroup(0);
    headConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0);
    headConfig.setInputSerializer(vertexWithRankAndDanglingSerializer, 0);
    headConfig.setInputComparator(vertexWithRankAndDanglingComparator, 0);
    headConfig.setInputLocalStrategy(0, LocalStrategy.SORT);
    headConfig.setRelativeMemoryInput(0, (double) minorConsumer / totalMemoryConsumption);
    headConfig.setFilehandlesInput(0, NUM_FILE_HANDLES_PER_SORT);
    headConfig.setSpillingThresholdInput(0, SORT_SPILL_THRESHOLD);

    // back channel / iterations
    headConfig.setRelativeBackChannelMemory((double) minorConsumer / totalMemoryConsumption);

    // output into iteration
    headConfig.setOutputSerializer(vertexWithRankAndDanglingSerializer);
    headConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    headConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);

    // final output
    TaskConfig headFinalOutConfig = new TaskConfig(new Configuration());
    headFinalOutConfig.setOutputSerializer(vertexWithRankAndDanglingSerializer);
    headFinalOutConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    headConfig.setIterationHeadFinalOutputConfig(headFinalOutConfig);

    // the sync
    headConfig.setIterationHeadIndexOfSyncOutput(3);
    headConfig.setNumberOfIterations(numIterations);

    // the driver
    headConfig.setDriver(CollectorMapDriver.class);
    headConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
    headConfig.setStubWrapper(
        new UserCodeClassWrapper<CustomCompensatingMap>(CustomCompensatingMap.class));
    headConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices));
    headConfig.setStubParameter("compensation.failingWorker", failingWorkers);
    headConfig.setStubParameter("compensation.failingIteration", String.valueOf(failingIteration));
    headConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss));
    headConfig.addIterationAggregator(
        CustomCompensatableDotProductCoGroup.AGGREGATOR_NAME, new PageRankStatsAggregator());

    // --------------- the join ---------------------

    JobTaskVertex intermediate =
        JobGraphUtils.createTask(
            IterationIntermediatePactTask.class,
            "IterationIntermediate",
            jobGraph,
            degreeOfParallelism);
    TaskConfig intermediateConfig = new TaskConfig(intermediate.getConfiguration());
    intermediateConfig.setIterationId(ITERATION_ID);
    //		intermediateConfig.setDriver(RepeatableHashjoinMatchDriverWithCachedBuildside.class);
    intermediateConfig.setDriver(BuildSecondCachedMatchDriver.class);
    intermediateConfig.setDriverStrategy(DriverStrategy.HYBRIDHASH_BUILD_SECOND);
    intermediateConfig.setRelativeMemoryDriver((double) matchMemory / totalMemoryConsumption);
    intermediateConfig.addInputToGroup(0);
    intermediateConfig.addInputToGroup(1);
    intermediateConfig.setInputSerializer(vertexWithRankAndDanglingSerializer, 0);
    intermediateConfig.setInputSerializer(vertexWithAdjacencyListSerializer, 1);
    intermediateConfig.setDriverComparator(vertexWithRankAndDanglingComparator, 0);
    intermediateConfig.setDriverComparator(vertexWithAdjacencyListComparator, 1);
    intermediateConfig.setDriverPairComparator(matchComparator);

    intermediateConfig.setOutputSerializer(vertexWithRankSerializer);
    intermediateConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);

    intermediateConfig.setStubWrapper(
        new UserCodeClassWrapper<CustomCompensatableDotProductMatch>(
            CustomCompensatableDotProductMatch.class));
    intermediateConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices));
    intermediateConfig.setStubParameter("compensation.failingWorker", failingWorkers);
    intermediateConfig.setStubParameter(
        "compensation.failingIteration", String.valueOf(failingIteration));
    intermediateConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss));

    // the combiner and the output
    TaskConfig combinerConfig = new TaskConfig(new Configuration());
    combinerConfig.addInputToGroup(0);
    combinerConfig.setInputSerializer(vertexWithRankSerializer, 0);
    combinerConfig.setDriverStrategy(DriverStrategy.SORTED_GROUP_COMBINE);
    combinerConfig.setDriverComparator(vertexWithRankComparator, 0);
    combinerConfig.setRelativeMemoryDriver((double) coGroupSortMemory / totalMemoryConsumption);
    combinerConfig.setOutputSerializer(vertexWithRankSerializer);
    combinerConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
    combinerConfig.setOutputComparator(vertexWithRankComparator, 0);
    combinerConfig.setStubWrapper(
        new UserCodeClassWrapper<CustomRankCombiner>(CustomRankCombiner.class));
    intermediateConfig.addChainedTask(
        SynchronousChainedCombineDriver.class, combinerConfig, "Combiner");

    // ---------------- the tail (co group) --------------------

    JobTaskVertex tail =
        JobGraphUtils.createTask(
            IterationTailPactTask.class, "IterationTail", jobGraph, degreeOfParallelism);
    TaskConfig tailConfig = new TaskConfig(tail.getConfiguration());
    tailConfig.setIterationId(ITERATION_ID);
    tailConfig.setIsWorksetUpdate();

    // inputs and driver
    tailConfig.setDriver(CoGroupDriver.class);
    tailConfig.setDriverStrategy(DriverStrategy.CO_GROUP);
    tailConfig.addInputToGroup(0);
    tailConfig.addInputToGroup(1);
    tailConfig.setInputSerializer(vertexWithRankAndDanglingSerializer, 0);
    tailConfig.setInputSerializer(vertexWithRankSerializer, 1);
    tailConfig.setDriverComparator(vertexWithRankAndDanglingComparator, 0);
    tailConfig.setDriverComparator(vertexWithRankComparator, 1);
    tailConfig.setDriverPairComparator(coGroupComparator);
    tailConfig.setInputAsynchronouslyMaterialized(0, true);
    tailConfig.setRelativeInputMaterializationMemory(
        0, (double) minorConsumer / totalMemoryConsumption);
    tailConfig.setInputLocalStrategy(1, LocalStrategy.SORT);
    tailConfig.setInputComparator(vertexWithRankComparator, 1);
    tailConfig.setRelativeMemoryInput(1, (double) coGroupSortMemory / totalMemoryConsumption);
    tailConfig.setFilehandlesInput(1, NUM_FILE_HANDLES_PER_SORT);
    tailConfig.setSpillingThresholdInput(1, SORT_SPILL_THRESHOLD);
    tailConfig.addIterationAggregator(
        CustomCompensatableDotProductCoGroup.AGGREGATOR_NAME, new PageRankStatsAggregator());

    // output
    tailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    tailConfig.setOutputSerializer(vertexWithRankAndDanglingSerializer);

    // the stub
    tailConfig.setStubWrapper(
        new UserCodeClassWrapper<CustomCompensatableDotProductCoGroup>(
            CustomCompensatableDotProductCoGroup.class));
    tailConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices));
    tailConfig.setStubParameter(
        "pageRank.numDanglingVertices", String.valueOf(numDanglingVertices));
    tailConfig.setStubParameter("compensation.failingWorker", failingWorkers);
    tailConfig.setStubParameter("compensation.failingIteration", String.valueOf(failingIteration));
    tailConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss));

    // --------------- the output ---------------------

    JobOutputVertex output =
        JobGraphUtils.createFileOutput(jobGraph, "FinalOutput", degreeOfParallelism);
    TaskConfig outputConfig = new TaskConfig(output.getConfiguration());
    outputConfig.addInputToGroup(0);
    outputConfig.setInputSerializer(vertexWithRankAndDanglingSerializer, 0);
    outputConfig.setStubWrapper(
        new UserCodeClassWrapper<CustomPageWithRankOutFormat>(CustomPageWithRankOutFormat.class));
    outputConfig.setStubParameter(FileOutputFormat.FILE_PARAMETER_KEY, outputPath);

    // --------------- the auxiliaries ---------------------

    JobOutputVertex fakeTailOutput =
        JobGraphUtils.createFakeOutput(jobGraph, "FakeTailOutput", degreeOfParallelism);

    JobOutputVertex sync = JobGraphUtils.createSync(jobGraph, degreeOfParallelism);
    TaskConfig syncConfig = new TaskConfig(sync.getConfiguration());
    syncConfig.setNumberOfIterations(numIterations);
    syncConfig.addIterationAggregator(
        CustomCompensatableDotProductCoGroup.AGGREGATOR_NAME, new PageRankStatsAggregator());
    syncConfig.setConvergenceCriterion(
        CustomCompensatableDotProductCoGroup.AGGREGATOR_NAME, new DiffL1NormConvergenceCriterion());
    syncConfig.setIterationId(ITERATION_ID);

    // --------------- the wiring ---------------------

    JobGraphUtils.connect(
        pageWithRankInput, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);

    JobGraphUtils.connect(head, intermediate, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
    intermediateConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);

    JobGraphUtils.connect(
        adjacencyListInput, intermediate, ChannelType.NETWORK, DistributionPattern.BIPARTITE);

    JobGraphUtils.connect(head, tail, ChannelType.NETWORK, DistributionPattern.POINTWISE);
    JobGraphUtils.connect(intermediate, tail, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
    tailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);
    tailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(1, degreeOfParallelism);

    JobGraphUtils.connect(head, output, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
    JobGraphUtils.connect(
        tail, fakeTailOutput, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);

    JobGraphUtils.connect(head, sync, ChannelType.NETWORK, DistributionPattern.POINTWISE);

    fakeTailOutput.setVertexToShareInstancesWith(tail);
    tail.setVertexToShareInstancesWith(head);
    pageWithRankInput.setVertexToShareInstancesWith(head);
    adjacencyListInput.setVertexToShareInstancesWith(head);
    intermediate.setVertexToShareInstancesWith(head);
    output.setVertexToShareInstancesWith(head);
    sync.setVertexToShareInstancesWith(head);

    return jobGraph;
  }
예제 #14
0
 @Override
 public FSDataInputStream open(final Path f) throws IOException {
   final org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(f.toString());
   final org.apache.hadoop.fs.FSDataInputStream fdis = fs.open(path);
   return new HadoopDataInputStream(fdis);
 }