Exemplo n.º 1
0
  public BaseDataPublisher(State state) throws IOException {
    super(state);
    this.closer = Closer.create();
    Configuration conf = new Configuration();

    // Add all job configuration properties so they are picked up by Hadoop
    for (String key : this.getState().getPropertyNames()) {
      conf.set(key, this.getState().getProp(key));
    }

    this.numBranches = this.getState().getPropAsInt(ConfigurationKeys.FORK_BRANCHES_KEY, 1);

    this.fileSystemByBranches = Lists.newArrayListWithCapacity(this.numBranches);
    this.publisherFinalDirOwnerGroupsByBranches = Lists.newArrayListWithCapacity(this.numBranches);
    this.permissions = Lists.newArrayListWithCapacity(this.numBranches);

    // Get a FileSystem instance for each branch
    for (int i = 0; i < this.numBranches; i++) {
      URI uri =
          URI.create(
              this.getState()
                  .getProp(
                      ForkOperatorUtils.getPropertyNameForBranch(
                          ConfigurationKeys.WRITER_FILE_SYSTEM_URI, this.numBranches, i),
                      ConfigurationKeys.LOCAL_FS_URI));
      this.fileSystemByBranches.add(FileSystem.get(uri, conf));

      // The group(s) will be applied to the final publisher output directory(ies)
      this.publisherFinalDirOwnerGroupsByBranches.add(
          Optional.fromNullable(
              this.getState()
                  .getProp(
                      ForkOperatorUtils.getPropertyNameForBranch(
                          ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR_GROUP, this.numBranches, i))));

      // The permission(s) will be applied to all directories created by the publisher,
      // which do NOT include directories created by the writer and moved by the publisher.
      // The permissions of those directories are controlled by writer.file.permissions and
      // writer.dir.permissions.
      this.permissions.add(
          new FsPermission(
              state.getPropAsShortWithRadix(
                  ForkOperatorUtils.getPropertyNameForBranch(
                      ConfigurationKeys.DATA_PUBLISHER_PERMISSIONS, numBranches, i),
                  FsPermission.getDefault().toShort(),
                  ConfigurationKeys.PERMISSION_PARSING_RADIX)));
    }

    this.parallelRunnerThreads =
        state.getPropAsInt(
            ParallelRunner.PARALLEL_RUNNER_THREADS_KEY,
            ParallelRunner.DEFAULT_PARALLEL_RUNNER_THREADS);
  }