Exemplo n.º 1
0
  public static DataSegment serializeOutIndex(
      final DataSegment segmentTemplate,
      final Configuration configuration,
      final Progressable progressable,
      final TaskAttemptID taskAttemptID,
      final File mergedBase,
      final Path segmentBasePath)
      throws IOException {
    final FileSystem outputFS = FileSystem.get(segmentBasePath.toUri(), configuration);
    final Path tmpPath =
        new Path(segmentBasePath, String.format("index.zip.%d", taskAttemptID.getId()));
    final AtomicLong size = new AtomicLong(0L);
    final DataPusher zipPusher =
        (DataPusher)
            RetryProxy.create(
                DataPusher.class,
                new DataPusher() {
                  @Override
                  public long push() throws IOException {
                    try (OutputStream outputStream =
                        outputFS.create(tmpPath, true, DEFAULT_FS_BUFFER_SIZE, progressable)) {
                      size.set(zipAndCopyDir(mergedBase, outputStream, progressable));
                      outputStream.flush();
                    } catch (IOException | RuntimeException exception) {
                      log.error(exception, "Exception in retry loop");
                      throw exception;
                    }
                    return -1;
                  }
                },
                RetryPolicies.exponentialBackoffRetry(
                    NUM_RETRIES, SECONDS_BETWEEN_RETRIES, TimeUnit.SECONDS));
    zipPusher.push();
    log.info("Zipped %,d bytes to [%s]", size.get(), tmpPath.toUri());

    final Path finalIndexZipFilePath = new Path(segmentBasePath, "index.zip");
    final URI indexOutURI = finalIndexZipFilePath.toUri();
    final ImmutableMap<String, Object> loadSpec;
    // TODO: Make this a part of Pushers or Pullers
    switch (outputFS.getScheme()) {
      case "hdfs":
        loadSpec = ImmutableMap.<String, Object>of("type", "hdfs", "path", indexOutURI.toString());
        break;
      case "s3":
      case "s3n":
        loadSpec =
            ImmutableMap.<String, Object>of(
                "type", "s3_zip",
                "bucket", indexOutURI.getHost(),
                "key", indexOutURI.getPath().substring(1) // remove the leading "/"
                );
        break;
      case "file":
        loadSpec = ImmutableMap.<String, Object>of("type", "local", "path", indexOutURI.getPath());
        break;
      default:
        throw new IAE("Unknown file system scheme [%s]", outputFS.getScheme());
    }
    final DataSegment finalSegment =
        segmentTemplate
            .withLoadSpec(loadSpec)
            .withSize(size.get())
            .withBinaryVersion(SegmentUtils.getVersionFromDir(mergedBase));

    if (!renameIndexFiles(outputFS, tmpPath, finalIndexZipFilePath)) {
      throw new IOException(
          String.format(
              "Unable to rename [%s] to [%s]",
              tmpPath.toUri().toString(), finalIndexZipFilePath.toUri().toString()));
    }
    writeSegmentDescriptor(
        outputFS, finalSegment, new Path(segmentBasePath, "descriptor.json"), progressable);
    return finalSegment;
  }