Exemple #1
0
 public static Path makeSegmentOutputPath(
     Path basePath,
     FileSystem fileSystem,
     String dataSource,
     String version,
     Interval interval,
     int partitionNum) {
   Path outputPath = new Path(prependFSIfNullScheme(fileSystem, basePath), "./" + dataSource);
   if ("hdfs".equals(fileSystem.getScheme())) {
     outputPath =
         new Path(
             outputPath,
             String.format(
                 "./%s_%s",
                 interval.getStart().toString(ISODateTimeFormat.basicDateTime()),
                 interval.getEnd().toString(ISODateTimeFormat.basicDateTime())));
     outputPath = new Path(outputPath, version.replace(":", "_"));
   } else {
     outputPath =
         new Path(
             outputPath,
             String.format(
                 "./%s_%s", interval.getStart().toString(), interval.getEnd().toString()));
     outputPath = new Path(outputPath, String.format("./%s", version));
   }
   outputPath = new Path(outputPath, Integer.toString(partitionNum));
   return outputPath;
 }
 private void setUnreadableBySuperuserXattrib(Path p) throws IOException {
   if (fs.getScheme().toLowerCase().contains("hdfs")
       && intermediateEncryptionEnabled
       && !fs.getXAttrs(p).containsKey(UNREADABLE_BY_SUPERUSER_XATTRIB)) {
     fs.setXAttr(p, UNREADABLE_BY_SUPERUSER_XATTRIB, null, EnumSet.of(XAttrSetFlag.CREATE));
   }
 }
Exemple #3
0
  public static DataSegment serializeOutIndex(
      final DataSegment segmentTemplate,
      final Configuration configuration,
      final Progressable progressable,
      final TaskAttemptID taskAttemptID,
      final File mergedBase,
      final Path segmentBasePath)
      throws IOException {
    final FileSystem outputFS = FileSystem.get(segmentBasePath.toUri(), configuration);
    final Path tmpPath =
        new Path(segmentBasePath, String.format("index.zip.%d", taskAttemptID.getId()));
    final AtomicLong size = new AtomicLong(0L);
    final DataPusher zipPusher =
        (DataPusher)
            RetryProxy.create(
                DataPusher.class,
                new DataPusher() {
                  @Override
                  public long push() throws IOException {
                    try (OutputStream outputStream =
                        outputFS.create(tmpPath, true, DEFAULT_FS_BUFFER_SIZE, progressable)) {
                      size.set(zipAndCopyDir(mergedBase, outputStream, progressable));
                      outputStream.flush();
                    } catch (IOException | RuntimeException exception) {
                      log.error(exception, "Exception in retry loop");
                      throw exception;
                    }
                    return -1;
                  }
                },
                RetryPolicies.exponentialBackoffRetry(
                    NUM_RETRIES, SECONDS_BETWEEN_RETRIES, TimeUnit.SECONDS));
    zipPusher.push();
    log.info("Zipped %,d bytes to [%s]", size.get(), tmpPath.toUri());

    final Path finalIndexZipFilePath = new Path(segmentBasePath, "index.zip");
    final URI indexOutURI = finalIndexZipFilePath.toUri();
    final ImmutableMap<String, Object> loadSpec;
    // TODO: Make this a part of Pushers or Pullers
    switch (outputFS.getScheme()) {
      case "hdfs":
        loadSpec = ImmutableMap.<String, Object>of("type", "hdfs", "path", indexOutURI.toString());
        break;
      case "s3":
      case "s3n":
        loadSpec =
            ImmutableMap.<String, Object>of(
                "type", "s3_zip",
                "bucket", indexOutURI.getHost(),
                "key", indexOutURI.getPath().substring(1) // remove the leading "/"
                );
        break;
      case "file":
        loadSpec = ImmutableMap.<String, Object>of("type", "local", "path", indexOutURI.getPath());
        break;
      default:
        throw new IAE("Unknown file system scheme [%s]", outputFS.getScheme());
    }
    final DataSegment finalSegment =
        segmentTemplate
            .withLoadSpec(loadSpec)
            .withSize(size.get())
            .withBinaryVersion(SegmentUtils.getVersionFromDir(mergedBase));

    if (!renameIndexFiles(outputFS, tmpPath, finalIndexZipFilePath)) {
      throw new IOException(
          String.format(
              "Unable to rename [%s] to [%s]",
              tmpPath.toUri().toString(), finalIndexZipFilePath.toUri().toString()));
    }
    writeSegmentDescriptor(
        outputFS, finalSegment, new Path(segmentBasePath, "descriptor.json"), progressable);
    return finalSegment;
  }