public static Path makeSegmentOutputPath( Path basePath, FileSystem fileSystem, String dataSource, String version, Interval interval, int partitionNum) { Path outputPath = new Path(prependFSIfNullScheme(fileSystem, basePath), "./" + dataSource); if ("hdfs".equals(fileSystem.getScheme())) { outputPath = new Path( outputPath, String.format( "./%s_%s", interval.getStart().toString(ISODateTimeFormat.basicDateTime()), interval.getEnd().toString(ISODateTimeFormat.basicDateTime()))); outputPath = new Path(outputPath, version.replace(":", "_")); } else { outputPath = new Path( outputPath, String.format( "./%s_%s", interval.getStart().toString(), interval.getEnd().toString())); outputPath = new Path(outputPath, String.format("./%s", version)); } outputPath = new Path(outputPath, Integer.toString(partitionNum)); return outputPath; }
private void setUnreadableBySuperuserXattrib(Path p) throws IOException { if (fs.getScheme().toLowerCase().contains("hdfs") && intermediateEncryptionEnabled && !fs.getXAttrs(p).containsKey(UNREADABLE_BY_SUPERUSER_XATTRIB)) { fs.setXAttr(p, UNREADABLE_BY_SUPERUSER_XATTRIB, null, EnumSet.of(XAttrSetFlag.CREATE)); } }
public static DataSegment serializeOutIndex( final DataSegment segmentTemplate, final Configuration configuration, final Progressable progressable, final TaskAttemptID taskAttemptID, final File mergedBase, final Path segmentBasePath) throws IOException { final FileSystem outputFS = FileSystem.get(segmentBasePath.toUri(), configuration); final Path tmpPath = new Path(segmentBasePath, String.format("index.zip.%d", taskAttemptID.getId())); final AtomicLong size = new AtomicLong(0L); final DataPusher zipPusher = (DataPusher) RetryProxy.create( DataPusher.class, new DataPusher() { @Override public long push() throws IOException { try (OutputStream outputStream = outputFS.create(tmpPath, true, DEFAULT_FS_BUFFER_SIZE, progressable)) { size.set(zipAndCopyDir(mergedBase, outputStream, progressable)); outputStream.flush(); } catch (IOException | RuntimeException exception) { log.error(exception, "Exception in retry loop"); throw exception; } return -1; } }, RetryPolicies.exponentialBackoffRetry( NUM_RETRIES, SECONDS_BETWEEN_RETRIES, TimeUnit.SECONDS)); zipPusher.push(); log.info("Zipped %,d bytes to [%s]", size.get(), tmpPath.toUri()); final Path finalIndexZipFilePath = new Path(segmentBasePath, "index.zip"); final URI indexOutURI = finalIndexZipFilePath.toUri(); final ImmutableMap<String, Object> loadSpec; // TODO: Make this a part of Pushers or Pullers switch (outputFS.getScheme()) { case "hdfs": loadSpec = ImmutableMap.<String, Object>of("type", "hdfs", "path", indexOutURI.toString()); break; case "s3": case "s3n": loadSpec = ImmutableMap.<String, Object>of( "type", "s3_zip", "bucket", indexOutURI.getHost(), "key", indexOutURI.getPath().substring(1) // remove the leading "/" ); break; case "file": loadSpec = ImmutableMap.<String, Object>of("type", "local", "path", indexOutURI.getPath()); break; default: throw new IAE("Unknown file system scheme [%s]", outputFS.getScheme()); } final DataSegment finalSegment = segmentTemplate .withLoadSpec(loadSpec) .withSize(size.get()) .withBinaryVersion(SegmentUtils.getVersionFromDir(mergedBase)); if (!renameIndexFiles(outputFS, tmpPath, finalIndexZipFilePath)) { throw new IOException( String.format( "Unable to rename [%s] to [%s]", tmpPath.toUri().toString(), finalIndexZipFilePath.toUri().toString())); } writeSegmentDescriptor( outputFS, finalSegment, new Path(segmentBasePath, "descriptor.json"), progressable); return finalSegment; }