Ejemplo n.º 1
0
 // TODO: Replace this whenever hadoop gets their act together and stops breaking with more recent
 // versions of Guava
 public static long unzipNoGuava(
     final Path zip,
     final Configuration configuration,
     final File outDir,
     final Progressable progressable)
     throws IOException {
   final DataPusher zipPusher =
       (DataPusher)
           RetryProxy.create(
               DataPusher.class,
               new DataPusher() {
                 @Override
                 public long push() throws IOException {
                   try {
                     final FileSystem fileSystem = zip.getFileSystem(configuration);
                     long size = 0L;
                     final byte[] buffer = new byte[1 << 13];
                     progressable.progress();
                     try (ZipInputStream in = new ZipInputStream(fileSystem.open(zip, 1 << 13))) {
                       for (ZipEntry entry = in.getNextEntry();
                           entry != null;
                           entry = in.getNextEntry()) {
                         final String fileName = entry.getName();
                         try (final OutputStream out =
                             new BufferedOutputStream(
                                 new FileOutputStream(
                                     outDir.getAbsolutePath() + File.separator + fileName),
                                 1 << 13)) {
                           for (int len = in.read(buffer); len >= 0; len = in.read(buffer)) {
                             progressable.progress();
                             if (len == 0) {
                               continue;
                             }
                             size += len;
                             out.write(buffer, 0, len);
                           }
                           out.flush();
                         }
                       }
                     }
                     progressable.progress();
                     return size;
                   } catch (IOException | RuntimeException exception) {
                     log.error(exception, "Exception in unzip retry loop");
                     throw exception;
                   }
                 }
               },
               RetryPolicies.exponentialBackoffRetry(
                   NUM_RETRIES, SECONDS_BETWEEN_RETRIES, TimeUnit.SECONDS));
   return zipPusher.push();
 }
    public boolean execute(
        CommandInterpreter interpreter, CommandLine commandLine, boolean batchMode) {
      final String localPathToDataDir = commandLine.getOptionValue('l');
      final String groupName = commandLine.getOptionValue('g');
      final String destDataDirId = commandLine.getOptionValue('d');
      final String jobIdString = commandLine.getOptionValue('j');
      final String partitionPatternString = commandLine.getOptionValue('p');
      final int partitionGroupNum = Integer.parseInt(commandLine.getOptionValue('n'));

      final String message =
          "Sending data to nodes (backgrounded):\n"
              + "\tlocalPathToDataDir="
              + localPathToDataDir
              + "\n"
              + "\tgroupName="
              + groupName
              + "\n"
              + "\tdestDataDirId="
              + destDataDirId
              + "\n"
              + "\tjobIdString="
              + jobIdString
              + "\n"
              + "\tpartitionPatternString="
              + partitionPatternString
              + "\n"
              + "\tpartitionGroupNum="
              + partitionGroupNum
              + "\n";
      interpreter.showMessage(message, batchMode);

      final String jobDirPostfix = DataPusher.getJobDirPostfix(jobIdString, destDataDirId);
      DataPusher.sendDataToNodes(
          console.getClusterDefinition(),
          groupName,
          jobDirPostfix,
          localPathToDataDir,
          Pattern.compile(partitionPatternString),
          partitionGroupNum,
          3,
          1);

      return true;
    }
Ejemplo n.º 3
0
 public static void writeSegmentDescriptor(
     final FileSystem outputFS,
     final DataSegment segment,
     final Path descriptorPath,
     final Progressable progressable)
     throws IOException {
   final DataPusher descriptorPusher =
       (DataPusher)
           RetryProxy.create(
               DataPusher.class,
               new DataPusher() {
                 @Override
                 public long push() throws IOException {
                   try {
                     progressable.progress();
                     if (outputFS.exists(descriptorPath)) {
                       if (!outputFS.delete(descriptorPath, false)) {
                         throw new IOException(
                             String.format("Failed to delete descriptor at [%s]", descriptorPath));
                       }
                     }
                     try (final OutputStream descriptorOut =
                         outputFS.create(
                             descriptorPath, true, DEFAULT_FS_BUFFER_SIZE, progressable)) {
                       HadoopDruidIndexerConfig.jsonMapper.writeValue(descriptorOut, segment);
                       descriptorOut.flush();
                     }
                   } catch (RuntimeException | IOException ex) {
                     log.info(ex, "Exception in descriptor pusher retry loop");
                     throw ex;
                   }
                   return -1;
                 }
               },
               RetryPolicies.exponentialBackoffRetry(
                   NUM_RETRIES, SECONDS_BETWEEN_RETRIES, TimeUnit.SECONDS));
   descriptorPusher.push();
 }
Ejemplo n.º 4
0
  public static DataSegment serializeOutIndex(
      final DataSegment segmentTemplate,
      final Configuration configuration,
      final Progressable progressable,
      final TaskAttemptID taskAttemptID,
      final File mergedBase,
      final Path segmentBasePath)
      throws IOException {
    final FileSystem outputFS = FileSystem.get(segmentBasePath.toUri(), configuration);
    final Path tmpPath =
        new Path(segmentBasePath, String.format("index.zip.%d", taskAttemptID.getId()));
    final AtomicLong size = new AtomicLong(0L);
    final DataPusher zipPusher =
        (DataPusher)
            RetryProxy.create(
                DataPusher.class,
                new DataPusher() {
                  @Override
                  public long push() throws IOException {
                    try (OutputStream outputStream =
                        outputFS.create(tmpPath, true, DEFAULT_FS_BUFFER_SIZE, progressable)) {
                      size.set(zipAndCopyDir(mergedBase, outputStream, progressable));
                      outputStream.flush();
                    } catch (IOException | RuntimeException exception) {
                      log.error(exception, "Exception in retry loop");
                      throw exception;
                    }
                    return -1;
                  }
                },
                RetryPolicies.exponentialBackoffRetry(
                    NUM_RETRIES, SECONDS_BETWEEN_RETRIES, TimeUnit.SECONDS));
    zipPusher.push();
    log.info("Zipped %,d bytes to [%s]", size.get(), tmpPath.toUri());

    final Path finalIndexZipFilePath = new Path(segmentBasePath, "index.zip");
    final URI indexOutURI = finalIndexZipFilePath.toUri();
    final ImmutableMap<String, Object> loadSpec;
    // TODO: Make this a part of Pushers or Pullers
    switch (outputFS.getScheme()) {
      case "hdfs":
        loadSpec = ImmutableMap.<String, Object>of("type", "hdfs", "path", indexOutURI.toString());
        break;
      case "s3":
      case "s3n":
        loadSpec =
            ImmutableMap.<String, Object>of(
                "type", "s3_zip",
                "bucket", indexOutURI.getHost(),
                "key", indexOutURI.getPath().substring(1) // remove the leading "/"
                );
        break;
      case "file":
        loadSpec = ImmutableMap.<String, Object>of("type", "local", "path", indexOutURI.getPath());
        break;
      default:
        throw new IAE("Unknown file system scheme [%s]", outputFS.getScheme());
    }
    final DataSegment finalSegment =
        segmentTemplate
            .withLoadSpec(loadSpec)
            .withSize(size.get())
            .withBinaryVersion(SegmentUtils.getVersionFromDir(mergedBase));

    if (!renameIndexFiles(outputFS, tmpPath, finalIndexZipFilePath)) {
      throw new IOException(
          String.format(
              "Unable to rename [%s] to [%s]",
              tmpPath.toUri().toString(), finalIndexZipFilePath.toUri().toString()));
    }
    writeSegmentDescriptor(
        outputFS, finalSegment, new Path(segmentBasePath, "descriptor.json"), progressable);
    return finalSegment;
  }