Esempio n. 1
0
  /*
   * Fetch a file that is in a Hadoop file system. Return a local File.
   * Interruptible.
   */
  private File hdfsFetch(Path fromPath, Reporter reporter)
      throws IOException, InterruptedException {
    UUID uniqueId = UUID.randomUUID();
    File toFile = new File(tempDir, uniqueId.toString() + "/" + fromPath.getName());
    File toDir = new File(toFile.getParent());
    if (toDir.exists()) {
      FileUtils.deleteDirectory(toDir);
    }
    toDir.mkdirs();
    Path toPath = new Path(toFile.getCanonicalPath());

    FileSystem fS = fromPath.getFileSystem(hadoopConf);
    FileSystem tofS = FileSystem.getLocal(hadoopConf);

    Throttler throttler = new Throttler((double) bytesPerSecThrottle);
    try {
      for (FileStatus fStatus : fS.globStatus(fromPath)) {
        log.info("Copying " + fStatus.getPath() + " to " + toPath);
        long bytesSoFar = 0;

        FSDataInputStream iS = fS.open(fStatus.getPath());
        FSDataOutputStream oS = tofS.create(toPath);

        byte[] buffer = new byte[downloadBufferSize];

        int nRead;
        while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) {
          // Needed to being able to be interrupted at any moment.
          if (Thread.interrupted()) {
            iS.close();
            oS.close();
            cleanDirNoExceptions(toDir);
            throw new InterruptedException();
          }
          bytesSoFar += nRead;
          oS.write(buffer, 0, nRead);
          throttler.incrementAndThrottle(nRead);
          if (bytesSoFar >= bytesToReportProgress) {
            reporter.progress(bytesSoFar);
            bytesSoFar = 0l;
          }
        }

        if (reporter != null) {
          reporter.progress(bytesSoFar);
        }

        oS.close();
        iS.close();
      }

      return toDir;
    } catch (ClosedByInterruptException e) {
      // This can be thrown by the method read.
      cleanDirNoExceptions(toDir);
      throw new InterruptedIOException();
    }
  }
Esempio n. 2
0
 protected void waitForOpenSlot(int maxProcessesOnNode, Reporter reporter)
     throws IOException, InterruptedException {
   while (true) {
     // sleep for a random length of time between 0 and 60 seconds
     long sleepTime = (long) (Math.random() * 1000 * 60);
     logger.info("sleeping for " + sleepTime);
     Thread.sleep(sleepTime);
     int numRunningMappers = getNumRunningMappers();
     logger.info("num running mappers: " + numRunningMappers);
     if (numRunningMappers < maxProcessesOnNode) return;
     reporter.progress();
   }
 }
Esempio n. 3
0
  public void map(
      LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
      throws IOException {
    if (this.output == null) {
      this.output = output;
    }
    if (this.reporter == null) {
      this.reporter = reporter;
    }

    String line = value.toString();
    String[] reads = line.split("\n");
    splitRead(key, reads[0], s1FileWriter);
    splitRead(key, reads[1], getRead2FileWriter());

    reporter.progress();
  }
Esempio n. 4
0
  /** In case of interrupted, written file is not deleted. */
  private void copyFile(File sourceFile, File destFile, Reporter reporter)
      throws IOException, InterruptedException {
    if (!destFile.exists()) {
      destFile.createNewFile();
    }
    FileChannel source = null;
    FileChannel destination = null;

    Throttler throttler = new Throttler((double) bytesPerSecThrottle);

    FileInputStream iS = null;
    FileOutputStream oS = null;

    try {
      iS = new FileInputStream(sourceFile);
      oS = new FileOutputStream(destFile);
      source = iS.getChannel();
      destination = oS.getChannel();
      long bytesSoFar = 0;
      long reportingBytesSoFar = 0;
      long size = source.size();

      int transferred = 0;

      while (bytesSoFar < size) {
        // Needed to being able to be interrupted at any moment.
        if (Thread.interrupted()) {
          throw new InterruptedException();
        }

        // Casting to int here is safe since we will transfer at most "downloadBufferSize" bytes.
        // This is done on purpose for being able to implement Throttling.
        transferred = (int) destination.transferFrom(source, bytesSoFar, downloadBufferSize);
        bytesSoFar += transferred;
        reportingBytesSoFar += transferred;
        throttler.incrementAndThrottle(transferred);
        if (reportingBytesSoFar >= bytesToReportProgress) {
          reporter.progress(reportingBytesSoFar);
          reportingBytesSoFar = 0l;
        }
      }

      if (reporter != null) {
        reporter.progress(reportingBytesSoFar);
      }

    } catch (InterruptedException e) {
      e.printStackTrace();
    } finally {
      if (iS != null) {
        iS.close();
      }
      if (oS != null) {
        oS.close();
      }
      if (source != null) {
        source.close();
      }
      if (destination != null) {
        destination.close();
      }
    }
  }
Esempio n. 5
0
  /*
   * Fetch a file that is in a S3 file system. Return a local File. It accepts "s3://" and "s3n://" prefixes.
   * Interruptible.
   */
  private File s3Fetch(URI uri, Reporter reporter) throws IOException, InterruptedException {
    String bucketName = uri.getHost();
    String path = uri.getPath();
    UUID uniqueId = UUID.randomUUID();
    File destFolder = new File(tempDir, uniqueId.toString() + "/" + path);
    if (destFolder.exists()) {
      FileUtils.deleteDirectory(destFolder);
    }
    destFolder.mkdirs();

    Throttler throttler = new Throttler((double) bytesPerSecThrottle);

    boolean done = false;
    try {
      s3Service = new RestS3Service(getCredentials());
      if (s3Service.checkBucketStatus(bucketName) != RestS3Service.BUCKET_STATUS__MY_BUCKET) {
        throw new IOException("Bucket doesn't exist or is already claimed: " + bucketName);
      }

      if (path.startsWith("/")) {
        path = path.substring(1, path.length());
      }

      for (S3Object object : s3Service.listObjects(new S3Bucket(bucketName), path, "")) {
        long bytesSoFar = 0;

        String fileName = path;
        if (path.contains("/")) {
          fileName = path.substring(path.lastIndexOf("/") + 1, path.length());
        }
        File fileDest = new File(destFolder, fileName);
        log.info("Downloading " + object.getKey() + " to " + fileDest + " ...");

        if (fileDest.exists()) {
          fileDest.delete();
        }

        object = s3Service.getObject(new S3Bucket(bucketName), object.getKey());
        InputStream iS = object.getDataInputStream();
        FileOutputStream writer = new FileOutputStream(fileDest);
        byte[] buffer = new byte[downloadBufferSize];

        int nRead;
        while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) {
          // Needed to being able to be interrupted at any moment.
          if (Thread.interrupted()) {
            iS.close();
            writer.close();
            cleanDirNoExceptions(destFolder);
            throw new InterruptedException();
          }

          bytesSoFar += nRead;
          writer.write(buffer, 0, nRead);
          throttler.incrementAndThrottle(nRead);
          if (bytesSoFar >= bytesToReportProgress) {
            reporter.progress(bytesSoFar);
            bytesSoFar = 0l;
          }
        }

        if (reporter != null) {
          reporter.progress(bytesSoFar);
        }

        writer.close();
        iS.close();
        done = true;
      }

      if (!done) {
        throw new IOException("Bucket is empty! " + bucketName + " path: " + path);
      }
    } catch (S3ServiceException e) {
      throw new IOException(e);
    }

    return destFolder;
  }