示例#1
0
  /*
   * Fetch a file that is in a Hadoop file system. Return a local File.
   * Interruptible.
   */
  private File hdfsFetch(Path fromPath, Reporter reporter)
      throws IOException, InterruptedException {
    UUID uniqueId = UUID.randomUUID();
    File toFile = new File(tempDir, uniqueId.toString() + "/" + fromPath.getName());
    File toDir = new File(toFile.getParent());
    if (toDir.exists()) {
      FileUtils.deleteDirectory(toDir);
    }
    toDir.mkdirs();
    Path toPath = new Path(toFile.getCanonicalPath());

    FileSystem fS = fromPath.getFileSystem(hadoopConf);
    FileSystem tofS = FileSystem.getLocal(hadoopConf);

    Throttler throttler = new Throttler((double) bytesPerSecThrottle);
    try {
      for (FileStatus fStatus : fS.globStatus(fromPath)) {
        log.info("Copying " + fStatus.getPath() + " to " + toPath);
        long bytesSoFar = 0;

        FSDataInputStream iS = fS.open(fStatus.getPath());
        FSDataOutputStream oS = tofS.create(toPath);

        byte[] buffer = new byte[downloadBufferSize];

        int nRead;
        while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) {
          // Needed to being able to be interrupted at any moment.
          if (Thread.interrupted()) {
            iS.close();
            oS.close();
            cleanDirNoExceptions(toDir);
            throw new InterruptedException();
          }
          bytesSoFar += nRead;
          oS.write(buffer, 0, nRead);
          throttler.incrementAndThrottle(nRead);
          if (bytesSoFar >= bytesToReportProgress) {
            reporter.progress(bytesSoFar);
            bytesSoFar = 0l;
          }
        }

        if (reporter != null) {
          reporter.progress(bytesSoFar);
        }

        oS.close();
        iS.close();
      }

      return toDir;
    } catch (ClosedByInterruptException e) {
      // This can be thrown by the method read.
      cleanDirNoExceptions(toDir);
      throw new InterruptedIOException();
    }
  }
示例#2
0
  private void copyFile(File sourceFile, File destFile, Reporter reporter) throws IOException {
    if (!destFile.exists()) {
      destFile.createNewFile();
    }
    FileChannel source = null;
    FileChannel destination = null;

    Throttler throttler = new Throttler((double) bytesPerSecThrottle);

    FileInputStream iS = null;
    FileOutputStream oS = null;

    try {
      iS = new FileInputStream(sourceFile);
      oS = new FileOutputStream(destFile);
      source = iS.getChannel();
      destination = oS.getChannel();
      long bytesSoFar = 0;
      long reportingBytesSoFar = 0;
      long size = source.size();

      int transferred = 0;

      while (bytesSoFar < size) {
        // Casting to int here is safe since we will transfer at most "downloadBufferSize" bytes.
        // This is done on purpose for being able to implement Throttling.
        transferred = (int) destination.transferFrom(source, bytesSoFar, downloadBufferSize);
        bytesSoFar += transferred;
        reportingBytesSoFar += transferred;
        throttler.incrementAndThrottle(transferred);
        if (reportingBytesSoFar >= bytesToReportProgress) {
          reporter.progress(reportingBytesSoFar);
          reportingBytesSoFar = 0l;
        }
      }

      if (reporter != null) {
        reporter.progress(reportingBytesSoFar);
      }

    } finally {
      if (iS != null) {
        iS.close();
      }
      if (oS != null) {
        oS.close();
      }
      if (source != null) {
        source.close();
      }
      if (destination != null) {
        destination.close();
      }
    }
  }
示例#3
0
 public synchronized void flush() throws IOException {
   LOG.info("Starting flush of map output");
   synchronized (spillLock) {
     while (kvstart != kvend) {
       try {
         reporter.progress();
         spillLock.wait();
       } catch (InterruptedException e) {
         throw (IOException)
             new IOException("Buffer interrupted while waiting for the writer").initCause(e);
       }
     }
   }
   if (sortSpillException != null) {
     throw (IOException) new IOException("Spill failed").initCause(sortSpillException);
   }
   if (kvend != kvindex) {
     LOG.info("bufstart = " + bufstart + "; bufend = " + bufmark + "; bufvoid = " + bufvoid);
     LOG.info(
         "kvstart = " + kvstart + "; kvend = " + kvindex + "; length = " + kvoffsets.length);
     kvend = kvindex;
     bufend = bufmark;
     sortAndSpill();
   }
   // release sort buffer before the merge
   kvbuffer = null;
   mergeParts();
 }
示例#4
0
  /*
   * Fetch a file that is in a Hadoop file system. Return a local File.
   */
  private File hdfsFetch(Path fromPath, Reporter reporter) throws IOException {
    File toFile = new File(tempDir, fromPath.toUri().getPath());
    File toDir = new File(toFile.getParent());
    if (toDir.exists()) {
      FileUtils.deleteDirectory(toDir);
    }
    toDir.mkdirs();
    Path toPath = new Path(toFile.getCanonicalPath());

    FileSystem fS = fromPath.getFileSystem(hadoopConf);
    FileSystem tofS = FileSystem.getLocal(hadoopConf);

    Throttler throttler = new Throttler((double) bytesPerSecThrottle);

    for (FileStatus fStatus : fS.globStatus(fromPath)) {
      log.info("Copying " + fStatus.getPath() + " to " + toPath);
      long bytesSoFar = 0;

      FSDataInputStream iS = fS.open(fStatus.getPath());
      FSDataOutputStream oS = tofS.create(toPath);

      byte[] buffer = new byte[downloadBufferSize];

      int nRead;
      while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) {
        bytesSoFar += nRead;
        oS.write(buffer, 0, nRead);
        throttler.incrementAndThrottle(nRead);
        if (bytesSoFar >= bytesToReportProgress) {
          reporter.progress(bytesSoFar);
          bytesSoFar = 0l;
        }
      }

      if (reporter != null) {
        reporter.progress(bytesSoFar);
      }

      oS.close();
      iS.close();
    }

    return toDir;
  }
示例#5
0
 protected void waitForOpenSlot(int maxProcessesOnNode, Reporter reporter)
     throws IOException, InterruptedException {
   while (true) {
     // sleep for a random length of time between 0 and 60 seconds
     long sleepTime = (long) (Math.random() * 1000 * 60);
     logger.info("sleeping for " + sleepTime);
     Thread.sleep(sleepTime);
     int numRunningMappers = getNumRunningMappers();
     logger.info("num running mappers: " + numRunningMappers);
     if (numRunningMappers < maxProcessesOnNode) return;
     reporter.progress();
   }
 }
示例#6
0
  public void map(
      LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
      throws IOException {
    if (this.output == null) {
      this.output = output;
    }
    if (this.reporter == null) {
      this.reporter = reporter;
    }

    String line = value.toString();
    String[] reads = line.split("\n");
    splitRead(key, reads[0], s1FileWriter);
    splitRead(key, reads[1], getRead2FileWriter());

    reporter.progress();
  }
示例#7
0
 @SuppressWarnings("unchecked")
 private void combineAndSpill(RawKeyValueIterator kvIter, Counters.Counter inCounter)
     throws IOException {
   Reducer combiner = (Reducer) ReflectionUtils.newInstance(combinerClass, job);
   try {
     CombineValuesIterator values =
         new CombineValuesIterator(
             kvIter, comparator, keyClass, valClass, job, reporter, inCounter);
     while (values.more()) {
       combiner.reduce(values.getKey(), values, combineCollector, reporter);
       values.nextKey();
       // indicate we're making progress
       reporter.progress();
     }
   } finally {
     combiner.close();
   }
 }
示例#8
0
    @SuppressWarnings("unchecked")
    public synchronized void collect(K key, V value) throws IOException {
      reporter.progress();
      if (key.getClass() != keyClass) {
        throw new IOException(
            "Type mismatch in key from map: expected "
                + keyClass.getName()
                + ", recieved "
                + key.getClass().getName());
      }
      if (value.getClass() != valClass) {
        throw new IOException(
            "Type mismatch in value from map: expected "
                + valClass.getName()
                + ", recieved "
                + value.getClass().getName());
      }
      if (sortSpillException != null) {
        throw (IOException) new IOException("Spill failed").initCause(sortSpillException);
      }
      try {
        // serialize key bytes into buffer
        int keystart = bufindex;
        keySerializer.serialize(key);
        if (bufindex < keystart) {
          // wrapped the key; reset required
          bb.reset();
          keystart = 0;
        }
        // serialize value bytes into buffer
        int valstart = bufindex;
        valSerializer.serialize(value);
        int valend = bb.markRecord();
        mapOutputByteCounter.increment(
            valend >= keystart ? valend - keystart : (bufvoid - keystart) + valend);

        if (keystart == bufindex) {
          // if emitted records make no writes, it's possible to wrap
          // accounting space without notice
          bb.write(new byte[0], 0, 0);
        }

        int partition = partitioner.getPartition(key, value, partitions);
        if (partition < 0 || partition >= partitions) {
          throw new IOException("Illegal partition for " + key + " (" + partition + ")");
        }
        mapOutputRecordCounter.increment(1);

        // update accounting info
        int ind = kvindex * ACCTSIZE;
        kvoffsets[kvindex] = ind;
        kvindices[ind + PARTITION] = partition;
        kvindices[ind + KEYSTART] = keystart;
        kvindices[ind + VALSTART] = valstart;
        kvindex = (kvindex + 1) % kvoffsets.length;
      } catch (MapBufferTooSmallException e) {
        LOG.info("Record too large for in-memory buffer: " + e.getMessage());
        spillSingleRecord(key, value);
        mapOutputRecordCounter.increment(1);
        return;
      }
    }
示例#9
0
 public void collect(K key, V value) throws IOException {
   reporter.progress();
   out.write(key, value);
   mapOutputRecordCounter.increment(1);
 }
示例#10
0
  /*
   * Fetch a file that is in a S3 file system. Return a local File. It accepts "s3://" and "s3n://" prefixes.
   */
  private File s3Fetch(URI uri, Reporter reporter) throws IOException {
    String bucketName = uri.getHost();
    String path = uri.getPath();

    File destFolder = new File(tempDir, bucketName + "/" + path);
    if (destFolder.exists()) {
      FileUtils.deleteDirectory(destFolder);
    }
    destFolder.mkdirs();

    Throttler throttler = new Throttler((double) bytesPerSecThrottle);

    boolean done = false;
    try {
      s3Service = new RestS3Service(getCredentials());
      if (s3Service.checkBucketStatus(bucketName) != RestS3Service.BUCKET_STATUS__MY_BUCKET) {
        throw new IOException("Bucket doesn't exist or is already claimed: " + bucketName);
      }

      if (path.startsWith("/")) {
        path = path.substring(1, path.length());
      }

      for (S3Object object : s3Service.listObjects(new S3Bucket(bucketName), path, "")) {
        long bytesSoFar = 0;

        String fileName = path;
        if (path.contains("/")) {
          fileName = path.substring(path.lastIndexOf("/") + 1, path.length());
        }
        File fileDest = new File(destFolder, fileName);
        log.info("Downloading " + object.getKey() + " to " + fileDest + " ...");

        if (fileDest.exists()) {
          fileDest.delete();
        }

        object = s3Service.getObject(new S3Bucket(bucketName), object.getKey());
        InputStream iS = object.getDataInputStream();
        FileOutputStream writer = new FileOutputStream(fileDest);
        byte[] buffer = new byte[downloadBufferSize];

        int nRead;
        while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) {
          bytesSoFar += nRead;
          writer.write(buffer, 0, nRead);
          throttler.incrementAndThrottle(nRead);
          if (bytesSoFar >= bytesToReportProgress) {
            reporter.progress(bytesSoFar);
            bytesSoFar = 0l;
          }
        }

        if (reporter != null) {
          reporter.progress(bytesSoFar);
        }

        writer.close();
        iS.close();
        done = true;
      }

      if (!done) {
        throw new IOException("Bucket is empty! " + bucketName + " path: " + path);
      }
    } catch (S3ServiceException e) {
      throw new IOException(e);
    }

    return destFolder;
  }