コード例 #1
0
  @Override
  public Collection<Partition<AbstractFileInputOperator<T>>> definePartitions(
      Collection<Partition<AbstractFileInputOperator<T>>> partitions, PartitioningContext context) {
    lastRepartition = System.currentTimeMillis();

    int totalCount = getNewPartitionCount(partitions, context);

    LOG.debug("Computed new partitions: {}", totalCount);

    if (totalCount == partitions.size()) {
      return partitions;
    }

    AbstractFileInputOperator<T> tempOperator =
        partitions.iterator().next().getPartitionedInstance();

    MutableLong tempGlobalNumberOfRetries = tempOperator.globalNumberOfRetries;
    MutableLong tempGlobalNumberOfFailures = tempOperator.globalNumberOfRetries;

    /*
     * Build collective state from all instances of the operator.
     */
    Set<String> totalProcessedFiles = Sets.newHashSet();
    Set<FailedFile> currentFiles = Sets.newHashSet();
    List<DirectoryScanner> oldscanners = Lists.newLinkedList();
    List<FailedFile> totalFailedFiles = Lists.newLinkedList();
    List<String> totalPendingFiles = Lists.newLinkedList();
    Set<Integer> deletedOperators = Sets.newHashSet();

    for (Partition<AbstractFileInputOperator<T>> partition : partitions) {
      AbstractFileInputOperator<T> oper = partition.getPartitionedInstance();
      totalProcessedFiles.addAll(oper.processedFiles);
      totalFailedFiles.addAll(oper.failedFiles);
      totalPendingFiles.addAll(oper.pendingFiles);
      currentFiles.addAll(unfinishedFiles);
      tempGlobalNumberOfRetries.add(oper.localNumberOfRetries);
      tempGlobalNumberOfFailures.add(oper.localNumberOfFailures);
      if (oper.currentFile != null) {
        currentFiles.add(new FailedFile(oper.currentFile, oper.offset));
      }
      oldscanners.add(oper.getScanner());
      deletedOperators.add(oper.operatorId);
    }

    /*
     * Create partitions of scanners, scanner's partition method will do state
     * transfer for DirectoryScanner objects.
     */
    List<DirectoryScanner> scanners = scanner.partition(totalCount, oldscanners);

    Kryo kryo = new Kryo();
    Collection<Partition<AbstractFileInputOperator<T>>> newPartitions =
        Lists.newArrayListWithExpectedSize(totalCount);
    Collection<IdempotentStorageManager> newManagers =
        Lists.newArrayListWithExpectedSize(totalCount);

    for (int i = 0; i < scanners.size(); i++) {

      // Kryo.copy fails as it attempts to clone transient fields
      ByteArrayOutputStream bos = new ByteArrayOutputStream();
      Output loutput = new Output(bos);
      kryo.writeObject(loutput, this);
      loutput.close();
      Input lInput = new Input(bos.toByteArray());
      @SuppressWarnings("unchecked")
      AbstractFileInputOperator<T> oper = kryo.readObject(lInput, this.getClass());
      lInput.close();

      DirectoryScanner scn = scanners.get(i);
      oper.setScanner(scn);

      // Do state transfer for processed files.
      oper.processedFiles.addAll(totalProcessedFiles);
      oper.globalNumberOfFailures = tempGlobalNumberOfRetries;
      oper.localNumberOfFailures.setValue(0);
      oper.globalNumberOfRetries = tempGlobalNumberOfFailures;
      oper.localNumberOfRetries.setValue(0);

      /* redistribute unfinished files properly */
      oper.unfinishedFiles.clear();
      oper.currentFile = null;
      oper.offset = 0;
      Iterator<FailedFile> unfinishedIter = currentFiles.iterator();
      while (unfinishedIter.hasNext()) {
        FailedFile unfinishedFile = unfinishedIter.next();
        if (scn.acceptFile(unfinishedFile.path)) {
          oper.unfinishedFiles.add(unfinishedFile);
          unfinishedIter.remove();
        }
      }

      /* transfer failed files */
      oper.failedFiles.clear();
      Iterator<FailedFile> iter = totalFailedFiles.iterator();
      while (iter.hasNext()) {
        FailedFile ff = iter.next();
        if (scn.acceptFile(ff.path)) {
          oper.failedFiles.add(ff);
          iter.remove();
        }
      }

      /* redistribute pending files properly */
      oper.pendingFiles.clear();
      Iterator<String> pendingFilesIterator = totalPendingFiles.iterator();
      while (pendingFilesIterator.hasNext()) {
        String pathString = pendingFilesIterator.next();
        if (scn.acceptFile(pathString)) {
          oper.pendingFiles.add(pathString);
          pendingFilesIterator.remove();
        }
      }
      newPartitions.add(new DefaultPartition<AbstractFileInputOperator<T>>(oper));
      newManagers.add(oper.idempotentStorageManager);
    }

    idempotentStorageManager.partitioned(newManagers, deletedOperators);
    LOG.info("definePartitions called returning {} partitions", newPartitions.size());
    return newPartitions;
  }
コード例 #2
0
 @Override
 protected void closeFile(InputStream is) throws IOException {
   super.closeFile(is);
   br.close();
   br = null;
 }