@Override @SuppressWarnings("unchecked") public Object aggregate(Collection<?> countersList) { if (countersList.isEmpty()) { return null; } BasicCounters<MutableLong> tempFileCounters = (BasicCounters<MutableLong>) countersList.iterator().next(); MutableLong globalProcessedFiles = tempFileCounters.getCounter(FileCounters.GLOBAL_PROCESSED_FILES); MutableLong globalNumberOfFailures = tempFileCounters.getCounter(FileCounters.GLOBAL_NUMBER_OF_FAILURES); MutableLong globalNumberOfRetries = tempFileCounters.getCounter(FileCounters.GLOBAL_NUMBER_OF_RETRIES); totalLocalProcessedFiles.setValue(0); pendingFiles.setValue(0); totalLocalNumberOfFailures.setValue(0); totalLocalNumberOfRetries.setValue(0); for (Object fileCounters : countersList) { BasicCounters<MutableLong> basicFileCounters = (BasicCounters<MutableLong>) fileCounters; totalLocalProcessedFiles.add( basicFileCounters.getCounter(FileCounters.LOCAL_PROCESSED_FILES)); pendingFiles.add(basicFileCounters.getCounter(FileCounters.PENDING_FILES)); totalLocalNumberOfFailures.add( basicFileCounters.getCounter(FileCounters.LOCAL_NUMBER_OF_FAILURES)); totalLocalNumberOfRetries.add( basicFileCounters.getCounter(FileCounters.LOCAL_NUMBER_OF_RETRIES)); } globalProcessedFiles.add(totalLocalProcessedFiles); globalProcessedFiles.subtract(pendingFiles); globalNumberOfFailures.add(totalLocalNumberOfFailures); globalNumberOfRetries.add(totalLocalNumberOfRetries); BasicCounters<MutableLong> aggregatedCounters = new BasicCounters<MutableLong>(MutableLong.class); aggregatedCounters.setCounter(AggregatedFileCounters.PROCESSED_FILES, globalProcessedFiles); aggregatedCounters.setCounter(AggregatedFileCounters.PENDING_FILES, pendingFiles); aggregatedCounters.setCounter( AggregatedFileCounters.NUMBER_OF_ERRORS, totalLocalNumberOfFailures); aggregatedCounters.setCounter( AggregatedFileCounters.NUMBER_OF_RETRIES, totalLocalNumberOfRetries); return aggregatedCounters; }
@Override public Collection<Partition<AbstractFileInputOperator<T>>> definePartitions( Collection<Partition<AbstractFileInputOperator<T>>> partitions, PartitioningContext context) { lastRepartition = System.currentTimeMillis(); int totalCount = getNewPartitionCount(partitions, context); LOG.debug("Computed new partitions: {}", totalCount); if (totalCount == partitions.size()) { return partitions; } AbstractFileInputOperator<T> tempOperator = partitions.iterator().next().getPartitionedInstance(); MutableLong tempGlobalNumberOfRetries = tempOperator.globalNumberOfRetries; MutableLong tempGlobalNumberOfFailures = tempOperator.globalNumberOfRetries; /* * Build collective state from all instances of the operator. */ Set<String> totalProcessedFiles = Sets.newHashSet(); Set<FailedFile> currentFiles = Sets.newHashSet(); List<DirectoryScanner> oldscanners = Lists.newLinkedList(); List<FailedFile> totalFailedFiles = Lists.newLinkedList(); List<String> totalPendingFiles = Lists.newLinkedList(); Set<Integer> deletedOperators = Sets.newHashSet(); for (Partition<AbstractFileInputOperator<T>> partition : partitions) { AbstractFileInputOperator<T> oper = partition.getPartitionedInstance(); totalProcessedFiles.addAll(oper.processedFiles); totalFailedFiles.addAll(oper.failedFiles); totalPendingFiles.addAll(oper.pendingFiles); currentFiles.addAll(unfinishedFiles); tempGlobalNumberOfRetries.add(oper.localNumberOfRetries); tempGlobalNumberOfFailures.add(oper.localNumberOfFailures); if (oper.currentFile != null) { currentFiles.add(new FailedFile(oper.currentFile, oper.offset)); } oldscanners.add(oper.getScanner()); deletedOperators.add(oper.operatorId); } /* * Create partitions of scanners, scanner's partition method will do state * transfer for DirectoryScanner objects. */ List<DirectoryScanner> scanners = scanner.partition(totalCount, oldscanners); Kryo kryo = new Kryo(); Collection<Partition<AbstractFileInputOperator<T>>> newPartitions = Lists.newArrayListWithExpectedSize(totalCount); Collection<IdempotentStorageManager> newManagers = Lists.newArrayListWithExpectedSize(totalCount); for (int i = 0; i < scanners.size(); i++) { // Kryo.copy fails as it attempts to clone transient fields ByteArrayOutputStream bos = new ByteArrayOutputStream(); Output loutput = new Output(bos); kryo.writeObject(loutput, this); loutput.close(); Input lInput = new Input(bos.toByteArray()); @SuppressWarnings("unchecked") AbstractFileInputOperator<T> oper = kryo.readObject(lInput, this.getClass()); lInput.close(); DirectoryScanner scn = scanners.get(i); oper.setScanner(scn); // Do state transfer for processed files. oper.processedFiles.addAll(totalProcessedFiles); oper.globalNumberOfFailures = tempGlobalNumberOfRetries; oper.localNumberOfFailures.setValue(0); oper.globalNumberOfRetries = tempGlobalNumberOfFailures; oper.localNumberOfRetries.setValue(0); /* redistribute unfinished files properly */ oper.unfinishedFiles.clear(); oper.currentFile = null; oper.offset = 0; Iterator<FailedFile> unfinishedIter = currentFiles.iterator(); while (unfinishedIter.hasNext()) { FailedFile unfinishedFile = unfinishedIter.next(); if (scn.acceptFile(unfinishedFile.path)) { oper.unfinishedFiles.add(unfinishedFile); unfinishedIter.remove(); } } /* transfer failed files */ oper.failedFiles.clear(); Iterator<FailedFile> iter = totalFailedFiles.iterator(); while (iter.hasNext()) { FailedFile ff = iter.next(); if (scn.acceptFile(ff.path)) { oper.failedFiles.add(ff); iter.remove(); } } /* redistribute pending files properly */ oper.pendingFiles.clear(); Iterator<String> pendingFilesIterator = totalPendingFiles.iterator(); while (pendingFilesIterator.hasNext()) { String pathString = pendingFilesIterator.next(); if (scn.acceptFile(pathString)) { oper.pendingFiles.add(pathString); pendingFilesIterator.remove(); } } newPartitions.add(new DefaultPartition<AbstractFileInputOperator<T>>(oper)); newManagers.add(oper.idempotentStorageManager); } idempotentStorageManager.partitioned(newManagers, deletedOperators); LOG.info("definePartitions called returning {} partitions", newPartitions.size()); return newPartitions; }