protected DirectoryScanner createPartition(int partitionIndex, int partitionCount) { DirectoryScanner that = new DirectoryScanner(); that.filePatternRegexp = this.filePatternRegexp; that.regex = this.regex; that.partitionIndex = partitionIndex; that.partitionCount = partitionCount; return that; }
/** * Returns list of mapped files, that should be transformed. Files can by specified via attributes * (srcFile, srcDir) or resources (FileSet, FileList, DirSet, etc). Mapped file represents input * and output file for transformation. * * @return list of mapped files */ public List<MappedFile> getMappedFiles() { mappedFiles.clear(); // one src file if (getSrcFile() != null) { addMappedFile(getSrcFile()); } if (getSrcDir() != null) { addMappedFile(getSrcDir()); } Iterator element = resources.iterator(); while (element.hasNext()) { ResourceCollection rc = (ResourceCollection) element.next(); if (rc instanceof FileSet && rc.isFilesystemOnly()) { FileSet fs = (FileSet) rc; File fromDir = fs.getDir(getProject()); DirectoryScanner ds; try { ds = fs.getDirectoryScanner(getProject()); } catch (BuildException ex) { log("Could not scan directory " + fromDir, ex, Project.MSG_ERR); continue; } for (String f : ds.getIncludedFiles()) { addMappedFile(new File(fromDir + System.getProperty("file.separator") + f), fromDir); } } else { if (!rc.isFilesystemOnly()) { log("Only filesystem resources are supported", Project.MSG_WARN); continue; } Iterator rcIt = rc.iterator(); while (rcIt.hasNext()) { Resource r = (Resource) rcIt.next(); if (!r.isExists()) { log("Could not find resource " + r.toLongString(), Project.MSG_VERBOSE); continue; } if (r instanceof FileResource) { FileResource fr = (FileResource) r; addMappedFile(fr.getFile(), fr.getBaseDir()); } else { log( "Only file resources are supported (" + r.getClass().getSimpleName() + " found)", Project.MSG_WARN); continue; } } } } return mappedFiles; }
/** * Protected method for simplyfied addinng of mapped files * * @param file file to add * @param baseDir base directory for file */ protected void addMappedFile(File file, File baseDir) { if (file.isFile()) { if (baseDir == null) { baseDir = file.getParentFile(); } MappedFile mappedFile = new MappedFile(); mappedFile.setFrom(file); String filename = file.getName(); String[] names = getMapper().mapFileName(filename); // we don't use original filename if no mapping is available if (names == null || names.length == 0) { return; } File newFile = new File(file.getParent() + System.getProperty("file.separator") + names[0]); if (getDestDir() != null) { try { newFile = new File( newFile .getCanonicalPath() .replace(baseDir.getCanonicalPath(), getDestDir().getCanonicalPath())); } catch (IOException ex) { log("Couldn't map file", ex, Project.MSG_WARN); return; } } mappedFile.setTo(newFile); mappedFiles.add(mappedFile); } else if (file.isDirectory()) { if (baseDir == null) { baseDir = file; } DirectoryScanner ds = new DirectoryScanner(); ds.setBasedir(file); ds.scan(); for (String fileName : ds.getIncludedFiles()) { addMappedFile(new File(file + System.getProperty("file.separator") + fileName), baseDir); } } }
/** Scans the directory for new files. */ protected void scanDirectory() { if (System.currentTimeMillis() - scanIntervalMillis >= lastScanMillis) { Set<Path> newPaths = scanner.scan(fs, filePath, processedFiles); for (Path newPath : newPaths) { String newPathString = newPath.toString(); pendingFiles.add(newPathString); processedFiles.add(newPathString); localProcessedFileCount.increment(); } lastScanMillis = System.currentTimeMillis(); } }
public Properties( final List<String> sources, final String output, final Set<String> excludes) { myRoots = new HashSet<String>(sources); final DirectoryScanner.Result result = DirectoryScanner.getFiles(myRoots, excludes, ProjectWrapper.this); mySources = new HashMap<FileWrapper, FileWrapper>(); for (FileWrapper fw : result.getFiles()) { mySources.put(fw, fw); } myOutput = output; updateOutputStatus(); }
@Override public Collection<Partition<AbstractFileInputOperator<T>>> definePartitions( Collection<Partition<AbstractFileInputOperator<T>>> partitions, PartitioningContext context) { lastRepartition = System.currentTimeMillis(); int totalCount = getNewPartitionCount(partitions, context); LOG.debug("Computed new partitions: {}", totalCount); if (totalCount == partitions.size()) { return partitions; } AbstractFileInputOperator<T> tempOperator = partitions.iterator().next().getPartitionedInstance(); MutableLong tempGlobalNumberOfRetries = tempOperator.globalNumberOfRetries; MutableLong tempGlobalNumberOfFailures = tempOperator.globalNumberOfRetries; /* * Build collective state from all instances of the operator. */ Set<String> totalProcessedFiles = Sets.newHashSet(); Set<FailedFile> currentFiles = Sets.newHashSet(); List<DirectoryScanner> oldscanners = Lists.newLinkedList(); List<FailedFile> totalFailedFiles = Lists.newLinkedList(); List<String> totalPendingFiles = Lists.newLinkedList(); Set<Integer> deletedOperators = Sets.newHashSet(); for (Partition<AbstractFileInputOperator<T>> partition : partitions) { AbstractFileInputOperator<T> oper = partition.getPartitionedInstance(); totalProcessedFiles.addAll(oper.processedFiles); totalFailedFiles.addAll(oper.failedFiles); totalPendingFiles.addAll(oper.pendingFiles); currentFiles.addAll(unfinishedFiles); tempGlobalNumberOfRetries.add(oper.localNumberOfRetries); tempGlobalNumberOfFailures.add(oper.localNumberOfFailures); if (oper.currentFile != null) { currentFiles.add(new FailedFile(oper.currentFile, oper.offset)); } oldscanners.add(oper.getScanner()); deletedOperators.add(oper.operatorId); } /* * Create partitions of scanners, scanner's partition method will do state * transfer for DirectoryScanner objects. */ List<DirectoryScanner> scanners = scanner.partition(totalCount, oldscanners); Kryo kryo = new Kryo(); Collection<Partition<AbstractFileInputOperator<T>>> newPartitions = Lists.newArrayListWithExpectedSize(totalCount); Collection<IdempotentStorageManager> newManagers = Lists.newArrayListWithExpectedSize(totalCount); for (int i = 0; i < scanners.size(); i++) { // Kryo.copy fails as it attempts to clone transient fields ByteArrayOutputStream bos = new ByteArrayOutputStream(); Output loutput = new Output(bos); kryo.writeObject(loutput, this); loutput.close(); Input lInput = new Input(bos.toByteArray()); @SuppressWarnings("unchecked") AbstractFileInputOperator<T> oper = kryo.readObject(lInput, this.getClass()); lInput.close(); DirectoryScanner scn = scanners.get(i); oper.setScanner(scn); // Do state transfer for processed files. oper.processedFiles.addAll(totalProcessedFiles); oper.globalNumberOfFailures = tempGlobalNumberOfRetries; oper.localNumberOfFailures.setValue(0); oper.globalNumberOfRetries = tempGlobalNumberOfFailures; oper.localNumberOfRetries.setValue(0); /* redistribute unfinished files properly */ oper.unfinishedFiles.clear(); oper.currentFile = null; oper.offset = 0; Iterator<FailedFile> unfinishedIter = currentFiles.iterator(); while (unfinishedIter.hasNext()) { FailedFile unfinishedFile = unfinishedIter.next(); if (scn.acceptFile(unfinishedFile.path)) { oper.unfinishedFiles.add(unfinishedFile); unfinishedIter.remove(); } } /* transfer failed files */ oper.failedFiles.clear(); Iterator<FailedFile> iter = totalFailedFiles.iterator(); while (iter.hasNext()) { FailedFile ff = iter.next(); if (scn.acceptFile(ff.path)) { oper.failedFiles.add(ff); iter.remove(); } } /* redistribute pending files properly */ oper.pendingFiles.clear(); Iterator<String> pendingFilesIterator = totalPendingFiles.iterator(); while (pendingFilesIterator.hasNext()) { String pathString = pendingFilesIterator.next(); if (scn.acceptFile(pathString)) { oper.pendingFiles.add(pathString); pendingFilesIterator.remove(); } } newPartitions.add(new DefaultPartition<AbstractFileInputOperator<T>>(oper)); newManagers.add(oper.idempotentStorageManager); } idempotentStorageManager.partitioned(newManagers, deletedOperators); LOG.info("definePartitions called returning {} partitions", newPartitions.size()); return newPartitions; }
protected void replay(long windowId) { // This operator can partition itself dynamically. When that happens a file can be re-hashed // to a different partition than the previous one. In order to handle this, the partition loads // all the recovery data for a window and then processes only those files which would be hashed // to it in the current run. try { Map<Integer, Object> recoveryDataPerOperator = idempotentStorageManager.load(windowId); for (Object recovery : recoveryDataPerOperator.values()) { @SuppressWarnings("unchecked") LinkedList<RecoveryEntry> recoveryData = (LinkedList<RecoveryEntry>) recovery; for (RecoveryEntry recoveryEntry : recoveryData) { if (scanner.acceptFile(recoveryEntry.file)) { // The operator may have continued processing the same file in multiple windows. // So the recovery states of subsequent windows will have an entry for that file however // the offset changes. // In this case we continue reading from previously opened stream. if (currentFile == null || !(currentFile.equals(recoveryEntry.file) && offset == recoveryEntry.startOffset)) { if (inputStream != null) { closeFile(inputStream); } processedFiles.add(recoveryEntry.file); // removing the file from failed and unfinished queues and pending set Iterator<FailedFile> failedFileIterator = failedFiles.iterator(); while (failedFileIterator.hasNext()) { FailedFile ff = failedFileIterator.next(); if (ff.path.equals(recoveryEntry.file) && ff.offset == recoveryEntry.startOffset) { failedFileIterator.remove(); break; } } Iterator<FailedFile> unfinishedFileIterator = unfinishedFiles.iterator(); while (unfinishedFileIterator.hasNext()) { FailedFile ff = unfinishedFileIterator.next(); if (ff.path.equals(recoveryEntry.file) && ff.offset == recoveryEntry.startOffset) { unfinishedFileIterator.remove(); break; } } if (pendingFiles.contains(recoveryEntry.file)) { pendingFiles.remove(recoveryEntry.file); } inputStream = retryFailedFile(new FailedFile(recoveryEntry.file, recoveryEntry.startOffset)); while (offset < recoveryEntry.endOffset) { T line = readEntity(); offset++; emit(line); } } else { while (offset < recoveryEntry.endOffset) { T line = readEntity(); offset++; emit(line); } } } } } } catch (IOException e) { throw new RuntimeException("replay", e); } }