/** Update {@link lastStatus} so that it can be viewed from outside */ private void updateStatus() { int highPriorityFiles = 0; int lowPriorityFiles = 0; List<JobStatus> jobs = new ArrayList<JobStatus>(); List<String> highPriorityFileNames = new ArrayList<String>(); for (Map.Entry<String, CorruptFileInfo> e : fileIndex.entrySet()) { String fileName = e.getKey(); CorruptFileInfo fileInfo = e.getValue(); if (fileInfo.getHighestPriority() > 0) { highPriorityFileNames.add(fileName); highPriorityFiles += 1; } else { lowPriorityFiles += 1; } } for (Job job : jobIndex.keySet()) { String url = job.getTrackingURL(); String name = job.getJobName(); JobID jobId = job.getID(); jobs.add(new BlockFixer.JobStatus(jobId, name, url)); } lastStatus = new BlockFixer.Status(highPriorityFiles, lowPriorityFiles, jobs, highPriorityFileNames); RaidNodeMetrics.getInstance().corruptFilesHighPri.set(highPriorityFiles); RaidNodeMetrics.getInstance().corruptFilesLowPri.set(lowPriorityFiles); LOG.info("Update status done." + lastStatus.toString()); }
/** Handle a failed job. */ private void failJob(Job job) throws IOException { // assume no files have been fixed LOG.error("Job " + job.getID() + "(" + job.getJobName() + ") finished (failed)"); // We do not change metrics here since we do not know for sure if file // fixing failed. for (CorruptFileInfo fileInfo : jobIndex.get(job)) { boolean failed = true; fileInfo.finishJob(job.getJobName(), failed); } numJobsRunning--; }
/** inserts new job into file index and job index */ private List<CorruptFileInfo> updateFileIndex( String jobName, List<String> corruptFiles, int priority) { List<CorruptFileInfo> fileInfos = new ArrayList<CorruptFileInfo>(); for (String file : corruptFiles) { CorruptFileInfo fileInfo = fileIndex.get(file); if (fileInfo != null) { fileInfo.addJob(jobName, priority); } else { fileInfo = new CorruptFileInfo(file, jobName, priority); fileIndex.put(file, fileInfo); } fileInfos.add(fileInfo); } return fileInfos; }
/** Handle a successful job. */ private void succeedJob(Job job, long filesSucceeded, long filesFailed) throws IOException { String jobName = job.getJobName(); LOG.info("Job " + job.getID() + "(" + jobName + ") finished (succeeded)"); if (filesFailed == 0) { // no files have failed for (CorruptFileInfo fileInfo : jobIndex.get(job)) { boolean failed = false; fileInfo.finishJob(jobName, failed); } } else { // we have to look at the output to check which files have failed Set<String> failedFiles = getFailedFiles(job); for (CorruptFileInfo fileInfo : jobIndex.get(job)) { if (failedFiles.contains(fileInfo.getFile().toString())) { boolean failed = true; fileInfo.finishJob(jobName, failed); } else { // call succeed for files that have succeeded or for which no action // was taken boolean failed = false; fileInfo.finishJob(jobName, failed); } } } // report succeeded files to metrics incrFilesFixed(filesSucceeded); incrFileFixFailures(filesFailed); numJobsRunning--; }
// Compute integer priority. Urgency is indicated by higher numbers. Map<String, Integer> computePriorities(FileSystem fs, Map<String, Integer> corruptFiles) throws IOException { Map<String, Integer> corruptFilePriority = new HashMap<String, Integer>(); String[] parityDestPrefixes = destPrefixes(); Set<String> srcDirsToWatchOutFor = new HashSet<String>(); // Loop over parity files once. for (Iterator<String> it = corruptFiles.keySet().iterator(); it.hasNext(); ) { String p = it.next(); if (BlockFixer.isSourceFile(p, parityDestPrefixes)) { continue; } // Find the parent of the parity file. Path parent = new Path(p).getParent(); // If the file was a HAR part file, the parent will end with _raid.har. In // that case, the parity directory is the parent of the parent. if (parent.toUri().getPath().endsWith(RaidNode.HAR_SUFFIX)) { parent = parent.getParent(); } String parentUriPath = parent.toUri().getPath(); // Remove the RAID prefix to get the source dir. srcDirsToWatchOutFor.add(parentUriPath.substring(parentUriPath.indexOf(Path.SEPARATOR, 1))); int numCorrupt = corruptFiles.get(p); int priority = (numCorrupt > 1) ? 1 : 0; CorruptFileInfo fileInfo = fileIndex.get(p); if (fileInfo == null || priority > fileInfo.getHighestPriority()) { corruptFilePriority.put(p, priority); } } // Loop over src files now. for (Iterator<String> it = corruptFiles.keySet().iterator(); it.hasNext(); ) { String p = it.next(); if (BlockFixer.isSourceFile(p, parityDestPrefixes)) { if (BlockFixer.doesParityDirExist(fs, p, parityDestPrefixes)) { int numCorrupt = corruptFiles.get(p); FileStatus stat = fs.getFileStatus(new Path(p)); int priority = 0; if (stat.getReplication() > 1) { // If we have a missing block when replication > 1, it is high pri. priority = 1; } else { // Replication == 1. Assume Reed Solomon parity exists. // If we have more than one missing block when replication == 1, then // high pri. priority = (numCorrupt > 1) ? 1 : 0; } // If priority is low, check if the scan of corrupt parity files found // the src dir to be risky. if (priority == 0) { Path parent = new Path(p).getParent(); String parentUriPath = parent.toUri().getPath(); if (srcDirsToWatchOutFor.contains(parentUriPath)) { priority = 1; } } CorruptFileInfo fileInfo = fileIndex.get(p); if (fileInfo == null || priority > fileInfo.getHighestPriority()) { corruptFilePriority.put(p, priority); } } } } return corruptFilePriority; }