private void myTearDown() throws Exception { if (cnode != null) { cnode.stop(); cnode.join(); } if (mr != null) { mr.shutdown(); } if (dfsCluster != null) { dfsCluster.shutdown(); } }
private void estimateSavings() { for (RaidPolicyPathPair p : raidPolicyPathPairList) { ErasureCodeType code = p.policy.getErasureCode(); int stripeSize = RaidNode.getStripeLength(jobconf); int paritySize = RaidNode.parityLength(code, jobconf); int targetRepl = Integer.parseInt(p.policy.getProperty("targetReplication")); int parityRepl = Integer.parseInt(p.policy.getProperty("metaReplication")); for (FileStatus st : p.srcPaths) { long saving = RaidNode.savingFromRaidingFile(st, stripeSize, paritySize, targetRepl, parityRepl); totalSaving += saving; } } }
/** Run a FileOperation */ public void map( Text key, PolicyInfo policy, OutputCollector<WritableComparable, Text> out, Reporter reporter) throws IOException { this.reporter = reporter; try { LOG.info("Raiding file=" + key.toString() + " policy=" + policy); Path p = new Path(key.toString()); FileStatus fs = p.getFileSystem(jobconf).getFileStatus(p); st.clear(); RaidNode.doRaid(jobconf, policy, fs, st, reporter); ++succeedcount; reporter.incrCounter(Counter.PROCESSED_BLOCKS, st.numProcessedBlocks); reporter.incrCounter(Counter.PROCESSED_SIZE, st.processedSize); reporter.incrCounter(Counter.META_BLOCKS, st.numMetaBlocks); reporter.incrCounter(Counter.META_SIZE, st.metaSize); reporter.incrCounter(Counter.FILES_SUCCEEDED, 1); } catch (IOException e) { ++failcount; reporter.incrCounter(Counter.FILES_FAILED, 1); String s = "FAIL: " + policy + ", " + key + " " + StringUtils.stringifyException(e); out.collect(null, new Text(s)); LOG.info(s); } finally { reporter.setStatus(getCountString()); } }
/** {@inheritDocs} */ @Override public void stop() { if (stopRequested) { return; } super.stop(); if (jobMonitor != null) jobMonitor.running = false; if (jobMonitorThread != null) jobMonitorThread.interrupt(); }
/** {@inheritDocs} */ @Override public void join() { super.join(); try { if (jobMonitorThread != null) jobMonitorThread.join(); } catch (InterruptedException ie) { // do nothing } }
public ConfigManager(Configuration conf) throws IOException, SAXException, RaidConfigurationException, ClassNotFoundException, ParserConfigurationException { this.conf = conf; this.configFileName = conf.get("raid.config.file"); this.doReload = conf.getBoolean("raid.config.reload", true); this.reloadInterval = conf.getLong("raid.config.reload.interval", RELOAD_INTERVAL); this.periodicity = conf.getLong("raid.policy.rescan.interval", RESCAN_INTERVAL); this.harPartfileSize = conf.getLong("raid.har.partfile.size", HAR_PARTFILE_SIZE); this.maxJobsPerPolicy = conf.getInt("raid.distraid.max.jobs", DISTRAID_MAX_JOBS); this.maxFilesPerJob = conf.getInt("raid.distraid.max.files", DISTRAID_MAX_FILES); if (configFileName == null) { String msg = "No raid.config.file given in conf - " + "the Hadoop Raid utility cannot run. Aborting...."; LOG.warn(msg); throw new IOException(msg); } reloadConfigs(); lastSuccessfulReload = RaidNode.now(); lastReloadAttempt = RaidNode.now(); running = true; }
/** * create new job conf based on configuration passed. * * @param conf * @return */ private static JobConf createJobConf(Configuration conf) { JobConf jobconf = new JobConf(conf, DistRaid.class); jobName = NAME + " " + dateForm.format(new Date(RaidNode.now())); jobconf.setUser(RaidNode.JOBUSER); jobconf.setJobName(jobName); jobconf.setMapSpeculativeExecution(false); RaidUtils.parseAndSetOptions(jobconf, SCHEDULER_OPTION_LABEL); jobconf.setJarByClass(DistRaid.class); jobconf.setInputFormat(DistRaidInputFormat.class); jobconf.setOutputKeyClass(Text.class); jobconf.setOutputValueClass(Text.class); jobconf.setMapperClass(DistRaidMapper.class); jobconf.setNumReduceTasks(0); return jobconf; }
/** * Reload config file if it hasn't been loaded in a while Returns true if the file was reloaded. */ public synchronized boolean reloadConfigsIfNecessary() { long time = RaidNode.now(); if (time > lastReloadAttempt + reloadInterval) { lastReloadAttempt = time; try { File file = new File(configFileName); long lastModified = file.lastModified(); if (lastModified > lastSuccessfulReload && time > lastModified + RELOAD_WAIT) { reloadConfigs(); lastSuccessfulReload = time; lastReloadAttemptFailed = false; return true; } } catch (Exception e) { if (!lastReloadAttemptFailed) { LOG.error("Failed to reload config file - " + "will use existing configuration.", e); } lastReloadAttemptFailed = true; } } return false; }
public void testMultiplePriorities() throws Exception { long[] crcs = new long[3]; int[] seeds = new int[3]; Path dirPath = new Path("/home/test"); int stripeLength = 3; short repl = 1; mySetup(stripeLength); Codec codec = Codec.getCodec("rs"); LOG.info("Starting testMultiplePriorities"); try { // Create test file and raid it. Path[] files = TestRaidDfs.createTestFiles( dirPath, fileSizes, blockSizes, crcs, seeds, fileSys, (short) 1); FileStatus stat = fileSys.getFileStatus(dirPath); RaidNode.doRaid( conf, stat, new Path(codec.parityDirectory), codec, new RaidNode.Statistics(), RaidUtils.NULL_PROGRESSABLE, false, repl, repl); Integer[] corruptBlockIdxs = new Integer[] {0, 2}; LOG.info("Corrupt block " + corruptBlockIdxs + " of directory " + dirPath); TestDirectoryRaidDfs.corruptBlocksInDirectory( conf, dirPath, crcs, corruptBlockIdxs, fileSys, dfsCluster, false, true); // Create Block Fixer and fix. FakeDistBlockIntegrityMonitor distBlockFixer = new FakeDistBlockIntegrityMonitor(conf); assertEquals(0, distBlockFixer.submittedJobs.size()); // waiting for one job to submit long startTime = System.currentTimeMillis(); while (System.currentTimeMillis() - startTime < 120000 && distBlockFixer.submittedJobs.size() == 0) { distBlockFixer.getCorruptionMonitor().checkAndReconstructBlocks(); LOG.info("Waiting for jobs to submit"); Thread.sleep(10000); } int submittedJob = distBlockFixer.submittedJobs.size(); LOG.info("Already Submitted " + submittedJob + " jobs"); assertTrue("Should submit more than 1 jobs", submittedJob >= 1); // Corrupt two more blocks corruptBlockIdxs = new Integer[] {4, 5}; LOG.info("Corrupt block " + corruptBlockIdxs + " of directory " + dirPath); TestDirectoryRaidDfs.corruptBlocksInDirectory( conf, dirPath, crcs, corruptBlockIdxs, fileSys, dfsCluster, false, true); // A new job should be submitted since two blocks are corrupt. startTime = System.currentTimeMillis(); while (System.currentTimeMillis() - startTime < 120000 && distBlockFixer.submittedJobs.size() == submittedJob) { distBlockFixer.getCorruptionMonitor().checkAndReconstructBlocks(); LOG.info("Waiting for more jobs to submit"); Thread.sleep(10000); } LOG.info("Already Submitted " + distBlockFixer.submittedJobs.size() + " jobs"); assertTrue( "should submit more than 1 jobs", distBlockFixer.submittedJobs.size() - submittedJob >= 1); } finally { myTearDown(); } }
/** * tests that the distributed block fixer obeys the limit on how many jobs to submit * simultaneously. */ @Test public void testMaxPendingJobs() throws Exception { LOG.info("Test testMaxPendingJobs started."); int stripeLength = 3; mySetup(stripeLength); long[] crcs1 = new long[3]; int[] seeds1 = new int[3]; long[] crcs2 = new long[3]; int[] seeds2 = new int[3]; Path dirPath1 = new Path("/user/dhruba/raidtestrs/1"); Path[] files1 = TestRaidDfs.createTestFiles( dirPath1, fileSizes, blockSizes, crcs1, seeds1, fileSys, (short) 1); Path dirPath2 = new Path("/user/dhruba/raidtestrs/2"); Path[] files2 = TestRaidDfs.createTestFiles( dirPath2, fileSizes, blockSizes, crcs2, seeds2, fileSys, (short) 1); Path destPath = new Path("/destraidrs/user/dhruba/raidtestrs"); LOG.info("Test testMaxPendingJobs created test files"); Configuration localConf = this.getRaidNodeConfig(conf, false); localConf.setLong("raid.blockfix.maxpendingjobs", 1L); try { cnode = RaidNode.createRaidNode(null, localConf); TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath1, destPath); TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath2, destPath); cnode.stop(); cnode.join(); DistributedFileSystem dfs = (DistributedFileSystem) fileSys; String[] corruptFiles = DFSUtil.getCorruptFiles(dfs); assertEquals("no corrupt files expected", 0, corruptFiles.length); assertEquals( "filesFixed() should return 0 before fixing files", 0, cnode.blockIntegrityMonitor.getNumFilesFixed()); // corrupt directory 1 this.corruptFiles( dirPath1, crcs1, rsCorruptFileIdx1, dfs, files1, rsNumCorruptBlocksInFiles1); cnode = RaidNode.createRaidNode(null, localConf); DistBlockIntegrityMonitor blockFixer = (DistBlockIntegrityMonitor) cnode.blockIntegrityMonitor; long start = System.currentTimeMillis(); while (blockFixer.jobsRunning() < 1 && System.currentTimeMillis() - start < 60000) { LOG.info("Test testDirBlockFix waiting for fixing job 1 to start"); Thread.sleep(1000); } assertEquals("job not running", 1, blockFixer.jobsRunning()); // corrupt directory 2 this.corruptFiles( dirPath2, crcs2, rsCorruptFileIdx2, dfs, files2, rsNumCorruptBlocksInFiles2); // wait until both files are fixed while (blockFixer.getNumFilesFixed() < 6 && System.currentTimeMillis() - start < 240000) { // make sure the block fixer does not start a second job while // the first one is still running assertTrue("too many jobs running", blockFixer.jobsRunning() <= 1); Thread.sleep(1000); } TestBlockFixer.verifyMetrics( fileSys, cnode, false, 6L, getTotal(rsNumCorruptBlocksInFiles1) + getTotal(rsNumCorruptBlocksInFiles2)); dfs = getDFS(conf, dfs); for (int i = 0; i < fileSizes.length; i++) { assertTrue( "file " + files1[i] + " not fixed", TestRaidDfs.validateFile(dfs, files1[i], fileSizes[i], crcs1[i])); } for (int i = 0; i < fileSizes.length; i++) { assertTrue( "file " + files2[i] + " not fixed", TestRaidDfs.validateFile(dfs, files2[i], fileSizes[i], crcs2[i])); } } catch (Exception e) { LOG.info("Test testMaxPendingJobs exception " + e + StringUtils.stringifyException(e)); throw e; } finally { myTearDown(); } }
/** tests that we can have 2 concurrent jobs fixing files (dist block fixer) */ @Test public void testConcurrentJobs() throws Exception { LOG.info("Test testConcurrentJobs started."); int stripeLength = 3; mySetup(stripeLength); long[] crcs1 = new long[3]; int[] seeds1 = new int[3]; long[] crcs2 = new long[3]; int[] seeds2 = new int[3]; Path dirPath1 = new Path("/user/dhruba/raidtestrs/1"); Path[] files1 = TestRaidDfs.createTestFiles( dirPath1, fileSizes, blockSizes, crcs1, seeds1, fileSys, (short) 1); Path dirPath2 = new Path("/user/dhruba/raidtestrs/2"); Path[] files2 = TestRaidDfs.createTestFiles( dirPath2, fileSizes, blockSizes, crcs2, seeds2, fileSys, (short) 1); Path destPath = new Path("/destraidrs/user/dhruba/raidtestrs"); LOG.info("Test testConcurrentJobs created test files"); Configuration localConf = this.getRaidNodeConfig(conf, false); localConf.setLong(BlockIntegrityMonitor.BLOCKCHECK_INTERVAL, 15000L); localConf.setLong(DistBlockIntegrityMonitor.RAIDNODE_BLOCK_FIX_SUBMISSION_INTERVAL_KEY, 15000L); localConf.setLong( DistBlockIntegrityMonitor.RAIDNODE_BLOCK_FIX_SCAN_SUBMISSION_INTERVAL_KEY, 3600000); try { cnode = RaidNode.createRaidNode(null, localConf); TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath1, destPath); TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath2, destPath); cnode.stop(); cnode.join(); DistributedFileSystem dfs = (DistributedFileSystem) fileSys; String[] corruptFiles = DFSUtil.getCorruptFiles(dfs); assertEquals("no corrupt files expected", 0, corruptFiles.length); assertEquals( "filesFixed() should return 0 before fixing files", 0, cnode.blockIntegrityMonitor.getNumFilesFixed()); // corrupt directory 1 this.corruptFiles( dirPath1, crcs1, rsCorruptFileIdx1, dfs, files1, rsNumCorruptBlocksInFiles1); cnode = RaidNode.createRaidNode(null, localConf); DistBlockIntegrityMonitor blockFixer = (DistBlockIntegrityMonitor) cnode.blockIntegrityMonitor; long start = System.currentTimeMillis(); // All files are HIGH-PRI corrupt files while (blockFixer.jobsRunning() < 1 && System.currentTimeMillis() - start < 60000) { LOG.info("Test testDirBlockFix waiting for fixing job 1 to start"); Thread.sleep(1000); } assertEquals("job 1 not running", 1, blockFixer.jobsRunning()); // Corrupt directory 2 this.corruptFiles( dirPath2, crcs2, rsCorruptFileIdx2, dfs, files2, rsNumCorruptBlocksInFiles2); // 1 LOW-PRI file and 2 HIGH-PRI files while (blockFixer.jobsRunning() < 3 && System.currentTimeMillis() - start < 60000) { LOG.info("Test testDirBlockFix waiting for fixing job 2 and 3 to start"); Thread.sleep(1000); } assertTrue("more than 3 jobs are running", blockFixer.jobsRunning() >= 3); while (blockFixer.getNumFilesFixed() < 6 && System.currentTimeMillis() - start < 240000) { LOG.info("Test testDirBlockFix waiting for files to be fixed."); Thread.sleep(1000); } TestBlockFixer.verifyMetrics( fileSys, cnode, false, 6L, getTotal(rsNumCorruptBlocksInFiles1) + getTotal(rsNumCorruptBlocksInFiles2)); dfs = getDFS(conf, dfs); for (int i = 0; i < fileSizes.length; i++) { assertTrue( "file " + files1[i] + " not fixed", TestRaidDfs.validateFile(dfs, files1[i], fileSizes[i], crcs1[i])); } for (int i = 0; i < fileSizes.length; i++) { assertTrue( "file " + files2[i] + " not fixed", TestRaidDfs.validateFile(dfs, files2[i], fileSizes[i], crcs2[i])); } } catch (Exception e) { LOG.info("Test testConcurrentJobs exception " + e, e); throw e; } finally { myTearDown(); } }
/** Corrupt a parity file and wait for it to get fixed. */ private void implParityBlockFix(String testName, boolean local) throws Exception { LOG.info("Test " + testName + " started."); int stripeLength = 3; mySetup(stripeLength); long[] crcs = new long[3]; int[] seeds = new int[3]; Path dirPath = new Path("/user/dhruba/raidtest"); Path[] files = TestRaidDfs.createTestFiles( dirPath, fileSizes, blockSizes, crcs, seeds, fileSys, (short) 1); Path destPath = new Path("/destraid/user/dhruba"); Path parityFile = new Path("/destraid/user/dhruba/raidtest"); LOG.info("Test " + testName + " created test files"); Configuration localConf = this.getRaidNodeConfig(conf, local); try { cnode = RaidNode.createRaidNode(null, localConf); TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath, destPath); cnode.stop(); cnode.join(); long parityCRC = RaidDFSUtil.getCRC(fileSys, parityFile); FileStatus parityStat = fileSys.getFileStatus(parityFile); DistributedFileSystem dfs = (DistributedFileSystem) fileSys; LocatedBlocks locs = RaidDFSUtil.getBlockLocations(dfs, parityFile.toUri().getPath(), 0, parityStat.getLen()); String[] corruptFiles = DFSUtil.getCorruptFiles(dfs); assertEquals("no corrupt files expected", 0, corruptFiles.length); assertEquals( "filesFixed() should return 0 before fixing files", 0, cnode.blockIntegrityMonitor.getNumFilesFixed()); // Corrupt parity blocks for different stripes. int[] corruptBlockIdxs = new int[] {0, 1, 2}; for (int idx : corruptBlockIdxs) corruptBlock(locs.get(idx).getBlock(), dfsCluster); RaidDFSUtil.reportCorruptBlocks(dfs, parityFile, corruptBlockIdxs, 2 * blockSize); corruptFiles = DFSUtil.getCorruptFiles(dfs); assertEquals("file not corrupted", 1, corruptFiles.length); assertEquals("wrong file corrupted", corruptFiles[0], parityFile.toUri().getPath()); cnode = RaidNode.createRaidNode(null, localConf); long start = System.currentTimeMillis(); while (cnode.blockIntegrityMonitor.getNumFilesFixed() < 1 && System.currentTimeMillis() - start < 120000) { LOG.info("Test " + testName + " waiting for files to be fixed."); Thread.sleep(3000); } TestBlockFixer.verifyMetrics(fileSys, cnode, local, 1L, corruptBlockIdxs.length); long checkCRC = RaidDFSUtil.getCRC(fileSys, parityFile); assertEquals("file not fixed", parityCRC, checkCRC); } catch (Exception e) { LOG.info("Test " + testName + " Exception " + e + StringUtils.stringifyException(e)); throw e; } finally { myTearDown(); } LOG.info("Test " + testName + " completed."); }
/** * Tests integrity of generated block. Create a file and delete a block entirely. Wait for the * block to be regenerated. Now stop RaidNode and corrupt the generated block. Test that * corruption in the generated block can be detected by clients. */ private void generatedBlockTestCommon(String testName, int blockToCorrupt, boolean local) throws Exception { LOG.info("Test " + testName + " started."); int stripeLength = 3; mySetup(stripeLength); long[] crcs = new long[3]; int[] seeds = new int[3]; Path dirPath = new Path("/user/dhruba/raidtest"); Path[] files = TestRaidDfs.createTestFiles( dirPath, fileSizes, blockSizes, crcs, seeds, fileSys, (short) 1); Path destPath = new Path("/destraid/user/dhruba"); LOG.info("Test " + testName + " created test files"); Configuration localConf = this.getRaidNodeConfig(conf, local); try { cnode = RaidNode.createRaidNode(null, localConf); TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath, destPath); cnode.stop(); cnode.join(); DistributedFileSystem dfs = (DistributedFileSystem) fileSys; String[] corruptFiles = DFSUtil.getCorruptFiles(dfs); assertEquals("no corrupt files expected", 0, corruptFiles.length); assertEquals( "filesFixed() should return 0 before fixing files", 0, cnode.blockIntegrityMonitor.getNumFilesFixed()); Integer[] corruptBlockIdxs = new Integer[] {blockToCorrupt}; TestDirectoryRaidDfs.corruptBlocksInDirectory( conf, dirPath, crcs, corruptBlockIdxs, fileSys, dfsCluster, false, true); corruptFiles = DFSUtil.getCorruptFiles(dfs); assertEquals("files not corrupted", corruptBlockIdxs.length, corruptFiles.length); int corruptFileIdx = -1; for (int i = 0; i < files.length; i++) { if (files[i].toUri().getPath().equals(corruptFiles[0])) { corruptFileIdx = i; break; } } assertNotSame("Wrong corrupt file", -1, corruptFileIdx); cnode = RaidNode.createRaidNode(null, localConf); long start = System.currentTimeMillis(); while (cnode.blockIntegrityMonitor.getNumFilesFixed() < 1 && System.currentTimeMillis() - start < 120000) { LOG.info("Test testDirBlockFix waiting for files to be fixed."); Thread.sleep(1000); } TestBlockFixer.verifyMetrics(fileSys, cnode, local, 1L, corruptBlockIdxs.length); // Stop RaidNode cnode.stop(); cnode.join(); cnode = null; // The block has successfully been reconstructed. dfs = getDFS(conf, dfs); assertTrue( "file not fixed", TestRaidDfs.validateFile( dfs, files[corruptFileIdx], fileSizes[corruptFileIdx], crcs[corruptFileIdx])); // Now corrupt the generated block. TestDirectoryRaidDfs.corruptBlocksInDirectory( conf, dirPath, crcs, corruptBlockIdxs, dfs, dfsCluster, false, false); try { TestRaidDfs.validateFile( dfs, files[corruptFileIdx], fileSizes[corruptFileIdx], crcs[corruptFileIdx]); fail("Expected exception not thrown"); } catch (org.apache.hadoop.fs.ChecksumException ce) { } catch (org.apache.hadoop.fs.BlockMissingException bme) { } } catch (Exception e) { LOG.info("Test " + testName + " Exception " + e, e); throw e; } finally { myTearDown(); } LOG.info("Test " + testName + " completed."); }
/** * Create a file with three stripes, corrupt a block each in two stripes, and wait for the the * file to be fixed. */ private void implDirBlockFix(boolean local, boolean hasStripeInfo, boolean corruptStripe) throws Exception { LOG.info( "Test testDirBlockFix started. local:" + local + " hasStripeInfo:" + hasStripeInfo + " corruptStripe:" + corruptStripe); int stripeLength = 3; mySetup(stripeLength); long[] crcs = new long[3]; int[] seeds = new int[3]; Path dirPath = new Path("/user/dhruba/raidtestrs"); Path[] files = TestRaidDfs.createTestFiles( dirPath, fileSizes, blockSizes, crcs, seeds, fileSys, (short) 1); Path destPath = new Path("/destraidrs/user/dhruba"); LOG.info("Test testDirBlockFix created test files"); Configuration localConf = this.getRaidNodeConfig(conf, local); // Not allow multiple running jobs localConf.setLong("raid.blockfix.maxpendingjobs", 1L); try { cnode = RaidNode.createRaidNode(null, localConf); TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath, destPath); cnode.stop(); cnode.join(); DistributedFileSystem dfs = (DistributedFileSystem) fileSys; String[] corruptFiles = DFSUtil.getCorruptFiles(dfs); assertEquals("no corrupt files expected", 0, corruptFiles.length); assertEquals( "filesFixed() should return 0 before fixing files", 0, cnode.blockIntegrityMonitor.getNumFilesFixed()); if (!hasStripeInfo) { // clear out all stripes LocalStripeStore lss = new LocalStripeStore(); lss.initialize(localConf, false, dfs); lss.clear(); } if (corruptStripe) { LocalStripeStore lss = new LocalStripeStore(); lss.initialize(localConf, false, dfs); Set<List<Block>> corruptCandidates = new HashSet<List<Block>>(lss.stripeSet.keySet()); for (List<Block> lb : corruptCandidates) { for (Codec codec : Codec.getCodecs()) { StripeInfo si = lss.getStripe(codec, lb.get(0)); if (si == null) { continue; } String oldSi = si.toString(); Collections.rotate(si.parityBlocks, 1); Collections.rotate(si.srcBlocks, 1); lss.putStripe(codec, si.parityBlocks, si.srcBlocks); String newSi = lss.getStripe(codec, lb.get(0)).toString(); LOG.info("Corrupt the stripe info old : " + oldSi + " new : " + newSi); } } } this.corruptFiles(dirPath, crcs, rsCorruptFileIdx1, dfs, files, rsNumCorruptBlocksInFiles1); cnode = RaidNode.createRaidNode(null, localConf); long start = System.currentTimeMillis(); while (cnode.blockIntegrityMonitor.getNumFilesFixed() < 3 && cnode.blockIntegrityMonitor.getNumFileFixFailures() < 3 && System.currentTimeMillis() - start < 120000) { LOG.info("Test testDirBlockFix waiting for files to be fixed."); Thread.sleep(1000); } long totalCorruptBlocks = getTotal(rsNumCorruptBlocksInFiles1); if (hasStripeInfo) { if (!corruptStripe) { TestBlockFixer.verifyMetrics(fileSys, cnode, local, 3L, totalCorruptBlocks); dfs = getDFS(conf, dfs); for (int i = 0; i < fileSizes.length; i++) { assertTrue( "file " + files[i] + " not fixed", TestRaidDfs.validateFile(dfs, files[i], fileSizes[i], crcs[i])); } } else { TestBlockFixer.verifyMetrics(fileSys, cnode, local, 0L, 0L); assertTrue( "should fail to fix more than 3 files", cnode.blockIntegrityMonitor.getNumFileFixFailures() >= 3L); TestBlockFixer.verifyMetrics( fileSys, cnode, LOGTYPES.OFFLINE_RECONSTRUCTION_FILE, LOGRESULTS.FAILURE, 3L, true); // Will throw stripe mismatch exception for the first blocks of 3 files TestBlockFixer.verifyMetrics( fileSys, cnode, LOGTYPES.OFFLINE_RECONSTRUCTION_STRIPE_VERIFICATION, LOGRESULTS.FAILURE, 3L, true); } } else { TestBlockFixer.verifyMetrics(fileSys, cnode, local, 0L, 0L); assertTrue( "should fail to fix more than 3 files", cnode.blockIntegrityMonitor.getNumFileFixFailures() >= 3L); TestBlockFixer.verifyMetrics( fileSys, cnode, LOGTYPES.OFFLINE_RECONSTRUCTION_GET_STRIPE, LOGRESULTS.FAILURE, totalCorruptBlocks, true); TestBlockFixer.verifyMetrics( fileSys, cnode, LOGTYPES.OFFLINE_RECONSTRUCTION_FILE, LOGRESULTS.FAILURE, 3L, true); } } catch (Exception e) { LOG.info("Test testDirBlockFix Exception " + e, e); throw e; } finally { myTearDown(); } LOG.info("Test testDirBlockFix completed."); }