@Test public void testGetCompanionBLocks() throws IOException { try { setupCluster(false, 1L, racks1, hosts1); String[] files = new String[] {"/dir/file1", "/dir/file2", "/dir/file3"}; Codec codec = Codec.getCodec("rs"); for (String file : files) { TestRaidDfs.createTestFile(fs, new Path(file), 3, 2, 8192L); } FileStatus stat = fs.getFileStatus(new Path("/dir")); RaidNode.doRaid( conf, stat, new Path(codec.parityDirectory), codec, new RaidNode.Statistics(), RaidUtils.NULL_PROGRESSABLE, false, 1, 1); Collection<LocatedBlock> companionBlocks; for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) { companionBlocks = getCompanionBlocks( namesystem, policy, getBlocks(namesystem, files[i]).get(j).getBlock()); Assert.assertEquals(8, companionBlocks.size()); } } companionBlocks = getCompanionBlocks(namesystem, policy, getBlocks(namesystem, files[2]).get(0).getBlock()); Assert.assertEquals(8, companionBlocks.size()); companionBlocks = getCompanionBlocks(namesystem, policy, getBlocks(namesystem, files[2]).get(1).getBlock()); Assert.assertEquals(4, companionBlocks.size()); String parityFile = "/raidrs/dir"; for (int i = 0; i < 3; i++) { companionBlocks = getCompanionBlocks( namesystem, policy, getBlocks(namesystem, parityFile).get(i).getBlock()); Assert.assertEquals(8, companionBlocks.size()); } for (int i = 3; i < 6; i++) { companionBlocks = getCompanionBlocks( namesystem, policy, getBlocks(namesystem, parityFile).get(i).getBlock()); Assert.assertEquals(4, companionBlocks.size()); } } finally { closeCluster(); } }
/** removes a specified block from MiniDFS storage and reports it as corrupt */ private void removeAndReportBlock( DistributedFileSystem blockDfs, Path filePath, LocatedBlock block) throws IOException { TestRaidDfs.corruptBlock(filePath, block.getBlock(), NUM_DATANODES, true); // report deleted block to the name node LocatedBlock[] toReport = {block}; blockDfs.getClient().namenode.reportBadBlocks(toReport); }
/* * This test start datanodes with simulated mode and keep running * chooseReplicaToDelete multiple times to get the average processing time * and number of allocated objects */ @Test public void testDirXORChooseReplicasToDeletePerformance() throws Exception { try { setupCluster(true, 1L, racks1, hosts1); // create test files int numFiles = 1000; long blockSize = 1024L; String parentDir = "/dir/"; for (int i = 0; i < numFiles; i++) { String file = parentDir + "file" + i; TestRaidDfs.createTestFile(fs, new Path(file), 3, 1, blockSize); } LOG.info("Created " + numFiles + " files"); Codec code = Codec.getCodec("xor"); FSNamesystem fsNameSys = cluster.getNameNode().namesystem; for (DatanodeDescriptor dd : fsNameSys.datanodeMap.values()) { LOG.info(dd); } // create fake parity file long numStripes = RaidNode.numStripes(numFiles, code.stripeLength); TestRaidDfs.createTestFile( fs, new Path(code.parityDirectory, "dir"), 3, (int) numStripes * code.parityLength, blockSize); long startTime = System.currentTimeMillis(); long total = 0L; fsNameSys.readLock(); for (BlocksMap.BlockInfo bi : fsNameSys.blocksMap.getBlocks()) { fsNameSys.replicator.chooseReplicaToDelete( bi.getINode(), bi, (short) 3, fsNameSys.datanodeMap.values(), new ArrayList<DatanodeDescriptor>()); total++; } fsNameSys.readUnlock(); LOG.info( "Average chooseReplicaToDelete time: " + ((double) (System.currentTimeMillis() - startTime) / total)); } finally { closeCluster(); } }
/* * This test creates a directory with 3 files and its fake parity file. * We decommissioned all nodes in the rack2 to make sure all data are stored * in rack1 machine. * Then we bring rack2 machines to normal state and create a non-raided file * which is too small to be raided in the directory with 4 replicas * (1 in rack1 and 3 in rack2). * Then we reduce the replication to 3 to trigger chooseReplicatToDelete. * We verify remaining replicas has 1 in rack1 and 2 in rack2. */ @Test public void testChooseReplicasToDeleteForSmallFile() throws Exception { try { setupCluster(false, 512L, racks2, hosts2); // create test files int numFiles = 4; long blockSize = 1024L; String parentDir = "/dir/"; DFSClient client = getDfsClient(cluster.getNameNode(), conf); DatanodeInfo[] infos = client.datanodeReport(DatanodeReportType.LIVE); ArrayList<String> rack2nodes = new ArrayList<String>(); ArrayList<DatanodeInfo> rack2di = new ArrayList<DatanodeInfo>(); for (DatanodeInfo di : infos) { if (di.getHostName().contains("rack2")) { rack2nodes.add(di.getName()); rack2di.add(cluster.getNameNode().namesystem.getDatanode(di)); } } LOG.info("Decommission rack2 nodes"); writeConfigFile(excludeFile, rack2nodes); cluster.getNameNode().namesystem.refreshNodes(conf); waitState(rack2di, AdminStates.DECOMMISSIONED); for (int i = 0; i < numFiles; i++) { if (i == 2) { continue; } String file = parentDir + "file" + i; Path filePath = new Path(file); TestRaidDfs.createTestFile(fs, filePath, 1, 1, blockSize); printLocatedBlocks(filePath); } LOG.info("Created " + (numFiles - 1) + " files"); // create fake parity file Codec code = Codec.getCodec("xor"); long numStripes = RaidNode.numStripes(numFiles, code.stripeLength); Path parityPath = new Path(code.parityDirectory, "dir"); TestRaidDfs.createTestFile( fs, parityPath, 1, (int) numStripes * code.parityLength, blockSize); LOG.info("Create parity file: " + parityPath); printLocatedBlocks(parityPath); LOG.info("Bring back rack2 nodes out of decommission"); writeConfigFile(excludeFile, null); cluster.getNameNode().namesystem.refreshNodes(conf); waitState(rack2di, AdminStates.NORMAL); Path smallFilePath = new Path(parentDir + "file2"); TestRaidDfs.createTestFile(fs, smallFilePath, 4, 1, 256L); assertEquals( "all datanodes should have replicas", hosts2.length, printLocatedBlocks(smallFilePath)); LOG.info("Created small file: " + smallFilePath); LOG.info("Reduce replication to 3"); dfs.setReplication(smallFilePath, (short) 3); long startTime = System.currentTimeMillis(); while (System.currentTimeMillis() - startTime < 120000 && printLocatedBlocks(smallFilePath) == 4) { Thread.sleep(1000); } LocatedBlocks lbs = dfs.getLocatedBlocks(smallFilePath, 0L, Integer.MAX_VALUE); boolean hasRack1 = false; for (DatanodeInfo di : lbs.getLocatedBlocks().get(0).getLocations()) { if (di.getNetworkLocation().contains("rack1")) { hasRack1 = true; break; } } assertTrue("We should keep the nodes in rack1", hasRack1); } finally { closeCluster(); } }
public void testMultiplePriorities() throws Exception { long[] crcs = new long[3]; int[] seeds = new int[3]; Path dirPath = new Path("/home/test"); int stripeLength = 3; short repl = 1; mySetup(stripeLength); Codec codec = Codec.getCodec("rs"); LOG.info("Starting testMultiplePriorities"); try { // Create test file and raid it. Path[] files = TestRaidDfs.createTestFiles( dirPath, fileSizes, blockSizes, crcs, seeds, fileSys, (short) 1); FileStatus stat = fileSys.getFileStatus(dirPath); RaidNode.doRaid( conf, stat, new Path(codec.parityDirectory), codec, new RaidNode.Statistics(), RaidUtils.NULL_PROGRESSABLE, false, repl, repl); Integer[] corruptBlockIdxs = new Integer[] {0, 2}; LOG.info("Corrupt block " + corruptBlockIdxs + " of directory " + dirPath); TestDirectoryRaidDfs.corruptBlocksInDirectory( conf, dirPath, crcs, corruptBlockIdxs, fileSys, dfsCluster, false, true); // Create Block Fixer and fix. FakeDistBlockIntegrityMonitor distBlockFixer = new FakeDistBlockIntegrityMonitor(conf); assertEquals(0, distBlockFixer.submittedJobs.size()); // waiting for one job to submit long startTime = System.currentTimeMillis(); while (System.currentTimeMillis() - startTime < 120000 && distBlockFixer.submittedJobs.size() == 0) { distBlockFixer.getCorruptionMonitor().checkAndReconstructBlocks(); LOG.info("Waiting for jobs to submit"); Thread.sleep(10000); } int submittedJob = distBlockFixer.submittedJobs.size(); LOG.info("Already Submitted " + submittedJob + " jobs"); assertTrue("Should submit more than 1 jobs", submittedJob >= 1); // Corrupt two more blocks corruptBlockIdxs = new Integer[] {4, 5}; LOG.info("Corrupt block " + corruptBlockIdxs + " of directory " + dirPath); TestDirectoryRaidDfs.corruptBlocksInDirectory( conf, dirPath, crcs, corruptBlockIdxs, fileSys, dfsCluster, false, true); // A new job should be submitted since two blocks are corrupt. startTime = System.currentTimeMillis(); while (System.currentTimeMillis() - startTime < 120000 && distBlockFixer.submittedJobs.size() == submittedJob) { distBlockFixer.getCorruptionMonitor().checkAndReconstructBlocks(); LOG.info("Waiting for more jobs to submit"); Thread.sleep(10000); } LOG.info("Already Submitted " + distBlockFixer.submittedJobs.size() + " jobs"); assertTrue( "should submit more than 1 jobs", distBlockFixer.submittedJobs.size() - submittedJob >= 1); } finally { myTearDown(); } }
/** * tests that the distributed block fixer obeys the limit on how many jobs to submit * simultaneously. */ @Test public void testMaxPendingJobs() throws Exception { LOG.info("Test testMaxPendingJobs started."); int stripeLength = 3; mySetup(stripeLength); long[] crcs1 = new long[3]; int[] seeds1 = new int[3]; long[] crcs2 = new long[3]; int[] seeds2 = new int[3]; Path dirPath1 = new Path("/user/dhruba/raidtestrs/1"); Path[] files1 = TestRaidDfs.createTestFiles( dirPath1, fileSizes, blockSizes, crcs1, seeds1, fileSys, (short) 1); Path dirPath2 = new Path("/user/dhruba/raidtestrs/2"); Path[] files2 = TestRaidDfs.createTestFiles( dirPath2, fileSizes, blockSizes, crcs2, seeds2, fileSys, (short) 1); Path destPath = new Path("/destraidrs/user/dhruba/raidtestrs"); LOG.info("Test testMaxPendingJobs created test files"); Configuration localConf = this.getRaidNodeConfig(conf, false); localConf.setLong("raid.blockfix.maxpendingjobs", 1L); try { cnode = RaidNode.createRaidNode(null, localConf); TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath1, destPath); TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath2, destPath); cnode.stop(); cnode.join(); DistributedFileSystem dfs = (DistributedFileSystem) fileSys; String[] corruptFiles = DFSUtil.getCorruptFiles(dfs); assertEquals("no corrupt files expected", 0, corruptFiles.length); assertEquals( "filesFixed() should return 0 before fixing files", 0, cnode.blockIntegrityMonitor.getNumFilesFixed()); // corrupt directory 1 this.corruptFiles( dirPath1, crcs1, rsCorruptFileIdx1, dfs, files1, rsNumCorruptBlocksInFiles1); cnode = RaidNode.createRaidNode(null, localConf); DistBlockIntegrityMonitor blockFixer = (DistBlockIntegrityMonitor) cnode.blockIntegrityMonitor; long start = System.currentTimeMillis(); while (blockFixer.jobsRunning() < 1 && System.currentTimeMillis() - start < 60000) { LOG.info("Test testDirBlockFix waiting for fixing job 1 to start"); Thread.sleep(1000); } assertEquals("job not running", 1, blockFixer.jobsRunning()); // corrupt directory 2 this.corruptFiles( dirPath2, crcs2, rsCorruptFileIdx2, dfs, files2, rsNumCorruptBlocksInFiles2); // wait until both files are fixed while (blockFixer.getNumFilesFixed() < 6 && System.currentTimeMillis() - start < 240000) { // make sure the block fixer does not start a second job while // the first one is still running assertTrue("too many jobs running", blockFixer.jobsRunning() <= 1); Thread.sleep(1000); } TestBlockFixer.verifyMetrics( fileSys, cnode, false, 6L, getTotal(rsNumCorruptBlocksInFiles1) + getTotal(rsNumCorruptBlocksInFiles2)); dfs = getDFS(conf, dfs); for (int i = 0; i < fileSizes.length; i++) { assertTrue( "file " + files1[i] + " not fixed", TestRaidDfs.validateFile(dfs, files1[i], fileSizes[i], crcs1[i])); } for (int i = 0; i < fileSizes.length; i++) { assertTrue( "file " + files2[i] + " not fixed", TestRaidDfs.validateFile(dfs, files2[i], fileSizes[i], crcs2[i])); } } catch (Exception e) { LOG.info("Test testMaxPendingJobs exception " + e + StringUtils.stringifyException(e)); throw e; } finally { myTearDown(); } }
/** tests that we can have 2 concurrent jobs fixing files (dist block fixer) */ @Test public void testConcurrentJobs() throws Exception { LOG.info("Test testConcurrentJobs started."); int stripeLength = 3; mySetup(stripeLength); long[] crcs1 = new long[3]; int[] seeds1 = new int[3]; long[] crcs2 = new long[3]; int[] seeds2 = new int[3]; Path dirPath1 = new Path("/user/dhruba/raidtestrs/1"); Path[] files1 = TestRaidDfs.createTestFiles( dirPath1, fileSizes, blockSizes, crcs1, seeds1, fileSys, (short) 1); Path dirPath2 = new Path("/user/dhruba/raidtestrs/2"); Path[] files2 = TestRaidDfs.createTestFiles( dirPath2, fileSizes, blockSizes, crcs2, seeds2, fileSys, (short) 1); Path destPath = new Path("/destraidrs/user/dhruba/raidtestrs"); LOG.info("Test testConcurrentJobs created test files"); Configuration localConf = this.getRaidNodeConfig(conf, false); localConf.setLong(BlockIntegrityMonitor.BLOCKCHECK_INTERVAL, 15000L); localConf.setLong(DistBlockIntegrityMonitor.RAIDNODE_BLOCK_FIX_SUBMISSION_INTERVAL_KEY, 15000L); localConf.setLong( DistBlockIntegrityMonitor.RAIDNODE_BLOCK_FIX_SCAN_SUBMISSION_INTERVAL_KEY, 3600000); try { cnode = RaidNode.createRaidNode(null, localConf); TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath1, destPath); TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath2, destPath); cnode.stop(); cnode.join(); DistributedFileSystem dfs = (DistributedFileSystem) fileSys; String[] corruptFiles = DFSUtil.getCorruptFiles(dfs); assertEquals("no corrupt files expected", 0, corruptFiles.length); assertEquals( "filesFixed() should return 0 before fixing files", 0, cnode.blockIntegrityMonitor.getNumFilesFixed()); // corrupt directory 1 this.corruptFiles( dirPath1, crcs1, rsCorruptFileIdx1, dfs, files1, rsNumCorruptBlocksInFiles1); cnode = RaidNode.createRaidNode(null, localConf); DistBlockIntegrityMonitor blockFixer = (DistBlockIntegrityMonitor) cnode.blockIntegrityMonitor; long start = System.currentTimeMillis(); // All files are HIGH-PRI corrupt files while (blockFixer.jobsRunning() < 1 && System.currentTimeMillis() - start < 60000) { LOG.info("Test testDirBlockFix waiting for fixing job 1 to start"); Thread.sleep(1000); } assertEquals("job 1 not running", 1, blockFixer.jobsRunning()); // Corrupt directory 2 this.corruptFiles( dirPath2, crcs2, rsCorruptFileIdx2, dfs, files2, rsNumCorruptBlocksInFiles2); // 1 LOW-PRI file and 2 HIGH-PRI files while (blockFixer.jobsRunning() < 3 && System.currentTimeMillis() - start < 60000) { LOG.info("Test testDirBlockFix waiting for fixing job 2 and 3 to start"); Thread.sleep(1000); } assertTrue("more than 3 jobs are running", blockFixer.jobsRunning() >= 3); while (blockFixer.getNumFilesFixed() < 6 && System.currentTimeMillis() - start < 240000) { LOG.info("Test testDirBlockFix waiting for files to be fixed."); Thread.sleep(1000); } TestBlockFixer.verifyMetrics( fileSys, cnode, false, 6L, getTotal(rsNumCorruptBlocksInFiles1) + getTotal(rsNumCorruptBlocksInFiles2)); dfs = getDFS(conf, dfs); for (int i = 0; i < fileSizes.length; i++) { assertTrue( "file " + files1[i] + " not fixed", TestRaidDfs.validateFile(dfs, files1[i], fileSizes[i], crcs1[i])); } for (int i = 0; i < fileSizes.length; i++) { assertTrue( "file " + files2[i] + " not fixed", TestRaidDfs.validateFile(dfs, files2[i], fileSizes[i], crcs2[i])); } } catch (Exception e) { LOG.info("Test testConcurrentJobs exception " + e, e); throw e; } finally { myTearDown(); } }
/** Corrupt a parity file and wait for it to get fixed. */ private void implParityBlockFix(String testName, boolean local) throws Exception { LOG.info("Test " + testName + " started."); int stripeLength = 3; mySetup(stripeLength); long[] crcs = new long[3]; int[] seeds = new int[3]; Path dirPath = new Path("/user/dhruba/raidtest"); Path[] files = TestRaidDfs.createTestFiles( dirPath, fileSizes, blockSizes, crcs, seeds, fileSys, (short) 1); Path destPath = new Path("/destraid/user/dhruba"); Path parityFile = new Path("/destraid/user/dhruba/raidtest"); LOG.info("Test " + testName + " created test files"); Configuration localConf = this.getRaidNodeConfig(conf, local); try { cnode = RaidNode.createRaidNode(null, localConf); TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath, destPath); cnode.stop(); cnode.join(); long parityCRC = RaidDFSUtil.getCRC(fileSys, parityFile); FileStatus parityStat = fileSys.getFileStatus(parityFile); DistributedFileSystem dfs = (DistributedFileSystem) fileSys; LocatedBlocks locs = RaidDFSUtil.getBlockLocations(dfs, parityFile.toUri().getPath(), 0, parityStat.getLen()); String[] corruptFiles = DFSUtil.getCorruptFiles(dfs); assertEquals("no corrupt files expected", 0, corruptFiles.length); assertEquals( "filesFixed() should return 0 before fixing files", 0, cnode.blockIntegrityMonitor.getNumFilesFixed()); // Corrupt parity blocks for different stripes. int[] corruptBlockIdxs = new int[] {0, 1, 2}; for (int idx : corruptBlockIdxs) corruptBlock(locs.get(idx).getBlock(), dfsCluster); RaidDFSUtil.reportCorruptBlocks(dfs, parityFile, corruptBlockIdxs, 2 * blockSize); corruptFiles = DFSUtil.getCorruptFiles(dfs); assertEquals("file not corrupted", 1, corruptFiles.length); assertEquals("wrong file corrupted", corruptFiles[0], parityFile.toUri().getPath()); cnode = RaidNode.createRaidNode(null, localConf); long start = System.currentTimeMillis(); while (cnode.blockIntegrityMonitor.getNumFilesFixed() < 1 && System.currentTimeMillis() - start < 120000) { LOG.info("Test " + testName + " waiting for files to be fixed."); Thread.sleep(3000); } TestBlockFixer.verifyMetrics(fileSys, cnode, local, 1L, corruptBlockIdxs.length); long checkCRC = RaidDFSUtil.getCRC(fileSys, parityFile); assertEquals("file not fixed", parityCRC, checkCRC); } catch (Exception e) { LOG.info("Test " + testName + " Exception " + e + StringUtils.stringifyException(e)); throw e; } finally { myTearDown(); } LOG.info("Test " + testName + " completed."); }
/** * Tests integrity of generated block. Create a file and delete a block entirely. Wait for the * block to be regenerated. Now stop RaidNode and corrupt the generated block. Test that * corruption in the generated block can be detected by clients. */ private void generatedBlockTestCommon(String testName, int blockToCorrupt, boolean local) throws Exception { LOG.info("Test " + testName + " started."); int stripeLength = 3; mySetup(stripeLength); long[] crcs = new long[3]; int[] seeds = new int[3]; Path dirPath = new Path("/user/dhruba/raidtest"); Path[] files = TestRaidDfs.createTestFiles( dirPath, fileSizes, blockSizes, crcs, seeds, fileSys, (short) 1); Path destPath = new Path("/destraid/user/dhruba"); LOG.info("Test " + testName + " created test files"); Configuration localConf = this.getRaidNodeConfig(conf, local); try { cnode = RaidNode.createRaidNode(null, localConf); TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath, destPath); cnode.stop(); cnode.join(); DistributedFileSystem dfs = (DistributedFileSystem) fileSys; String[] corruptFiles = DFSUtil.getCorruptFiles(dfs); assertEquals("no corrupt files expected", 0, corruptFiles.length); assertEquals( "filesFixed() should return 0 before fixing files", 0, cnode.blockIntegrityMonitor.getNumFilesFixed()); Integer[] corruptBlockIdxs = new Integer[] {blockToCorrupt}; TestDirectoryRaidDfs.corruptBlocksInDirectory( conf, dirPath, crcs, corruptBlockIdxs, fileSys, dfsCluster, false, true); corruptFiles = DFSUtil.getCorruptFiles(dfs); assertEquals("files not corrupted", corruptBlockIdxs.length, corruptFiles.length); int corruptFileIdx = -1; for (int i = 0; i < files.length; i++) { if (files[i].toUri().getPath().equals(corruptFiles[0])) { corruptFileIdx = i; break; } } assertNotSame("Wrong corrupt file", -1, corruptFileIdx); cnode = RaidNode.createRaidNode(null, localConf); long start = System.currentTimeMillis(); while (cnode.blockIntegrityMonitor.getNumFilesFixed() < 1 && System.currentTimeMillis() - start < 120000) { LOG.info("Test testDirBlockFix waiting for files to be fixed."); Thread.sleep(1000); } TestBlockFixer.verifyMetrics(fileSys, cnode, local, 1L, corruptBlockIdxs.length); // Stop RaidNode cnode.stop(); cnode.join(); cnode = null; // The block has successfully been reconstructed. dfs = getDFS(conf, dfs); assertTrue( "file not fixed", TestRaidDfs.validateFile( dfs, files[corruptFileIdx], fileSizes[corruptFileIdx], crcs[corruptFileIdx])); // Now corrupt the generated block. TestDirectoryRaidDfs.corruptBlocksInDirectory( conf, dirPath, crcs, corruptBlockIdxs, dfs, dfsCluster, false, false); try { TestRaidDfs.validateFile( dfs, files[corruptFileIdx], fileSizes[corruptFileIdx], crcs[corruptFileIdx]); fail("Expected exception not thrown"); } catch (org.apache.hadoop.fs.ChecksumException ce) { } catch (org.apache.hadoop.fs.BlockMissingException bme) { } } catch (Exception e) { LOG.info("Test " + testName + " Exception " + e, e); throw e; } finally { myTearDown(); } LOG.info("Test " + testName + " completed."); }
/** * Create a file with three stripes, corrupt a block each in two stripes, and wait for the the * file to be fixed. */ private void implDirBlockFix(boolean local, boolean hasStripeInfo, boolean corruptStripe) throws Exception { LOG.info( "Test testDirBlockFix started. local:" + local + " hasStripeInfo:" + hasStripeInfo + " corruptStripe:" + corruptStripe); int stripeLength = 3; mySetup(stripeLength); long[] crcs = new long[3]; int[] seeds = new int[3]; Path dirPath = new Path("/user/dhruba/raidtestrs"); Path[] files = TestRaidDfs.createTestFiles( dirPath, fileSizes, blockSizes, crcs, seeds, fileSys, (short) 1); Path destPath = new Path("/destraidrs/user/dhruba"); LOG.info("Test testDirBlockFix created test files"); Configuration localConf = this.getRaidNodeConfig(conf, local); // Not allow multiple running jobs localConf.setLong("raid.blockfix.maxpendingjobs", 1L); try { cnode = RaidNode.createRaidNode(null, localConf); TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath, destPath); cnode.stop(); cnode.join(); DistributedFileSystem dfs = (DistributedFileSystem) fileSys; String[] corruptFiles = DFSUtil.getCorruptFiles(dfs); assertEquals("no corrupt files expected", 0, corruptFiles.length); assertEquals( "filesFixed() should return 0 before fixing files", 0, cnode.blockIntegrityMonitor.getNumFilesFixed()); if (!hasStripeInfo) { // clear out all stripes LocalStripeStore lss = new LocalStripeStore(); lss.initialize(localConf, false, dfs); lss.clear(); } if (corruptStripe) { LocalStripeStore lss = new LocalStripeStore(); lss.initialize(localConf, false, dfs); Set<List<Block>> corruptCandidates = new HashSet<List<Block>>(lss.stripeSet.keySet()); for (List<Block> lb : corruptCandidates) { for (Codec codec : Codec.getCodecs()) { StripeInfo si = lss.getStripe(codec, lb.get(0)); if (si == null) { continue; } String oldSi = si.toString(); Collections.rotate(si.parityBlocks, 1); Collections.rotate(si.srcBlocks, 1); lss.putStripe(codec, si.parityBlocks, si.srcBlocks); String newSi = lss.getStripe(codec, lb.get(0)).toString(); LOG.info("Corrupt the stripe info old : " + oldSi + " new : " + newSi); } } } this.corruptFiles(dirPath, crcs, rsCorruptFileIdx1, dfs, files, rsNumCorruptBlocksInFiles1); cnode = RaidNode.createRaidNode(null, localConf); long start = System.currentTimeMillis(); while (cnode.blockIntegrityMonitor.getNumFilesFixed() < 3 && cnode.blockIntegrityMonitor.getNumFileFixFailures() < 3 && System.currentTimeMillis() - start < 120000) { LOG.info("Test testDirBlockFix waiting for files to be fixed."); Thread.sleep(1000); } long totalCorruptBlocks = getTotal(rsNumCorruptBlocksInFiles1); if (hasStripeInfo) { if (!corruptStripe) { TestBlockFixer.verifyMetrics(fileSys, cnode, local, 3L, totalCorruptBlocks); dfs = getDFS(conf, dfs); for (int i = 0; i < fileSizes.length; i++) { assertTrue( "file " + files[i] + " not fixed", TestRaidDfs.validateFile(dfs, files[i], fileSizes[i], crcs[i])); } } else { TestBlockFixer.verifyMetrics(fileSys, cnode, local, 0L, 0L); assertTrue( "should fail to fix more than 3 files", cnode.blockIntegrityMonitor.getNumFileFixFailures() >= 3L); TestBlockFixer.verifyMetrics( fileSys, cnode, LOGTYPES.OFFLINE_RECONSTRUCTION_FILE, LOGRESULTS.FAILURE, 3L, true); // Will throw stripe mismatch exception for the first blocks of 3 files TestBlockFixer.verifyMetrics( fileSys, cnode, LOGTYPES.OFFLINE_RECONSTRUCTION_STRIPE_VERIFICATION, LOGRESULTS.FAILURE, 3L, true); } } else { TestBlockFixer.verifyMetrics(fileSys, cnode, local, 0L, 0L); assertTrue( "should fail to fix more than 3 files", cnode.blockIntegrityMonitor.getNumFileFixFailures() >= 3L); TestBlockFixer.verifyMetrics( fileSys, cnode, LOGTYPES.OFFLINE_RECONSTRUCTION_GET_STRIPE, LOGRESULTS.FAILURE, totalCorruptBlocks, true); TestBlockFixer.verifyMetrics( fileSys, cnode, LOGTYPES.OFFLINE_RECONSTRUCTION_FILE, LOGRESULTS.FAILURE, 3L, true); } } catch (Exception e) { LOG.info("Test testDirBlockFix Exception " + e, e); throw e; } finally { myTearDown(); } LOG.info("Test testDirBlockFix completed."); }