/** * Verify that the NameNode is able to still use <tt>READ_ONLY_SHARED</tt> replicas even when the * single NORMAL replica is offline (and the effective replication count is 0). */ @Test public void testNormalReplicaOffline() throws Exception { // Stop the datanode hosting the NORMAL replica cluster.stopDataNode(normalDataNode.getXferAddr()); // Force NameNode to detect that the datanode is down BlockManagerTestUtil.noticeDeadDatanode(cluster.getNameNode(), normalDataNode.getXferAddr()); // The live replica count should now be zero (since the NORMAL replica is offline) NumberReplicas numberReplicas = blockManager.countNodes(block); assertThat(numberReplicas.liveReplicas(), is(0)); // The block should be reported as under-replicated BlockManagerTestUtil.updateState(blockManager); assertThat(blockManager.getUnderReplicatedBlocksCount(), is(1L)); // The BlockManager should be able to heal the replication count back to 1 // by triggering an inter-datanode replication from one of the READ_ONLY_SHARED replicas BlockManagerTestUtil.computeAllPendingWork(blockManager); DFSTestUtil.waitForReplication(cluster, extendedBlock, 1, 1, 0); // There should now be 2 *locations* for the block, and 1 *replica* assertThat(getLocatedBlock().getLocations().length, is(2)); validateNumberReplicas(1); }
/** * Verify that corrupt <tt>READ_ONLY_SHARED</tt> replicas aren't counted towards the corrupt * replicas total. */ @Test public void testReadOnlyReplicaCorrupt() throws Exception { // "Corrupt" a READ_ONLY_SHARED replica by reporting it as a bad replica client.reportBadBlocks( new LocatedBlock[] { new LocatedBlock(extendedBlock, new DatanodeInfo[] {readOnlyDataNode}) }); // There should now be only 1 *location* for the block as the READ_ONLY_SHARED is corrupt waitForLocations(1); // However, the corrupt READ_ONLY_SHARED replica should *not* affect the overall corrupt // replicas count NumberReplicas numberReplicas = blockManager.countNodes(block); assertThat(numberReplicas.corruptReplicas(), is(0)); }
private void validateNumberReplicas(int expectedReplicas) throws IOException { NumberReplicas numberReplicas = blockManager.countNodes(block); assertThat(numberReplicas.liveReplicas(), is(expectedReplicas)); assertThat(numberReplicas.excessReplicas(), is(0)); assertThat(numberReplicas.corruptReplicas(), is(0)); assertThat(numberReplicas.decommissionedReplicas(), is(0)); assertThat(numberReplicas.replicasOnStaleNodes(), is(0)); BlockManagerTestUtil.updateState(blockManager); assertThat(blockManager.getUnderReplicatedBlocksCount(), is(0L)); assertThat(blockManager.getExcessBlocksCount(), is(0L)); }
@VisibleForTesting void check(String parent, HdfsFileStatus file, Result res) throws IOException { String path = file.getFullName(parent); boolean isOpen = false; if (file.isDir()) { if (snapshottableDirs != null && snapshottableDirs.contains(path)) { String snapshotPath = (path.endsWith(Path.SEPARATOR) ? path : path + Path.SEPARATOR) + HdfsConstants.DOT_SNAPSHOT_DIR; HdfsFileStatus snapshotFileInfo = namenode.getRpcServer().getFileInfo(snapshotPath); check(snapshotPath, snapshotFileInfo, res); } byte[] lastReturnedName = HdfsFileStatus.EMPTY_NAME; DirectoryListing thisListing; if (showFiles) { out.println(path + " <dir>"); } res.totalDirs++; do { assert lastReturnedName != null; thisListing = namenode.getRpcServer().getListing(path, lastReturnedName, false); if (thisListing == null) { return; } HdfsFileStatus[] files = thisListing.getPartialListing(); for (int i = 0; i < files.length; i++) { check(path, files[i], res); } lastReturnedName = thisListing.getLastName(); } while (thisListing.hasMore()); return; } if (file.isSymlink()) { if (showFiles) { out.println(path + " <symlink>"); } res.totalSymlinks++; return; } long fileLen = file.getLen(); // Get block locations without updating the file access time // and without block access tokens LocatedBlocks blocks; try { blocks = namenode.getNamesystem().getBlockLocations(path, 0, fileLen, false, false, false); } catch (FileNotFoundException fnfe) { blocks = null; } if (blocks == null) { // the file is deleted return; } isOpen = blocks.isUnderConstruction(); if (isOpen && !showOpenFiles) { // We collect these stats about open files to report with default options res.totalOpenFilesSize += fileLen; res.totalOpenFilesBlocks += blocks.locatedBlockCount(); res.totalOpenFiles++; return; } res.totalFiles++; res.totalSize += fileLen; res.totalBlocks += blocks.locatedBlockCount(); if (showOpenFiles && isOpen) { out.print( path + " " + fileLen + " bytes, " + blocks.locatedBlockCount() + " block(s), OPENFORWRITE: "); } else if (showFiles) { out.print(path + " " + fileLen + " bytes, " + blocks.locatedBlockCount() + " block(s): "); } else { out.print('.'); } if (res.totalFiles % 100 == 0) { out.println(); out.flush(); } int missing = 0; int corrupt = 0; long missize = 0; int underReplicatedPerFile = 0; int misReplicatedPerFile = 0; StringBuilder report = new StringBuilder(); int i = 0; for (LocatedBlock lBlk : blocks.getLocatedBlocks()) { ExtendedBlock block = lBlk.getBlock(); boolean isCorrupt = lBlk.isCorrupt(); String blkName = block.toString(); DatanodeInfo[] locs = lBlk.getLocations(); NumberReplicas numberReplicas = namenode.getNamesystem().getBlockManager().countNodes(block.getLocalBlock()); int liveReplicas = numberReplicas.liveReplicas(); res.totalReplicas += liveReplicas; short targetFileReplication = file.getReplication(); res.numExpectedReplicas += targetFileReplication; if (liveReplicas > targetFileReplication) { res.excessiveReplicas += (liveReplicas - targetFileReplication); res.numOverReplicatedBlocks += 1; } // Check if block is Corrupt if (isCorrupt) { corrupt++; res.corruptBlocks++; out.print( "\n" + path + ": CORRUPT blockpool " + block.getBlockPoolId() + " block " + block.getBlockName() + "\n"); } if (liveReplicas >= minReplication) res.numMinReplicatedBlocks++; if (liveReplicas < targetFileReplication && liveReplicas > 0) { res.missingReplicas += (targetFileReplication - liveReplicas); res.numUnderReplicatedBlocks += 1; underReplicatedPerFile++; if (!showFiles) { out.print("\n" + path + ": "); } out.println( " Under replicated " + block + ". Target Replicas is " + targetFileReplication + " but found " + liveReplicas + " replica(s)."); } // verify block placement policy BlockPlacementStatus blockPlacementStatus = bpPolicy.verifyBlockPlacement(path, lBlk, targetFileReplication); if (!blockPlacementStatus.isPlacementPolicySatisfied()) { res.numMisReplicatedBlocks++; misReplicatedPerFile++; if (!showFiles) { if (underReplicatedPerFile == 0) out.println(); out.print(path + ": "); } out.println( " Replica placement policy is violated for " + block + ". " + blockPlacementStatus.getErrorDescription()); } report.append(i + ". " + blkName + " len=" + block.getNumBytes()); if (liveReplicas == 0) { report.append(" MISSING!"); res.addMissing(block.toString(), block.getNumBytes()); missing++; missize += block.getNumBytes(); } else { report.append(" repl=" + liveReplicas); if (showLocations || showRacks) { StringBuilder sb = new StringBuilder("["); for (int j = 0; j < locs.length; j++) { if (j > 0) { sb.append(", "); } if (showRacks) sb.append(NodeBase.getPath(locs[j])); else sb.append(locs[j]); } sb.append(']'); report.append(" " + sb.toString()); } } report.append('\n'); i++; } if ((missing > 0) || (corrupt > 0)) { if (!showFiles && (missing > 0)) { out.print( "\n" + path + ": MISSING " + missing + " blocks of total size " + missize + " B."); } res.corruptFiles++; if (isOpen) { LOG.info("Fsck: ignoring open file " + path); } else { if (doMove) copyBlocksToLostFound(parent, file, blocks); if (doDelete) deleteCorruptedFile(path); } } if (showFiles) { if (missing > 0) { out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n"); } else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) { out.print(" OK\n"); } if (showBlocks) { out.print(report.toString() + "\n"); } } }