/** * Keep accessing the given file until the namenode reports that the given block in the file * contains the given number of corrupt replicas. */ public static void waitCorruptReplicas( FileSystem fs, FSNamesystem ns, Path file, ExtendedBlock b, int corruptRepls) throws IOException, TimeoutException { int count = 0; final int ATTEMPTS = 50; int repls = ns.getBlockManager().numCorruptReplicas(b.getLocalBlock()); while (repls != corruptRepls && count < ATTEMPTS) { try { IOUtils.copyBytes(fs.open(file), new IOUtils.NullOutputStream(), 512, true); } catch (IOException e) { // Swallow exceptions } System.out.println("Waiting for " + corruptRepls + " corrupt replicas"); repls = ns.getBlockManager().numCorruptReplicas(b.getLocalBlock()); count++; } if (count == ATTEMPTS) { throw new TimeoutException( "Timed out waiting for corrupt replicas." + " Waiting for " + corruptRepls + ", but only found " + repls); } }
int addSomeBlocks(SimulatedFSDataset fsdataset, int startingBlockId) throws IOException { int bytesAdded = 0; for (int i = startingBlockId; i < startingBlockId + NUMBLOCKS; ++i) { ExtendedBlock b = new ExtendedBlock(bpid, i, 0, 0); // we pass expected len as zero, - fsdataset should use the sizeof actual // data written ReplicaInPipelineInterface bInfo = fsdataset.createRbw(StorageType.DEFAULT, b, false).getReplica(); ReplicaOutputStreams out = bInfo.createStreams(true, DataChecksum.newDataChecksum(DataChecksum.Type.CRC32, 512)); try { OutputStream dataOut = out.getDataOut(); assertEquals(0, fsdataset.getLength(b)); for (int j = 1; j <= blockIdToLen(i); ++j) { dataOut.write(j); assertEquals(j, bInfo.getBytesOnDisk()); // correct length even as we write bytesAdded++; } } finally { out.close(); } b.setNumBytes(blockIdToLen(i)); fsdataset.finalizeBlock(b); assertEquals(blockIdToLen(i), fsdataset.getLength(b)); } return bytesAdded; }
/** * The following test first creates a file. It verifies the block information from a datanode. * Then, it updates the block with new information and verifies again. */ @Test public void testBlockMetaDataInfo() throws Exception { MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); cluster.waitActive(); // create a file DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem(); String filestr = "/foo"; Path filepath = new Path(filestr); DFSTestUtil.createFile(dfs, filepath, 1024L, (short) 3, 0L); assertTrue(dfs.exists(filepath)); // get block info LocatedBlock locatedblock = getLastLocatedBlock(DFSClientAdapter.getDFSClient(dfs).getNamenode(), filestr); DatanodeInfo[] datanodeinfo = locatedblock.getLocations(); assertTrue(datanodeinfo.length > 0); // connect to a data node DataNode datanode = cluster.getDataNode(datanodeinfo[0].getIpcPort()); InterDatanodeProtocol idp = DataNodeTestUtils.createInterDatanodeProtocolProxy(datanode, datanodeinfo[0], conf); // stop block scanner, so we could compare lastScanTime DataNodeTestUtils.shutdownBlockScanner(datanode); // verify BlockMetaDataInfo ExtendedBlock b = locatedblock.getBlock(); InterDatanodeProtocol.LOG.info("b=" + b + ", " + b.getClass()); checkMetaInfo(b, datanode); long recoveryId = b.getGenerationStamp() + 1; idp.initReplicaRecovery(new RecoveringBlock(b, locatedblock.getLocations(), recoveryId)); // verify updateBlock ExtendedBlock newblock = new ExtendedBlock( b.getBlockPoolId(), b.getBlockId(), b.getNumBytes() / 2, b.getGenerationStamp() + 1); idp.updateReplicaUnderRecovery(b, recoveryId, newblock.getNumBytes()); checkMetaInfo(newblock, datanode); // Verify correct null response trying to init recovery for a missing block ExtendedBlock badBlock = new ExtendedBlock("fake-pool", b.getBlockId(), 0, 0); assertNull( idp.initReplicaRecovery( new RecoveringBlock(badBlock, locatedblock.getLocations(), recoveryId))); } finally { if (cluster != null) { cluster.shutdown(); } } }
private int getTrueReplication(MiniDFSCluster cluster, ExtendedBlock block) throws IOException { int count = 0; for (DataNode dn : cluster.getDataNodes()) { if (DataNodeTestUtils.getFSDataset(dn) .getStoredBlock(block.getBlockPoolId(), block.getBlockId()) != null) { count++; } } return count; }
/** Convert an ExtendedBlock to a Json map. */ private static Map<String, Object> toJsonMap(final ExtendedBlock extendedblock) { if (extendedblock == null) { return null; } final Map<String, Object> m = new TreeMap<String, Object>(); m.put("blockPoolId", extendedblock.getBlockPoolId()); m.put("blockId", extendedblock.getBlockId()); m.put("numBytes", extendedblock.getNumBytes()); m.put("generationStamp", extendedblock.getGenerationStamp()); return m; }
/** * Corrupt a block on a data node. Replace the block file content with content of 1, 2, * ...BLOCK_SIZE. * * @param block the ExtendedBlock to be corrupted * @param dn the data node where the block needs to be corrupted * @throws FileNotFoundException * @throws IOException */ private static void corruptBlock(final ExtendedBlock block, final DataNode dn) throws FileNotFoundException, IOException { final File f = DataNodeTestUtils.getBlockFile(dn, block.getBlockPoolId(), block.getLocalBlock()); final RandomAccessFile raFile = new RandomAccessFile(f, "rw"); final byte[] bytes = new byte[(int) BLOCK_SIZE]; for (int i = 0; i < BLOCK_SIZE; i++) { bytes[i] = (byte) (i); } raFile.write(bytes); raFile.close(); }
/** * try to access a block on a data node. If fails - throws exception * * @param datanode * @param lblock * @throws IOException */ private void accessBlock(DatanodeInfo datanode, LocatedBlock lblock) throws IOException { InetSocketAddress targetAddr = null; ExtendedBlock block = lblock.getBlock(); targetAddr = NetUtils.createSocketAddr(datanode.getXferAddr()); BlockReader blockReader = new BlockReaderFactory(new DfsClientConf(conf)) .setInetSocketAddress(targetAddr) .setBlock(block) .setFileName( BlockReaderFactory.getFileName(targetAddr, "test-blockpoolid", block.getBlockId())) .setBlockToken(lblock.getBlockToken()) .setStartOffset(0) .setLength(-1) .setVerifyChecksum(true) .setClientName("TestDataNodeVolumeFailure") .setDatanodeInfo(datanode) .setCachingStrategy(CachingStrategy.newDefaultStrategy()) .setClientCacheContext(ClientContext.getFromConf(conf)) .setConfiguration(conf) .setTracer(FsTracer.get(conf)) .setRemotePeerFactory( new RemotePeerFactory() { @Override public Peer newConnectedPeer( InetSocketAddress addr, Token<BlockTokenIdentifier> blockToken, DatanodeID datanodeId) throws IOException { Peer peer = null; Socket sock = NetUtils.getDefaultSocketFactory(conf).createSocket(); try { sock.connect(addr, HdfsConstants.READ_TIMEOUT); sock.setSoTimeout(HdfsConstants.READ_TIMEOUT); peer = DFSUtilClient.peerFromSocket(sock); } finally { if (peer == null) { IOUtils.closeSocket(sock); } } return peer; } }) .build(); blockReader.close(); }
/** * Setup a {@link MiniDFSCluster}. Create a block with both {@link State#NORMAL} and {@link * State#READ_ONLY_SHARED} replicas. */ @Before public void setup() throws IOException, InterruptedException { conf = new HdfsConfiguration(); SimulatedFSDataset.setFactory(conf); Configuration[] overlays = new Configuration[NUM_DATANODES]; for (int i = 0; i < overlays.length; i++) { overlays[i] = new Configuration(); if (i == RO_NODE_INDEX) { overlays[i].setEnum( SimulatedFSDataset.CONFIG_PROPERTY_STATE, i == RO_NODE_INDEX ? READ_ONLY_SHARED : NORMAL); } } cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(NUM_DATANODES) .dataNodeConfOverlays(overlays) .build(); fs = cluster.getFileSystem(); blockManager = cluster.getNameNode().getNamesystem().getBlockManager(); datanodeManager = blockManager.getDatanodeManager(); client = new DFSClient( new InetSocketAddress("localhost", cluster.getNameNodePort()), cluster.getConfiguration(0)); for (int i = 0; i < NUM_DATANODES; i++) { DataNode dataNode = cluster.getDataNodes().get(i); validateStorageState( BlockManagerTestUtil.getStorageReportsForDatanode( datanodeManager.getDatanode(dataNode.getDatanodeId())), i == RO_NODE_INDEX ? READ_ONLY_SHARED : NORMAL); } // Create a 1 block file DFSTestUtil.createFile(fs, PATH, BLOCK_SIZE, BLOCK_SIZE, BLOCK_SIZE, (short) 1, seed); LocatedBlock locatedBlock = getLocatedBlock(); extendedBlock = locatedBlock.getBlock(); block = extendedBlock.getLocalBlock(); assertThat(locatedBlock.getLocations().length, is(1)); normalDataNode = locatedBlock.getLocations()[0]; readOnlyDataNode = datanodeManager.getDatanode(cluster.getDataNodes().get(RO_NODE_INDEX).getDatanodeId()); assertThat(normalDataNode, is(not(readOnlyDataNode))); validateNumberReplicas(1); // Inject the block into the datanode with READ_ONLY_SHARED storage cluster.injectBlocks(0, RO_NODE_INDEX, Collections.singleton(block)); // There should now be 2 *locations* for the block // Must wait until the NameNode has processed the block report for the injected blocks waitForLocations(2); }
/** * go to each block on the 2nd DataNode until it fails... * * @param path * @param size * @throws IOException */ private void triggerFailure(String path, long size) throws IOException { NamenodeProtocols nn = cluster.getNameNodeRpc(); List<LocatedBlock> locatedBlocks = nn.getBlockLocations(path, 0, size).getLocatedBlocks(); for (LocatedBlock lb : locatedBlocks) { DatanodeInfo dinfo = lb.getLocations()[1]; ExtendedBlock b = lb.getBlock(); try { accessBlock(dinfo, lb); } catch (IOException e) { System.out.println( "Failure triggered, on block: " + b.getBlockId() + "; corresponding volume should be removed by now"); break; } } }
/** Create a file with one block and corrupt some/all of the block replicas. */ private void createAFileWithCorruptedBlockReplicas( Path filePath, short repl, int corruptBlockCount) throws IOException, AccessControlException, FileNotFoundException, UnresolvedLinkException, InterruptedException, TimeoutException { DFSTestUtil.createFile(dfs, filePath, BLOCK_SIZE, repl, 0); DFSTestUtil.waitReplication(dfs, filePath, repl); // Locate the file blocks by asking name node final LocatedBlocks locatedblocks = dfs.dfs.getNamenode().getBlockLocations(filePath.toString(), 0L, BLOCK_SIZE); Assert.assertEquals(repl, locatedblocks.get(0).getLocations().length); // The file only has one block LocatedBlock lblock = locatedblocks.get(0); DatanodeInfo[] datanodeinfos = lblock.getLocations(); ExtendedBlock block = lblock.getBlock(); // corrupt some /all of the block replicas for (int i = 0; i < corruptBlockCount; i++) { DatanodeInfo dninfo = datanodeinfos[i]; final DataNode dn = cluster.getDataNode(dninfo.getIpcPort()); corruptBlock(block, dn); LOG.debug("Corrupted block " + block.getBlockName() + " on data node " + dninfo); } }
/** Get a BlockReader for the given block. */ public BlockReader getBlockReader(LocatedBlock testBlock, int offset, int lenToRead) throws IOException { InetSocketAddress targetAddr = null; Socket sock = null; ExtendedBlock block = testBlock.getBlock(); DatanodeInfo[] nodes = testBlock.getLocations(); targetAddr = NetUtils.createSocketAddr(nodes[0].getName()); sock = NetUtils.getDefaultSocketFactory(conf).createSocket(); sock.connect(targetAddr, HdfsServerConstants.READ_TIMEOUT); sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT); return BlockReaderFactory.newBlockReader( new DFSClient.Conf(conf), sock, targetAddr.toString() + ":" + block.getBlockId(), block, testBlock.getBlockToken(), offset, lenToRead, conf.getInt("io.file.buffer.size", 4096), true, ""); }
/* * Wait up to 20s for the given block to be replicated across * the requested number of racks, with the requested number of * replicas, and the requested number of replicas still needed. */ public static void waitForReplication( MiniDFSCluster cluster, ExtendedBlock b, int racks, int replicas, int neededReplicas) throws IOException, TimeoutException, InterruptedException { int curRacks = 0; int curReplicas = 0; int curNeededReplicas = 0; int count = 0; final int ATTEMPTS = 20; do { Thread.sleep(1000); int[] r = BlockManagerTestUtil.getReplicaInfo(cluster.getNamesystem(), b.getLocalBlock()); curRacks = r[0]; curReplicas = r[1]; curNeededReplicas = r[2]; count++; } while ((curRacks != racks || curReplicas != replicas || curNeededReplicas != neededReplicas) && count < ATTEMPTS); if (count == ATTEMPTS) { throw new TimeoutException( "Timed out waiting for replication." + " Needed replicas = " + neededReplicas + " Cur needed replicas = " + curNeededReplicas + " Replicas = " + replicas + " Cur replicas = " + curReplicas + " Racks = " + racks + " Cur racks = " + curRacks); } }
/* * XXX (ab) Bulk of this method is copied verbatim from {@link DFSClient}, which is * bad. Both places should be refactored to provide a method to copy blocks * around. */ private void copyBlock(DFSClient dfs, LocatedBlock lblock, OutputStream fos) throws Exception { int failures = 0; InetSocketAddress targetAddr = null; TreeSet<DatanodeInfo> deadNodes = new TreeSet<DatanodeInfo>(); BlockReader blockReader = null; ExtendedBlock block = lblock.getBlock(); while (blockReader == null) { DatanodeInfo chosenNode; try { chosenNode = bestNode(dfs, lblock.getLocations(), deadNodes); targetAddr = NetUtils.createSocketAddr(chosenNode.getXferAddr()); } catch (IOException ie) { if (failures >= DFSConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_DEFAULT) { throw new IOException("Could not obtain block " + lblock, ie); } LOG.info("Could not obtain block from any node: " + ie); try { Thread.sleep(10000); } catch (InterruptedException iex) { } deadNodes.clear(); failures++; continue; } try { String file = BlockReaderFactory.getFileName(targetAddr, block.getBlockPoolId(), block.getBlockId()); blockReader = new BlockReaderFactory(dfs.getConf()) .setFileName(file) .setBlock(block) .setBlockToken(lblock.getBlockToken()) .setStartOffset(0) .setLength(-1) .setVerifyChecksum(true) .setClientName("fsck") .setDatanodeInfo(chosenNode) .setInetSocketAddress(targetAddr) .setCachingStrategy(CachingStrategy.newDropBehind()) .setClientCacheContext(dfs.getClientContext()) .setConfiguration(namenode.conf) .setRemotePeerFactory( new RemotePeerFactory() { @Override public Peer newConnectedPeer(InetSocketAddress addr) throws IOException { Peer peer = null; Socket s = NetUtils.getDefaultSocketFactory(conf).createSocket(); try { s.connect(addr, HdfsServerConstants.READ_TIMEOUT); s.setSoTimeout(HdfsServerConstants.READ_TIMEOUT); peer = TcpPeerServer.peerFromSocketAndKey( s, namenode.getRpcServer().getDataEncryptionKey()); } finally { if (peer == null) { IOUtils.closeQuietly(s); } } return peer; } }) .build(); } catch (IOException ex) { // Put chosen node into dead list, continue LOG.info("Failed to connect to " + targetAddr + ":" + ex); deadNodes.add(chosenNode); } } byte[] buf = new byte[1024]; int cnt = 0; boolean success = true; long bytesRead = 0; try { while ((cnt = blockReader.read(buf, 0, buf.length)) > 0) { fos.write(buf, 0, cnt); bytesRead += cnt; } if (bytesRead != block.getNumBytes()) { throw new IOException( "Recorded block size is " + block.getNumBytes() + ", but datanode returned " + bytesRead + " bytes"); } } catch (Exception e) { LOG.error("Error reading block", e); success = false; } finally { blockReader.close(); } if (!success) { throw new Exception("Could not copy block data for " + lblock.getBlock()); } }
/** Test for {@link FsDatasetImpl#updateReplicaUnderRecovery(ExtendedBlock, long, long)} */ @Test public void testUpdateReplicaUnderRecovery() throws IOException { MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); cluster.waitActive(); String bpid = cluster.getNamesystem().getBlockPoolId(); // create a file DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem(); String filestr = "/foo"; Path filepath = new Path(filestr); DFSTestUtil.createFile(dfs, filepath, 1024L, (short) 3, 0L); // get block info final LocatedBlock locatedblock = getLastLocatedBlock(DFSClientAdapter.getDFSClient(dfs).getNamenode(), filestr); final DatanodeInfo[] datanodeinfo = locatedblock.getLocations(); Assert.assertTrue(datanodeinfo.length > 0); // get DataNode and FSDataset objects final DataNode datanode = cluster.getDataNode(datanodeinfo[0].getIpcPort()); Assert.assertTrue(datanode != null); // initReplicaRecovery final ExtendedBlock b = locatedblock.getBlock(); final long recoveryid = b.getGenerationStamp() + 1; final long newlength = b.getNumBytes() - 1; final FsDatasetSpi<?> fsdataset = DataNodeTestUtils.getFSDataset(datanode); final ReplicaRecoveryInfo rri = fsdataset.initReplicaRecovery(new RecoveringBlock(b, null, recoveryid)); // check replica final ReplicaInfo replica = FsDatasetTestUtil.fetchReplicaInfo(fsdataset, bpid, b.getBlockId()); Assert.assertEquals(ReplicaState.RUR, replica.getState()); // check meta data before update FsDatasetImpl.checkReplicaFiles(replica); // case "THIS IS NOT SUPPOSED TO HAPPEN" // with (block length) != (stored replica's on disk length). { // create a block with same id and gs but different length. final ExtendedBlock tmp = new ExtendedBlock( b.getBlockPoolId(), rri.getBlockId(), rri.getNumBytes() - 1, rri.getGenerationStamp()); try { // update should fail fsdataset.updateReplicaUnderRecovery(tmp, recoveryid, newlength); Assert.fail(); } catch (IOException ioe) { System.out.println("GOOD: getting " + ioe); } } // update final String storageID = fsdataset.updateReplicaUnderRecovery( new ExtendedBlock(b.getBlockPoolId(), rri), recoveryid, newlength); assertTrue(storageID != null); } finally { if (cluster != null) cluster.shutdown(); } }
public static void checkMetaInfo(ExtendedBlock b, DataNode dn) throws IOException { Block metainfo = DataNodeTestUtils.getFSDataset(dn).getStoredBlock(b.getBlockPoolId(), b.getBlockId()); Assert.assertEquals(b.getBlockId(), metainfo.getBlockId()); Assert.assertEquals(b.getNumBytes(), metainfo.getNumBytes()); }
// try reading a block using a BlockReader directly protected void tryRead(final Configuration conf, LocatedBlock lblock, boolean shouldSucceed) { InetSocketAddress targetAddr = null; IOException ioe = null; BlockReader blockReader = null; ExtendedBlock block = lblock.getBlock(); try { DatanodeInfo[] nodes = lblock.getLocations(); targetAddr = NetUtils.createSocketAddr(nodes[0].getXferAddr()); blockReader = new BlockReaderFactory(new DfsClientConf(conf)) .setFileName( BlockReaderFactory.getFileName( targetAddr, "test-blockpoolid", block.getBlockId())) .setBlock(block) .setBlockToken(lblock.getBlockToken()) .setInetSocketAddress(targetAddr) .setStartOffset(0) .setLength(-1) .setVerifyChecksum(true) .setClientName("TestBlockTokenWithDFS") .setDatanodeInfo(nodes[0]) .setCachingStrategy(CachingStrategy.newDefaultStrategy()) .setClientCacheContext(ClientContext.getFromConf(conf)) .setConfiguration(conf) .setTracer(FsTracer.get(conf)) .setRemotePeerFactory( new RemotePeerFactory() { @Override public Peer newConnectedPeer( InetSocketAddress addr, Token<BlockTokenIdentifier> blockToken, DatanodeID datanodeId) throws IOException { Peer peer = null; Socket sock = NetUtils.getDefaultSocketFactory(conf).createSocket(); try { sock.connect(addr, HdfsConstants.READ_TIMEOUT); sock.setSoTimeout(HdfsConstants.READ_TIMEOUT); peer = DFSUtilClient.peerFromSocket(sock); } finally { if (peer == null) { IOUtils.closeSocket(sock); } } return peer; } }) .build(); } catch (IOException ex) { ioe = ex; } finally { if (blockReader != null) { try { blockReader.close(); } catch (IOException e) { throw new RuntimeException(e); } } } if (shouldSucceed) { Assert.assertNotNull( "OP_READ_BLOCK: access token is invalid, " + "when it is expected to be valid", blockReader); } else { Assert.assertNotNull( "OP_READ_BLOCK: access token is valid, " + "when it is expected to be invalid", ioe); Assert.assertTrue( "OP_READ_BLOCK failed due to reasons other than access token: ", ioe instanceof InvalidBlockTokenException); } }
/** * Open a DataInputStream to a DataNode so that it can be read from. We get block ID and the IDs * of the destinations at startup, from the namenode. */ private synchronized DatanodeInfo blockSeekTo(long target) throws IOException { if (target >= getFileLength()) { throw new IOException("Attempted to read past end of file"); } // Will be getting a new BlockReader. if (blockReader != null) { closeBlockReader(blockReader); blockReader = null; } // // Connect to best DataNode for desired Block, with potential offset // DatanodeInfo chosenNode = null; int refetchToken = 1; // only need to get a new access token once boolean connectFailedOnce = false; while (true) { // // Compute desired block // LocatedBlock targetBlock = getBlockAt(target, true); assert (target == pos) : "Wrong postion " + pos + " expect " + target; long offsetIntoBlock = target - targetBlock.getStartOffset(); DNAddrPair retval = chooseDataNode(targetBlock); chosenNode = retval.info; InetSocketAddress targetAddr = retval.addr; try { ExtendedBlock blk = targetBlock.getBlock(); Token<BlockTokenIdentifier> accessToken = targetBlock.getBlockToken(); blockReader = getBlockReader( targetAddr, chosenNode, src, blk, accessToken, offsetIntoBlock, blk.getNumBytes() - offsetIntoBlock, buffersize, verifyChecksum, dfsClient.clientName); if (connectFailedOnce) { DFSClient.LOG.info( "Successfully connected to " + targetAddr + " for block " + blk.getBlockId()); } return chosenNode; } catch (IOException ex) { if (ex instanceof InvalidBlockTokenException && refetchToken > 0) { DFSClient.LOG.info( "Will fetch a new access token and retry, " + "access token was invalid when connecting to " + targetAddr + " : " + ex); /* * Get a new access token and retry. Retry is needed in 2 cases. 1) * When both NN and DN re-started while DFSClient holding a cached * access token. 2) In the case that NN fails to update its * access key at pre-set interval (by a wide margin) and * subsequently restarts. In this case, DN re-registers itself with * NN and receives a new access key, but DN will delete the old * access key from its memory since it's considered expired based on * the estimated expiration date. */ refetchToken--; fetchBlockAt(target); } else { connectFailedOnce = true; DFSClient.LOG.warn( "Failed to connect to " + targetAddr + " for block" + ", add to deadNodes and continue. " + ex, ex); // Put chosen node into dead list, continue addToDeadNodes(chosenNode); } } } }
/** * Test that copy on write for blocks works correctly * * @throws IOException an exception might be thrown */ @Test public void testCopyOnWrite() throws IOException { Configuration conf = new HdfsConfiguration(); if (simulatedStorage) { conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true); } MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); FileSystem fs = cluster.getFileSystem(); InetSocketAddress addr = new InetSocketAddress("localhost", cluster.getNameNodePort()); DFSClient client = new DFSClient(addr, conf); try { // create a new file, write to it and close it. // Path file1 = new Path("/filestatus.dat"); FSDataOutputStream stm = AppendTestUtil.createFile(fs, file1, 1); writeFile(stm); stm.close(); // Get a handle to the datanode DataNode[] dn = cluster.listDataNodes(); assertTrue("There should be only one datanode but found " + dn.length, dn.length == 1); LocatedBlocks locations = client.getNamenode().getBlockLocations(file1.toString(), 0, Long.MAX_VALUE); List<LocatedBlock> blocks = locations.getLocatedBlocks(); FSDataset dataset = (FSDataset) dn[0].data; // // Create hard links for a few of the blocks // for (int i = 0; i < blocks.size(); i = i + 2) { ExtendedBlock b = blocks.get(i).getBlock(); File f = dataset.getFile(b.getBlockPoolId(), b.getLocalBlock()); File link = new File(f.toString() + ".link"); System.out.println("Creating hardlink for File " + f + " to " + link); HardLink.createHardLink(f, link); } // // Detach all blocks. This should remove hardlinks (if any) // for (int i = 0; i < blocks.size(); i++) { ExtendedBlock b = blocks.get(i).getBlock(); System.out.println("testCopyOnWrite detaching block " + b); assertTrue( "Detaching block " + b + " should have returned true", dataset.unlinkBlock(b, 1)); } // Since the blocks were already detached earlier, these calls should // return false // for (int i = 0; i < blocks.size(); i++) { ExtendedBlock b = blocks.get(i).getBlock(); System.out.println("testCopyOnWrite detaching block " + b); assertTrue( "Detaching block " + b + " should have returned false", !dataset.unlinkBlock(b, 1)); } } finally { fs.close(); cluster.shutdown(); } }
@VisibleForTesting void check(String parent, HdfsFileStatus file, Result res) throws IOException { String path = file.getFullName(parent); boolean isOpen = false; if (file.isDir()) { if (snapshottableDirs != null && snapshottableDirs.contains(path)) { String snapshotPath = (path.endsWith(Path.SEPARATOR) ? path : path + Path.SEPARATOR) + HdfsConstants.DOT_SNAPSHOT_DIR; HdfsFileStatus snapshotFileInfo = namenode.getRpcServer().getFileInfo(snapshotPath); check(snapshotPath, snapshotFileInfo, res); } byte[] lastReturnedName = HdfsFileStatus.EMPTY_NAME; DirectoryListing thisListing; if (showFiles) { out.println(path + " <dir>"); } res.totalDirs++; do { assert lastReturnedName != null; thisListing = namenode.getRpcServer().getListing(path, lastReturnedName, false); if (thisListing == null) { return; } HdfsFileStatus[] files = thisListing.getPartialListing(); for (int i = 0; i < files.length; i++) { check(path, files[i], res); } lastReturnedName = thisListing.getLastName(); } while (thisListing.hasMore()); return; } if (file.isSymlink()) { if (showFiles) { out.println(path + " <symlink>"); } res.totalSymlinks++; return; } long fileLen = file.getLen(); // Get block locations without updating the file access time // and without block access tokens LocatedBlocks blocks; try { blocks = namenode.getNamesystem().getBlockLocations(path, 0, fileLen, false, false, false); } catch (FileNotFoundException fnfe) { blocks = null; } if (blocks == null) { // the file is deleted return; } isOpen = blocks.isUnderConstruction(); if (isOpen && !showOpenFiles) { // We collect these stats about open files to report with default options res.totalOpenFilesSize += fileLen; res.totalOpenFilesBlocks += blocks.locatedBlockCount(); res.totalOpenFiles++; return; } res.totalFiles++; res.totalSize += fileLen; res.totalBlocks += blocks.locatedBlockCount(); if (showOpenFiles && isOpen) { out.print( path + " " + fileLen + " bytes, " + blocks.locatedBlockCount() + " block(s), OPENFORWRITE: "); } else if (showFiles) { out.print(path + " " + fileLen + " bytes, " + blocks.locatedBlockCount() + " block(s): "); } else { out.print('.'); } if (res.totalFiles % 100 == 0) { out.println(); out.flush(); } int missing = 0; int corrupt = 0; long missize = 0; int underReplicatedPerFile = 0; int misReplicatedPerFile = 0; StringBuilder report = new StringBuilder(); int i = 0; for (LocatedBlock lBlk : blocks.getLocatedBlocks()) { ExtendedBlock block = lBlk.getBlock(); boolean isCorrupt = lBlk.isCorrupt(); String blkName = block.toString(); DatanodeInfo[] locs = lBlk.getLocations(); NumberReplicas numberReplicas = namenode.getNamesystem().getBlockManager().countNodes(block.getLocalBlock()); int liveReplicas = numberReplicas.liveReplicas(); res.totalReplicas += liveReplicas; short targetFileReplication = file.getReplication(); res.numExpectedReplicas += targetFileReplication; if (liveReplicas > targetFileReplication) { res.excessiveReplicas += (liveReplicas - targetFileReplication); res.numOverReplicatedBlocks += 1; } // Check if block is Corrupt if (isCorrupt) { corrupt++; res.corruptBlocks++; out.print( "\n" + path + ": CORRUPT blockpool " + block.getBlockPoolId() + " block " + block.getBlockName() + "\n"); } if (liveReplicas >= minReplication) res.numMinReplicatedBlocks++; if (liveReplicas < targetFileReplication && liveReplicas > 0) { res.missingReplicas += (targetFileReplication - liveReplicas); res.numUnderReplicatedBlocks += 1; underReplicatedPerFile++; if (!showFiles) { out.print("\n" + path + ": "); } out.println( " Under replicated " + block + ". Target Replicas is " + targetFileReplication + " but found " + liveReplicas + " replica(s)."); } // verify block placement policy BlockPlacementStatus blockPlacementStatus = bpPolicy.verifyBlockPlacement(path, lBlk, targetFileReplication); if (!blockPlacementStatus.isPlacementPolicySatisfied()) { res.numMisReplicatedBlocks++; misReplicatedPerFile++; if (!showFiles) { if (underReplicatedPerFile == 0) out.println(); out.print(path + ": "); } out.println( " Replica placement policy is violated for " + block + ". " + blockPlacementStatus.getErrorDescription()); } report.append(i + ". " + blkName + " len=" + block.getNumBytes()); if (liveReplicas == 0) { report.append(" MISSING!"); res.addMissing(block.toString(), block.getNumBytes()); missing++; missize += block.getNumBytes(); } else { report.append(" repl=" + liveReplicas); if (showLocations || showRacks) { StringBuilder sb = new StringBuilder("["); for (int j = 0; j < locs.length; j++) { if (j > 0) { sb.append(", "); } if (showRacks) sb.append(NodeBase.getPath(locs[j])); else sb.append(locs[j]); } sb.append(']'); report.append(" " + sb.toString()); } } report.append('\n'); i++; } if ((missing > 0) || (corrupt > 0)) { if (!showFiles && (missing > 0)) { out.print( "\n" + path + ": MISSING " + missing + " blocks of total size " + missize + " B."); } res.corruptFiles++; if (isOpen) { LOG.info("Fsck: ignoring open file " + path); } else { if (doMove) copyBlocksToLostFound(parent, file, blocks); if (doDelete) deleteCorruptedFile(path); } } if (showFiles) { if (missing > 0) { out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n"); } else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) { out.print(" OK\n"); } if (showBlocks) { out.print(report.toString() + "\n"); } } }
/** * The following test first creates a file with a few blocks. It randomly truncates the replica of * the last block stored in each datanode. Finally, it triggers block synchronization to * synchronize all stored block. */ public void testBlockSynchronization() throws Exception { final int ORG_FILE_SIZE = 3000; Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(5).build(); cluster.waitActive(); // create a file DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem(); String filestr = "/foo"; Path filepath = new Path(filestr); DFSTestUtil.createFile(dfs, filepath, ORG_FILE_SIZE, REPLICATION_NUM, 0L); assertTrue(dfs.exists(filepath)); DFSTestUtil.waitReplication(dfs, filepath, REPLICATION_NUM); // get block info for the last block LocatedBlock locatedblock = TestInterDatanodeProtocol.getLastLocatedBlock(dfs.dfs.getNamenode(), filestr); DatanodeInfo[] datanodeinfos = locatedblock.getLocations(); assertEquals(REPLICATION_NUM, datanodeinfos.length); // connect to data nodes DataNode[] datanodes = new DataNode[REPLICATION_NUM]; for (int i = 0; i < REPLICATION_NUM; i++) { datanodes[i] = cluster.getDataNode(datanodeinfos[i].getIpcPort()); assertTrue(datanodes[i] != null); } // verify Block Info ExtendedBlock lastblock = locatedblock.getBlock(); DataNode.LOG.info("newblocks=" + lastblock); for (int i = 0; i < REPLICATION_NUM; i++) { checkMetaInfo(lastblock, datanodes[i]); } DataNode.LOG.info("dfs.dfs.clientName=" + dfs.dfs.clientName); cluster.getNameNodeRpc().append(filestr, dfs.dfs.clientName); // expire lease to trigger block recovery. waitLeaseRecovery(cluster); Block[] updatedmetainfo = new Block[REPLICATION_NUM]; long oldSize = lastblock.getNumBytes(); lastblock = TestInterDatanodeProtocol.getLastLocatedBlock(dfs.dfs.getNamenode(), filestr).getBlock(); long currentGS = lastblock.getGenerationStamp(); for (int i = 0; i < REPLICATION_NUM; i++) { updatedmetainfo[i] = DataNodeTestUtils.getFSDataset(datanodes[i]) .getStoredBlock(lastblock.getBlockPoolId(), lastblock.getBlockId()); assertEquals(lastblock.getBlockId(), updatedmetainfo[i].getBlockId()); assertEquals(oldSize, updatedmetainfo[i].getNumBytes()); assertEquals(currentGS, updatedmetainfo[i].getGenerationStamp()); } // verify that lease recovery does not occur when namenode is in safemode System.out.println("Testing that lease recovery cannot happen during safemode."); filestr = "/foo.safemode"; filepath = new Path(filestr); dfs.create(filepath, (short) 1); cluster.getNameNodeRpc().setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_ENTER); assertTrue(dfs.dfs.exists(filestr)); DFSTestUtil.waitReplication(dfs, filepath, (short) 1); waitLeaseRecovery(cluster); // verify that we still cannot recover the lease LeaseManager lm = NameNodeAdapter.getLeaseManager(cluster.getNamesystem()); assertTrue("Found " + lm.countLease() + " lease, expected 1", lm.countLease() == 1); cluster.getNameNodeRpc().setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE); } finally { if (cluster != null) { cluster.shutdown(); } } }
/** * Test the case that a replica is reported corrupt while it is not in blocksMap. Make sure that * ArrayIndexOutOfBounds does not thrown. See Hadoop-4351. * * <p>TODO HOPS This test fails as it tries to remove a non-existing replica. Calling * findAndMarkBlockAsCorrupt from a DataNode that does not store any replica for this specific * block will lead to a tuple did not exist exception. The reason for this is that * BlockManager.removeStoredBlock is called with a node that does not store a replica and hence * the delete will not be able to succeed during commit. */ @Test public void testArrayOutOfBoundsException() throws Exception { MiniDFSCluster cluster = null; try { Configuration conf = new HdfsConfiguration(); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); cluster.waitActive(); FileSystem fs = cluster.getFileSystem(); final Path FILE_PATH = new Path("/tmp.txt"); final long FILE_LEN = 1L; DFSTestUtil.createFile(fs, FILE_PATH, FILE_LEN, (short) 2, 1L); // get the block final String bpid = cluster.getNamesystem().getBlockPoolId(); File storageDir = cluster.getInstanceStorageDir(0, 0); File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); assertTrue("Data directory does not exist", dataDir.exists()); ExtendedBlock blk = getBlock(bpid, dataDir); if (blk == null) { storageDir = cluster.getInstanceStorageDir(0, 1); dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); blk = getBlock(bpid, dataDir); } assertFalse( "Data directory does not contain any blocks or there was an " + "IO error", blk == null); // start a third datanode cluster.startDataNodes(conf, 1, true, null, null); ArrayList<DataNode> datanodes = cluster.getDataNodes(); assertEquals(datanodes.size(), 3); DataNode dataNode = datanodes.get(2); // report corrupted block by the third datanode DatanodeRegistration dnR = DataNodeTestUtils.getDNRegistrationForBP(dataNode, blk.getBlockPoolId()); // Get the storage id of one of the storages on the datanode String storageId = cluster .getNamesystem() .getBlockManager() .getDatanodeManager() .getDatanode(dataNode.getDatanodeId()) .getStorageInfos()[0] .getStorageID(); cluster .getNamesystem() .getBlockManager() .findAndMarkBlockAsCorrupt(blk, new DatanodeInfo(dnR), storageId, "some test reason"); // open the file fs.open(FILE_PATH); // clean up fs.delete(FILE_PATH, false); } finally { if (cluster != null) { cluster.shutdown(); } } }