/* * Since NameNode will not persist any locations of the block, addBlock() * retry call after restart NN should re-select the locations and return to * client. refer HDFS-5257 */ @Test public void testAddBlockRetryShouldReturnBlockWithLocations() throws Exception { final String src = "/testAddBlockRetryShouldReturnBlockWithLocations"; NamenodeProtocols nameNodeRpc = cluster.getNameNodeRpc(); // create file nameNodeRpc.create( src, FsPermission.getFileDefault(), "clientName", new EnumSetWritable<CreateFlag>(EnumSet.of(CreateFlag.CREATE)), true, (short) 3, 1024, null); // start first addBlock() LOG.info("Starting first addBlock for " + src); LocatedBlock lb1 = nameNodeRpc.addBlock(src, "clientName", null, null, INodeId.GRANDFATHER_INODE_ID, null); assertTrue("Block locations should be present", lb1.getLocations().length > 0); cluster.restartNameNode(); nameNodeRpc = cluster.getNameNodeRpc(); LocatedBlock lb2 = nameNodeRpc.addBlock(src, "clientName", null, null, INodeId.GRANDFATHER_INODE_ID, null); assertEquals("Blocks are not equal", lb1.getBlock(), lb2.getBlock()); assertTrue("Wrong locations with retry", lb2.getLocations().length > 0); }
/** * Setup a {@link MiniDFSCluster}. Create a block with both {@link State#NORMAL} and {@link * State#READ_ONLY_SHARED} replicas. */ @Before public void setup() throws IOException, InterruptedException { conf = new HdfsConfiguration(); SimulatedFSDataset.setFactory(conf); Configuration[] overlays = new Configuration[NUM_DATANODES]; for (int i = 0; i < overlays.length; i++) { overlays[i] = new Configuration(); if (i == RO_NODE_INDEX) { overlays[i].setEnum( SimulatedFSDataset.CONFIG_PROPERTY_STATE, i == RO_NODE_INDEX ? READ_ONLY_SHARED : NORMAL); } } cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(NUM_DATANODES) .dataNodeConfOverlays(overlays) .build(); fs = cluster.getFileSystem(); blockManager = cluster.getNameNode().getNamesystem().getBlockManager(); datanodeManager = blockManager.getDatanodeManager(); client = new DFSClient( new InetSocketAddress("localhost", cluster.getNameNodePort()), cluster.getConfiguration(0)); for (int i = 0; i < NUM_DATANODES; i++) { DataNode dataNode = cluster.getDataNodes().get(i); validateStorageState( BlockManagerTestUtil.getStorageReportsForDatanode( datanodeManager.getDatanode(dataNode.getDatanodeId())), i == RO_NODE_INDEX ? READ_ONLY_SHARED : NORMAL); } // Create a 1 block file DFSTestUtil.createFile(fs, PATH, BLOCK_SIZE, BLOCK_SIZE, BLOCK_SIZE, (short) 1, seed); LocatedBlock locatedBlock = getLocatedBlock(); extendedBlock = locatedBlock.getBlock(); block = extendedBlock.getLocalBlock(); assertThat(locatedBlock.getLocations().length, is(1)); normalDataNode = locatedBlock.getLocations()[0]; readOnlyDataNode = datanodeManager.getDatanode(cluster.getDataNodes().get(RO_NODE_INDEX).getDatanodeId()); assertThat(normalDataNode, is(not(readOnlyDataNode))); validateNumberReplicas(1); // Inject the block into the datanode with READ_ONLY_SHARED storage cluster.injectBlocks(0, RO_NODE_INDEX, Collections.singleton(block)); // There should now be 2 *locations* for the block // Must wait until the NameNode has processed the block report for the injected blocks waitForLocations(2); }
/** * The following test first creates a file. It verifies the block information from a datanode. * Then, it updates the block with new information and verifies again. */ @Test public void testBlockMetaDataInfo() throws Exception { MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); cluster.waitActive(); // create a file DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem(); String filestr = "/foo"; Path filepath = new Path(filestr); DFSTestUtil.createFile(dfs, filepath, 1024L, (short) 3, 0L); assertTrue(dfs.exists(filepath)); // get block info LocatedBlock locatedblock = getLastLocatedBlock(DFSClientAdapter.getDFSClient(dfs).getNamenode(), filestr); DatanodeInfo[] datanodeinfo = locatedblock.getLocations(); assertTrue(datanodeinfo.length > 0); // connect to a data node DataNode datanode = cluster.getDataNode(datanodeinfo[0].getIpcPort()); InterDatanodeProtocol idp = DataNodeTestUtils.createInterDatanodeProtocolProxy(datanode, datanodeinfo[0], conf); // stop block scanner, so we could compare lastScanTime DataNodeTestUtils.shutdownBlockScanner(datanode); // verify BlockMetaDataInfo ExtendedBlock b = locatedblock.getBlock(); InterDatanodeProtocol.LOG.info("b=" + b + ", " + b.getClass()); checkMetaInfo(b, datanode); long recoveryId = b.getGenerationStamp() + 1; idp.initReplicaRecovery(new RecoveringBlock(b, locatedblock.getLocations(), recoveryId)); // verify updateBlock ExtendedBlock newblock = new ExtendedBlock( b.getBlockPoolId(), b.getBlockId(), b.getNumBytes() / 2, b.getGenerationStamp() + 1); idp.updateReplicaUnderRecovery(b, recoveryId, newblock.getNumBytes()); checkMetaInfo(newblock, datanode); // Verify correct null response trying to init recovery for a missing block ExtendedBlock badBlock = new ExtendedBlock("fake-pool", b.getBlockId(), 0, 0); assertNull( idp.initReplicaRecovery( new RecoveringBlock(badBlock, locatedblock.getLocations(), recoveryId))); } finally { if (cluster != null) { cluster.shutdown(); } } }
/** Read the block length from one of the datanodes. */ private long readBlockLength(LocatedBlock locatedblock) throws IOException { assert locatedblock != null : "LocatedBlock cannot be null"; int replicaNotFoundCount = locatedblock.getLocations().length; for (DatanodeInfo datanode : locatedblock.getLocations()) { ClientDatanodeProtocol cdp = null; try { cdp = DFSUtil.createClientDatanodeProtocolProxy( datanode, dfsClient.conf, dfsClient.getConf().socketTimeout, locatedblock); final long n = cdp.getReplicaVisibleLength(locatedblock.getBlock()); if (n >= 0) { return n; } } catch (IOException ioe) { if (ioe instanceof RemoteException && (((RemoteException) ioe).unwrapRemoteException() instanceof ReplicaNotFoundException)) { // special case : replica might not be on the DN, treat as 0 length replicaNotFoundCount--; } if (DFSClient.LOG.isDebugEnabled()) { DFSClient.LOG.debug( "Failed to getReplicaVisibleLength from datanode " + datanode + " for block " + locatedblock.getBlock(), ioe); } } finally { if (cdp != null) { RPC.stopProxy(cdp); } } } // Namenode told us about these locations, but none know about the replica // means that we hit the race between pipeline creation start and end. // we require all 3 because some other exception could have happened // on a DN that has it. we want to report that error if (replicaNotFoundCount == 0) { return 0; } throw new IOException("Cannot obtain block length for " + locatedblock); }
private void checkForCorruptOpenFiles(FileStatus file, List<FileStatus> corruptFiles) throws IOException { String filePath = file.getPath().toUri().getPath(); if (file.isDir()) { for (FileStatus fileStatus : nn.namesystem.dir.getListing(filePath)) { checkForCorruptOpenFiles(fileStatus, corruptFiles); } } else { LeaseManager.Lease lease = nn.getNamesystem().leaseManager.getLeaseByPath(filePath); // Condition: // 1. lease has expired hard limit // 2. the file is open for write // 3. the last block has 0 locations if (lease != null && lease.expiredHardLimit()) { LocatedBlocks blocks = nn.getNamesystem().getBlockLocations(filePath, 0, file.getLen()); List<LocatedBlock> locatedBlockList = blocks.getLocatedBlocks(); LocatedBlock lastBlock = locatedBlockList.get(locatedBlockList.size() - 1); if (blocks.isUnderConstruction() && lastBlock.getLocations().length == 0) { corruptFiles.add(file); } } } }
public void reorderBlocks(Configuration conf, LocatedBlocks lbs, String src) throws IOException { ServerName sn = AbstractFSWALProvider.getServerNameFromWALDirectoryName(conf, src); if (sn == null) { // It's not an WAL return; } // Ok, so it's an WAL String hostName = sn.getHostname(); if (LOG.isTraceEnabled()) { LOG.trace(src + " is an WAL file, so reordering blocks, last hostname will be:" + hostName); } // Just check for all blocks for (LocatedBlock lb : lbs.getLocatedBlocks()) { DatanodeInfo[] dnis = lb.getLocations(); if (dnis != null && dnis.length > 1) { boolean found = false; for (int i = 0; i < dnis.length - 1 && !found; i++) { if (hostName.equals(dnis[i].getHostName())) { // advance the other locations by one and put this one at the last place. DatanodeInfo toLast = dnis[i]; System.arraycopy(dnis, i + 1, dnis, i, dnis.length - i - 1); dnis[dnis.length - 1] = toLast; found = true; } } } } }
/** * Test that {@link PlacementMonitor} moves block correctly * * @throws Exception */ @Test public void testMoveBlock() throws Exception { setupCluster(); try { Path path = new Path("/dir/file"); DFSTestUtil.createFile(fs, path, 1, (short) 1, 0L); DFSTestUtil.waitReplication(fs, path, (short) 1); FileStatus status = fs.getFileStatus(path); LocatedBlocks blocks = namenode.getBlockLocations(path.toString(), 0, status.getLen()); Assert.assertEquals(1, blocks.getLocatedBlocks().size()); LocatedBlock block = blocks.getLocatedBlocks().get(0); Assert.assertEquals(1, block.getLocations().length); DatanodeInfo source = block.getLocations()[0]; Set<DatanodeInfo> excluded = new HashSet<DatanodeInfo>(); for (DatanodeInfo d : datanodes) { excluded.add(d); } excluded.remove(source); DatanodeInfo target = excluded.iterator().next(); excluded.add(source); excluded.remove(target); BlockMover.BlockMoveAction action = blockMover.new BlockMoveAction(block, source, excluded, 1); LOG.info("Start moving block from " + source + " to " + target); action.run(); LOG.info("Done moving block"); boolean blockMoved = false; for (int i = 0; i < 100; ++i) { blocks = namenode.getBlockLocations(path.toString(), 0, status.getLen()); block = blocks.getLocatedBlocks().get(0); if (block.getLocations().length == 1 && block.getLocations()[0].equals((target))) { blockMoved = true; break; } Thread.sleep(100L); } Assert.assertTrue(blockMoved); } finally { if (cluster != null) { cluster.shutdown(); } if (placementMonitor != null) { placementMonitor.stop(); } } }
/** * Verify the number of corrupted block replicas by fetching the block location from name node. */ private void verifyCorruptedBlockCount(Path filePath, int expectedReplicas) throws AccessControlException, FileNotFoundException, UnresolvedLinkException, IOException { final LocatedBlocks lBlocks = dfs.dfs.getNamenode().getBlockLocations(filePath.toUri().getPath(), 0, Long.MAX_VALUE); // we expect only the first block of the file is used for this test LocatedBlock firstLocatedBlock = lBlocks.get(0); Assert.assertEquals(expectedReplicas, firstLocatedBlock.getLocations().length); }
private DNAddrPair chooseDataNode(LocatedBlock block) throws IOException { while (true) { DatanodeInfo[] nodes = block.getLocations(); try { DatanodeInfo chosenNode = bestNode(nodes, deadNodes); InetSocketAddress targetAddr = NetUtils.createSocketAddr(chosenNode.getXferAddr()); return new DNAddrPair(chosenNode, targetAddr); } catch (IOException ie) { String blockInfo = block.getBlock() + " file=" + src; if (failures >= dfsClient.getMaxBlockAcquireFailures()) { throw new BlockMissingException( src, "Could not obtain block: " + blockInfo, block.getStartOffset()); } if (nodes == null || nodes.length == 0) { DFSClient.LOG.info("No node available for block: " + blockInfo); } DFSClient.LOG.info( "Could not obtain block " + block.getBlock() + " from any node: " + ie + ". Will get new block locations from namenode and retry..."); try { // Introducing a random factor to the wait time before another retry. // The wait time is dependent on # of failures and a random factor. // At the first time of getting a BlockMissingException, the wait time // is a random number between 0..3000 ms. If the first retry // still fails, we will wait 3000 ms grace period before the 2nd retry. // Also at the second retry, the waiting window is expanded to 6000 ms // alleviating the request rate from the server. Similarly the 3rd retry // will wait 6000ms grace period before retry and the waiting window is // expanded to 9000ms. double waitTime = timeWindow * failures + // grace period for the last round of attempt timeWindow * (failures + 1) * DFSUtil.getRandom().nextDouble(); // expanding time window for each failure DFSClient.LOG.warn( "DFS chooseDataNode: got # " + (failures + 1) + " IOException, will wait for " + waitTime + " msec."); Thread.sleep((long) waitTime); } catch (InterruptedException iex) { } deadNodes.clear(); // 2nd option is to remove only nodes[blockId] openInfo(); block = getBlockAt(block.getStartOffset(), false); failures++; continue; } } }
private int printLocatedBlocks(Path filePath) throws Exception { LocatedBlocks lbs = dfs.getLocatedBlocks(filePath, 0L, Integer.MAX_VALUE); StringBuilder sb = new StringBuilder(); sb.append("Path " + filePath + ":"); int maxRepl = 0; for (LocatedBlock lb : lbs.getLocatedBlocks()) { sb.append(lb.getBlock()); sb.append(":"); for (DatanodeInfo loc : lb.getLocations()) { sb.append(loc.getHostName()); sb.append(" "); } if (lb.getLocations().length > maxRepl) { maxRepl = lb.getLocations().length; } } LOG.info(sb.toString()); return maxRepl; }
private void addBlocks(String fileName, String clientName) throws IOException { for (int jdx = 0; jdx < blocksPerFile; jdx++) { LocatedBlock loc = nameNode.addBlock(fileName, clientName); for (DatanodeInfo dnInfo : loc.getLocations()) { int dnIdx = Arrays.binarySearch(datanodes, dnInfo.getName()); datanodes[dnIdx].addBlock(loc.getBlock()); nameNode.blockReceived( datanodes[dnIdx].dnRegistration, new Block[] {loc.getBlock()}, new String[] {""}); } } }
/** * TC11: Racing rename * * @throws IOException an exception might be thrown */ public void testTC11() throws Exception { final Path p = new Path("/TC11/foo"); System.out.println("p=" + p); // a. Create file and write one block of data. Close file. final int len1 = (int) BLOCK_SIZE; { FSDataOutputStream out = fs.create(p, false, buffersize, REPLICATION, BLOCK_SIZE); AppendTestUtil.write(out, 0, len1); out.close(); } // b. Reopen file in "append" mode. Append half block of data. FSDataOutputStream out = fs.append(p); final int len2 = (int) BLOCK_SIZE / 2; AppendTestUtil.write(out, len1, len2); out.hflush(); // c. Rename file to file.new. final Path pnew = new Path(p + ".new"); assertTrue(fs.rename(p, pnew)); // d. Close file handle that was opened in (b). try { out.close(); fail("close() should throw an exception"); } catch (Exception e) { AppendTestUtil.LOG.info("GOOD!", e); } // wait for the lease recovery cluster.setLeasePeriod(1000, 1000); AppendTestUtil.sleep(5000); // check block sizes final long len = fs.getFileStatus(pnew).getLen(); final LocatedBlocks locatedblocks = fs.dfs.getNamenode().getBlockLocations(pnew.toString(), 0L, len); final int numblock = locatedblocks.locatedBlockCount(); for (int i = 0; i < numblock; i++) { final LocatedBlock lb = locatedblocks.get(i); final Block blk = lb.getBlock(); final long size = lb.getBlockSize(); if (i < numblock - 1) { assertEquals(BLOCK_SIZE, size); } for (DatanodeInfo datanodeinfo : lb.getLocations()) { final DataNode dn = cluster.getDataNode(datanodeinfo.getIpcPort()); final Block metainfo = dn.data.getStoredBlock(blk.getBlockId()); assertEquals(size, metainfo.getNumBytes()); } } }
private BlockInfo createBlockInfo(Path file, LocatedBlock b) { DatanodeInfo[] locations = b.getLocations(); String[] hosts = new String[locations.length]; String[] names = new String[locations.length]; for (int i = 0; i < locations.length; ++i) { DatanodeInfo d = locations[i]; hosts[i] = d.getHost(); names[i] = d.getName(); } BlockLocation loc = new BlockLocation(names, hosts, b.getStartOffset(), b.getBlockSize()); return new BlockInfo(loc, file); }
/** Read the entire buffer. */ @Override public synchronized int read(byte buf[], int off, int len) throws IOException { dfsClient.checkOpen(); if (closed) { throw new IOException("Stream closed"); } Map<ExtendedBlock, Set<DatanodeInfo>> corruptedBlockMap = new HashMap<ExtendedBlock, Set<DatanodeInfo>>(); failures = 0; if (pos < getFileLength()) { int retries = 2; while (retries > 0) { try { if (pos > blockEnd) { currentNode = blockSeekTo(pos); } int realLen = (int) Math.min(len, (blockEnd - pos + 1L)); int result = readBuffer(buf, off, realLen, corruptedBlockMap); if (result >= 0) { pos += result; } else { // got a EOS from reader though we expect more data on it. throw new IOException("Unexpected EOS from the reader"); } if (dfsClient.stats != null && result != -1) { dfsClient.stats.incrementBytesRead(result); } return result; } catch (ChecksumException ce) { throw ce; } catch (IOException e) { if (retries == 1) { DFSClient.LOG.warn("DFS Read", e); } blockEnd = -1; if (currentNode != null) { addToDeadNodes(currentNode); } if (--retries == 0) { throw e; } } finally { // Check if need to report block replicas corruption either read // was successful or ChecksumException occured. reportCheckSumFailure(corruptedBlockMap, currentLocatedBlock.getLocations().length); } } } return -1; }
/** Convert a LocatedBlock to a Json map. */ private static Map<String, Object> toJsonMap(final LocatedBlock locatedblock) throws IOException { if (locatedblock == null) { return null; } final Map<String, Object> m = new TreeMap<String, Object>(); m.put("blockToken", toJsonMap(locatedblock.getBlockToken())); m.put("isCorrupt", locatedblock.isCorrupt()); m.put("startOffset", locatedblock.getStartOffset()); m.put("block", toJsonMap(locatedblock.getBlock())); m.put("locations", toJsonArray(locatedblock.getLocations())); return m; }
private void waitForLocations(int locations) throws IOException, InterruptedException { for (int tries = 0; tries < RETRIES; ) try { LocatedBlock locatedBlock = getLocatedBlock(); assertThat(locatedBlock.getLocations().length, is(locations)); break; } catch (AssertionError e) { if (++tries < RETRIES) { Thread.sleep(1000); } else { throw e; } } }
/** * Read bytes starting from the specified position. * * @param position start read from this position * @param buffer read buffer * @param offset offset into buffer * @param length number of bytes to read * @return actual number of bytes read */ @Override public int read(long position, byte[] buffer, int offset, int length) throws IOException { // sanity checks dfsClient.checkOpen(); if (closed) { throw new IOException("Stream closed"); } failures = 0; long filelen = getFileLength(); if ((position < 0) || (position >= filelen)) { return -1; } int realLen = length; if ((position + length) > filelen) { realLen = (int) (filelen - position); } // determine the block and byte range within the block // corresponding to position and realLen List<LocatedBlock> blockRange = getBlockRange(position, realLen); int remaining = realLen; Map<ExtendedBlock, Set<DatanodeInfo>> corruptedBlockMap = new HashMap<ExtendedBlock, Set<DatanodeInfo>>(); for (LocatedBlock blk : blockRange) { long targetStart = position - blk.getStartOffset(); long bytesToRead = Math.min(remaining, blk.getBlockSize() - targetStart); try { fetchBlockByteRange( blk, targetStart, targetStart + bytesToRead - 1, buffer, offset, corruptedBlockMap); } finally { // Check and report if any block replicas are corrupted. // BlockMissingException may be caught if all block replicas are // corrupted. reportCheckSumFailure(corruptedBlockMap, blk.getLocations().length); } remaining -= bytesToRead; position += bytesToRead; offset += bytesToRead; } assert remaining == 0 : "Wrong number of bytes read."; if (dfsClient.stats != null) { dfsClient.stats.incrementBytesRead(realLen); } return realLen; }
/** * go to each block on the 2nd DataNode until it fails... * * @param path * @param size * @throws IOException */ private void triggerFailure(String path, long size) throws IOException { NamenodeProtocols nn = cluster.getNameNodeRpc(); List<LocatedBlock> locatedBlocks = nn.getBlockLocations(path, 0, size).getLocatedBlocks(); for (LocatedBlock lb : locatedBlocks) { DatanodeInfo dinfo = lb.getLocations()[1]; ExtendedBlock b = lb.getBlock(); try { accessBlock(dinfo, lb); } catch (IOException e) { System.out.println( "Failure triggered, on block: " + b.getBlockId() + "; corresponding volume should be removed by now"); break; } } }
/** * TC7: Corrupted replicas are present. * * @throws IOException an exception might be thrown */ public void testTC7() throws Exception { final short repl = 2; final Path p = new Path("/TC7/foo"); System.out.println("p=" + p); // a. Create file with replication factor of 2. Write half block of data. Close file. final int len1 = (int) (BLOCK_SIZE / 2); { FSDataOutputStream out = fs.create(p, false, buffersize, repl, BLOCK_SIZE); AppendTestUtil.write(out, 0, len1); out.close(); } DFSTestUtil.waitReplication(fs, p, repl); // b. Log into one datanode that has one replica of this block. // Find the block file on this datanode and truncate it to zero size. final LocatedBlocks locatedblocks = fs.dfs.getNamenode().getBlockLocations(p.toString(), 0L, len1); assertEquals(1, locatedblocks.locatedBlockCount()); final LocatedBlock lb = locatedblocks.get(0); final Block blk = lb.getBlock(); assertEquals(len1, lb.getBlockSize()); DatanodeInfo[] datanodeinfos = lb.getLocations(); assertEquals(repl, datanodeinfos.length); final DataNode dn = cluster.getDataNode(datanodeinfos[0].getIpcPort()); final FSDataset data = (FSDataset) dn.getFSDataset(); final RandomAccessFile raf = new RandomAccessFile(data.getBlockFile(blk), "rw"); AppendTestUtil.LOG.info("dn=" + dn + ", blk=" + blk + " (length=" + blk.getNumBytes() + ")"); assertEquals(len1, raf.length()); raf.setLength(0); raf.close(); // c. Open file in "append mode". Append a new block worth of data. Close file. final int len2 = (int) BLOCK_SIZE; { FSDataOutputStream out = fs.append(p); AppendTestUtil.write(out, len1, len2); out.close(); } // d. Reopen file and read two blocks worth of data. AppendTestUtil.check(fs, p, len1 + len2); }
/** Create a file with one block and corrupt some/all of the block replicas. */ private void createAFileWithCorruptedBlockReplicas( Path filePath, short repl, int corruptBlockCount) throws IOException, AccessControlException, FileNotFoundException, UnresolvedLinkException, InterruptedException, TimeoutException { DFSTestUtil.createFile(dfs, filePath, BLOCK_SIZE, repl, 0); DFSTestUtil.waitReplication(dfs, filePath, repl); // Locate the file blocks by asking name node final LocatedBlocks locatedblocks = dfs.dfs.getNamenode().getBlockLocations(filePath.toString(), 0L, BLOCK_SIZE); Assert.assertEquals(repl, locatedblocks.get(0).getLocations().length); // The file only has one block LocatedBlock lblock = locatedblocks.get(0); DatanodeInfo[] datanodeinfos = lblock.getLocations(); ExtendedBlock block = lblock.getBlock(); // corrupt some /all of the block replicas for (int i = 0; i < corruptBlockCount; i++) { DatanodeInfo dninfo = datanodeinfos[i]; final DataNode dn = cluster.getDataNode(dninfo.getIpcPort()); corruptBlock(block, dn); LOG.debug("Corrupted block " + block.getBlockName() + " on data node " + dninfo); } }
public static DatanodeInfo bestNode(LocatedBlock blk) throws IOException { TreeSet<DatanodeInfo> deadNodes = new TreeSet<DatanodeInfo>(); DatanodeInfo chosenNode = null; int failures = 0; Socket s = null; DatanodeInfo[] nodes = blk.getLocations(); if (nodes == null || nodes.length == 0) { throw new IOException("No nodes contain this block"); } while (s == null) { if (chosenNode == null) { do { chosenNode = nodes[rand.nextInt(nodes.length)]; } while (deadNodes.contains(chosenNode)); } int index = rand.nextInt(nodes.length); chosenNode = nodes[index]; // just ping to check whether the node is alive InetSocketAddress targetAddr = NetUtils.createSocketAddr(chosenNode.getHost() + ":" + chosenNode.getInfoPort()); try { s = new Socket(); s.connect(targetAddr, HdfsConstants.READ_TIMEOUT); s.setSoTimeout(HdfsConstants.READ_TIMEOUT); } catch (IOException e) { deadNodes.add(chosenNode); s.close(); s = null; failures++; } if (failures == nodes.length) throw new IOException("Could not reach the block containing the data. Please try again"); } s.close(); return chosenNode; }
/** Get a BlockReader for the given block. */ public BlockReader getBlockReader(LocatedBlock testBlock, int offset, int lenToRead) throws IOException { InetSocketAddress targetAddr = null; Socket sock = null; ExtendedBlock block = testBlock.getBlock(); DatanodeInfo[] nodes = testBlock.getLocations(); targetAddr = NetUtils.createSocketAddr(nodes[0].getName()); sock = NetUtils.getDefaultSocketFactory(conf).createSocket(); sock.connect(targetAddr, HdfsServerConstants.READ_TIMEOUT); sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT); return BlockReaderFactory.newBlockReader( new DFSClient.Conf(conf), sock, targetAddr.toString() + ":" + block.getBlockId(), block, testBlock.getBlockToken(), offset, lenToRead, conf.getInt("io.file.buffer.size", 4096), true, ""); }
/** * Count datanodes that have copies of the blocks for a file put it into the map * * @param map * @param path * @param size * @return * @throws IOException */ private int countNNBlocks(Map<String, BlockLocs> map, String path, long size) throws IOException { int total = 0; NamenodeProtocols nn = cluster.getNameNodeRpc(); List<LocatedBlock> locatedBlocks = nn.getBlockLocations(path, 0, size).getLocatedBlocks(); // System.out.println("Number of blocks: " + locatedBlocks.size()); for (LocatedBlock lb : locatedBlocks) { String blockId = "" + lb.getBlock().getBlockId(); // System.out.print(blockId + ": "); DatanodeInfo[] dn_locs = lb.getLocations(); BlockLocs bl = map.get(blockId); if (bl == null) { bl = new BlockLocs(); } // System.out.print(dn_info.name+","); total += dn_locs.length; bl.num_locs += dn_locs.length; map.put(blockId, bl); // System.out.println(); } return total; }
private long fetchLocatedBlocksAndGetLastBlockLength() throws IOException { LocatedBlocks newInfo = DFSClient.callGetBlockLocations(dfsClient.namenode, src, 0, prefetchSize); if (DFSClient.LOG.isDebugEnabled()) { DFSClient.LOG.debug("newInfo = " + newInfo); } if (newInfo == null) { throw new IOException("Cannot open filename " + src); } if (locatedBlocks != null) { Iterator<LocatedBlock> oldIter = locatedBlocks.getLocatedBlocks().iterator(); Iterator<LocatedBlock> newIter = newInfo.getLocatedBlocks().iterator(); while (oldIter.hasNext() && newIter.hasNext()) { if (!oldIter.next().getBlock().equals(newIter.next().getBlock())) { throw new IOException("Blocklist for " + src + " has changed!"); } } } locatedBlocks = newInfo; long lastBlockBeingWrittenLength = 0; if (!locatedBlocks.isLastBlockComplete()) { final LocatedBlock last = locatedBlocks.getLastLocatedBlock(); if (last != null) { if (last.getLocations().length == 0) { return -1; } final long len = readBlockLength(last); last.getBlock().setNumBytes(len); lastBlockBeingWrittenLength = len; } } currentNode = null; return lastBlockBeingWrittenLength; }
/** * The following test first creates a file with a few blocks. It randomly truncates the replica of * the last block stored in each datanode. Finally, it triggers block synchronization to * synchronize all stored block. */ public void testBlockSynchronization() throws Exception { final int ORG_FILE_SIZE = 3000; Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(5).build(); cluster.waitActive(); // create a file DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem(); String filestr = "/foo"; Path filepath = new Path(filestr); DFSTestUtil.createFile(dfs, filepath, ORG_FILE_SIZE, REPLICATION_NUM, 0L); assertTrue(dfs.exists(filepath)); DFSTestUtil.waitReplication(dfs, filepath, REPLICATION_NUM); // get block info for the last block LocatedBlock locatedblock = TestInterDatanodeProtocol.getLastLocatedBlock(dfs.dfs.getNamenode(), filestr); DatanodeInfo[] datanodeinfos = locatedblock.getLocations(); assertEquals(REPLICATION_NUM, datanodeinfos.length); // connect to data nodes DataNode[] datanodes = new DataNode[REPLICATION_NUM]; for (int i = 0; i < REPLICATION_NUM; i++) { datanodes[i] = cluster.getDataNode(datanodeinfos[i].getIpcPort()); assertTrue(datanodes[i] != null); } // verify Block Info ExtendedBlock lastblock = locatedblock.getBlock(); DataNode.LOG.info("newblocks=" + lastblock); for (int i = 0; i < REPLICATION_NUM; i++) { checkMetaInfo(lastblock, datanodes[i]); } DataNode.LOG.info("dfs.dfs.clientName=" + dfs.dfs.clientName); cluster.getNameNodeRpc().append(filestr, dfs.dfs.clientName); // expire lease to trigger block recovery. waitLeaseRecovery(cluster); Block[] updatedmetainfo = new Block[REPLICATION_NUM]; long oldSize = lastblock.getNumBytes(); lastblock = TestInterDatanodeProtocol.getLastLocatedBlock(dfs.dfs.getNamenode(), filestr).getBlock(); long currentGS = lastblock.getGenerationStamp(); for (int i = 0; i < REPLICATION_NUM; i++) { updatedmetainfo[i] = DataNodeTestUtils.getFSDataset(datanodes[i]) .getStoredBlock(lastblock.getBlockPoolId(), lastblock.getBlockId()); assertEquals(lastblock.getBlockId(), updatedmetainfo[i].getBlockId()); assertEquals(oldSize, updatedmetainfo[i].getNumBytes()); assertEquals(currentGS, updatedmetainfo[i].getGenerationStamp()); } // verify that lease recovery does not occur when namenode is in safemode System.out.println("Testing that lease recovery cannot happen during safemode."); filestr = "/foo.safemode"; filepath = new Path(filestr); dfs.create(filepath, (short) 1); cluster.getNameNodeRpc().setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_ENTER); assertTrue(dfs.dfs.exists(filestr)); DFSTestUtil.waitReplication(dfs, filepath, (short) 1); waitLeaseRecovery(cluster); // verify that we still cannot recover the lease LeaseManager lm = NameNodeAdapter.getLeaseManager(cluster.getNamesystem()); assertTrue("Found " + lm.countLease() + " lease, expected 1", lm.countLease() == 1); cluster.getNameNodeRpc().setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE); } finally { if (cluster != null) { cluster.shutdown(); } } }
private void copyBlocksToLostFound(String parent, HdfsFileStatus file, LocatedBlocks blocks) throws IOException { final DFSClient dfs = new DFSClient(NameNode.getAddress(conf), conf); final String fullName = file.getFullName(parent); OutputStream fos = null; try { if (!lfInited) { lostFoundInit(dfs); } if (!lfInitedOk) { throw new IOException("failed to initialize lost+found"); } String target = lostFound + fullName; if (hdfsPathExists(target)) { LOG.warn( "Fsck: can't copy the remains of " + fullName + " to " + "lost+found, because " + target + " already exists."); return; } if (!namenode.getRpcServer().mkdirs(target, file.getPermission(), true)) { throw new IOException("failed to create directory " + target); } // create chains int chain = 0; boolean copyError = false; for (LocatedBlock lBlk : blocks.getLocatedBlocks()) { LocatedBlock lblock = lBlk; DatanodeInfo[] locs = lblock.getLocations(); if (locs == null || locs.length == 0) { if (fos != null) { fos.flush(); fos.close(); fos = null; } continue; } if (fos == null) { fos = dfs.create(target + "/" + chain, true); if (fos == null) { throw new IOException( "Failed to copy " + fullName + " to /lost+found: could not store chain " + chain); } chain++; } // copy the block. It's a pity it's not abstracted from DFSInputStream ... try { copyBlock(dfs, lblock, fos); } catch (Exception e) { LOG.error("Fsck: could not copy block " + lblock.getBlock() + " to " + target, e); fos.flush(); fos.close(); fos = null; internalError = true; copyError = true; } } if (copyError) { LOG.warn( "Fsck: there were errors copying the remains of the " + "corrupted file " + fullName + " to /lost+found"); } else { LOG.info("Fsck: copied the remains of the corrupted file " + fullName + " to /lost+found"); } } catch (Exception e) { LOG.error("copyBlocksToLostFound: error processing " + fullName, e); internalError = true; } finally { if (fos != null) fos.close(); dfs.close(); } }
/** * Create a file, write something, fsync but not close. Then change lease period and wait for * lease recovery. Finally, read the block directly from each Datanode and verify the content. */ public void testLeaseExpireHardLimit() throws Exception { System.out.println("testLeaseExpireHardLimit start"); final long leasePeriod = 1000; final int DATANODE_NUM = 3; Configuration conf = new Configuration(); conf.setInt("heartbeat.recheck.interval", 1000); conf.setInt("dfs.heartbeat.interval", 1); // create cluster MiniDFSCluster cluster = new MiniDFSCluster(conf, DATANODE_NUM, true, null); DistributedFileSystem dfs = null; try { cluster.waitActive(); dfs = (DistributedFileSystem) cluster.getFileSystem(); // create a new file. final String f = DIR + "foo"; final Path fpath = new Path(f); FSDataOutputStream out = TestFileCreation.createFile(dfs, fpath, DATANODE_NUM); out.write("something".getBytes()); out.sync(); int actualRepl = ((DFSClient.DFSOutputStream) (out.getWrappedStream())).getNumCurrentReplicas(); assertTrue( f + " should be replicated to " + DATANODE_NUM + " datanodes.", actualRepl == DATANODE_NUM); // set the soft and hard limit to be 1 second so that the // namenode triggers lease recovery cluster.setLeasePeriod(leasePeriod, leasePeriod); // wait for the lease to expire try { Thread.sleep(5 * leasePeriod); } catch (InterruptedException e) { } LocatedBlocks locations = dfs.dfs.namenode.getBlockLocations(f, 0, Long.MAX_VALUE); assertEquals(1, locations.locatedBlockCount()); LocatedBlock locatedblock = locations.getLocatedBlocks().get(0); int successcount = 0; for (DatanodeInfo datanodeinfo : locatedblock.getLocations()) { DataNode datanode = cluster.getDataNode(datanodeinfo.ipcPort); FSDataset dataset = (FSDataset) datanode.data; Block b = dataset.getStoredBlock(locatedblock.getBlock().getBlockId()); File blockfile = dataset.findBlockFile(b.getBlockId()); System.out.println("blockfile=" + blockfile); if (blockfile != null) { BufferedReader in = new BufferedReader(new FileReader(blockfile)); assertEquals("something", in.readLine()); in.close(); successcount++; } } System.out.println("successcount=" + successcount); assertTrue(successcount > 0); } finally { IOUtils.closeStream(dfs); cluster.shutdown(); } System.out.println("testLeaseExpireHardLimit successful"); }
/** Retry addBlock() while another thread is in chooseTarget(). See HDFS-4452. */ @Test public void testRetryAddBlockWhileInChooseTarget() throws Exception { final String src = "/testRetryAddBlockWhileInChooseTarget"; FSNamesystem ns = cluster.getNamesystem(); BlockManager spyBM = spy(ns.getBlockManager()); final NamenodeProtocols nn = cluster.getNameNodeRpc(); // substitute mocked BlockManager into FSNamesystem Class<? extends FSNamesystem> nsClass = ns.getClass(); Field bmField = nsClass.getDeclaredField("blockManager"); bmField.setAccessible(true); bmField.set(ns, spyBM); doAnswer( new Answer<DatanodeStorageInfo[]>() { @Override public DatanodeStorageInfo[] answer(InvocationOnMock invocation) throws Throwable { LOG.info("chooseTarget for " + src); DatanodeStorageInfo[] ret = (DatanodeStorageInfo[]) invocation.callRealMethod(); count++; if (count == 1) { // run second addBlock() LOG.info("Starting second addBlock for " + src); nn.addBlock(src, "clientName", null, null, INodeId.GRANDFATHER_INODE_ID, null); LocatedBlocks lbs = nn.getBlockLocations(src, 0, Long.MAX_VALUE); assertEquals("Must be one block", 1, lbs.getLocatedBlocks().size()); lb2 = lbs.get(0); assertEquals("Wrong replication", REPLICATION, lb2.getLocations().length); } return ret; } }) .when(spyBM) .chooseTarget4NewBlock( Mockito.anyString(), Mockito.anyInt(), Mockito.<DatanodeDescriptor>any(), Mockito.<HashSet<Node>>any(), Mockito.anyLong(), Mockito.<List<String>>any(), Mockito.anyByte()); // create file nn.create( src, FsPermission.getFileDefault(), "clientName", new EnumSetWritable<CreateFlag>(EnumSet.of(CreateFlag.CREATE)), true, (short) 3, 1024, null); // start first addBlock() LOG.info("Starting first addBlock for " + src); nn.addBlock(src, "clientName", null, null, INodeId.GRANDFATHER_INODE_ID, null); // check locations LocatedBlocks lbs = nn.getBlockLocations(src, 0, Long.MAX_VALUE); assertEquals("Must be one block", 1, lbs.getLocatedBlocks().size()); lb1 = lbs.get(0); assertEquals("Wrong replication", REPLICATION, lb1.getLocations().length); assertEquals("Blocks are not equal", lb1.getBlock(), lb2.getBlock()); }
/* * XXX (ab) Bulk of this method is copied verbatim from {@link DFSClient}, which is * bad. Both places should be refactored to provide a method to copy blocks * around. */ private void copyBlock(DFSClient dfs, LocatedBlock lblock, OutputStream fos) throws Exception { int failures = 0; InetSocketAddress targetAddr = null; TreeSet<DatanodeInfo> deadNodes = new TreeSet<DatanodeInfo>(); BlockReader blockReader = null; ExtendedBlock block = lblock.getBlock(); while (blockReader == null) { DatanodeInfo chosenNode; try { chosenNode = bestNode(dfs, lblock.getLocations(), deadNodes); targetAddr = NetUtils.createSocketAddr(chosenNode.getXferAddr()); } catch (IOException ie) { if (failures >= DFSConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_DEFAULT) { throw new IOException("Could not obtain block " + lblock, ie); } LOG.info("Could not obtain block from any node: " + ie); try { Thread.sleep(10000); } catch (InterruptedException iex) { } deadNodes.clear(); failures++; continue; } try { String file = BlockReaderFactory.getFileName(targetAddr, block.getBlockPoolId(), block.getBlockId()); blockReader = new BlockReaderFactory(dfs.getConf()) .setFileName(file) .setBlock(block) .setBlockToken(lblock.getBlockToken()) .setStartOffset(0) .setLength(-1) .setVerifyChecksum(true) .setClientName("fsck") .setDatanodeInfo(chosenNode) .setInetSocketAddress(targetAddr) .setCachingStrategy(CachingStrategy.newDropBehind()) .setClientCacheContext(dfs.getClientContext()) .setConfiguration(namenode.conf) .setRemotePeerFactory( new RemotePeerFactory() { @Override public Peer newConnectedPeer(InetSocketAddress addr) throws IOException { Peer peer = null; Socket s = NetUtils.getDefaultSocketFactory(conf).createSocket(); try { s.connect(addr, HdfsServerConstants.READ_TIMEOUT); s.setSoTimeout(HdfsServerConstants.READ_TIMEOUT); peer = TcpPeerServer.peerFromSocketAndKey( s, namenode.getRpcServer().getDataEncryptionKey()); } finally { if (peer == null) { IOUtils.closeQuietly(s); } } return peer; } }) .build(); } catch (IOException ex) { // Put chosen node into dead list, continue LOG.info("Failed to connect to " + targetAddr + ":" + ex); deadNodes.add(chosenNode); } } byte[] buf = new byte[1024]; int cnt = 0; boolean success = true; long bytesRead = 0; try { while ((cnt = blockReader.read(buf, 0, buf.length)) > 0) { fos.write(buf, 0, cnt); bytesRead += cnt; } if (bytesRead != block.getNumBytes()) { throw new IOException( "Recorded block size is " + block.getNumBytes() + ", but datanode returned " + bytesRead + " bytes"); } } catch (Exception e) { LOG.error("Error reading block", e); success = false; } finally { blockReader.close(); } if (!success) { throw new Exception("Could not copy block data for " + lblock.getBlock()); } }
// try reading a block using a BlockReader directly protected void tryRead(final Configuration conf, LocatedBlock lblock, boolean shouldSucceed) { InetSocketAddress targetAddr = null; IOException ioe = null; BlockReader blockReader = null; ExtendedBlock block = lblock.getBlock(); try { DatanodeInfo[] nodes = lblock.getLocations(); targetAddr = NetUtils.createSocketAddr(nodes[0].getXferAddr()); blockReader = new BlockReaderFactory(new DfsClientConf(conf)) .setFileName( BlockReaderFactory.getFileName( targetAddr, "test-blockpoolid", block.getBlockId())) .setBlock(block) .setBlockToken(lblock.getBlockToken()) .setInetSocketAddress(targetAddr) .setStartOffset(0) .setLength(-1) .setVerifyChecksum(true) .setClientName("TestBlockTokenWithDFS") .setDatanodeInfo(nodes[0]) .setCachingStrategy(CachingStrategy.newDefaultStrategy()) .setClientCacheContext(ClientContext.getFromConf(conf)) .setConfiguration(conf) .setTracer(FsTracer.get(conf)) .setRemotePeerFactory( new RemotePeerFactory() { @Override public Peer newConnectedPeer( InetSocketAddress addr, Token<BlockTokenIdentifier> blockToken, DatanodeID datanodeId) throws IOException { Peer peer = null; Socket sock = NetUtils.getDefaultSocketFactory(conf).createSocket(); try { sock.connect(addr, HdfsConstants.READ_TIMEOUT); sock.setSoTimeout(HdfsConstants.READ_TIMEOUT); peer = DFSUtilClient.peerFromSocket(sock); } finally { if (peer == null) { IOUtils.closeSocket(sock); } } return peer; } }) .build(); } catch (IOException ex) { ioe = ex; } finally { if (blockReader != null) { try { blockReader.close(); } catch (IOException e) { throw new RuntimeException(e); } } } if (shouldSucceed) { Assert.assertNotNull( "OP_READ_BLOCK: access token is invalid, " + "when it is expected to be valid", blockReader); } else { Assert.assertNotNull( "OP_READ_BLOCK: access token is valid, " + "when it is expected to be invalid", ioe); Assert.assertTrue( "OP_READ_BLOCK failed due to reasons other than access token: ", ioe instanceof InvalidBlockTokenException); } }