/** Get a BlockReader for the given block. */ public BlockReader getBlockReader(LocatedBlock testBlock, int offset, int lenToRead) throws IOException { InetSocketAddress targetAddr = null; Socket sock = null; ExtendedBlock block = testBlock.getBlock(); DatanodeInfo[] nodes = testBlock.getLocations(); targetAddr = NetUtils.createSocketAddr(nodes[0].getName()); sock = NetUtils.getDefaultSocketFactory(conf).createSocket(); sock.connect(targetAddr, HdfsServerConstants.READ_TIMEOUT); sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT); return BlockReaderFactory.newBlockReader( new DFSClient.Conf(conf), sock, targetAddr.toString() + ":" + block.getBlockId(), block, testBlock.getBlockToken(), offset, lenToRead, conf.getInt("io.file.buffer.size", 4096), true, ""); }
/** * Retrieve a BlockReader suitable for reading. This method will reuse the cached connection to * the DN if appropriate. Otherwise, it will create a new connection. * * @param dnAddr Address of the datanode * @param chosenNode Chosen datanode information * @param file File location * @param block The Block object * @param blockToken The access token for security * @param startOffset The read offset, relative to block head * @param len The number of bytes to read * @param bufferSize The IO buffer size (not the client buffer size) * @param verifyChecksum Whether to verify checksum * @param clientName Client name * @return New BlockReader instance */ protected BlockReader getBlockReader( InetSocketAddress dnAddr, DatanodeInfo chosenNode, String file, ExtendedBlock block, Token<BlockTokenIdentifier> blockToken, long startOffset, long len, int bufferSize, boolean verifyChecksum, String clientName) throws IOException { if (dfsClient.shouldTryShortCircuitRead(dnAddr)) { return DFSClient.getLocalBlockReader( dfsClient.conf, src, block, blockToken, chosenNode, dfsClient.hdfsTimeout, startOffset); } IOException err = null; boolean fromCache = true; // Allow retry since there is no way of knowing whether the cached socket // is good until we actually use it. for (int retries = 0; retries <= nCachedConnRetry && fromCache; ++retries) { Socket sock = null; // Don't use the cache on the last attempt - it's possible that there // are arbitrarily many unusable sockets in the cache, but we don't // want to fail the read. if (retries < nCachedConnRetry) { sock = socketCache.get(dnAddr); } if (sock == null) { fromCache = false; sock = dfsClient.socketFactory.createSocket(); // TCP_NODELAY is crucial here because of bad interactions between // Nagle's Algorithm and Delayed ACKs. With connection keepalive // between the client and DN, the conversation looks like: // 1. Client -> DN: Read block X // 2. DN -> Client: data for block X // 3. Client -> DN: Status OK (successful read) // 4. Client -> DN: Read block Y // The fact that step #3 and #4 are both in the client->DN direction // triggers Nagling. If the DN is using delayed ACKs, this results // in a delay of 40ms or more. // // TCP_NODELAY disables nagling and thus avoids this performance // disaster. sock.setTcpNoDelay(true); NetUtils.connect( sock, dnAddr, dfsClient.getRandomLocalInterfaceAddr(), dfsClient.getConf().socketTimeout); sock.setSoTimeout(dfsClient.getConf().socketTimeout); } try { // The OP_READ_BLOCK request is sent as we make the BlockReader BlockReader reader = BlockReaderFactory.newBlockReader( dfsClient.getConf(), sock, file, block, blockToken, startOffset, len, bufferSize, verifyChecksum, clientName); return reader; } catch (IOException ex) { // Our socket is no good. DFSClient.LOG.debug("Error making BlockReader. Closing stale " + sock, ex); sock.close(); err = ex; } } throw err; }