/** BlockInStream for remote block. */ public class RemoteBlockInStream extends BlockInStream { private static final int BUFFER_SIZE = UserConf.get().REMOTE_READ_BUFFER_SIZE_BYTE; private final Logger LOG = Logger.getLogger(Constants.LOGGER_TYPE); private ClientBlockInfo mBlockInfo; private InputStream mCheckpointInputStream = null; private long mReadByte; private ByteBuffer mCurrentBuffer = null; private long mBufferStartPosition = 0; private boolean mRecache = true; private BlockOutStream mBlockOutStream = null; private Object mUFSConf = null; /** * @param file the file the block belongs to * @param readType the InStream's read type * @param blockIndex the index of the block in the file * @throws IOException */ RemoteBlockInStream(TachyonFile file, ReadType readType, int blockIndex) throws IOException { this(file, readType, blockIndex, null); } /** * @param file the file the block belongs to * @param readType the InStream's read type * @param blockIndex the index of the block in the file * @param ufsConf the under file system configuration * @throws IOException */ RemoteBlockInStream(TachyonFile file, ReadType readType, int blockIndex, Object ufsConf) throws IOException { super(file, readType, blockIndex); mBlockInfo = TFS.getClientBlockInfo(FILE.FID, BLOCK_INDEX); mReadByte = 0; mBufferStartPosition = 0; if (!FILE.isComplete()) { throw new IOException("File " + FILE.getPath() + " is not ready to read"); } mRecache = readType.isCache(); if (mRecache) { mBlockOutStream = new BlockOutStream(file, WriteType.TRY_CACHE, blockIndex); } updateCurrentBuffer(); mUFSConf = ufsConf; if (mCurrentBuffer == null) { setupStreamFromUnderFs(mBlockInfo.offset, mUFSConf); if (mCheckpointInputStream == null) { TFS.reportLostFile(FILE.FID); throw new IOException("Can not find the block " + FILE + " " + BLOCK_INDEX); } } } @Override public void close() throws IOException { if (!mClosed) { if (mRecache) { mBlockOutStream.cancel(); } if (mCheckpointInputStream != null) { mCheckpointInputStream.close(); } } mClosed = true; } private void doneRecache() throws IOException { if (mRecache) { mBlockOutStream.close(); } } @Override public int read() throws IOException { mReadByte++; if (mReadByte > mBlockInfo.length) { doneRecache(); return -1; } if (mCurrentBuffer != null) { if (mCurrentBuffer.remaining() == 0) { mBufferStartPosition = mReadByte - 1; updateCurrentBuffer(); } if (mCurrentBuffer != null) { int ret = mCurrentBuffer.get() & 0xFF; if (mRecache) { mBlockOutStream.write(ret); } return ret; } setupStreamFromUnderFs(mBlockInfo.offset + mReadByte - 1, mUFSConf); } int ret = mCheckpointInputStream.read() & 0xFF; if (mRecache) { mBlockOutStream.write(ret); } return ret; } @Override public int read(byte b[]) throws IOException { return read(b, 0, b.length); } @Override public int read(byte b[], int off, int len) throws IOException { if (b == null) { throw new NullPointerException(); } else if (off < 0 || len < 0 || len > b.length - off) { throw new IndexOutOfBoundsException(); } else if (len == 0) { return 0; } long ret = mBlockInfo.length - mReadByte; if (ret < len) { len = (int) ret; } if (ret == 0) { return -1; } if (mCurrentBuffer != null) { if (mCurrentBuffer.remaining() == 0) { mBufferStartPosition = mReadByte; updateCurrentBuffer(); } if (mCurrentBuffer != null) { ret = Math.min(ret, mCurrentBuffer.remaining()); ret = Math.min(ret, len); mCurrentBuffer.get(b, off, (int) ret); mReadByte += ret; if (mRecache) { mBlockOutStream.write(b, off, (int) ret); if (mReadByte == mBlockInfo.length) { doneRecache(); } } return (int) ret; } setupStreamFromUnderFs(mBlockInfo.offset + mReadByte, mUFSConf); } ret = mCheckpointInputStream.read(b, off, len); mReadByte += ret; if (mRecache) { mBlockOutStream.write(b, off, (int) ret); if (mReadByte == mBlockInfo.length) { doneRecache(); } } return (int) ret; } private ByteBuffer readRemoteByteBuffer(ClientBlockInfo blockInfo, long offset, long len) { ByteBuffer buf = null; try { List<NetAddress> blockLocations = blockInfo.getLocations(); LOG.info("Block locations:" + blockLocations); for (int k = 0; k < blockLocations.size(); k++) { String host = blockLocations.get(k).mHost; int port = blockLocations.get(k).mSecondaryPort; // The data is not in remote machine's memory if port == -1. if (port == -1) { continue; } if (host.equals(InetAddress.getLocalHost().getHostName()) || host.equals(InetAddress.getLocalHost().getHostAddress())) { String localFileName = CommonUtils.concat(TFS.getRootFolder(), blockInfo.blockId); LOG.warn("Master thinks the local machine has data " + localFileName + "! But not!"); } LOG.info( host + ":" + port + " current host is " + InetAddress.getLocalHost().getHostName() + " " + InetAddress.getLocalHost().getHostAddress()); try { buf = retrieveByteBufferFromRemoteMachine( new InetSocketAddress(host, port), blockInfo.blockId, offset, len); if (buf != null) { break; } } catch (IOException e) { LOG.error(e.getMessage()); buf = null; } } } catch (IOException e) { LOG.error("Failed to get read data from remote " + e.getMessage()); buf = null; } return buf; } private ByteBuffer retrieveByteBufferFromRemoteMachine( InetSocketAddress address, long blockId, long offset, long length) throws IOException { SocketChannel socketChannel = SocketChannel.open(); socketChannel.connect(address); LOG.info("Connected to remote machine " + address + " sent"); DataServerMessage sendMsg = DataServerMessage.createBlockRequestMessage(blockId, offset, length); while (!sendMsg.finishSending()) { sendMsg.send(socketChannel); } LOG.info("Data " + blockId + " to remote machine " + address + " sent"); DataServerMessage recvMsg = DataServerMessage.createBlockResponseMessage(false, blockId); while (!recvMsg.isMessageReady()) { int numRead = recvMsg.recv(socketChannel); if (numRead == -1) { LOG.warn("Read nothing"); } } LOG.info("Data " + blockId + " from remote machine " + address + " received"); socketChannel.close(); if (!recvMsg.isMessageReady()) { LOG.info("Data " + blockId + " from remote machine is not ready."); return null; } if (recvMsg.getBlockId() < 0) { LOG.info("Data " + recvMsg.getBlockId() + " is not in remote machine."); return null; } return recvMsg.getReadOnlyData(); } @Override public void seek(long pos) throws IOException { if (pos < 0) { throw new IOException("pos is negative: " + pos); } mRecache = false; if (mCurrentBuffer != null) { mReadByte = pos; if (mBufferStartPosition <= pos && pos < mBufferStartPosition + mCurrentBuffer.limit()) { mCurrentBuffer.position((int) (pos - mBufferStartPosition)); } else { mBufferStartPosition = pos; updateCurrentBuffer(); } } else { if (mCheckpointInputStream != null) { mCheckpointInputStream.close(); } setupStreamFromUnderFs(mBlockInfo.offset + pos, mUFSConf); } } private void setupStreamFromUnderFs(long offset, Object conf) throws IOException { String checkpointPath = TFS.getUfsPath(FILE.FID); if (!checkpointPath.equals("")) { LOG.info("May stream from underlayer fs: " + checkpointPath); UnderFileSystem underfsClient = UnderFileSystem.get(checkpointPath, conf); try { mCheckpointInputStream = underfsClient.open(checkpointPath); while (offset > 0) { long skipped = mCheckpointInputStream.skip(offset); offset -= skipped; if (skipped == 0) { throw new IOException( "Failed to find the start position " + offset + " for block " + mBlockInfo); } } } catch (IOException e) { LOG.error( "Failed to read from checkpoint " + checkpointPath + " for File " + FILE.FID + "\n" + e); mCheckpointInputStream = null; } } } @Override public long skip(long n) throws IOException { if (n <= 0) { return 0; } long ret = mBlockInfo.length - mReadByte; if (ret > n) { ret = n; } if (mCurrentBuffer != null) { if (mCurrentBuffer.remaining() < ret) { mBufferStartPosition = mReadByte + ret; updateCurrentBuffer(); } if (mCurrentBuffer != null) { if (ret > 0) { if (mRecache) { mBlockOutStream.cancel(); } mRecache = false; } return (int) ret; } setupStreamFromUnderFs(mBlockInfo.offset + mReadByte, mUFSConf); } long tmp = mCheckpointInputStream.skip(ret); ret = Math.min(ret, tmp); mReadByte += ret; if (ret > 0) { if (mRecache) { mBlockOutStream.cancel(); } mRecache = false; } return ret; } private void updateCurrentBuffer() throws IOException { long length = BUFFER_SIZE; if (mBufferStartPosition + length > mBlockInfo.length) { length = mBlockInfo.length - mBufferStartPosition; } LOG.info( String.format( "Try to find remote worker and read block %d from %d, with len %d", mBlockInfo.blockId, mBufferStartPosition, length)); mCurrentBuffer = readRemoteByteBuffer(mBlockInfo, mBufferStartPosition, length); if (mCurrentBuffer == null) { mBlockInfo = TFS.getClientBlockInfo(FILE.FID, BLOCK_INDEX); mCurrentBuffer = readRemoteByteBuffer(mBlockInfo, mBufferStartPosition, length); } } }
public static void main(String[] args) throws IOException, InvalidPathException, FileAlreadyExistException { if (args.length != 9) { System.out.println( "java -cp target/tachyon-" + Version.VERSION + "-jar-with-dependencies.jar tachyon.examples.Performance " + "<MasterIp> <FileName> <WriteBlockSizeInBytes> <BlocksPerFile> " + "<DebugMode:true/false> <Threads> <FilesPerThread> <TestCaseNumber> <BaseFileNumber>\n" + "1: Files Write Test\n" + "2: Files Read Test\n" + "3: RamFile Write Test \n" + "4: RamFile Read Test \n" + "5: ByteBuffer Write Test \n" + "6: ByteBuffer Read Test \n"); System.exit(-1); } MASTER_ADDRESS = args[0]; FILE_NAME = args[1]; BLOCK_SIZE_BYTES = Integer.parseInt(args[2]); BLOCKS_PER_FILE = Integer.parseInt(args[3]); DEBUG_MODE = ("true".equals(args[4])); THREADS = Integer.parseInt(args[5]); FILES = Integer.parseInt(args[6]) * THREADS; int testCase = Integer.parseInt(args[7]); BASE_FILE_NUMBER = Integer.parseInt(args[8]); FILE_BYTES = BLOCKS_PER_FILE * BLOCK_SIZE_BYTES; FILES_BYTES = 1L * FILE_BYTES * FILES; RESULT_PREFIX = String.format( "Threads %d FilesPerThread %d TotalFiles %d " + "BLOCK_SIZE_KB %d BLOCKS_PER_FILE %d FILE_SIZE_MB %d " + "Tachyon_WRITE_BUFFER_SIZE_KB %d BaseFileNumber %d : ", THREADS, FILES / THREADS, FILES, BLOCK_SIZE_BYTES / 1024, BLOCKS_PER_FILE, CommonUtils.getMB(FILE_BYTES), UserConf.get().FILE_BUFFER_BYTES / 1024, BASE_FILE_NUMBER); if (testCase == 1) { RESULT_PREFIX = "TachyonFilesWriteTest " + RESULT_PREFIX; LOG.info(RESULT_PREFIX); MTC = TachyonFS.get(MASTER_ADDRESS); createFiles(); TachyonTest(true); } else if (testCase == 2) { RESULT_PREFIX = "TachyonFilesReadTest " + RESULT_PREFIX; LOG.info(RESULT_PREFIX); MTC = TachyonFS.get(MASTER_ADDRESS); TachyonTest(false); } else if (testCase == 3) { RESULT_PREFIX = "RamFile Write " + RESULT_PREFIX; LOG.info(RESULT_PREFIX); memoryCopyTest(true, false); } else if (testCase == 4) { RESULT_PREFIX = "RamFile Read " + RESULT_PREFIX; LOG.info(RESULT_PREFIX); memoryCopyTest(false, false); } else if (testCase == 5) { RESULT_PREFIX = "ByteBuffer Write Test " + RESULT_PREFIX; LOG.info(RESULT_PREFIX); memoryCopyTest(true, true); } else if (testCase == 6) { RESULT_PREFIX = "ByteBuffer Read Test " + RESULT_PREFIX; LOG.info(RESULT_PREFIX); memoryCopyTest(false, true); } else { CommonUtils.runtimeException("No Test Case " + testCase); } for (int k = 0; k < RESULT_ARRAY_SIZE; k++) { System.out.print(Results[k] + " "); } System.out.println(); System.exit(0); }
public class HdfsFileInputStream extends InputStream implements Seekable, PositionedReadable { private static Logger LOG = Logger.getLogger(Constants.LOGGER_TYPE); private long mCurrentPosition; private TachyonFS mTFS; private int mFileId; private Path mHdfsPath; private Configuration mHadoopConf; private int mHadoopBufferSize; private FSDataInputStream mHdfsInputStream = null; private InStream mTachyonFileInputStream = null; private int mBufferLimit = 0; private int mBufferPosition = 0; private byte mBuffer[] = new byte[UserConf.get().FILE_BUFFER_BYTES * 4]; public HdfsFileInputStream( TachyonFS tfs, int fileId, Path hdfsPath, Configuration conf, int bufferSize) { LOG.debug( "PartitionInputStreamHdfs(" + tfs + ", " + fileId + ", " + hdfsPath + ", " + conf + ", " + bufferSize + ")"); mCurrentPosition = 0; mTFS = tfs; mFileId = fileId; mHdfsPath = hdfsPath; mHadoopConf = conf; mHadoopBufferSize = bufferSize; TachyonFile tachyonFile = mTFS.getFile(mFileId); try { mTachyonFileInputStream = tachyonFile.getInStream(ReadType.CACHE); } catch (IOException e) { LOG.error(e.getMessage()); return; } } /** * Read upto the specified number of bytes, from a given position within a file, and return the * number of bytes read. This does not change the current offset of a file, and is thread-safe. */ @Override public int read(long position, byte[] buffer, int offset, int length) throws IOException { throw new IOException("Not supported"); // TODO Auto-generated method stub // return 0; } /** * Read number of bytes equalt to the length of the buffer, from a given position within a file. * This does not change the current offset of a file, and is thread-safe. */ @Override public void readFully(long position, byte[] buffer) throws IOException { // TODO Auto-generated method stub throw new IOException("Not supported"); } /** * Read the specified number of bytes, from a given position within a file. This does not change * the current offset of a file, and is thread-safe. */ @Override public void readFully(long position, byte[] buffer, int offset, int length) throws IOException { // TODO Auto-generated method stub throw new IOException("Not supported"); } /** Return the current offset from the start of the file */ @Override public long getPos() throws IOException { return mCurrentPosition; } /** * Seek to the given offset from the start of the file. The next read() will be from that * location. Can't seek past the end of the file. */ @Override public void seek(long pos) throws IOException { if (pos == mCurrentPosition) { return; } if (pos < mCurrentPosition) { throw new IOException( "Not supported to seek to " + pos + " . Current Position is " + mCurrentPosition); } if (mTachyonFileInputStream != null) { long needSkip = pos - mCurrentPosition; while (needSkip > 0) { needSkip -= mTachyonFileInputStream.skip(needSkip); } mCurrentPosition = pos; } else if (mHdfsInputStream != null) { mHdfsInputStream.seek(pos); mCurrentPosition = pos; } } /** Seeks a different copy of the data. Returns true if found a new source, false otherwise. */ @Override public boolean seekToNewSource(long targetPos) throws IOException { throw new IOException("Not supported"); // TODO Auto-generated method stub // return false; } @Override public int read() throws IOException { if (mTachyonFileInputStream != null) { int ret = 0; try { ret = mTachyonFileInputStream.read(); mCurrentPosition++; return ret; } catch (IOException e) { LOG.error(e.getMessage(), e); mTachyonFileInputStream = null; } } if (mHdfsInputStream != null) { return readFromHdfsBuffer(); } FileSystem fs = mHdfsPath.getFileSystem(mHadoopConf); mHdfsInputStream = fs.open(mHdfsPath, mHadoopBufferSize); mHdfsInputStream.seek(mCurrentPosition); return readFromHdfsBuffer(); } @Override public int read(byte b[]) throws IOException { throw new IOException("Not supported"); } @Override public int read(byte b[], int off, int len) throws IOException { if (mTachyonFileInputStream != null) { int ret = 0; try { ret = mTachyonFileInputStream.read(b, off, len); mCurrentPosition += ret; return ret; } catch (IOException e) { LOG.error(e.getMessage(), e); mTachyonFileInputStream = null; } } if (mHdfsInputStream != null) { b[off] = (byte) readFromHdfsBuffer(); if (b[off] == -1) { return -1; } return 1; } FileSystem fs = mHdfsPath.getFileSystem(mHadoopConf); mHdfsInputStream = fs.open(mHdfsPath, mHadoopBufferSize); mHdfsInputStream.seek(mCurrentPosition); b[off] = (byte) readFromHdfsBuffer(); if (b[off] == -1) { return -1; } return 1; } private int readFromHdfsBuffer() throws IOException { if (mBufferPosition < mBufferLimit) { return mBuffer[mBufferPosition++]; } LOG.error("Reading from HDFS directly"); while ((mBufferLimit = mHdfsInputStream.read(mBuffer)) == 0) { LOG.error("Read 0 bytes in readFromHdfsBuffer for " + mHdfsPath); } if (mBufferLimit == -1) { return -1; } mBufferPosition = 0; return mBuffer[mBufferPosition++]; } @Override public void close() throws IOException { if (mTachyonFileInputStream != null) { mTachyonFileInputStream.close(); } if (mHdfsInputStream != null) { mHdfsInputStream.close(); } } }