/** * Creates a new file output stream. * * @param path the file path * @param options the client options * @throws IOException if an I/O error occurs */ public FileOutStream(AlluxioURI path, OutStreamOptions options) throws IOException { mUri = Preconditions.checkNotNull(path); mNonce = IdUtils.getRandomNonNegativeLong(); mBlockSize = options.getBlockSizeBytes(); mAlluxioStorageType = options.getAlluxioStorageType(); mUnderStorageType = options.getUnderStorageType(); mContext = FileSystemContext.INSTANCE; mPreviousBlockOutStreams = new LinkedList<BufferedBlockOutStream>(); if (mUnderStorageType.isSyncPersist()) { updateUfsPath(); String tmpPath = PathUtils.temporaryFileName(mNonce, mUfsPath); UnderFileSystem ufs = UnderFileSystem.get(tmpPath, ClientContext.getConf()); // TODO(jiri): Implement collection of temporary files left behind by dead clients. mUnderStorageOutputStream = ufs.create(tmpPath, (int) mBlockSize); } else { mUfsPath = null; mUnderStorageOutputStream = null; } mClosed = false; mCanceled = false; mShouldCacheCurrentBlock = mAlluxioStorageType.isStore(); mBytesWritten = 0; mLocationPolicy = Preconditions.checkNotNull( options.getLocationPolicy(), PreconditionMessage.FILE_WRITE_LOCATION_POLICY_UNSPECIFIED); }
/** * Gets a stream to write data to a block. The stream can only be backed by Alluxio storage. * * @param blockId the block to write * @param blockSize the standard block size to write, or -1 if the block already exists (and this * stream is just storing the block in Alluxio again) * @param address the address of the worker to write the block to, fails if the worker cannot * serve the request * @return a {@link BufferedBlockOutStream} which can be used to write data to the block in a * streaming fashion * @throws IOException if the block cannot be written */ public BufferedBlockOutStream getOutStream(long blockId, long blockSize, WorkerNetAddress address) throws IOException { if (blockSize == -1) { try (CloseableResource<BlockMasterClient> blockMasterClientResource = mContext.acquireMasterClientResource()) { blockSize = blockMasterClientResource.get().getBlockInfo(blockId).getLength(); } catch (AlluxioException e) { throw new IOException(e); } } // No specified location to write to. if (address == null) { throw new RuntimeException(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage()); } // Location is local. if (NetworkAddressUtils.getLocalHostName(ClientContext.getConf()).equals(address.getHost())) { if (mContext.hasLocalWorker()) { return new LocalBlockOutStream(blockId, blockSize); } else { throw new IOException(ExceptionMessage.NO_LOCAL_WORKER.getMessage("write")); } } // Location is specified and it is remote. return new RemoteBlockOutStream(blockId, blockSize, address); }
/** * Constructs a new stream for reading a file from HDFS. * * @param uri the Alluxio file URI * @param conf Hadoop configuration * @param bufferSize the buffer size * @param stats filesystem statistics * @throws IOException if the underlying file does not exist or its stream cannot be created */ public HdfsFileInputStream( AlluxioURI uri, org.apache.hadoop.conf.Configuration conf, int bufferSize, org.apache.hadoop.fs.FileSystem.Statistics stats) throws IOException { LOG.debug("HdfsFileInputStream({}, {}, {}, {}, {})", uri, conf, bufferSize, stats); Configuration configuration = ClientContext.getConf(); long bufferBytes = configuration.getBytes(Constants.USER_FILE_BUFFER_BYTES); mBuffer = new byte[Ints.checkedCast(bufferBytes) * 4]; mCurrentPosition = 0; FileSystem fs = FileSystem.Factory.get(); mHadoopConf = conf; mHadoopBufferSize = bufferSize; mStatistics = stats; try { mFileInfo = fs.getStatus(uri); mHdfsPath = new Path(mFileInfo.getUfsPath()); mAlluxioFileInputStream = fs.openFile(uri, OpenFileOptions.defaults().setReadType(ReadType.CACHE)); } catch (FileDoesNotExistException e) { throw new FileNotFoundException( ExceptionMessage.HDFS_FILE_NOT_FOUND.getMessage(mHdfsPath, uri)); } catch (AlluxioException e) { throw new IOException(e); } }
/** * Obtains a client for a remote based on the given network address. Illegal argument exception is * thrown if the hostname is the local hostname. Runtime exception is thrown if the client cannot * be created with a connection to the hostname. * * @param address the address of the worker * @return a worker client with a connection to the specified hostname */ private BlockWorkerClient acquireRemoteWorkerClient(WorkerNetAddress address) { // If we couldn't find a worker, crash. if (address == null) { // TODO(calvin): Better exception usage. throw new RuntimeException(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage()); } Preconditions.checkArgument( !address.getHost().equals(NetworkAddressUtils.getLocalHostName()), PreconditionMessage.REMOTE_CLIENT_BUT_LOCAL_HOSTNAME); long clientId = IdUtils.getRandomNonNegativeLong(); return new RetryHandlingBlockWorkerClient( address, ClientContext.getBlockClientExecutorService(), clientId, false); }
@Override public void write(int b) throws IOException { if (mShouldCacheCurrentBlock) { try { if (mCurrentBlockOutStream == null || mCurrentBlockOutStream.remaining() == 0) { getNextBlock(); } mCurrentBlockOutStream.write(b); } catch (IOException e) { handleCacheWriteException(e); } } if (mUnderStorageType.isSyncPersist()) { mUnderStorageOutputStream.write(b); ClientContext.getClientMetrics().incBytesWrittenUfs(1); } mBytesWritten++; }
@Override public void write(byte[] b, int off, int len) throws IOException { Preconditions.checkArgument(b != null, PreconditionMessage.ERR_WRITE_BUFFER_NULL); Preconditions.checkArgument( off >= 0 && len >= 0 && len + off <= b.length, PreconditionMessage.ERR_BUFFER_STATE, b.length, off, len); if (mShouldCacheCurrentBlock) { try { int tLen = len; int tOff = off; while (tLen > 0) { if (mCurrentBlockOutStream == null || mCurrentBlockOutStream.remaining() == 0) { getNextBlock(); } long currentBlockLeftBytes = mCurrentBlockOutStream.remaining(); if (currentBlockLeftBytes >= tLen) { mCurrentBlockOutStream.write(b, tOff, tLen); tLen = 0; } else { mCurrentBlockOutStream.write(b, tOff, (int) currentBlockLeftBytes); tOff += currentBlockLeftBytes; tLen -= currentBlockLeftBytes; } } } catch (IOException e) { handleCacheWriteException(e); } } if (mUnderStorageType.isSyncPersist()) { mUnderStorageOutputStream.write(b, off, len); ClientContext.getClientMetrics().incBytesWrittenUfs(len); } mBytesWritten += len; }
/** * Gets a stream to read the data of a block. The stream is backed by Alluxio storage. * * @param blockId the block to read from * @return a {@link BlockInStream} which can be used to read the data in a streaming fashion * @throws IOException if the block does not exist */ public BufferedBlockInStream getInStream(long blockId) throws IOException { BlockInfo blockInfo; try (CloseableResource<BlockMasterClient> masterClientResource = mContext.acquireMasterClientResource()) { blockInfo = masterClientResource.get().getBlockInfo(blockId); } catch (AlluxioException e) { throw new IOException(e); } if (blockInfo.getLocations().isEmpty()) { throw new IOException("Block " + blockId + " is not available in Alluxio"); } // TODO(calvin): Get location via a policy. // Although blockInfo.locations are sorted by tier, we prefer reading from the local worker. // But when there is no local worker or there are no local blocks, we prefer the first // location in blockInfo.locations that is nearest to memory tier. // Assuming if there is no local worker, there are no local blocks in blockInfo.locations. // TODO(cc): Check mContext.hasLocalWorker before finding for a local block when the TODO // for hasLocalWorker is fixed. String localHostName = NetworkAddressUtils.getLocalHostName(ClientContext.getConf()); for (BlockLocation location : blockInfo.getLocations()) { WorkerNetAddress workerNetAddress = location.getWorkerAddress(); if (workerNetAddress.getHost().equals(localHostName)) { // There is a local worker and the block is local. try { return new LocalBlockInStream(blockId, blockInfo.getLength()); } catch (IOException e) { LOG.warn("Failed to open local stream for block " + blockId + ". " + e.getMessage()); // Getting a local stream failed, do not try again break; } } } // No local worker/block, get the first location since it's nearest to memory tier. WorkerNetAddress workerNetAddress = blockInfo.getLocations().get(0).getWorkerAddress(); return new RemoteBlockInStream(blockId, blockInfo.getLength(), workerNetAddress); }
/** * Gets a context using the master address got from config. * * @return the context created or cached before */ public static synchronized BlockStoreContext get() { return get(ClientContext.getMasterAddress()); }
@Override public void close() throws IOException { if (mClosed) { return; } if (mCurrentBlockOutStream != null) { mPreviousBlockOutStreams.add(mCurrentBlockOutStream); } CompleteFileOptions options = CompleteFileOptions.defaults(); if (mUnderStorageType.isSyncPersist()) { String tmpPath = PathUtils.temporaryFileName(mNonce, mUfsPath); UnderFileSystem ufs = UnderFileSystem.get(tmpPath, ClientContext.getConf()); if (mCanceled) { // TODO(yupeng): Handle this special case in under storage integrations. mUnderStorageOutputStream.close(); if (!ufs.exists(tmpPath)) { // Location of the temporary file has changed, recompute it. updateUfsPath(); tmpPath = PathUtils.temporaryFileName(mNonce, mUfsPath); } ufs.delete(tmpPath, false); } else { mUnderStorageOutputStream.flush(); mUnderStorageOutputStream.close(); if (!ufs.exists(tmpPath)) { // Location of the temporary file has changed, recompute it. updateUfsPath(); tmpPath = PathUtils.temporaryFileName(mNonce, mUfsPath); } if (!ufs.rename(tmpPath, mUfsPath)) { throw new IOException("Failed to rename " + tmpPath + " to " + mUfsPath); } options.setUfsLength(ufs.getFileSize(mUfsPath)); } } if (mAlluxioStorageType.isStore()) { try { if (mCanceled) { for (BufferedBlockOutStream bos : mPreviousBlockOutStreams) { bos.cancel(); } } else { for (BufferedBlockOutStream bos : mPreviousBlockOutStreams) { bos.close(); } } } catch (IOException e) { handleCacheWriteException(e); } } // Complete the file if it's ready to be completed. if (!mCanceled && (mUnderStorageType.isSyncPersist() || mAlluxioStorageType.isStore())) { FileSystemMasterClient masterClient = mContext.acquireMasterClient(); try { masterClient.completeFile(mUri, options); } catch (AlluxioException e) { throw new IOException(e); } finally { mContext.releaseMasterClient(masterClient); } } if (mUnderStorageType.isAsyncPersist()) { scheduleAsyncPersist(); } mClosed = true; }
/** Default implementation of {@link KeyValueStoreReader} to access an Alluxio key-value store. */ @NotThreadSafe class BaseKeyValueStoreReader implements KeyValueStoreReader { private static final Logger LOG = LoggerFactory.getLogger(Constants.LOGGER_TYPE); private final Configuration mConf = ClientContext.getConf(); private final InetSocketAddress mMasterAddress = ClientContext.getMasterAddress(); private final KeyValueMasterClient mMasterClient; /** A list of partitions of the store. */ private final List<PartitionInfo> mPartitions; /** * Constructs a {@link BaseKeyValueStoreReader} instance. * * @param uri URI of the key-value store * @throws IOException if non-Alluxio error occurs * @throws AlluxioException if Alluxio error occurs */ BaseKeyValueStoreReader(AlluxioURI uri) throws IOException, AlluxioException { // TODO(binfan): use a thread pool to manage the client. LOG.info("Create KeyValueStoreReader for {}", uri); mMasterClient = new KeyValueMasterClient(mMasterAddress, mConf); mPartitions = mMasterClient.getPartitionInfo(uri); mMasterClient.close(); } @Override public void close() {} @Override public byte[] get(byte[] key) throws IOException, AlluxioException { ByteBuffer value = get(ByteBuffer.wrap(key)); if (value == null) { return null; } return BufferUtils.newByteArrayFromByteBuffer(value); } @Override public ByteBuffer get(ByteBuffer key) throws IOException, AlluxioException { Preconditions.checkNotNull(key); // TODO(binfan): improve the inefficient for-loop to binary search. for (PartitionInfo partition : mPartitions) { // NOTE: keyStart and keyLimit are both inclusive if (key.compareTo(partition.bufferForKeyStart()) >= 0 && key.compareTo(partition.bufferForKeyLimit()) <= 0) { long blockId = partition.getBlockId(); KeyValuePartitionReader reader = KeyValuePartitionReader.Factory.create(blockId); try { ByteBuffer value = reader.get(key); return value; } finally { reader.close(); } } } return null; } @Override public KeyValueIterator iterator() throws IOException, AlluxioException { return new KeyValueStoreIterator(mPartitions); } @Override public int size() throws IOException, AlluxioException { int totalSize = 0; // TODO(cc): Put size into PartitionInfo. for (PartitionInfo partition : mPartitions) { KeyValuePartitionReader partitionReader = KeyValuePartitionReader.Factory.create(partition.getBlockId()); totalSize += partitionReader.size(); partitionReader.close(); } return totalSize; } }