/** Move a persisted replica from lazypersist directory to a subdirectory under finalized. */ File activateSavedReplica(Block b, File metaFile, File blockFile) throws IOException { final File blockDir = DatanodeUtil.idToBlockDir(finalizedDir, b.getBlockId()); final File targetBlockFile = new File(blockDir, blockFile.getName()); final File targetMetaFile = new File(blockDir, metaFile.getName()); FileUtils.moveFile(blockFile, targetBlockFile); FsDatasetImpl.LOG.info("Moved " + blockFile + " to " + targetBlockFile); FileUtils.moveFile(metaFile, targetMetaFile); FsDatasetImpl.LOG.info("Moved " + metaFile + " to " + targetMetaFile); return targetBlockFile; }
/** * Read in the cached DU value and return it if it is less than 600 seconds old (DU update * interval). Slight imprecision of dfsUsed is not critical and skipping DU can significantly * shorten the startup time. If the cached value is not available or too old, -1 is returned. */ long loadDfsUsed() { long cachedDfsUsed; long mtime; Scanner sc; try { sc = new Scanner(new File(currentDir, DU_CACHE_FILE), "UTF-8"); } catch (FileNotFoundException fnfe) { return -1; } try { // Get the recorded dfsUsed from the file. if (sc.hasNextLong()) { cachedDfsUsed = sc.nextLong(); } else { return -1; } // Get the recorded mtime from the file. if (sc.hasNextLong()) { mtime = sc.nextLong(); } else { return -1; } // Return the cached value if mtime is okay. if (mtime > 0 && (Time.now() - mtime < 600000L)) { FsDatasetImpl.LOG.info("Cached dfsUsed found for " + currentDir + ": " + cachedDfsUsed); return cachedDfsUsed; } return -1; } finally { sc.close(); } }
@Override public void run() { boolean succeeded = false; final FsDatasetImpl dataset = (FsDatasetImpl) datanode.getFSDataset(); try (FsVolumeReference ref = this.targetVolume) { int smallBufferSize = DFSUtil.getSmallBufferSize(EMPTY_HDFS_CONF); // No FsDatasetImpl lock for the file copy File targetFiles[] = FsDatasetImpl.copyBlockFiles( blockId, genStamp, metaFile, blockFile, lazyPersistDir, true, smallBufferSize); // Lock FsDataSetImpl during onCompleteLazyPersist callback dataset.onCompleteLazyPersist( bpId, blockId, creationTime, targetFiles, (FsVolumeImpl) targetVolume.getVolume()); succeeded = true; } catch (Exception e) { FsDatasetImpl.LOG.warn( "LazyWriter failed to async persist RamDisk block pool id: " + bpId + "block Id: " + blockId, e); } finally { if (!succeeded) { dataset.onFailLazyPersist(bpId, blockId); } } }
/** Asynchronously lazy persist the block from the RamDisk to Disk. */ void submitLazyPersistTask( String bpId, long blockId, long genStamp, long creationTime, File metaFile, File blockFile, FsVolumeReference target) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug( "LazyWriter schedule async task to persist RamDisk block pool id: " + bpId + " block id: " + blockId); } FsVolumeImpl volume = (FsVolumeImpl) target.getVolume(); File lazyPersistDir = volume.getLazyPersistDir(bpId); if (!lazyPersistDir.exists() && !lazyPersistDir.mkdirs()) { FsDatasetImpl.LOG.warn("LazyWriter failed to create " + lazyPersistDir); throw new IOException( "LazyWriter fail to find or create lazy persist dir: " + lazyPersistDir.toString()); } ReplicaLazyPersistTask lazyPersistTask = new ReplicaLazyPersistTask( bpId, blockId, genStamp, creationTime, blockFile, metaFile, target, lazyPersistDir); execute(volume.getCurrentDir(), lazyPersistTask); }
/** Write the current dfsUsed to the cache file. */ void saveDfsUsed() { File outFile = new File(currentDir, DU_CACHE_FILE); if (outFile.exists() && !outFile.delete()) { FsDatasetImpl.LOG.warn("Failed to delete old dfsUsed file in " + outFile.getParent()); } try { long used = getDfsUsed(); try (Writer out = new OutputStreamWriter(new FileOutputStream(outFile), "UTF-8")) { // mtime is written last, so that truncated writes won't be valid. out.write(Long.toString(used) + " " + Long.toString(Time.now())); out.flush(); } } catch (IOException ioe) { // If write failed, the volume might be bad. Since the cache file is // not critical, log the error and continue. FsDatasetImpl.LOG.warn("Failed to write dfsUsed to " + outFile, ioe); } }
void getVolumeMap(ReplicaMap volumeMap, final RamDiskReplicaTracker lazyWriteReplicaMap) throws IOException { // Recover lazy persist replicas, they will be added to the volumeMap // when we scan the finalized directory. if (lazypersistDir.exists()) { int numRecovered = moveLazyPersistReplicasToFinalized(lazypersistDir); FsDatasetImpl.LOG.info("Recovered " + numRecovered + " replicas from " + lazypersistDir); } // add finalized replicas addToReplicasMap(volumeMap, finalizedDir, lazyWriteReplicaMap, true); // add rbw replicas addToReplicasMap(volumeMap, rbwDir, lazyWriteReplicaMap, false); }
void addToReplicasMap( ReplicaMap volumeMap, File dir, final RamDiskReplicaTracker lazyWriteReplicaMap, boolean isFinalized) throws IOException { File files[] = FileUtil.listFiles(dir); for (File file : files) { if (file.isDirectory()) { addToReplicasMap(volumeMap, file, lazyWriteReplicaMap, isFinalized); } if (isFinalized && FsDatasetUtil.isUnlinkTmpFile(file)) { throw new RuntimeException("未实现"); } if (!Block.isBlockFilename(file)) continue; long genStamp = FsDatasetUtil.getGenerationStampFromFile(files, file); long blockId = Block.filename2id(file.getName()); ReplicaInfo newReplica = null; if (isFinalized) { newReplica = new FinalizedReplica(blockId, file.length(), genStamp, volume, file.getParentFile()); } else { boolean loadRwr = true; File restartMeta = new File(file.getParent() + File.pathSeparator + "." + file.getName() + ".restart"); try { if (!restartMeta.delete()) { FsDatasetImpl.LOG.warn("Failed to delete restart meta file: " + restartMeta.getPath()); } } finally { } if (loadRwr) { LOG.error("未实现"); continue; } } ReplicaInfo oldReplica = volumeMap.get(bpid, newReplica.getBlockId()); if (oldReplica == null) { volumeMap.add(bpid, newReplica); } else { throw new RuntimeException("未实现"); } if (newReplica.getVolume().isTransientStorage()) { throw new RuntimeException("未实现"); } else { lazyWriteReplicaMap.discardReplica(bpid, blockId, false); } } }
/** * Find out the number of bytes in the block that match its crc. * * <p>This algorithm assumes that data corruption caused by unexpected datanode shutdown occurs * only in the last crc chunk. So it checks only the last chunk. * * @param blockFile the block file * @param genStamp generation stamp of the block * @return the number of valid bytes */ private long validateIntegrityAndSetLength(File blockFile, long genStamp) { DataInputStream checksumIn = null; InputStream blockIn = null; try { final File metaFile = FsDatasetUtil.getMetaFile(blockFile, genStamp); long blockFileLen = blockFile.length(); long metaFileLen = metaFile.length(); int crcHeaderLen = DataChecksum.getChecksumHeaderSize(); if (!blockFile.exists() || blockFileLen == 0 || !metaFile.exists() || metaFileLen < crcHeaderLen) { return 0; } checksumIn = new DataInputStream( new BufferedInputStream( new FileInputStream(metaFile), HdfsConstants.IO_FILE_BUFFER_SIZE)); // read and handle the common header here. For now just a version final DataChecksum checksum = BlockMetadataHeader.readDataChecksum(checksumIn, metaFile); int bytesPerChecksum = checksum.getBytesPerChecksum(); int checksumSize = checksum.getChecksumSize(); long numChunks = Math.min( (blockFileLen + bytesPerChecksum - 1) / bytesPerChecksum, (metaFileLen - crcHeaderLen) / checksumSize); if (numChunks == 0) { return 0; } IOUtils.skipFully(checksumIn, (numChunks - 1) * checksumSize); blockIn = new FileInputStream(blockFile); long lastChunkStartPos = (numChunks - 1) * bytesPerChecksum; IOUtils.skipFully(blockIn, lastChunkStartPos); int lastChunkSize = (int) Math.min(bytesPerChecksum, blockFileLen - lastChunkStartPos); byte[] buf = new byte[lastChunkSize + checksumSize]; checksumIn.readFully(buf, lastChunkSize, checksumSize); IOUtils.readFully(blockIn, buf, 0, lastChunkSize); checksum.update(buf, 0, lastChunkSize); long validFileLength; if (checksum.compare(buf, lastChunkSize)) { // last chunk matches crc validFileLength = lastChunkStartPos + lastChunkSize; } else { // last chunck is corrupt validFileLength = lastChunkStartPos; } // truncate if extra bytes are present without CRC if (blockFile.length() > validFileLength) { RandomAccessFile blockRAF = new RandomAccessFile(blockFile, "rw"); try { // truncate blockFile blockRAF.setLength(validFileLength); } finally { blockRAF.close(); } } return validFileLength; } catch (IOException e) { FsDatasetImpl.LOG.warn(e); return 0; } finally { IOUtils.closeStream(checksumIn); IOUtils.closeStream(blockIn); } }
/** * Add replicas under the given directory to the volume map * * @param volumeMap the replicas map * @param dir an input directory * @param lazyWriteReplicaMap Map of replicas on transient storage. * @param isFinalized true if the directory has finalized replicas; false if the directory has rbw * replicas */ void addToReplicasMap( ReplicaMap volumeMap, File dir, final RamDiskReplicaTracker lazyWriteReplicaMap, boolean isFinalized) throws IOException { File files[] = FileUtil.listFiles(dir); for (File file : files) { if (file.isDirectory()) { addToReplicasMap(volumeMap, file, lazyWriteReplicaMap, isFinalized); } if (isFinalized && FsDatasetUtil.isUnlinkTmpFile(file)) { file = recoverTempUnlinkedBlock(file); if (file == null) { // the original block still exists, so we cover it // in another iteration and can continue here continue; } } if (!Block.isBlockFilename(file)) continue; long genStamp = FsDatasetUtil.getGenerationStampFromFile(files, file); long blockId = Block.filename2id(file.getName()); ReplicaInfo newReplica = null; if (isFinalized) { newReplica = new FinalizedReplica(blockId, file.length(), genStamp, volume, file.getParentFile()); } else { boolean loadRwr = true; File restartMeta = new File(file.getParent() + File.pathSeparator + "." + file.getName() + ".restart"); Scanner sc = null; try { sc = new Scanner(restartMeta, "UTF-8"); // The restart meta file exists if (sc.hasNextLong() && (sc.nextLong() > Time.now())) { // It didn't expire. Load the replica as a RBW. // We don't know the expected block length, so just use 0 // and don't reserve any more space for writes. newReplica = new ReplicaBeingWritten( blockId, validateIntegrityAndSetLength(file, genStamp), genStamp, volume, file.getParentFile(), null, 0); loadRwr = false; } sc.close(); if (!restartMeta.delete()) { FsDatasetImpl.LOG.warn("Failed to delete restart meta file: " + restartMeta.getPath()); } } catch (FileNotFoundException fnfe) { // nothing to do hereFile dir = } finally { if (sc != null) { sc.close(); } } // Restart meta doesn't exist or expired. if (loadRwr) { newReplica = new ReplicaWaitingToBeRecovered( blockId, validateIntegrityAndSetLength(file, genStamp), genStamp, volume, file.getParentFile()); } } ReplicaInfo oldReplica = volumeMap.get(bpid, newReplica.getBlockId()); if (oldReplica == null) { volumeMap.add(bpid, newReplica); } else { // We have multiple replicas of the same block so decide which one // to keep. newReplica = resolveDuplicateReplicas(newReplica, oldReplica, volumeMap); } // If we are retaining a replica on transient storage make sure // it is in the lazyWriteReplicaMap so it can be persisted // eventually. if (newReplica.getVolume().isTransientStorage()) { lazyWriteReplicaMap.addReplica(bpid, blockId, (FsVolumeImpl) newReplica.getVolume()); } else { lazyWriteReplicaMap.discardReplica(bpid, blockId, false); } } }