void addToReplicasMap( ReplicaMap volumeMap, File dir, final RamDiskReplicaTracker lazyWriteReplicaMap, boolean isFinalized) throws IOException { File files[] = FileUtil.listFiles(dir); for (File file : files) { if (file.isDirectory()) { addToReplicasMap(volumeMap, file, lazyWriteReplicaMap, isFinalized); } if (isFinalized && FsDatasetUtil.isUnlinkTmpFile(file)) { throw new RuntimeException("未实现"); } if (!Block.isBlockFilename(file)) continue; long genStamp = FsDatasetUtil.getGenerationStampFromFile(files, file); long blockId = Block.filename2id(file.getName()); ReplicaInfo newReplica = null; if (isFinalized) { newReplica = new FinalizedReplica(blockId, file.length(), genStamp, volume, file.getParentFile()); } else { boolean loadRwr = true; File restartMeta = new File(file.getParent() + File.pathSeparator + "." + file.getName() + ".restart"); try { if (!restartMeta.delete()) { FsDatasetImpl.LOG.warn("Failed to delete restart meta file: " + restartMeta.getPath()); } } finally { } if (loadRwr) { LOG.error("未实现"); continue; } } ReplicaInfo oldReplica = volumeMap.get(bpid, newReplica.getBlockId()); if (oldReplica == null) { volumeMap.add(bpid, newReplica); } else { throw new RuntimeException("未实现"); } if (newReplica.getVolume().isTransientStorage()) { throw new RuntimeException("未实现"); } else { lazyWriteReplicaMap.discardReplica(bpid, blockId, false); } } }
File addBlock(Block b, File f) throws IOException { File blockDir = DatanodeUtil.idToBlockDir(finalizedDir, b.getBlockId()); if (!blockDir.exists()) { if (!blockDir.mkdirs()) { throw new IOException("Failed to mkdirs " + blockDir); } } File blockFile = FsDatasetImpl.moveBlockFiles(b, f, blockDir); File metaFile = FsDatasetUtil.getMetaFile(blockFile, b.getGenerationStamp()); dfsUsage.incDfsUsed(b.getNumBytes() + metaFile.length()); return blockFile; }
/** * Recover an unlinked tmp file on datanode restart. If the original block does not exist, then * the tmp file is renamed to be the original file name and the original name is returned; * otherwise the tmp file is deleted and null is returned. */ File recoverTempUnlinkedBlock(File unlinkedTmp) throws IOException { File blockFile = FsDatasetUtil.getOrigFile(unlinkedTmp); if (blockFile.exists()) { // If the original block file still exists, then no recovery is needed. if (!unlinkedTmp.delete()) { throw new IOException("Unable to cleanup unlinked tmp file " + unlinkedTmp); } return null; } else { if (!unlinkedTmp.renameTo(blockFile)) { throw new IOException("Unable to rename unlinked tmp file " + unlinkedTmp); } return blockFile; } }
/** * Find out the number of bytes in the block that match its crc. * * <p>This algorithm assumes that data corruption caused by unexpected datanode shutdown occurs * only in the last crc chunk. So it checks only the last chunk. * * @param blockFile the block file * @param genStamp generation stamp of the block * @return the number of valid bytes */ private long validateIntegrityAndSetLength(File blockFile, long genStamp) { DataInputStream checksumIn = null; InputStream blockIn = null; try { final File metaFile = FsDatasetUtil.getMetaFile(blockFile, genStamp); long blockFileLen = blockFile.length(); long metaFileLen = metaFile.length(); int crcHeaderLen = DataChecksum.getChecksumHeaderSize(); if (!blockFile.exists() || blockFileLen == 0 || !metaFile.exists() || metaFileLen < crcHeaderLen) { return 0; } checksumIn = new DataInputStream( new BufferedInputStream( new FileInputStream(metaFile), HdfsConstants.IO_FILE_BUFFER_SIZE)); // read and handle the common header here. For now just a version final DataChecksum checksum = BlockMetadataHeader.readDataChecksum(checksumIn, metaFile); int bytesPerChecksum = checksum.getBytesPerChecksum(); int checksumSize = checksum.getChecksumSize(); long numChunks = Math.min( (blockFileLen + bytesPerChecksum - 1) / bytesPerChecksum, (metaFileLen - crcHeaderLen) / checksumSize); if (numChunks == 0) { return 0; } IOUtils.skipFully(checksumIn, (numChunks - 1) * checksumSize); blockIn = new FileInputStream(blockFile); long lastChunkStartPos = (numChunks - 1) * bytesPerChecksum; IOUtils.skipFully(blockIn, lastChunkStartPos); int lastChunkSize = (int) Math.min(bytesPerChecksum, blockFileLen - lastChunkStartPos); byte[] buf = new byte[lastChunkSize + checksumSize]; checksumIn.readFully(buf, lastChunkSize, checksumSize); IOUtils.readFully(blockIn, buf, 0, lastChunkSize); checksum.update(buf, 0, lastChunkSize); long validFileLength; if (checksum.compare(buf, lastChunkSize)) { // last chunk matches crc validFileLength = lastChunkStartPos + lastChunkSize; } else { // last chunck is corrupt validFileLength = lastChunkStartPos; } // truncate if extra bytes are present without CRC if (blockFile.length() > validFileLength) { RandomAccessFile blockRAF = new RandomAccessFile(blockFile, "rw"); try { // truncate blockFile blockRAF.setLength(validFileLength); } finally { blockRAF.close(); } } return validFileLength; } catch (IOException e) { FsDatasetImpl.LOG.warn(e); return 0; } finally { IOUtils.closeStream(checksumIn); IOUtils.closeStream(blockIn); } }
/** * Add replicas under the given directory to the volume map * * @param volumeMap the replicas map * @param dir an input directory * @param lazyWriteReplicaMap Map of replicas on transient storage. * @param isFinalized true if the directory has finalized replicas; false if the directory has rbw * replicas */ void addToReplicasMap( ReplicaMap volumeMap, File dir, final RamDiskReplicaTracker lazyWriteReplicaMap, boolean isFinalized) throws IOException { File files[] = FileUtil.listFiles(dir); for (File file : files) { if (file.isDirectory()) { addToReplicasMap(volumeMap, file, lazyWriteReplicaMap, isFinalized); } if (isFinalized && FsDatasetUtil.isUnlinkTmpFile(file)) { file = recoverTempUnlinkedBlock(file); if (file == null) { // the original block still exists, so we cover it // in another iteration and can continue here continue; } } if (!Block.isBlockFilename(file)) continue; long genStamp = FsDatasetUtil.getGenerationStampFromFile(files, file); long blockId = Block.filename2id(file.getName()); ReplicaInfo newReplica = null; if (isFinalized) { newReplica = new FinalizedReplica(blockId, file.length(), genStamp, volume, file.getParentFile()); } else { boolean loadRwr = true; File restartMeta = new File(file.getParent() + File.pathSeparator + "." + file.getName() + ".restart"); Scanner sc = null; try { sc = new Scanner(restartMeta, "UTF-8"); // The restart meta file exists if (sc.hasNextLong() && (sc.nextLong() > Time.now())) { // It didn't expire. Load the replica as a RBW. // We don't know the expected block length, so just use 0 // and don't reserve any more space for writes. newReplica = new ReplicaBeingWritten( blockId, validateIntegrityAndSetLength(file, genStamp), genStamp, volume, file.getParentFile(), null, 0); loadRwr = false; } sc.close(); if (!restartMeta.delete()) { FsDatasetImpl.LOG.warn("Failed to delete restart meta file: " + restartMeta.getPath()); } } catch (FileNotFoundException fnfe) { // nothing to do hereFile dir = } finally { if (sc != null) { sc.close(); } } // Restart meta doesn't exist or expired. if (loadRwr) { newReplica = new ReplicaWaitingToBeRecovered( blockId, validateIntegrityAndSetLength(file, genStamp), genStamp, volume, file.getParentFile()); } } ReplicaInfo oldReplica = volumeMap.get(bpid, newReplica.getBlockId()); if (oldReplica == null) { volumeMap.add(bpid, newReplica); } else { // We have multiple replicas of the same block so decide which one // to keep. newReplica = resolveDuplicateReplicas(newReplica, oldReplica, volumeMap); } // If we are retaining a replica on transient storage make sure // it is in the lazyWriteReplicaMap so it can be persisted // eventually. if (newReplica.getVolume().isTransientStorage()) { lazyWriteReplicaMap.addReplica(bpid, blockId, (FsVolumeImpl) newReplica.getVolume()); } else { lazyWriteReplicaMap.discardReplica(bpid, blockId, false); } } }