void addToReplicasMap( ReplicaMap volumeMap, File dir, final RamDiskReplicaTracker lazyWriteReplicaMap, boolean isFinalized) throws IOException { File files[] = FileUtil.listFiles(dir); for (File file : files) { if (file.isDirectory()) { addToReplicasMap(volumeMap, file, lazyWriteReplicaMap, isFinalized); } if (isFinalized && FsDatasetUtil.isUnlinkTmpFile(file)) { throw new RuntimeException("未实现"); } if (!Block.isBlockFilename(file)) continue; long genStamp = FsDatasetUtil.getGenerationStampFromFile(files, file); long blockId = Block.filename2id(file.getName()); ReplicaInfo newReplica = null; if (isFinalized) { newReplica = new FinalizedReplica(blockId, file.length(), genStamp, volume, file.getParentFile()); } else { boolean loadRwr = true; File restartMeta = new File(file.getParent() + File.pathSeparator + "." + file.getName() + ".restart"); try { if (!restartMeta.delete()) { FsDatasetImpl.LOG.warn("Failed to delete restart meta file: " + restartMeta.getPath()); } } finally { } if (loadRwr) { LOG.error("未实现"); continue; } } ReplicaInfo oldReplica = volumeMap.get(bpid, newReplica.getBlockId()); if (oldReplica == null) { volumeMap.add(bpid, newReplica); } else { throw new RuntimeException("未实现"); } if (newReplica.getVolume().isTransientStorage()) { throw new RuntimeException("未实现"); } else { lazyWriteReplicaMap.discardReplica(bpid, blockId, false); } } }
/** * This method is invoked during DN startup when volumes are scanned to build up the volumeMap. * * <p>Given two replicas, decide which one to keep. The preference is as follows: 1. Prefer the * replica with the higher generation stamp. 2. If generation stamps are equal, prefer the replica * with the larger on-disk length. 3. If on-disk length is the same, prefer the replica on * persistent storage volume. 4. All other factors being equal, keep replica1. * * <p>The other replica is removed from the volumeMap and is deleted from its storage volume. * * @param replica1 * @param replica2 * @param volumeMap * @return the replica that is retained. * @throws IOException */ ReplicaInfo resolveDuplicateReplicas( final ReplicaInfo replica1, final ReplicaInfo replica2, final ReplicaMap volumeMap) throws IOException { if (!deleteDuplicateReplicas) { // Leave both block replicas in place. return replica1; } ReplicaInfo replicaToKeep; ReplicaInfo replicaToDelete; if (replica1.getGenerationStamp() != replica2.getGenerationStamp()) { replicaToKeep = replica1.getGenerationStamp() > replica2.getGenerationStamp() ? replica1 : replica2; } else if (replica1.getNumBytes() != replica2.getNumBytes()) { replicaToKeep = replica1.getNumBytes() > replica2.getNumBytes() ? replica1 : replica2; } else if (replica1.getVolume().isTransientStorage() && !replica2.getVolume().isTransientStorage()) { replicaToKeep = replica2; } else { replicaToKeep = replica1; } replicaToDelete = (replicaToKeep == replica1) ? replica2 : replica1; if (LOG.isDebugEnabled()) { LOG.debug( "resolveDuplicateReplicas decide to keep " + replicaToKeep + ". Will try to delete " + replicaToDelete); } // Update volumeMap. volumeMap.add(bpid, replicaToKeep); // Delete the files on disk. Failure here is okay. final File blockFile = replicaToDelete.getBlockFile(); if (!blockFile.delete()) { LOG.warn("Failed to delete block file " + blockFile); } final File metaFile = replicaToDelete.getMetaFile(); if (!metaFile.delete()) { LOG.warn("Failed to delete meta file " + metaFile); } return replicaToKeep; }
/** * Generate testing environment and return a collection of blocks on which to run the tests. * * @param bpid Block pool ID to generate blocks for * @param dataSet Namespace in which to insert blocks * @return Contrived blocks for further testing. * @throws IOException */ private ExtendedBlock[] setup(String bpid, FsDatasetImpl dataSet) throws IOException { // setup replicas map ExtendedBlock[] blocks = new ExtendedBlock[] { new ExtendedBlock(bpid, 1, 1, 2001), new ExtendedBlock(bpid, 2, 1, 2002), new ExtendedBlock(bpid, 3, 1, 2003), new ExtendedBlock(bpid, 4, 1, 2004), new ExtendedBlock(bpid, 5, 1, 2005), new ExtendedBlock(bpid, 6, 1, 2006) }; ReplicaMap replicasMap = dataSet.volumeMap; FsVolumeImpl vol = (FsVolumeImpl) dataSet.volumes.getNextVolume(StorageType.DEFAULT, 0).getVolume(); ReplicaInfo replicaInfo = new FinalizedReplica( blocks[FINALIZED].getLocalBlock(), vol, vol.getCurrentDir().getParentFile()); replicasMap.add(bpid, replicaInfo); replicaInfo.getBlockFile().createNewFile(); replicaInfo.getMetaFile().createNewFile(); replicasMap.add( bpid, new ReplicaInPipeline( blocks[TEMPORARY].getBlockId(), blocks[TEMPORARY].getGenerationStamp(), vol, vol.createTmpFile(bpid, blocks[TEMPORARY].getLocalBlock()).getParentFile(), 0)); replicaInfo = new ReplicaBeingWritten( blocks[RBW].getLocalBlock(), vol, vol.createRbwFile(bpid, blocks[RBW].getLocalBlock()).getParentFile(), null); replicasMap.add(bpid, replicaInfo); replicaInfo.getBlockFile().createNewFile(); replicaInfo.getMetaFile().createNewFile(); replicasMap.add( bpid, new ReplicaWaitingToBeRecovered( blocks[RWR].getLocalBlock(), vol, vol.createRbwFile(bpid, blocks[RWR].getLocalBlock()).getParentFile())); replicasMap.add( bpid, new ReplicaUnderRecovery( new FinalizedReplica( blocks[RUR].getLocalBlock(), vol, vol.getCurrentDir().getParentFile()), 2007)); return blocks; }
/** * Add replicas under the given directory to the volume map * * @param volumeMap the replicas map * @param dir an input directory * @param lazyWriteReplicaMap Map of replicas on transient storage. * @param isFinalized true if the directory has finalized replicas; false if the directory has rbw * replicas */ void addToReplicasMap( ReplicaMap volumeMap, File dir, final RamDiskReplicaTracker lazyWriteReplicaMap, boolean isFinalized) throws IOException { File files[] = FileUtil.listFiles(dir); for (File file : files) { if (file.isDirectory()) { addToReplicasMap(volumeMap, file, lazyWriteReplicaMap, isFinalized); } if (isFinalized && FsDatasetUtil.isUnlinkTmpFile(file)) { file = recoverTempUnlinkedBlock(file); if (file == null) { // the original block still exists, so we cover it // in another iteration and can continue here continue; } } if (!Block.isBlockFilename(file)) continue; long genStamp = FsDatasetUtil.getGenerationStampFromFile(files, file); long blockId = Block.filename2id(file.getName()); ReplicaInfo newReplica = null; if (isFinalized) { newReplica = new FinalizedReplica(blockId, file.length(), genStamp, volume, file.getParentFile()); } else { boolean loadRwr = true; File restartMeta = new File(file.getParent() + File.pathSeparator + "." + file.getName() + ".restart"); Scanner sc = null; try { sc = new Scanner(restartMeta, "UTF-8"); // The restart meta file exists if (sc.hasNextLong() && (sc.nextLong() > Time.now())) { // It didn't expire. Load the replica as a RBW. // We don't know the expected block length, so just use 0 // and don't reserve any more space for writes. newReplica = new ReplicaBeingWritten( blockId, validateIntegrityAndSetLength(file, genStamp), genStamp, volume, file.getParentFile(), null, 0); loadRwr = false; } sc.close(); if (!restartMeta.delete()) { FsDatasetImpl.LOG.warn("Failed to delete restart meta file: " + restartMeta.getPath()); } } catch (FileNotFoundException fnfe) { // nothing to do hereFile dir = } finally { if (sc != null) { sc.close(); } } // Restart meta doesn't exist or expired. if (loadRwr) { newReplica = new ReplicaWaitingToBeRecovered( blockId, validateIntegrityAndSetLength(file, genStamp), genStamp, volume, file.getParentFile()); } } ReplicaInfo oldReplica = volumeMap.get(bpid, newReplica.getBlockId()); if (oldReplica == null) { volumeMap.add(bpid, newReplica); } else { // We have multiple replicas of the same block so decide which one // to keep. newReplica = resolveDuplicateReplicas(newReplica, oldReplica, volumeMap); } // If we are retaining a replica on transient storage make sure // it is in the lazyWriteReplicaMap so it can be persisted // eventually. if (newReplica.getVolume().isTransientStorage()) { lazyWriteReplicaMap.addReplica(bpid, blockId, (FsVolumeImpl) newReplica.getVolume()); } else { lazyWriteReplicaMap.discardReplica(bpid, blockId, false); } } }
/** Test {@link FsDatasetImpl#initReplicaRecovery(String, ReplicaMap, Block, long)} */ @Test public void testInitReplicaRecovery() throws IOException { final long firstblockid = 10000L; final long gs = 7777L; final long length = 22L; final ReplicaMap map = new ReplicaMap(this); String bpid = "BP-TEST"; final Block[] blocks = new Block[5]; for (int i = 0; i < blocks.length; i++) { blocks[i] = new Block(firstblockid + i, length, gs); map.add(bpid, createReplicaInfo(blocks[i])); } { // normal case final Block b = blocks[0]; final ReplicaInfo originalInfo = map.get(bpid, b); final long recoveryid = gs + 1; final ReplicaRecoveryInfo recoveryInfo = FsDatasetImpl.initReplicaRecovery(bpid, map, blocks[0], recoveryid); assertEquals(originalInfo, recoveryInfo); final ReplicaUnderRecovery updatedInfo = (ReplicaUnderRecovery) map.get(bpid, b); Assert.assertEquals(originalInfo.getBlockId(), updatedInfo.getBlockId()); Assert.assertEquals(recoveryid, updatedInfo.getRecoveryID()); // recover one more time final long recoveryid2 = gs + 2; final ReplicaRecoveryInfo recoveryInfo2 = FsDatasetImpl.initReplicaRecovery(bpid, map, blocks[0], recoveryid2); assertEquals(originalInfo, recoveryInfo2); final ReplicaUnderRecovery updatedInfo2 = (ReplicaUnderRecovery) map.get(bpid, b); Assert.assertEquals(originalInfo.getBlockId(), updatedInfo2.getBlockId()); Assert.assertEquals(recoveryid2, updatedInfo2.getRecoveryID()); // case RecoveryInProgressException try { FsDatasetImpl.initReplicaRecovery(bpid, map, b, recoveryid); Assert.fail(); } catch (RecoveryInProgressException ripe) { System.out.println("GOOD: getting " + ripe); } } { // BlockRecoveryFI_01: replica not found final long recoveryid = gs + 1; final Block b = new Block(firstblockid - 1, length, gs); ReplicaRecoveryInfo r = FsDatasetImpl.initReplicaRecovery(bpid, map, b, recoveryid); Assert.assertNull("Data-node should not have this replica.", r); } { // BlockRecoveryFI_02: "THIS IS NOT SUPPOSED TO HAPPEN" with recovery id < gs final long recoveryid = gs - 1; final Block b = new Block(firstblockid + 1, length, gs); try { FsDatasetImpl.initReplicaRecovery(bpid, map, b, recoveryid); Assert.fail(); } catch (IOException ioe) { System.out.println("GOOD: getting " + ioe); } } // BlockRecoveryFI_03: Replica's gs is less than the block's gs { final long recoveryid = gs + 1; final Block b = new Block(firstblockid, length, gs + 1); try { FsDatasetImpl.initReplicaRecovery(bpid, map, b, recoveryid); fail( "InitReplicaRecovery should fail because replica's " + "gs is less than the block's gs"); } catch (IOException e) { e.getMessage() .startsWith("replica.getGenerationStamp() < block.getGenerationStamp(), block="); } } }