/**
  * Keep accessing the given file until the namenode reports that the given block in the file
  * contains the given number of corrupt replicas.
  */
 public static void waitCorruptReplicas(
     FileSystem fs, FSNamesystem ns, Path file, ExtendedBlock b, int corruptRepls)
     throws IOException, TimeoutException {
   int count = 0;
   final int ATTEMPTS = 50;
   int repls = ns.getBlockManager().numCorruptReplicas(b.getLocalBlock());
   while (repls != corruptRepls && count < ATTEMPTS) {
     try {
       IOUtils.copyBytes(fs.open(file), new IOUtils.NullOutputStream(), 512, true);
     } catch (IOException e) {
       // Swallow exceptions
     }
     System.out.println("Waiting for " + corruptRepls + " corrupt replicas");
     repls = ns.getBlockManager().numCorruptReplicas(b.getLocalBlock());
     count++;
   }
   if (count == ATTEMPTS) {
     throw new TimeoutException(
         "Timed out waiting for corrupt replicas."
             + " Waiting for "
             + corruptRepls
             + ", but only found "
             + repls);
   }
 }
 int addSomeBlocks(SimulatedFSDataset fsdataset, int startingBlockId) throws IOException {
   int bytesAdded = 0;
   for (int i = startingBlockId; i < startingBlockId + NUMBLOCKS; ++i) {
     ExtendedBlock b = new ExtendedBlock(bpid, i, 0, 0);
     // we pass expected len as zero, - fsdataset should use the sizeof actual
     // data written
     ReplicaInPipelineInterface bInfo =
         fsdataset.createRbw(StorageType.DEFAULT, b, false).getReplica();
     ReplicaOutputStreams out =
         bInfo.createStreams(true, DataChecksum.newDataChecksum(DataChecksum.Type.CRC32, 512));
     try {
       OutputStream dataOut = out.getDataOut();
       assertEquals(0, fsdataset.getLength(b));
       for (int j = 1; j <= blockIdToLen(i); ++j) {
         dataOut.write(j);
         assertEquals(j, bInfo.getBytesOnDisk()); // correct length even as we write
         bytesAdded++;
       }
     } finally {
       out.close();
     }
     b.setNumBytes(blockIdToLen(i));
     fsdataset.finalizeBlock(b);
     assertEquals(blockIdToLen(i), fsdataset.getLength(b));
   }
   return bytesAdded;
 }
  /**
   * The following test first creates a file. It verifies the block information from a datanode.
   * Then, it updates the block with new information and verifies again.
   */
  @Test
  public void testBlockMetaDataInfo() throws Exception {
    MiniDFSCluster cluster = null;

    try {
      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
      cluster.waitActive();

      // create a file
      DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem();
      String filestr = "/foo";
      Path filepath = new Path(filestr);
      DFSTestUtil.createFile(dfs, filepath, 1024L, (short) 3, 0L);
      assertTrue(dfs.exists(filepath));

      // get block info
      LocatedBlock locatedblock =
          getLastLocatedBlock(DFSClientAdapter.getDFSClient(dfs).getNamenode(), filestr);
      DatanodeInfo[] datanodeinfo = locatedblock.getLocations();
      assertTrue(datanodeinfo.length > 0);

      // connect to a data node
      DataNode datanode = cluster.getDataNode(datanodeinfo[0].getIpcPort());
      InterDatanodeProtocol idp =
          DataNodeTestUtils.createInterDatanodeProtocolProxy(datanode, datanodeinfo[0], conf);

      // stop block scanner, so we could compare lastScanTime
      DataNodeTestUtils.shutdownBlockScanner(datanode);

      // verify BlockMetaDataInfo
      ExtendedBlock b = locatedblock.getBlock();
      InterDatanodeProtocol.LOG.info("b=" + b + ", " + b.getClass());
      checkMetaInfo(b, datanode);
      long recoveryId = b.getGenerationStamp() + 1;
      idp.initReplicaRecovery(new RecoveringBlock(b, locatedblock.getLocations(), recoveryId));

      // verify updateBlock
      ExtendedBlock newblock =
          new ExtendedBlock(
              b.getBlockPoolId(), b.getBlockId(), b.getNumBytes() / 2, b.getGenerationStamp() + 1);
      idp.updateReplicaUnderRecovery(b, recoveryId, newblock.getNumBytes());
      checkMetaInfo(newblock, datanode);

      // Verify correct null response trying to init recovery for a missing block
      ExtendedBlock badBlock = new ExtendedBlock("fake-pool", b.getBlockId(), 0, 0);
      assertNull(
          idp.initReplicaRecovery(
              new RecoveringBlock(badBlock, locatedblock.getLocations(), recoveryId)));
    } finally {
      if (cluster != null) {
        cluster.shutdown();
      }
    }
  }
 private int getTrueReplication(MiniDFSCluster cluster, ExtendedBlock block) throws IOException {
   int count = 0;
   for (DataNode dn : cluster.getDataNodes()) {
     if (DataNodeTestUtils.getFSDataset(dn)
             .getStoredBlock(block.getBlockPoolId(), block.getBlockId())
         != null) {
       count++;
     }
   }
   return count;
 }
Exemple #5
0
  /** Convert an ExtendedBlock to a Json map. */
  private static Map<String, Object> toJsonMap(final ExtendedBlock extendedblock) {
    if (extendedblock == null) {
      return null;
    }

    final Map<String, Object> m = new TreeMap<String, Object>();
    m.put("blockPoolId", extendedblock.getBlockPoolId());
    m.put("blockId", extendedblock.getBlockId());
    m.put("numBytes", extendedblock.getNumBytes());
    m.put("generationStamp", extendedblock.getGenerationStamp());
    return m;
  }
 /**
  * Corrupt a block on a data node. Replace the block file content with content of 1, 2,
  * ...BLOCK_SIZE.
  *
  * @param block the ExtendedBlock to be corrupted
  * @param dn the data node where the block needs to be corrupted
  * @throws FileNotFoundException
  * @throws IOException
  */
 private static void corruptBlock(final ExtendedBlock block, final DataNode dn)
     throws FileNotFoundException, IOException {
   final File f =
       DataNodeTestUtils.getBlockFile(dn, block.getBlockPoolId(), block.getLocalBlock());
   final RandomAccessFile raFile = new RandomAccessFile(f, "rw");
   final byte[] bytes = new byte[(int) BLOCK_SIZE];
   for (int i = 0; i < BLOCK_SIZE; i++) {
     bytes[i] = (byte) (i);
   }
   raFile.write(bytes);
   raFile.close();
 }
  /**
   * try to access a block on a data node. If fails - throws exception
   *
   * @param datanode
   * @param lblock
   * @throws IOException
   */
  private void accessBlock(DatanodeInfo datanode, LocatedBlock lblock) throws IOException {
    InetSocketAddress targetAddr = null;
    ExtendedBlock block = lblock.getBlock();

    targetAddr = NetUtils.createSocketAddr(datanode.getXferAddr());

    BlockReader blockReader =
        new BlockReaderFactory(new DfsClientConf(conf))
            .setInetSocketAddress(targetAddr)
            .setBlock(block)
            .setFileName(
                BlockReaderFactory.getFileName(targetAddr, "test-blockpoolid", block.getBlockId()))
            .setBlockToken(lblock.getBlockToken())
            .setStartOffset(0)
            .setLength(-1)
            .setVerifyChecksum(true)
            .setClientName("TestDataNodeVolumeFailure")
            .setDatanodeInfo(datanode)
            .setCachingStrategy(CachingStrategy.newDefaultStrategy())
            .setClientCacheContext(ClientContext.getFromConf(conf))
            .setConfiguration(conf)
            .setTracer(FsTracer.get(conf))
            .setRemotePeerFactory(
                new RemotePeerFactory() {
                  @Override
                  public Peer newConnectedPeer(
                      InetSocketAddress addr,
                      Token<BlockTokenIdentifier> blockToken,
                      DatanodeID datanodeId)
                      throws IOException {
                    Peer peer = null;
                    Socket sock = NetUtils.getDefaultSocketFactory(conf).createSocket();
                    try {
                      sock.connect(addr, HdfsConstants.READ_TIMEOUT);
                      sock.setSoTimeout(HdfsConstants.READ_TIMEOUT);
                      peer = DFSUtilClient.peerFromSocket(sock);
                    } finally {
                      if (peer == null) {
                        IOUtils.closeSocket(sock);
                      }
                    }
                    return peer;
                  }
                })
            .build();
    blockReader.close();
  }
  /**
   * Setup a {@link MiniDFSCluster}. Create a block with both {@link State#NORMAL} and {@link
   * State#READ_ONLY_SHARED} replicas.
   */
  @Before
  public void setup() throws IOException, InterruptedException {
    conf = new HdfsConfiguration();
    SimulatedFSDataset.setFactory(conf);

    Configuration[] overlays = new Configuration[NUM_DATANODES];
    for (int i = 0; i < overlays.length; i++) {
      overlays[i] = new Configuration();
      if (i == RO_NODE_INDEX) {
        overlays[i].setEnum(
            SimulatedFSDataset.CONFIG_PROPERTY_STATE,
            i == RO_NODE_INDEX ? READ_ONLY_SHARED : NORMAL);
      }
    }

    cluster =
        new MiniDFSCluster.Builder(conf)
            .numDataNodes(NUM_DATANODES)
            .dataNodeConfOverlays(overlays)
            .build();
    fs = cluster.getFileSystem();
    blockManager = cluster.getNameNode().getNamesystem().getBlockManager();
    datanodeManager = blockManager.getDatanodeManager();
    client =
        new DFSClient(
            new InetSocketAddress("localhost", cluster.getNameNodePort()),
            cluster.getConfiguration(0));

    for (int i = 0; i < NUM_DATANODES; i++) {
      DataNode dataNode = cluster.getDataNodes().get(i);
      validateStorageState(
          BlockManagerTestUtil.getStorageReportsForDatanode(
              datanodeManager.getDatanode(dataNode.getDatanodeId())),
          i == RO_NODE_INDEX ? READ_ONLY_SHARED : NORMAL);
    }

    // Create a 1 block file
    DFSTestUtil.createFile(fs, PATH, BLOCK_SIZE, BLOCK_SIZE, BLOCK_SIZE, (short) 1, seed);

    LocatedBlock locatedBlock = getLocatedBlock();
    extendedBlock = locatedBlock.getBlock();
    block = extendedBlock.getLocalBlock();

    assertThat(locatedBlock.getLocations().length, is(1));
    normalDataNode = locatedBlock.getLocations()[0];
    readOnlyDataNode =
        datanodeManager.getDatanode(cluster.getDataNodes().get(RO_NODE_INDEX).getDatanodeId());
    assertThat(normalDataNode, is(not(readOnlyDataNode)));

    validateNumberReplicas(1);

    // Inject the block into the datanode with READ_ONLY_SHARED storage
    cluster.injectBlocks(0, RO_NODE_INDEX, Collections.singleton(block));

    // There should now be 2 *locations* for the block
    // Must wait until the NameNode has processed the block report for the injected blocks
    waitForLocations(2);
  }
  /**
   * go to each block on the 2nd DataNode until it fails...
   *
   * @param path
   * @param size
   * @throws IOException
   */
  private void triggerFailure(String path, long size) throws IOException {
    NamenodeProtocols nn = cluster.getNameNodeRpc();
    List<LocatedBlock> locatedBlocks = nn.getBlockLocations(path, 0, size).getLocatedBlocks();

    for (LocatedBlock lb : locatedBlocks) {
      DatanodeInfo dinfo = lb.getLocations()[1];
      ExtendedBlock b = lb.getBlock();
      try {
        accessBlock(dinfo, lb);
      } catch (IOException e) {
        System.out.println(
            "Failure triggered, on block: "
                + b.getBlockId()
                + "; corresponding volume should be removed by now");
        break;
      }
    }
  }
 /** Create a file with one block and corrupt some/all of the block replicas. */
 private void createAFileWithCorruptedBlockReplicas(
     Path filePath, short repl, int corruptBlockCount)
     throws IOException, AccessControlException, FileNotFoundException, UnresolvedLinkException,
         InterruptedException, TimeoutException {
   DFSTestUtil.createFile(dfs, filePath, BLOCK_SIZE, repl, 0);
   DFSTestUtil.waitReplication(dfs, filePath, repl);
   // Locate the file blocks by asking name node
   final LocatedBlocks locatedblocks =
       dfs.dfs.getNamenode().getBlockLocations(filePath.toString(), 0L, BLOCK_SIZE);
   Assert.assertEquals(repl, locatedblocks.get(0).getLocations().length);
   // The file only has one block
   LocatedBlock lblock = locatedblocks.get(0);
   DatanodeInfo[] datanodeinfos = lblock.getLocations();
   ExtendedBlock block = lblock.getBlock();
   // corrupt some /all of the block replicas
   for (int i = 0; i < corruptBlockCount; i++) {
     DatanodeInfo dninfo = datanodeinfos[i];
     final DataNode dn = cluster.getDataNode(dninfo.getIpcPort());
     corruptBlock(block, dn);
     LOG.debug("Corrupted block " + block.getBlockName() + " on data node " + dninfo);
   }
 }
  /** Get a BlockReader for the given block. */
  public BlockReader getBlockReader(LocatedBlock testBlock, int offset, int lenToRead)
      throws IOException {
    InetSocketAddress targetAddr = null;
    Socket sock = null;
    ExtendedBlock block = testBlock.getBlock();
    DatanodeInfo[] nodes = testBlock.getLocations();
    targetAddr = NetUtils.createSocketAddr(nodes[0].getName());
    sock = NetUtils.getDefaultSocketFactory(conf).createSocket();
    sock.connect(targetAddr, HdfsServerConstants.READ_TIMEOUT);
    sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);

    return BlockReaderFactory.newBlockReader(
        new DFSClient.Conf(conf),
        sock,
        targetAddr.toString() + ":" + block.getBlockId(),
        block,
        testBlock.getBlockToken(),
        offset,
        lenToRead,
        conf.getInt("io.file.buffer.size", 4096),
        true,
        "");
  }
  /*
   * Wait up to 20s for the given block to be replicated across
   * the requested number of racks, with the requested number of
   * replicas, and the requested number of replicas still needed.
   */
  public static void waitForReplication(
      MiniDFSCluster cluster, ExtendedBlock b, int racks, int replicas, int neededReplicas)
      throws IOException, TimeoutException, InterruptedException {
    int curRacks = 0;
    int curReplicas = 0;
    int curNeededReplicas = 0;
    int count = 0;
    final int ATTEMPTS = 20;

    do {
      Thread.sleep(1000);
      int[] r = BlockManagerTestUtil.getReplicaInfo(cluster.getNamesystem(), b.getLocalBlock());
      curRacks = r[0];
      curReplicas = r[1];
      curNeededReplicas = r[2];
      count++;
    } while ((curRacks != racks || curReplicas != replicas || curNeededReplicas != neededReplicas)
        && count < ATTEMPTS);

    if (count == ATTEMPTS) {
      throw new TimeoutException(
          "Timed out waiting for replication."
              + " Needed replicas = "
              + neededReplicas
              + " Cur needed replicas = "
              + curNeededReplicas
              + " Replicas = "
              + replicas
              + " Cur replicas = "
              + curReplicas
              + " Racks = "
              + racks
              + " Cur racks = "
              + curRacks);
    }
  }
  /*
   * XXX (ab) Bulk of this method is copied verbatim from {@link DFSClient}, which is
   * bad. Both places should be refactored to provide a method to copy blocks
   * around.
   */
  private void copyBlock(DFSClient dfs, LocatedBlock lblock, OutputStream fos) throws Exception {
    int failures = 0;
    InetSocketAddress targetAddr = null;
    TreeSet<DatanodeInfo> deadNodes = new TreeSet<DatanodeInfo>();
    BlockReader blockReader = null;
    ExtendedBlock block = lblock.getBlock();

    while (blockReader == null) {
      DatanodeInfo chosenNode;

      try {
        chosenNode = bestNode(dfs, lblock.getLocations(), deadNodes);
        targetAddr = NetUtils.createSocketAddr(chosenNode.getXferAddr());
      } catch (IOException ie) {
        if (failures >= DFSConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_DEFAULT) {
          throw new IOException("Could not obtain block " + lblock, ie);
        }
        LOG.info("Could not obtain block from any node:  " + ie);
        try {
          Thread.sleep(10000);
        } catch (InterruptedException iex) {
        }
        deadNodes.clear();
        failures++;
        continue;
      }
      try {
        String file =
            BlockReaderFactory.getFileName(targetAddr, block.getBlockPoolId(), block.getBlockId());
        blockReader =
            new BlockReaderFactory(dfs.getConf())
                .setFileName(file)
                .setBlock(block)
                .setBlockToken(lblock.getBlockToken())
                .setStartOffset(0)
                .setLength(-1)
                .setVerifyChecksum(true)
                .setClientName("fsck")
                .setDatanodeInfo(chosenNode)
                .setInetSocketAddress(targetAddr)
                .setCachingStrategy(CachingStrategy.newDropBehind())
                .setClientCacheContext(dfs.getClientContext())
                .setConfiguration(namenode.conf)
                .setRemotePeerFactory(
                    new RemotePeerFactory() {
                      @Override
                      public Peer newConnectedPeer(InetSocketAddress addr) throws IOException {
                        Peer peer = null;
                        Socket s = NetUtils.getDefaultSocketFactory(conf).createSocket();
                        try {
                          s.connect(addr, HdfsServerConstants.READ_TIMEOUT);
                          s.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
                          peer =
                              TcpPeerServer.peerFromSocketAndKey(
                                  s, namenode.getRpcServer().getDataEncryptionKey());
                        } finally {
                          if (peer == null) {
                            IOUtils.closeQuietly(s);
                          }
                        }
                        return peer;
                      }
                    })
                .build();
      } catch (IOException ex) {
        // Put chosen node into dead list, continue
        LOG.info("Failed to connect to " + targetAddr + ":" + ex);
        deadNodes.add(chosenNode);
      }
    }
    byte[] buf = new byte[1024];
    int cnt = 0;
    boolean success = true;
    long bytesRead = 0;
    try {
      while ((cnt = blockReader.read(buf, 0, buf.length)) > 0) {
        fos.write(buf, 0, cnt);
        bytesRead += cnt;
      }
      if (bytesRead != block.getNumBytes()) {
        throw new IOException(
            "Recorded block size is "
                + block.getNumBytes()
                + ", but datanode returned "
                + bytesRead
                + " bytes");
      }
    } catch (Exception e) {
      LOG.error("Error reading block", e);
      success = false;
    } finally {
      blockReader.close();
    }
    if (!success) {
      throw new Exception("Could not copy block data for " + lblock.getBlock());
    }
  }
  /** Test for {@link FsDatasetImpl#updateReplicaUnderRecovery(ExtendedBlock, long, long)} */
  @Test
  public void testUpdateReplicaUnderRecovery() throws IOException {
    MiniDFSCluster cluster = null;

    try {
      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
      cluster.waitActive();
      String bpid = cluster.getNamesystem().getBlockPoolId();

      // create a file
      DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem();
      String filestr = "/foo";
      Path filepath = new Path(filestr);
      DFSTestUtil.createFile(dfs, filepath, 1024L, (short) 3, 0L);

      // get block info
      final LocatedBlock locatedblock =
          getLastLocatedBlock(DFSClientAdapter.getDFSClient(dfs).getNamenode(), filestr);
      final DatanodeInfo[] datanodeinfo = locatedblock.getLocations();
      Assert.assertTrue(datanodeinfo.length > 0);

      // get DataNode and FSDataset objects
      final DataNode datanode = cluster.getDataNode(datanodeinfo[0].getIpcPort());
      Assert.assertTrue(datanode != null);

      // initReplicaRecovery
      final ExtendedBlock b = locatedblock.getBlock();
      final long recoveryid = b.getGenerationStamp() + 1;
      final long newlength = b.getNumBytes() - 1;
      final FsDatasetSpi<?> fsdataset = DataNodeTestUtils.getFSDataset(datanode);
      final ReplicaRecoveryInfo rri =
          fsdataset.initReplicaRecovery(new RecoveringBlock(b, null, recoveryid));

      // check replica
      final ReplicaInfo replica =
          FsDatasetTestUtil.fetchReplicaInfo(fsdataset, bpid, b.getBlockId());
      Assert.assertEquals(ReplicaState.RUR, replica.getState());

      // check meta data before update
      FsDatasetImpl.checkReplicaFiles(replica);

      // case "THIS IS NOT SUPPOSED TO HAPPEN"
      // with (block length) != (stored replica's on disk length).
      {
        // create a block with same id and gs but different length.
        final ExtendedBlock tmp =
            new ExtendedBlock(
                b.getBlockPoolId(),
                rri.getBlockId(),
                rri.getNumBytes() - 1,
                rri.getGenerationStamp());
        try {
          // update should fail
          fsdataset.updateReplicaUnderRecovery(tmp, recoveryid, newlength);
          Assert.fail();
        } catch (IOException ioe) {
          System.out.println("GOOD: getting " + ioe);
        }
      }

      // update
      final String storageID =
          fsdataset.updateReplicaUnderRecovery(
              new ExtendedBlock(b.getBlockPoolId(), rri), recoveryid, newlength);
      assertTrue(storageID != null);

    } finally {
      if (cluster != null) cluster.shutdown();
    }
  }
 public static void checkMetaInfo(ExtendedBlock b, DataNode dn) throws IOException {
   Block metainfo =
       DataNodeTestUtils.getFSDataset(dn).getStoredBlock(b.getBlockPoolId(), b.getBlockId());
   Assert.assertEquals(b.getBlockId(), metainfo.getBlockId());
   Assert.assertEquals(b.getNumBytes(), metainfo.getNumBytes());
 }
  // try reading a block using a BlockReader directly
  protected void tryRead(final Configuration conf, LocatedBlock lblock, boolean shouldSucceed) {
    InetSocketAddress targetAddr = null;
    IOException ioe = null;
    BlockReader blockReader = null;
    ExtendedBlock block = lblock.getBlock();
    try {
      DatanodeInfo[] nodes = lblock.getLocations();
      targetAddr = NetUtils.createSocketAddr(nodes[0].getXferAddr());

      blockReader =
          new BlockReaderFactory(new DfsClientConf(conf))
              .setFileName(
                  BlockReaderFactory.getFileName(
                      targetAddr, "test-blockpoolid", block.getBlockId()))
              .setBlock(block)
              .setBlockToken(lblock.getBlockToken())
              .setInetSocketAddress(targetAddr)
              .setStartOffset(0)
              .setLength(-1)
              .setVerifyChecksum(true)
              .setClientName("TestBlockTokenWithDFS")
              .setDatanodeInfo(nodes[0])
              .setCachingStrategy(CachingStrategy.newDefaultStrategy())
              .setClientCacheContext(ClientContext.getFromConf(conf))
              .setConfiguration(conf)
              .setTracer(FsTracer.get(conf))
              .setRemotePeerFactory(
                  new RemotePeerFactory() {
                    @Override
                    public Peer newConnectedPeer(
                        InetSocketAddress addr,
                        Token<BlockTokenIdentifier> blockToken,
                        DatanodeID datanodeId)
                        throws IOException {
                      Peer peer = null;
                      Socket sock = NetUtils.getDefaultSocketFactory(conf).createSocket();
                      try {
                        sock.connect(addr, HdfsConstants.READ_TIMEOUT);
                        sock.setSoTimeout(HdfsConstants.READ_TIMEOUT);
                        peer = DFSUtilClient.peerFromSocket(sock);
                      } finally {
                        if (peer == null) {
                          IOUtils.closeSocket(sock);
                        }
                      }
                      return peer;
                    }
                  })
              .build();
    } catch (IOException ex) {
      ioe = ex;
    } finally {
      if (blockReader != null) {
        try {
          blockReader.close();
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      }
    }
    if (shouldSucceed) {
      Assert.assertNotNull(
          "OP_READ_BLOCK: access token is invalid, " + "when it is expected to be valid",
          blockReader);
    } else {
      Assert.assertNotNull(
          "OP_READ_BLOCK: access token is valid, " + "when it is expected to be invalid", ioe);
      Assert.assertTrue(
          "OP_READ_BLOCK failed due to reasons other than access token: ",
          ioe instanceof InvalidBlockTokenException);
    }
  }
  /**
   * Open a DataInputStream to a DataNode so that it can be read from. We get block ID and the IDs
   * of the destinations at startup, from the namenode.
   */
  private synchronized DatanodeInfo blockSeekTo(long target) throws IOException {
    if (target >= getFileLength()) {
      throw new IOException("Attempted to read past end of file");
    }

    // Will be getting a new BlockReader.
    if (blockReader != null) {
      closeBlockReader(blockReader);
      blockReader = null;
    }

    //
    // Connect to best DataNode for desired Block, with potential offset
    //
    DatanodeInfo chosenNode = null;
    int refetchToken = 1; // only need to get a new access token once

    boolean connectFailedOnce = false;

    while (true) {
      //
      // Compute desired block
      //
      LocatedBlock targetBlock = getBlockAt(target, true);
      assert (target == pos) : "Wrong postion " + pos + " expect " + target;
      long offsetIntoBlock = target - targetBlock.getStartOffset();

      DNAddrPair retval = chooseDataNode(targetBlock);
      chosenNode = retval.info;
      InetSocketAddress targetAddr = retval.addr;

      try {
        ExtendedBlock blk = targetBlock.getBlock();
        Token<BlockTokenIdentifier> accessToken = targetBlock.getBlockToken();
        blockReader =
            getBlockReader(
                targetAddr,
                chosenNode,
                src,
                blk,
                accessToken,
                offsetIntoBlock,
                blk.getNumBytes() - offsetIntoBlock,
                buffersize,
                verifyChecksum,
                dfsClient.clientName);
        if (connectFailedOnce) {
          DFSClient.LOG.info(
              "Successfully connected to " + targetAddr + " for block " + blk.getBlockId());
        }
        return chosenNode;
      } catch (IOException ex) {
        if (ex instanceof InvalidBlockTokenException && refetchToken > 0) {
          DFSClient.LOG.info(
              "Will fetch a new access token and retry, "
                  + "access token was invalid when connecting to "
                  + targetAddr
                  + " : "
                  + ex);
          /*
           * Get a new access token and retry. Retry is needed in 2 cases. 1)
           * When both NN and DN re-started while DFSClient holding a cached
           * access token. 2) In the case that NN fails to update its
           * access key at pre-set interval (by a wide margin) and
           * subsequently restarts. In this case, DN re-registers itself with
           * NN and receives a new access key, but DN will delete the old
           * access key from its memory since it's considered expired based on
           * the estimated expiration date.
           */
          refetchToken--;
          fetchBlockAt(target);
        } else {
          connectFailedOnce = true;
          DFSClient.LOG.warn(
              "Failed to connect to "
                  + targetAddr
                  + " for block"
                  + ", add to deadNodes and continue. "
                  + ex,
              ex);
          // Put chosen node into dead list, continue
          addToDeadNodes(chosenNode);
        }
      }
    }
  }
  /**
   * Test that copy on write for blocks works correctly
   *
   * @throws IOException an exception might be thrown
   */
  @Test
  public void testCopyOnWrite() throws IOException {
    Configuration conf = new HdfsConfiguration();
    if (simulatedStorage) {
      conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true);
    }
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
    FileSystem fs = cluster.getFileSystem();
    InetSocketAddress addr = new InetSocketAddress("localhost", cluster.getNameNodePort());
    DFSClient client = new DFSClient(addr, conf);
    try {

      // create a new file, write to it and close it.
      //
      Path file1 = new Path("/filestatus.dat");
      FSDataOutputStream stm = AppendTestUtil.createFile(fs, file1, 1);
      writeFile(stm);
      stm.close();

      // Get a handle to the datanode
      DataNode[] dn = cluster.listDataNodes();
      assertTrue("There should be only one datanode but found " + dn.length, dn.length == 1);

      LocatedBlocks locations =
          client.getNamenode().getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
      List<LocatedBlock> blocks = locations.getLocatedBlocks();
      FSDataset dataset = (FSDataset) dn[0].data;

      //
      // Create hard links for a few of the blocks
      //
      for (int i = 0; i < blocks.size(); i = i + 2) {
        ExtendedBlock b = blocks.get(i).getBlock();
        File f = dataset.getFile(b.getBlockPoolId(), b.getLocalBlock());
        File link = new File(f.toString() + ".link");
        System.out.println("Creating hardlink for File " + f + " to " + link);
        HardLink.createHardLink(f, link);
      }

      //
      // Detach all blocks. This should remove hardlinks (if any)
      //
      for (int i = 0; i < blocks.size(); i++) {
        ExtendedBlock b = blocks.get(i).getBlock();
        System.out.println("testCopyOnWrite detaching block " + b);
        assertTrue(
            "Detaching block " + b + " should have returned true", dataset.unlinkBlock(b, 1));
      }

      // Since the blocks were already detached earlier, these calls should
      // return false
      //
      for (int i = 0; i < blocks.size(); i++) {
        ExtendedBlock b = blocks.get(i).getBlock();
        System.out.println("testCopyOnWrite detaching block " + b);
        assertTrue(
            "Detaching block " + b + " should have returned false", !dataset.unlinkBlock(b, 1));
      }

    } finally {
      fs.close();
      cluster.shutdown();
    }
  }
  @VisibleForTesting
  void check(String parent, HdfsFileStatus file, Result res) throws IOException {
    String path = file.getFullName(parent);
    boolean isOpen = false;

    if (file.isDir()) {
      if (snapshottableDirs != null && snapshottableDirs.contains(path)) {
        String snapshotPath =
            (path.endsWith(Path.SEPARATOR) ? path : path + Path.SEPARATOR)
                + HdfsConstants.DOT_SNAPSHOT_DIR;
        HdfsFileStatus snapshotFileInfo = namenode.getRpcServer().getFileInfo(snapshotPath);
        check(snapshotPath, snapshotFileInfo, res);
      }
      byte[] lastReturnedName = HdfsFileStatus.EMPTY_NAME;
      DirectoryListing thisListing;
      if (showFiles) {
        out.println(path + " <dir>");
      }
      res.totalDirs++;
      do {
        assert lastReturnedName != null;
        thisListing = namenode.getRpcServer().getListing(path, lastReturnedName, false);
        if (thisListing == null) {
          return;
        }
        HdfsFileStatus[] files = thisListing.getPartialListing();
        for (int i = 0; i < files.length; i++) {
          check(path, files[i], res);
        }
        lastReturnedName = thisListing.getLastName();
      } while (thisListing.hasMore());
      return;
    }
    if (file.isSymlink()) {
      if (showFiles) {
        out.println(path + " <symlink>");
      }
      res.totalSymlinks++;
      return;
    }
    long fileLen = file.getLen();
    // Get block locations without updating the file access time
    // and without block access tokens
    LocatedBlocks blocks;
    try {
      blocks = namenode.getNamesystem().getBlockLocations(path, 0, fileLen, false, false, false);
    } catch (FileNotFoundException fnfe) {
      blocks = null;
    }
    if (blocks == null) { // the file is deleted
      return;
    }
    isOpen = blocks.isUnderConstruction();
    if (isOpen && !showOpenFiles) {
      // We collect these stats about open files to report with default options
      res.totalOpenFilesSize += fileLen;
      res.totalOpenFilesBlocks += blocks.locatedBlockCount();
      res.totalOpenFiles++;
      return;
    }
    res.totalFiles++;
    res.totalSize += fileLen;
    res.totalBlocks += blocks.locatedBlockCount();
    if (showOpenFiles && isOpen) {
      out.print(
          path
              + " "
              + fileLen
              + " bytes, "
              + blocks.locatedBlockCount()
              + " block(s), OPENFORWRITE: ");
    } else if (showFiles) {
      out.print(path + " " + fileLen + " bytes, " + blocks.locatedBlockCount() + " block(s): ");
    } else {
      out.print('.');
    }
    if (res.totalFiles % 100 == 0) {
      out.println();
      out.flush();
    }
    int missing = 0;
    int corrupt = 0;
    long missize = 0;
    int underReplicatedPerFile = 0;
    int misReplicatedPerFile = 0;
    StringBuilder report = new StringBuilder();
    int i = 0;
    for (LocatedBlock lBlk : blocks.getLocatedBlocks()) {
      ExtendedBlock block = lBlk.getBlock();
      boolean isCorrupt = lBlk.isCorrupt();
      String blkName = block.toString();
      DatanodeInfo[] locs = lBlk.getLocations();
      NumberReplicas numberReplicas =
          namenode.getNamesystem().getBlockManager().countNodes(block.getLocalBlock());
      int liveReplicas = numberReplicas.liveReplicas();
      res.totalReplicas += liveReplicas;
      short targetFileReplication = file.getReplication();
      res.numExpectedReplicas += targetFileReplication;
      if (liveReplicas > targetFileReplication) {
        res.excessiveReplicas += (liveReplicas - targetFileReplication);
        res.numOverReplicatedBlocks += 1;
      }
      // Check if block is Corrupt
      if (isCorrupt) {
        corrupt++;
        res.corruptBlocks++;
        out.print(
            "\n"
                + path
                + ": CORRUPT blockpool "
                + block.getBlockPoolId()
                + " block "
                + block.getBlockName()
                + "\n");
      }
      if (liveReplicas >= minReplication) res.numMinReplicatedBlocks++;
      if (liveReplicas < targetFileReplication && liveReplicas > 0) {
        res.missingReplicas += (targetFileReplication - liveReplicas);
        res.numUnderReplicatedBlocks += 1;
        underReplicatedPerFile++;
        if (!showFiles) {
          out.print("\n" + path + ": ");
        }
        out.println(
            " Under replicated "
                + block
                + ". Target Replicas is "
                + targetFileReplication
                + " but found "
                + liveReplicas
                + " replica(s).");
      }
      // verify block placement policy
      BlockPlacementStatus blockPlacementStatus =
          bpPolicy.verifyBlockPlacement(path, lBlk, targetFileReplication);
      if (!blockPlacementStatus.isPlacementPolicySatisfied()) {
        res.numMisReplicatedBlocks++;
        misReplicatedPerFile++;
        if (!showFiles) {
          if (underReplicatedPerFile == 0) out.println();
          out.print(path + ": ");
        }
        out.println(
            " Replica placement policy is violated for "
                + block
                + ". "
                + blockPlacementStatus.getErrorDescription());
      }
      report.append(i + ". " + blkName + " len=" + block.getNumBytes());
      if (liveReplicas == 0) {
        report.append(" MISSING!");
        res.addMissing(block.toString(), block.getNumBytes());
        missing++;
        missize += block.getNumBytes();
      } else {
        report.append(" repl=" + liveReplicas);
        if (showLocations || showRacks) {
          StringBuilder sb = new StringBuilder("[");
          for (int j = 0; j < locs.length; j++) {
            if (j > 0) {
              sb.append(", ");
            }
            if (showRacks) sb.append(NodeBase.getPath(locs[j]));
            else sb.append(locs[j]);
          }
          sb.append(']');
          report.append(" " + sb.toString());
        }
      }
      report.append('\n');
      i++;
    }
    if ((missing > 0) || (corrupt > 0)) {
      if (!showFiles && (missing > 0)) {
        out.print(
            "\n" + path + ": MISSING " + missing + " blocks of total size " + missize + " B.");
      }
      res.corruptFiles++;
      if (isOpen) {
        LOG.info("Fsck: ignoring open file " + path);
      } else {
        if (doMove) copyBlocksToLostFound(parent, file, blocks);
        if (doDelete) deleteCorruptedFile(path);
      }
    }
    if (showFiles) {
      if (missing > 0) {
        out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n");
      } else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) {
        out.print(" OK\n");
      }
      if (showBlocks) {
        out.print(report.toString() + "\n");
      }
    }
  }
  /**
   * The following test first creates a file with a few blocks. It randomly truncates the replica of
   * the last block stored in each datanode. Finally, it triggers block synchronization to
   * synchronize all stored block.
   */
  public void testBlockSynchronization() throws Exception {
    final int ORG_FILE_SIZE = 3000;
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
    MiniDFSCluster cluster = null;

    try {
      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(5).build();
      cluster.waitActive();

      // create a file
      DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem();
      String filestr = "/foo";
      Path filepath = new Path(filestr);
      DFSTestUtil.createFile(dfs, filepath, ORG_FILE_SIZE, REPLICATION_NUM, 0L);
      assertTrue(dfs.exists(filepath));
      DFSTestUtil.waitReplication(dfs, filepath, REPLICATION_NUM);

      // get block info for the last block
      LocatedBlock locatedblock =
          TestInterDatanodeProtocol.getLastLocatedBlock(dfs.dfs.getNamenode(), filestr);
      DatanodeInfo[] datanodeinfos = locatedblock.getLocations();
      assertEquals(REPLICATION_NUM, datanodeinfos.length);

      // connect to data nodes
      DataNode[] datanodes = new DataNode[REPLICATION_NUM];
      for (int i = 0; i < REPLICATION_NUM; i++) {
        datanodes[i] = cluster.getDataNode(datanodeinfos[i].getIpcPort());
        assertTrue(datanodes[i] != null);
      }

      // verify Block Info
      ExtendedBlock lastblock = locatedblock.getBlock();
      DataNode.LOG.info("newblocks=" + lastblock);
      for (int i = 0; i < REPLICATION_NUM; i++) {
        checkMetaInfo(lastblock, datanodes[i]);
      }

      DataNode.LOG.info("dfs.dfs.clientName=" + dfs.dfs.clientName);
      cluster.getNameNodeRpc().append(filestr, dfs.dfs.clientName);

      // expire lease to trigger block recovery.
      waitLeaseRecovery(cluster);

      Block[] updatedmetainfo = new Block[REPLICATION_NUM];
      long oldSize = lastblock.getNumBytes();
      lastblock =
          TestInterDatanodeProtocol.getLastLocatedBlock(dfs.dfs.getNamenode(), filestr).getBlock();
      long currentGS = lastblock.getGenerationStamp();
      for (int i = 0; i < REPLICATION_NUM; i++) {
        updatedmetainfo[i] =
            DataNodeTestUtils.getFSDataset(datanodes[i])
                .getStoredBlock(lastblock.getBlockPoolId(), lastblock.getBlockId());
        assertEquals(lastblock.getBlockId(), updatedmetainfo[i].getBlockId());
        assertEquals(oldSize, updatedmetainfo[i].getNumBytes());
        assertEquals(currentGS, updatedmetainfo[i].getGenerationStamp());
      }

      // verify that lease recovery does not occur when namenode is in safemode
      System.out.println("Testing that lease recovery cannot happen during safemode.");
      filestr = "/foo.safemode";
      filepath = new Path(filestr);
      dfs.create(filepath, (short) 1);
      cluster.getNameNodeRpc().setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_ENTER);
      assertTrue(dfs.dfs.exists(filestr));
      DFSTestUtil.waitReplication(dfs, filepath, (short) 1);
      waitLeaseRecovery(cluster);
      // verify that we still cannot recover the lease
      LeaseManager lm = NameNodeAdapter.getLeaseManager(cluster.getNamesystem());
      assertTrue("Found " + lm.countLease() + " lease, expected 1", lm.countLease() == 1);
      cluster.getNameNodeRpc().setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE);
    } finally {
      if (cluster != null) {
        cluster.shutdown();
      }
    }
  }
  /**
   * Test the case that a replica is reported corrupt while it is not in blocksMap. Make sure that
   * ArrayIndexOutOfBounds does not thrown. See Hadoop-4351.
   *
   * <p>TODO HOPS This test fails as it tries to remove a non-existing replica. Calling
   * findAndMarkBlockAsCorrupt from a DataNode that does not store any replica for this specific
   * block will lead to a tuple did not exist exception. The reason for this is that
   * BlockManager.removeStoredBlock is called with a node that does not store a replica and hence
   * the delete will not be able to succeed during commit.
   */
  @Test
  public void testArrayOutOfBoundsException() throws Exception {
    MiniDFSCluster cluster = null;
    try {
      Configuration conf = new HdfsConfiguration();
      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
      cluster.waitActive();

      FileSystem fs = cluster.getFileSystem();
      final Path FILE_PATH = new Path("/tmp.txt");
      final long FILE_LEN = 1L;
      DFSTestUtil.createFile(fs, FILE_PATH, FILE_LEN, (short) 2, 1L);

      // get the block
      final String bpid = cluster.getNamesystem().getBlockPoolId();
      File storageDir = cluster.getInstanceStorageDir(0, 0);
      File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
      assertTrue("Data directory does not exist", dataDir.exists());
      ExtendedBlock blk = getBlock(bpid, dataDir);
      if (blk == null) {
        storageDir = cluster.getInstanceStorageDir(0, 1);
        dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
        blk = getBlock(bpid, dataDir);
      }
      assertFalse(
          "Data directory does not contain any blocks or there was an " + "IO error", blk == null);

      // start a third datanode
      cluster.startDataNodes(conf, 1, true, null, null);
      ArrayList<DataNode> datanodes = cluster.getDataNodes();
      assertEquals(datanodes.size(), 3);
      DataNode dataNode = datanodes.get(2);

      // report corrupted block by the third datanode
      DatanodeRegistration dnR =
          DataNodeTestUtils.getDNRegistrationForBP(dataNode, blk.getBlockPoolId());

      // Get the storage id of one of the storages on the datanode
      String storageId =
          cluster
              .getNamesystem()
              .getBlockManager()
              .getDatanodeManager()
              .getDatanode(dataNode.getDatanodeId())
              .getStorageInfos()[0]
              .getStorageID();

      cluster
          .getNamesystem()
          .getBlockManager()
          .findAndMarkBlockAsCorrupt(blk, new DatanodeInfo(dnR), storageId, "some test reason");

      // open the file
      fs.open(FILE_PATH);

      // clean up
      fs.delete(FILE_PATH, false);
    } finally {
      if (cluster != null) {
        cluster.shutdown();
      }
    }
  }