@Test
  public void testDnFencing() throws Exception {
    // Create a file with replication level 3.
    DFSTestUtil.createFile(fs, TEST_FILE_PATH, 30 * SMALL_BLOCK, (short) 3, 1L);
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, TEST_FILE_PATH);

    // Drop its replication count to 1, so it becomes over-replicated.
    // Then compute the invalidation of the extra blocks and trigger
    // heartbeats so the invalidations are flushed to the DNs.
    nn1.getRpcServer().setReplication(TEST_FILE, (short) 1);
    BlockManagerTestUtil.computeInvalidationWork(nn1.getNamesystem().getBlockManager());
    cluster.triggerHeartbeats();

    // Transition nn2 to active even though nn1 still thinks it's active.
    banner("Failing to NN2 but let NN1 continue to think it's active");
    NameNodeAdapter.abortEditLogs(nn1);
    NameNodeAdapter.enterSafeMode(nn1, false);
    cluster.transitionToActive(1);

    // Check that the standby picked up the replication change.
    assertEquals(1, nn2.getRpcServer().getFileInfo(TEST_FILE).getReplication());

    // Dump some info for debugging purposes.
    banner("NN2 Metadata immediately after failover");
    doMetasave(nn2);

    // Even though NN2 considers the blocks over-replicated, it should
    // post-pone the block invalidation because the DNs are still "stale".
    assertEquals(30, nn2.getNamesystem().getPostponedMisreplicatedBlocks());

    banner("Triggering heartbeats and block reports so that fencing is completed");
    cluster.triggerHeartbeats();
    cluster.triggerBlockReports();

    banner("Metadata after nodes have all block-reported");
    doMetasave(nn2);

    // The blocks should no longer be postponed.
    assertEquals(0, nn2.getNamesystem().getPostponedMisreplicatedBlocks());

    // Wait for NN2 to enact its deletions (replication monitor has to run, etc)
    BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager());
    cluster.triggerHeartbeats();
    HATestUtil.waitForDNDeletions(cluster);
    cluster.triggerDeletionReports();
    assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks());
    assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks());

    banner("Making sure the file is still readable");
    FileSystem fs2 = cluster.getFileSystem(1);
    DFSTestUtil.readFile(fs2, TEST_FILE_PATH);

    banner("Waiting for the actual block files to get deleted from DNs.");
    waitForTrueReplication(cluster, block, 1);
  }
  static DatanodeInfo chooseDatanode(
      final NameNode namenode,
      final String path,
      final HttpOpParam.Op op,
      final long openOffset,
      final long blocksize,
      Configuration conf)
      throws IOException {
    final BlockManager bm = namenode.getNamesystem().getBlockManager();

    if (op == PutOpParam.Op.CREATE) {
      // choose a datanode near to client
      final DatanodeDescriptor clientNode =
          bm.getDatanodeManager().getDatanodeByHost(getRemoteAddress());
      if (clientNode != null) {
        final DatanodeDescriptor[] datanodes =
            bm.getBlockPlacementPolicy().chooseTarget(path, 1, clientNode, null, blocksize);
        if (datanodes.length > 0) {
          return datanodes[0];
        }
      }
    } else if (op == GetOpParam.Op.OPEN
        || op == GetOpParam.Op.GETFILECHECKSUM
        || op == PostOpParam.Op.APPEND) {
      // choose a datanode containing a replica
      final NamenodeProtocols np = namenode.getRpcServer();
      final HdfsFileStatus status = np.getFileInfo(path);
      if (status == null) {
        throw new FileNotFoundException("File " + path + " not found.");
      }
      final long len = status.getLen();
      if (op == GetOpParam.Op.OPEN) {
        if (openOffset < 0L || (openOffset >= len && len > 0)) {
          throw new IOException(
              "Offset="
                  + openOffset
                  + " out of the range [0, "
                  + len
                  + "); "
                  + op
                  + ", path="
                  + path);
        }
      }

      if (len > 0) {
        final long offset = op == GetOpParam.Op.OPEN ? openOffset : len - 1;
        final LocatedBlocks locations = np.getBlockLocations(path, offset, 1);
        final int count = locations.locatedBlockCount();
        if (count > 0) {
          return JspHelper.bestNode(locations.get(0).getLocations(), false, conf);
        }
      }
    }

    return (DatanodeDescriptor)
        bm.getDatanodeManager().getNetworkTopology().chooseRandom(NodeBase.ROOT);
  }
 boolean hdfsPathExists(String path)
     throws AccessControlException, UnresolvedLinkException, IOException {
   try {
     HdfsFileStatus hfs = namenode.getRpcServer().getFileInfo(path);
     return (hfs != null);
   } catch (FileNotFoundException e) {
     return false;
   }
 }
 private void deleteCorruptedFile(String path) {
   try {
     namenode.getRpcServer().delete(path, true);
     LOG.info("Fsck: deleted corrupt file " + path);
   } catch (Exception e) {
     LOG.error("Fsck: error deleting corrupted file " + path, e);
     internalError = true;
   }
 }
  private Response delete(
      final UserGroupInformation ugi,
      final DelegationParam delegation,
      final UserParam username,
      final DoAsParam doAsUser,
      final String fullpath,
      final DeleteOpParam op,
      final RecursiveParam recursive)
      throws IOException {
    final NameNode namenode = (NameNode) context.getAttribute("name.node");

    switch (op.getValue()) {
      case DELETE:
        {
          final boolean b = namenode.getRpcServer().delete(fullpath, recursive.getValue());
          final String js = JsonUtil.toJsonString("boolean", b);
          return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
        }
      default:
        throw new UnsupportedOperationException(op + " is not supported");
    }
  }
  private Response post(
      final UserGroupInformation ugi,
      final DelegationParam delegation,
      final UserParam username,
      final DoAsParam doAsUser,
      final String fullpath,
      final PostOpParam op,
      final ConcatSourcesParam concatSrcs,
      final BufferSizeParam bufferSize)
      throws IOException, URISyntaxException {
    final NameNode namenode = (NameNode) context.getAttribute("name.node");

    switch (op.getValue()) {
      case APPEND:
        {
          final URI uri =
              redirectURI(
                  namenode,
                  ugi,
                  delegation,
                  username,
                  doAsUser,
                  fullpath,
                  op.getValue(),
                  -1L,
                  -1L,
                  bufferSize);
          return Response.temporaryRedirect(uri).type(MediaType.APPLICATION_OCTET_STREAM).build();
        }
      case CONCAT:
        {
          namenode.getRpcServer().concat(fullpath, concatSrcs.getAbsolutePaths());
          return Response.ok().build();
        }
      default:
        throw new UnsupportedOperationException(op + " is not supported");
    }
  }
  private Response get(
      final UserGroupInformation ugi,
      final DelegationParam delegation,
      final UserParam username,
      final DoAsParam doAsUser,
      final String fullpath,
      final GetOpParam op,
      final OffsetParam offset,
      final LengthParam length,
      final RenewerParam renewer,
      final BufferSizeParam bufferSize)
      throws IOException, URISyntaxException {
    final NameNode namenode = (NameNode) context.getAttribute("name.node");
    final NamenodeProtocols np = namenode.getRpcServer();

    switch (op.getValue()) {
      case OPEN:
        {
          final URI uri =
              redirectURI(
                  namenode,
                  ugi,
                  delegation,
                  username,
                  doAsUser,
                  fullpath,
                  op.getValue(),
                  offset.getValue(),
                  -1L,
                  offset,
                  length,
                  bufferSize);
          return Response.temporaryRedirect(uri).type(MediaType.APPLICATION_OCTET_STREAM).build();
        }
      case GET_BLOCK_LOCATIONS:
        {
          final long offsetValue = offset.getValue();
          final Long lengthValue = length.getValue();
          final LocatedBlocks locatedblocks =
              np.getBlockLocations(
                  fullpath, offsetValue, lengthValue != null ? lengthValue : Long.MAX_VALUE);
          final String js = JsonUtil.toJsonString(locatedblocks);
          return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
        }
      case GETFILESTATUS:
        {
          final HdfsFileStatus status = np.getFileInfo(fullpath);
          if (status == null) {
            throw new FileNotFoundException("File does not exist: " + fullpath);
          }

          final String js = JsonUtil.toJsonString(status, true);
          return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
        }
      case LISTSTATUS:
        {
          final StreamingOutput streaming = getListingStream(np, fullpath);
          return Response.ok(streaming).type(MediaType.APPLICATION_JSON).build();
        }
      case GETCONTENTSUMMARY:
        {
          final ContentSummary contentsummary = np.getContentSummary(fullpath);
          final String js = JsonUtil.toJsonString(contentsummary);
          return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
        }
      case GETFILECHECKSUM:
        {
          final URI uri =
              redirectURI(
                  namenode, ugi, delegation, username, doAsUser, fullpath, op.getValue(), -1L, -1L);
          return Response.temporaryRedirect(uri).type(MediaType.APPLICATION_OCTET_STREAM).build();
        }
      case GETDELEGATIONTOKEN:
        {
          if (delegation.getValue() != null) {
            throw new IllegalArgumentException(delegation.getName() + " parameter is not null.");
          }
          final Token<? extends TokenIdentifier> token =
              generateDelegationToken(namenode, ugi, renewer.getValue());
          final String js = JsonUtil.toJsonString(token);
          return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
        }
      case GETHOMEDIRECTORY:
        {
          final String js =
              JsonUtil.toJsonString(
                  org.apache.hadoop.fs.Path.class.getSimpleName(),
                  WebHdfsFileSystem.getHomeDirectoryString(ugi));
          return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
        }
      default:
        throw new UnsupportedOperationException(op + " is not supported");
    }
  }
  private Response put(
      final UserGroupInformation ugi,
      final DelegationParam delegation,
      final UserParam username,
      final DoAsParam doAsUser,
      final String fullpath,
      final PutOpParam op,
      final DestinationParam destination,
      final OwnerParam owner,
      final GroupParam group,
      final PermissionParam permission,
      final OverwriteParam overwrite,
      final BufferSizeParam bufferSize,
      final ReplicationParam replication,
      final BlockSizeParam blockSize,
      final ModificationTimeParam modificationTime,
      final AccessTimeParam accessTime,
      final RenameOptionSetParam renameOptions,
      final CreateParentParam createParent,
      final TokenArgumentParam delegationTokenArgument)
      throws IOException, URISyntaxException {

    final Configuration conf = (Configuration) context.getAttribute(JspHelper.CURRENT_CONF);
    final NameNode namenode = (NameNode) context.getAttribute("name.node");
    final NamenodeProtocols np = namenode.getRpcServer();

    switch (op.getValue()) {
      case CREATE:
        {
          final URI uri =
              redirectURI(
                  namenode,
                  ugi,
                  delegation,
                  username,
                  doAsUser,
                  fullpath,
                  op.getValue(),
                  -1L,
                  blockSize.getValue(conf),
                  permission,
                  overwrite,
                  bufferSize,
                  replication,
                  blockSize);
          return Response.temporaryRedirect(uri).type(MediaType.APPLICATION_OCTET_STREAM).build();
        }
      case MKDIRS:
        {
          final boolean b = np.mkdirs(fullpath, permission.getFsPermission(), true);
          final String js = JsonUtil.toJsonString("boolean", b);
          return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
        }
      case CREATESYMLINK:
        {
          np.createSymlink(
              destination.getValue(),
              fullpath,
              PermissionParam.getDefaultFsPermission(),
              createParent.getValue());
          return Response.ok().type(MediaType.APPLICATION_OCTET_STREAM).build();
        }
      case RENAME:
        {
          final EnumSet<Options.Rename> s = renameOptions.getValue();
          if (s.isEmpty()) {
            final boolean b = np.rename(fullpath, destination.getValue());
            final String js = JsonUtil.toJsonString("boolean", b);
            return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
          } else {
            np.rename2(fullpath, destination.getValue(), s.toArray(new Options.Rename[s.size()]));
            return Response.ok().type(MediaType.APPLICATION_OCTET_STREAM).build();
          }
        }
      case SETREPLICATION:
        {
          final boolean b = np.setReplication(fullpath, replication.getValue(conf));
          final String js = JsonUtil.toJsonString("boolean", b);
          return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
        }
      case SETOWNER:
        {
          if (owner.getValue() == null && group.getValue() == null) {
            throw new IllegalArgumentException("Both owner and group are empty.");
          }

          np.setOwner(fullpath, owner.getValue(), group.getValue());
          return Response.ok().type(MediaType.APPLICATION_OCTET_STREAM).build();
        }
      case SETPERMISSION:
        {
          np.setPermission(fullpath, permission.getFsPermission());
          return Response.ok().type(MediaType.APPLICATION_OCTET_STREAM).build();
        }
      case SETTIMES:
        {
          np.setTimes(fullpath, modificationTime.getValue(), accessTime.getValue());
          return Response.ok().type(MediaType.APPLICATION_OCTET_STREAM).build();
        }
      case RENEWDELEGATIONTOKEN:
        {
          final Token<DelegationTokenIdentifier> token = new Token<DelegationTokenIdentifier>();
          token.decodeFromUrlString(delegationTokenArgument.getValue());
          final long expiryTime = np.renewDelegationToken(token);
          final String js = JsonUtil.toJsonString("long", expiryTime);
          return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
        }
      case CANCELDELEGATIONTOKEN:
        {
          final Token<DelegationTokenIdentifier> token = new Token<DelegationTokenIdentifier>();
          token.decodeFromUrlString(delegationTokenArgument.getValue());
          np.cancelDelegationToken(token);
          return Response.ok().type(MediaType.APPLICATION_OCTET_STREAM).build();
        }
      default:
        throw new UnsupportedOperationException(op + " is not supported");
    }
  }
  /**
   * Test case that reduces replication of a file with a lot of blocks and then fails over right
   * after those blocks enter the DN invalidation queues on the active. Ensures that fencing is
   * correct and no replicas are lost.
   */
  @Test
  public void testNNClearsCommandsOnFailoverWithReplChanges() throws Exception {
    // Make lots of blocks to increase chances of triggering a bug.
    DFSTestUtil.createFile(fs, TEST_FILE_PATH, 30 * SMALL_BLOCK, (short) 1, 1L);

    banner("rolling NN1's edit log, forcing catch-up");
    HATestUtil.waitForStandbyToCatchUp(nn1, nn2);

    // Get some new replicas reported so that NN2 now considers
    // them over-replicated and schedules some more deletions
    nn1.getRpcServer().setReplication(TEST_FILE, (short) 2);
    while (BlockManagerTestUtil.getComputedDatanodeWork(nn1.getNamesystem().getBlockManager())
        > 0) {
      LOG.info("Getting more replication work computed");
    }
    BlockManager bm1 = nn1.getNamesystem().getBlockManager();
    while (bm1.getPendingReplicationBlocksCount() > 0) {
      BlockManagerTestUtil.updateState(bm1);
      cluster.triggerHeartbeats();
      Thread.sleep(1000);
    }

    banner("triggering BRs");
    cluster.triggerBlockReports();

    nn1.getRpcServer().setReplication(TEST_FILE, (short) 1);

    banner("computing invalidation on nn1");

    BlockManagerTestUtil.computeInvalidationWork(nn1.getNamesystem().getBlockManager());
    doMetasave(nn1);

    banner("computing invalidation on nn2");
    BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager());
    doMetasave(nn2);

    // Dump some info for debugging purposes.
    banner("Metadata immediately before failover");
    doMetasave(nn2);

    // Transition nn2 to active even though nn1 still thinks it's active
    banner("Failing to NN2 but let NN1 continue to think it's active");
    NameNodeAdapter.abortEditLogs(nn1);
    NameNodeAdapter.enterSafeMode(nn1, false);

    BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager());
    cluster.transitionToActive(1);

    // Check that the standby picked up the replication change.
    assertEquals(1, nn2.getRpcServer().getFileInfo(TEST_FILE).getReplication());

    // Dump some info for debugging purposes.
    banner("Metadata immediately after failover");
    doMetasave(nn2);

    banner("Triggering heartbeats and block reports so that fencing is completed");
    cluster.triggerHeartbeats();
    cluster.triggerBlockReports();

    banner("Metadata after nodes have all block-reported");
    doMetasave(nn2);

    // The block should no longer be postponed.
    assertEquals(0, nn2.getNamesystem().getPostponedMisreplicatedBlocks());

    // Wait for NN2 to enact its deletions (replication monitor has to run, etc)
    BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager());

    HATestUtil.waitForNNToIssueDeletions(nn2);
    cluster.triggerHeartbeats();
    HATestUtil.waitForDNDeletions(cluster);
    cluster.triggerDeletionReports();
    assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks());
    assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks());

    banner("Making sure the file is still readable");
    FileSystem fs2 = cluster.getFileSystem(1);
    DFSTestUtil.readFile(fs2, TEST_FILE_PATH);
  }
예제 #10
0
  /**
   * Test case which restarts the standby node in such a way that, when it exits safemode, it will
   * want to invalidate a bunch of over-replicated block replicas. Ensures that if we failover at
   * this point it won't lose data.
   */
  @Test
  public void testNNClearsCommandsOnFailoverAfterStartup() throws Exception {
    // Make lots of blocks to increase chances of triggering a bug.
    DFSTestUtil.createFile(fs, TEST_FILE_PATH, 30 * SMALL_BLOCK, (short) 3, 1L);

    banner("Shutting down NN2");
    cluster.shutdownNameNode(1);

    banner("Setting replication to 1, rolling edit log.");
    nn1.getRpcServer().setReplication(TEST_FILE, (short) 1);
    nn1.getRpcServer().rollEditLog();

    // Start NN2 again. When it starts up, it will see all of the
    // blocks as over-replicated, since it has the metadata for
    // replication=1, but the DNs haven't yet processed the deletions.
    banner("Starting NN2 again.");
    cluster.restartNameNode(1);
    nn2 = cluster.getNameNode(1);

    banner("triggering BRs");
    cluster.triggerBlockReports();

    // We expect that both NN1 and NN2 will have some number of
    // deletions queued up for the DNs.
    banner("computing invalidation on nn1");
    BlockManagerTestUtil.computeInvalidationWork(nn1.getNamesystem().getBlockManager());

    banner("computing invalidation on nn2");
    BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager());

    // Dump some info for debugging purposes.
    banner("Metadata immediately before failover");
    doMetasave(nn2);

    // Transition nn2 to active even though nn1 still thinks it's active
    banner("Failing to NN2 but let NN1 continue to think it's active");
    NameNodeAdapter.abortEditLogs(nn1);
    NameNodeAdapter.enterSafeMode(nn1, false);

    cluster.transitionToActive(1);

    // Check that the standby picked up the replication change.
    assertEquals(1, nn2.getRpcServer().getFileInfo(TEST_FILE).getReplication());

    // Dump some info for debugging purposes.
    banner("Metadata immediately after failover");
    doMetasave(nn2);

    banner("Triggering heartbeats and block reports so that fencing is completed");
    cluster.triggerHeartbeats();
    cluster.triggerBlockReports();

    banner("Metadata after nodes have all block-reported");
    doMetasave(nn2);

    // The block should no longer be postponed.
    assertEquals(0, nn2.getNamesystem().getPostponedMisreplicatedBlocks());

    // Wait for NN2 to enact its deletions (replication monitor has to run, etc)
    BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager());

    HATestUtil.waitForNNToIssueDeletions(nn2);
    cluster.triggerHeartbeats();
    HATestUtil.waitForDNDeletions(cluster);
    cluster.triggerDeletionReports();
    assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks());
    assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks());

    banner("Making sure the file is still readable");
    FileSystem fs2 = cluster.getFileSystem(1);
    DFSTestUtil.readFile(fs2, TEST_FILE_PATH);
  }
  protected void doTestRead(Configuration conf, MiniDFSCluster cluster, boolean isStriped)
      throws Exception {
    final int numDataNodes = cluster.getDataNodes().size();
    final NameNode nn = cluster.getNameNode();
    final NamenodeProtocols nnProto = nn.getRpcServer();
    final BlockManager bm = nn.getNamesystem().getBlockManager();
    final BlockTokenSecretManager sm = bm.getBlockTokenSecretManager();

    // set a short token lifetime (1 second) initially
    SecurityTestUtil.setBlockTokenLifetime(sm, 1000L);

    Path fileToRead = new Path(FILE_TO_READ);
    FileSystem fs = cluster.getFileSystem();
    byte[] expected = generateBytes(FILE_SIZE);
    createFile(fs, fileToRead, expected);

    /*
     * setup for testing expiration handling of cached tokens
     */

    // read using blockSeekTo(). Acquired tokens are cached in in1
    FSDataInputStream in1 = fs.open(fileToRead);
    assertTrue(checkFile1(in1, expected));
    // read using blockSeekTo(). Acquired tokens are cached in in2
    FSDataInputStream in2 = fs.open(fileToRead);
    assertTrue(checkFile1(in2, expected));
    // read using fetchBlockByteRange(). Acquired tokens are cached in in3
    FSDataInputStream in3 = fs.open(fileToRead);
    assertTrue(checkFile2(in3, expected));

    /*
     * testing READ interface on DN using a BlockReader
     */
    DFSClient client = null;
    try {
      client = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf);
    } finally {
      if (client != null) client.close();
    }
    List<LocatedBlock> locatedBlocks =
        nnProto.getBlockLocations(FILE_TO_READ, 0, FILE_SIZE).getLocatedBlocks();
    LocatedBlock lblock = locatedBlocks.get(0); // first block
    // verify token is not expired
    assertFalse(isBlockTokenExpired(lblock));
    // read with valid token, should succeed
    tryRead(conf, lblock, true);

    /*
     * wait till myToken and all cached tokens in in1, in2 and in3 expire
     */

    while (!isBlockTokenExpired(lblock)) {
      try {
        Thread.sleep(10);
      } catch (InterruptedException ignored) {
      }
    }

    /*
     * continue testing READ interface on DN using a BlockReader
     */

    // verify token is expired
    assertTrue(isBlockTokenExpired(lblock));
    // read should fail
    tryRead(conf, lblock, false);
    // use a valid new token
    bm.setBlockToken(lblock, BlockTokenIdentifier.AccessMode.READ);
    // read should succeed
    tryRead(conf, lblock, true);
    // use a token with wrong blockID
    long rightId = lblock.getBlock().getBlockId();
    long wrongId = rightId + 1;
    lblock.getBlock().setBlockId(wrongId);
    bm.setBlockToken(lblock, BlockTokenIdentifier.AccessMode.READ);
    lblock.getBlock().setBlockId(rightId);
    // read should fail
    tryRead(conf, lblock, false);
    // use a token with wrong access modes
    bm.setBlockToken(lblock, BlockTokenIdentifier.AccessMode.WRITE);
    // read should fail
    tryRead(conf, lblock, false);

    // set a long token lifetime for future tokens
    SecurityTestUtil.setBlockTokenLifetime(sm, 600 * 1000L);

    /*
     * testing that when cached tokens are expired, DFSClient will re-fetch
     * tokens transparently for READ.
     */

    // confirm all tokens cached in in1 are expired by now
    List<LocatedBlock> lblocks = DFSTestUtil.getAllBlocks(in1);
    for (LocatedBlock blk : lblocks) {
      assertTrue(isBlockTokenExpired(blk));
    }
    // verify blockSeekTo() is able to re-fetch token transparently
    in1.seek(0);
    assertTrue(checkFile1(in1, expected));

    // confirm all tokens cached in in2 are expired by now
    List<LocatedBlock> lblocks2 = DFSTestUtil.getAllBlocks(in2);
    for (LocatedBlock blk : lblocks2) {
      assertTrue(isBlockTokenExpired(blk));
    }
    // verify blockSeekTo() is able to re-fetch token transparently (testing
    // via another interface method)
    if (isStriped) {
      // striped block doesn't support seekToNewSource
      in2.seek(0);
    } else {
      assertTrue(in2.seekToNewSource(0));
    }
    assertTrue(checkFile1(in2, expected));

    // confirm all tokens cached in in3 are expired by now
    List<LocatedBlock> lblocks3 = DFSTestUtil.getAllBlocks(in3);
    for (LocatedBlock blk : lblocks3) {
      assertTrue(isBlockTokenExpired(blk));
    }
    // verify fetchBlockByteRange() is able to re-fetch token transparently
    assertTrue(checkFile2(in3, expected));

    /*
     * testing that after datanodes are restarted on the same ports, cached
     * tokens should still work and there is no need to fetch new tokens from
     * namenode. This test should run while namenode is down (to make sure no
     * new tokens can be fetched from namenode).
     */

    // restart datanodes on the same ports that they currently use
    assertTrue(cluster.restartDataNodes(true));
    cluster.waitActive();
    assertEquals(numDataNodes, cluster.getDataNodes().size());
    cluster.shutdownNameNode(0);

    // confirm tokens cached in in1 are still valid
    lblocks = DFSTestUtil.getAllBlocks(in1);
    for (LocatedBlock blk : lblocks) {
      assertFalse(isBlockTokenExpired(blk));
    }
    // verify blockSeekTo() still works (forced to use cached tokens)
    in1.seek(0);
    assertTrue(checkFile1(in1, expected));

    // confirm tokens cached in in2 are still valid
    lblocks2 = DFSTestUtil.getAllBlocks(in2);
    for (LocatedBlock blk : lblocks2) {
      assertFalse(isBlockTokenExpired(blk));
    }

    // verify blockSeekTo() still works (forced to use cached tokens)
    if (isStriped) {
      in2.seek(0);
    } else {
      in2.seekToNewSource(0);
    }
    assertTrue(checkFile1(in2, expected));

    // confirm tokens cached in in3 are still valid
    lblocks3 = DFSTestUtil.getAllBlocks(in3);
    for (LocatedBlock blk : lblocks3) {
      assertFalse(isBlockTokenExpired(blk));
    }
    // verify fetchBlockByteRange() still works (forced to use cached tokens)
    assertTrue(checkFile2(in3, expected));

    /*
     * testing that when namenode is restarted, cached tokens should still
     * work and there is no need to fetch new tokens from namenode. Like the
     * previous test, this test should also run while namenode is down. The
     * setup for this test depends on the previous test.
     */

    // restart the namenode and then shut it down for test
    cluster.restartNameNode(0);
    cluster.shutdownNameNode(0);

    // verify blockSeekTo() still works (forced to use cached tokens)
    in1.seek(0);
    assertTrue(checkFile1(in1, expected));
    // verify again blockSeekTo() still works (forced to use cached tokens)
    if (isStriped) {
      in2.seek(0);
    } else {
      in2.seekToNewSource(0);
    }
    assertTrue(checkFile1(in2, expected));

    // verify fetchBlockByteRange() still works (forced to use cached tokens)
    assertTrue(checkFile2(in3, expected));

    /*
     * testing that after both namenode and datanodes got restarted (namenode
     * first, followed by datanodes), DFSClient can't access DN without
     * re-fetching tokens and is able to re-fetch tokens transparently. The
     * setup of this test depends on the previous test.
     */

    // restore the cluster and restart the datanodes for test
    cluster.restartNameNode(0);
    assertTrue(cluster.restartDataNodes(true));
    cluster.waitActive();
    assertEquals(numDataNodes, cluster.getDataNodes().size());

    // shutdown namenode so that DFSClient can't get new tokens from namenode
    cluster.shutdownNameNode(0);

    // verify blockSeekTo() fails (cached tokens become invalid)
    in1.seek(0);
    assertFalse(checkFile1(in1, expected));
    // verify fetchBlockByteRange() fails (cached tokens become invalid)
    assertFalse(checkFile2(in3, expected));

    // restart the namenode to allow DFSClient to re-fetch tokens
    cluster.restartNameNode(0);
    // verify blockSeekTo() works again (by transparently re-fetching
    // tokens from namenode)
    in1.seek(0);
    assertTrue(checkFile1(in1, expected));
    if (isStriped) {
      in2.seek(0);
    } else {
      in2.seekToNewSource(0);
    }
    assertTrue(checkFile1(in2, expected));
    // verify fetchBlockByteRange() works again (by transparently
    // re-fetching tokens from namenode)
    assertTrue(checkFile2(in3, expected));

    /*
     * testing that when datanodes are restarted on different ports, DFSClient
     * is able to re-fetch tokens transparently to connect to them
     */

    // restart datanodes on newly assigned ports
    assertTrue(cluster.restartDataNodes(false));
    cluster.waitActive();
    assertEquals(numDataNodes, cluster.getDataNodes().size());
    // verify blockSeekTo() is able to re-fetch token transparently
    in1.seek(0);
    assertTrue(checkFile1(in1, expected));
    // verify blockSeekTo() is able to re-fetch token transparently
    if (isStriped) {
      in2.seek(0);
    } else {
      in2.seekToNewSource(0);
    }
    assertTrue(checkFile1(in2, expected));
    // verify fetchBlockByteRange() is able to re-fetch token transparently
    assertTrue(checkFile2(in3, expected));
  }
  private void copyBlocksToLostFound(String parent, HdfsFileStatus file, LocatedBlocks blocks)
      throws IOException {
    final DFSClient dfs = new DFSClient(NameNode.getAddress(conf), conf);
    final String fullName = file.getFullName(parent);
    OutputStream fos = null;
    try {
      if (!lfInited) {
        lostFoundInit(dfs);
      }
      if (!lfInitedOk) {
        throw new IOException("failed to initialize lost+found");
      }
      String target = lostFound + fullName;
      if (hdfsPathExists(target)) {
        LOG.warn(
            "Fsck: can't copy the remains of "
                + fullName
                + " to "
                + "lost+found, because "
                + target
                + " already exists.");
        return;
      }
      if (!namenode.getRpcServer().mkdirs(target, file.getPermission(), true)) {
        throw new IOException("failed to create directory " + target);
      }
      // create chains
      int chain = 0;
      boolean copyError = false;
      for (LocatedBlock lBlk : blocks.getLocatedBlocks()) {
        LocatedBlock lblock = lBlk;
        DatanodeInfo[] locs = lblock.getLocations();
        if (locs == null || locs.length == 0) {
          if (fos != null) {
            fos.flush();
            fos.close();
            fos = null;
          }
          continue;
        }
        if (fos == null) {
          fos = dfs.create(target + "/" + chain, true);
          if (fos == null) {
            throw new IOException(
                "Failed to copy " + fullName + " to /lost+found: could not store chain " + chain);
          }
          chain++;
        }

        // copy the block. It's a pity it's not abstracted from DFSInputStream ...
        try {
          copyBlock(dfs, lblock, fos);
        } catch (Exception e) {
          LOG.error("Fsck: could not copy block " + lblock.getBlock() + " to " + target, e);
          fos.flush();
          fos.close();
          fos = null;
          internalError = true;
          copyError = true;
        }
      }
      if (copyError) {
        LOG.warn(
            "Fsck: there were errors copying the remains of the "
                + "corrupted file "
                + fullName
                + " to /lost+found");
      } else {
        LOG.info("Fsck: copied the remains of the corrupted file " + fullName + " to /lost+found");
      }
    } catch (Exception e) {
      LOG.error("copyBlocksToLostFound: error processing " + fullName, e);
      internalError = true;
    } finally {
      if (fos != null) fos.close();
      dfs.close();
    }
  }
  @VisibleForTesting
  void check(String parent, HdfsFileStatus file, Result res) throws IOException {
    String path = file.getFullName(parent);
    boolean isOpen = false;

    if (file.isDir()) {
      if (snapshottableDirs != null && snapshottableDirs.contains(path)) {
        String snapshotPath =
            (path.endsWith(Path.SEPARATOR) ? path : path + Path.SEPARATOR)
                + HdfsConstants.DOT_SNAPSHOT_DIR;
        HdfsFileStatus snapshotFileInfo = namenode.getRpcServer().getFileInfo(snapshotPath);
        check(snapshotPath, snapshotFileInfo, res);
      }
      byte[] lastReturnedName = HdfsFileStatus.EMPTY_NAME;
      DirectoryListing thisListing;
      if (showFiles) {
        out.println(path + " <dir>");
      }
      res.totalDirs++;
      do {
        assert lastReturnedName != null;
        thisListing = namenode.getRpcServer().getListing(path, lastReturnedName, false);
        if (thisListing == null) {
          return;
        }
        HdfsFileStatus[] files = thisListing.getPartialListing();
        for (int i = 0; i < files.length; i++) {
          check(path, files[i], res);
        }
        lastReturnedName = thisListing.getLastName();
      } while (thisListing.hasMore());
      return;
    }
    if (file.isSymlink()) {
      if (showFiles) {
        out.println(path + " <symlink>");
      }
      res.totalSymlinks++;
      return;
    }
    long fileLen = file.getLen();
    // Get block locations without updating the file access time
    // and without block access tokens
    LocatedBlocks blocks;
    try {
      blocks = namenode.getNamesystem().getBlockLocations(path, 0, fileLen, false, false, false);
    } catch (FileNotFoundException fnfe) {
      blocks = null;
    }
    if (blocks == null) { // the file is deleted
      return;
    }
    isOpen = blocks.isUnderConstruction();
    if (isOpen && !showOpenFiles) {
      // We collect these stats about open files to report with default options
      res.totalOpenFilesSize += fileLen;
      res.totalOpenFilesBlocks += blocks.locatedBlockCount();
      res.totalOpenFiles++;
      return;
    }
    res.totalFiles++;
    res.totalSize += fileLen;
    res.totalBlocks += blocks.locatedBlockCount();
    if (showOpenFiles && isOpen) {
      out.print(
          path
              + " "
              + fileLen
              + " bytes, "
              + blocks.locatedBlockCount()
              + " block(s), OPENFORWRITE: ");
    } else if (showFiles) {
      out.print(path + " " + fileLen + " bytes, " + blocks.locatedBlockCount() + " block(s): ");
    } else {
      out.print('.');
    }
    if (res.totalFiles % 100 == 0) {
      out.println();
      out.flush();
    }
    int missing = 0;
    int corrupt = 0;
    long missize = 0;
    int underReplicatedPerFile = 0;
    int misReplicatedPerFile = 0;
    StringBuilder report = new StringBuilder();
    int i = 0;
    for (LocatedBlock lBlk : blocks.getLocatedBlocks()) {
      ExtendedBlock block = lBlk.getBlock();
      boolean isCorrupt = lBlk.isCorrupt();
      String blkName = block.toString();
      DatanodeInfo[] locs = lBlk.getLocations();
      NumberReplicas numberReplicas =
          namenode.getNamesystem().getBlockManager().countNodes(block.getLocalBlock());
      int liveReplicas = numberReplicas.liveReplicas();
      res.totalReplicas += liveReplicas;
      short targetFileReplication = file.getReplication();
      res.numExpectedReplicas += targetFileReplication;
      if (liveReplicas > targetFileReplication) {
        res.excessiveReplicas += (liveReplicas - targetFileReplication);
        res.numOverReplicatedBlocks += 1;
      }
      // Check if block is Corrupt
      if (isCorrupt) {
        corrupt++;
        res.corruptBlocks++;
        out.print(
            "\n"
                + path
                + ": CORRUPT blockpool "
                + block.getBlockPoolId()
                + " block "
                + block.getBlockName()
                + "\n");
      }
      if (liveReplicas >= minReplication) res.numMinReplicatedBlocks++;
      if (liveReplicas < targetFileReplication && liveReplicas > 0) {
        res.missingReplicas += (targetFileReplication - liveReplicas);
        res.numUnderReplicatedBlocks += 1;
        underReplicatedPerFile++;
        if (!showFiles) {
          out.print("\n" + path + ": ");
        }
        out.println(
            " Under replicated "
                + block
                + ". Target Replicas is "
                + targetFileReplication
                + " but found "
                + liveReplicas
                + " replica(s).");
      }
      // verify block placement policy
      BlockPlacementStatus blockPlacementStatus =
          bpPolicy.verifyBlockPlacement(path, lBlk, targetFileReplication);
      if (!blockPlacementStatus.isPlacementPolicySatisfied()) {
        res.numMisReplicatedBlocks++;
        misReplicatedPerFile++;
        if (!showFiles) {
          if (underReplicatedPerFile == 0) out.println();
          out.print(path + ": ");
        }
        out.println(
            " Replica placement policy is violated for "
                + block
                + ". "
                + blockPlacementStatus.getErrorDescription());
      }
      report.append(i + ". " + blkName + " len=" + block.getNumBytes());
      if (liveReplicas == 0) {
        report.append(" MISSING!");
        res.addMissing(block.toString(), block.getNumBytes());
        missing++;
        missize += block.getNumBytes();
      } else {
        report.append(" repl=" + liveReplicas);
        if (showLocations || showRacks) {
          StringBuilder sb = new StringBuilder("[");
          for (int j = 0; j < locs.length; j++) {
            if (j > 0) {
              sb.append(", ");
            }
            if (showRacks) sb.append(NodeBase.getPath(locs[j]));
            else sb.append(locs[j]);
          }
          sb.append(']');
          report.append(" " + sb.toString());
        }
      }
      report.append('\n');
      i++;
    }
    if ((missing > 0) || (corrupt > 0)) {
      if (!showFiles && (missing > 0)) {
        out.print(
            "\n" + path + ": MISSING " + missing + " blocks of total size " + missize + " B.");
      }
      res.corruptFiles++;
      if (isOpen) {
        LOG.info("Fsck: ignoring open file " + path);
      } else {
        if (doMove) copyBlocksToLostFound(parent, file, blocks);
        if (doDelete) deleteCorruptedFile(path);
      }
    }
    if (showFiles) {
      if (missing > 0) {
        out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n");
      } else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) {
        out.print(" OK\n");
      }
      if (showBlocks) {
        out.print(report.toString() + "\n");
      }
    }
  }
  /** Check files on DFS, starting from the indicated path. */
  public void fsck() {
    final long startTime = Time.now();
    try {
      String msg =
          "FSCK started by "
              + UserGroupInformation.getCurrentUser()
              + " from "
              + remoteAddress
              + " for path "
              + path
              + " at "
              + new Date();
      LOG.info(msg);
      out.println(msg);
      namenode.getNamesystem().logFsckEvent(path, remoteAddress);

      if (snapshottableDirs != null) {
        SnapshottableDirectoryStatus[] snapshotDirs =
            namenode.getRpcServer().getSnapshottableDirListing();
        if (snapshotDirs != null) {
          for (SnapshottableDirectoryStatus dir : snapshotDirs) {
            snapshottableDirs.add(dir.getFullPath().toString());
          }
        }
      }

      final HdfsFileStatus file = namenode.getRpcServer().getFileInfo(path);
      if (file != null) {

        if (showCorruptFileBlocks) {
          listCorruptFileBlocks();
          return;
        }

        Result res = new Result(conf);

        check(path, file, res);

        out.println(res);
        out.println(" Number of data-nodes:\t\t" + totalDatanodes);
        out.println(" Number of racks:\t\t" + networktopology.getNumOfRacks());

        out.println(
            "FSCK ended at " + new Date() + " in " + (Time.now() - startTime + " milliseconds"));

        // If there were internal errors during the fsck operation, we want to
        // return FAILURE_STATUS, even if those errors were not immediately
        // fatal.  Otherwise many unit tests will pass even when there are bugs.
        if (internalError) {
          throw new IOException("fsck encountered internal errors!");
        }

        // DFSck client scans for the string HEALTHY/CORRUPT to check the status
        // of file system and return appropriate code. Changing the output
        // string might break testcases. Also note this must be the last line
        // of the report.
        if (res.isHealthy()) {
          out.print("\n\nThe filesystem under path '" + path + "' " + HEALTHY_STATUS);
        } else {
          out.print("\n\nThe filesystem under path '" + path + "' " + CORRUPT_STATUS);
        }

      } else {
        out.print("\n\nPath '" + path + "' " + NONEXISTENT_STATUS);
      }
    } catch (Exception e) {
      String errMsg = "Fsck on path '" + path + "' " + FAILURE_STATUS;
      LOG.warn(errMsg, e);
      out.println(
          "FSCK ended at " + new Date() + " in " + (Time.now() - startTime + " milliseconds"));
      out.println(e.getMessage());
      out.print("\n\n" + errMsg);
    } finally {
      out.close();
    }
  }
예제 #15
0
 /**
  * Get the internal RPC server instance.
  *
  * @return rpc server
  */
 public static Server getRpcServer(NameNode namenode) {
   return ((NameNodeRpcServer) namenode.getRpcServer()).clientRpcServer;
 }