/**
   * Regression test for HDFS-2742. The issue in this bug was: - DN does a block report while file
   * is open. This BR contains the block in RBW state. - Standby queues the RBW state in
   * PendingDatanodeMessages - Standby processes edit logs during failover. Before fixing this bug,
   * it was mistakenly applying the RBW reported state after the block had been completed, causing
   * the block to get marked corrupt. Instead, we should now be applying the RBW message on OP_ADD,
   * and then the FINALIZED message on OP_CLOSE.
   */
  @Test
  public void testBlockReportsWhileFileBeingWritten() throws Exception {
    FSDataOutputStream out = fs.create(TEST_FILE_PATH);
    try {
      AppendTestUtil.write(out, 0, 10);
      out.hflush();

      // Block report will include the RBW replica, but will be
      // queued on the StandbyNode.
      cluster.triggerBlockReports();

    } finally {
      IOUtils.closeStream(out);
    }

    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);

    // Verify that no replicas are marked corrupt, and that the
    // file is readable from the failed-over standby.
    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
    BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
    assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
    assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());

    DFSTestUtil.readFile(fs, TEST_FILE_PATH);
  }
Exemplo n.º 2
0
  private void checkForCorruptOpenFiles(FileStatus file, List<FileStatus> corruptFiles)
      throws IOException {
    String filePath = file.getPath().toUri().getPath();

    if (file.isDir()) {
      for (FileStatus fileStatus : nn.namesystem.dir.getListing(filePath)) {
        checkForCorruptOpenFiles(fileStatus, corruptFiles);
      }

    } else {
      LeaseManager.Lease lease = nn.getNamesystem().leaseManager.getLeaseByPath(filePath);
      // Condition:
      //  1. lease has expired hard limit
      //  2. the file is open for write
      //  3. the last block has 0 locations
      if (lease != null && lease.expiredHardLimit()) {
        LocatedBlocks blocks = nn.getNamesystem().getBlockLocations(filePath, 0, file.getLen());
        List<LocatedBlock> locatedBlockList = blocks.getLocatedBlocks();
        LocatedBlock lastBlock = locatedBlockList.get(locatedBlockList.size() - 1);

        if (blocks.isUnderConstruction() && lastBlock.getLocations().length == 0) {
          corruptFiles.add(file);
        }
      }
    }
  }
 private void doMetasave(NameNode nn2) {
   nn2.getNamesystem().writeLock();
   try {
     PrintWriter pw = new PrintWriter(System.err);
     nn2.getNamesystem().getBlockManager().metaSave(pw);
     pw.flush();
   } finally {
     nn2.getNamesystem().writeUnlock();
   }
 }
  @Test
  public void testDnFencing() throws Exception {
    // Create a file with replication level 3.
    DFSTestUtil.createFile(fs, TEST_FILE_PATH, 30 * SMALL_BLOCK, (short) 3, 1L);
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, TEST_FILE_PATH);

    // Drop its replication count to 1, so it becomes over-replicated.
    // Then compute the invalidation of the extra blocks and trigger
    // heartbeats so the invalidations are flushed to the DNs.
    nn1.getRpcServer().setReplication(TEST_FILE, (short) 1);
    BlockManagerTestUtil.computeInvalidationWork(nn1.getNamesystem().getBlockManager());
    cluster.triggerHeartbeats();

    // Transition nn2 to active even though nn1 still thinks it's active.
    banner("Failing to NN2 but let NN1 continue to think it's active");
    NameNodeAdapter.abortEditLogs(nn1);
    NameNodeAdapter.enterSafeMode(nn1, false);
    cluster.transitionToActive(1);

    // Check that the standby picked up the replication change.
    assertEquals(1, nn2.getRpcServer().getFileInfo(TEST_FILE).getReplication());

    // Dump some info for debugging purposes.
    banner("NN2 Metadata immediately after failover");
    doMetasave(nn2);

    // Even though NN2 considers the blocks over-replicated, it should
    // post-pone the block invalidation because the DNs are still "stale".
    assertEquals(30, nn2.getNamesystem().getPostponedMisreplicatedBlocks());

    banner("Triggering heartbeats and block reports so that fencing is completed");
    cluster.triggerHeartbeats();
    cluster.triggerBlockReports();

    banner("Metadata after nodes have all block-reported");
    doMetasave(nn2);

    // The blocks should no longer be postponed.
    assertEquals(0, nn2.getNamesystem().getPostponedMisreplicatedBlocks());

    // Wait for NN2 to enact its deletions (replication monitor has to run, etc)
    BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager());
    cluster.triggerHeartbeats();
    HATestUtil.waitForDNDeletions(cluster);
    cluster.triggerDeletionReports();
    assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks());
    assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks());

    banner("Making sure the file is still readable");
    FileSystem fs2 = cluster.getFileSystem(1);
    DFSTestUtil.readFile(fs2, TEST_FILE_PATH);

    banner("Waiting for the actual block files to get deleted from DNs.");
    waitForTrueReplication(cluster, block, 1);
  }
  /**
   * Another regression test for HDFS-2742. This tests the following sequence: - DN does a block
   * report while file is open. This BR contains the block in RBW state. - The block report is
   * delayed in reaching the standby. - The file is closed. - The standby processes the OP_ADD and
   * OP_CLOSE operations before the RBW block report arrives. - The standby should not mark the
   * block as corrupt.
   */
  @Test
  public void testRBWReportArrivesAfterEdits() throws Exception {
    final CountDownLatch brFinished = new CountDownLatch(1);
    DelayAnswer delayer =
        new GenericTestUtils.DelayAnswer(LOG) {
          @Override
          protected Object passThrough(InvocationOnMock invocation) throws Throwable {
            try {
              return super.passThrough(invocation);
            } finally {
              // inform the test that our block report went through.
              brFinished.countDown();
            }
          }
        };

    FSDataOutputStream out = fs.create(TEST_FILE_PATH);
    try {
      AppendTestUtil.write(out, 0, 10);
      out.hflush();

      DataNode dn = cluster.getDataNodes().get(0);
      DatanodeProtocolClientSideTranslatorPB spy = DataNodeTestUtils.spyOnBposToNN(dn, nn2);

      Mockito.doAnswer(delayer)
          .when(spy)
          .blockReport(
              Mockito.<DatanodeRegistration>anyObject(),
              Mockito.anyString(),
              Mockito.<StorageBlockReport[]>anyObject());
      dn.scheduleAllBlockReport(0);
      delayer.waitForCall();

    } finally {
      IOUtils.closeStream(out);
    }

    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);

    delayer.proceed();
    brFinished.await();

    // Verify that no replicas are marked corrupt, and that the
    // file is readable from the failed-over standby.
    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
    BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
    assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
    assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());

    DFSTestUtil.readFile(fs, TEST_FILE_PATH);
  }
  /**
   * Test that, when a block is re-opened for append, the related datanode messages are correctly
   * queued by the SBN because they have future states and genstamps.
   */
  @Test
  public void testQueueingWithAppend() throws Exception {
    int numQueued = 0;
    int numDN = cluster.getDataNodes().size();

    FSDataOutputStream out = fs.create(TEST_FILE_PATH);
    try {
      AppendTestUtil.write(out, 0, 10);
      out.hflush();

      // Opening the file will report RBW replicas, but will be
      // queued on the StandbyNode.
      numQueued += numDN; // RBW messages
    } finally {
      IOUtils.closeStream(out);
      numQueued += numDN; // blockReceived messages
    }

    cluster.triggerBlockReports();
    numQueued += numDN;

    try {
      out = fs.append(TEST_FILE_PATH);
      AppendTestUtil.write(out, 10, 10);
      // RBW replicas once it's opened for append
      numQueued += numDN;

    } finally {
      IOUtils.closeStream(out);
      numQueued += numDN; // blockReceived
    }

    cluster.triggerBlockReports();
    numQueued += numDN;

    assertEquals(
        numQueued, cluster.getNameNode(1).getNamesystem().getPendingDataNodeMessageCount());

    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);

    // Verify that no replicas are marked corrupt, and that the
    // file is readable from the failed-over standby.
    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
    BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
    assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
    assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());

    AppendTestUtil.check(fs, TEST_FILE_PATH, 20);
  }
Exemplo n.º 7
0
  /**
   * The function will verify the token with NameNode if available and will create a
   * UserGroupInformation.
   *
   * <p>Code in this function is copied from JspHelper.getTokenUGI
   *
   * @param identifier Delegation token identifier
   * @param password Delegation token password
   * @param kind the kind of token
   * @param service the service for this token
   * @param servletContext Jetty servlet context which contains the NN address
   * @throws SecurityException Thrown when authentication fails
   */
  private static void verifyToken(
      byte[] identifier, byte[] password, Text kind, Text service, ServletContext servletContext) {
    try {
      Token<DelegationTokenIdentifier> token =
          new Token<DelegationTokenIdentifier>(identifier, password, kind, service);

      ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier());
      DataInputStream in = new DataInputStream(buf);
      DelegationTokenIdentifier id = new DelegationTokenIdentifier();
      id.readFields(in);

      final NameNode nn = NameNodeHttpServer.getNameNodeFromContext(servletContext);
      if (nn != null) {
        nn.getNamesystem().verifyToken(id, token.getPassword());
      }

      UserGroupInformation userGroupInformation = id.getUser();
      userGroupInformation.addToken(token);
      LOG.debug(
          "user "
              + userGroupInformation.getUserName()
              + " ("
              + userGroupInformation.getShortUserName()
              + ") authenticated");

      // re-login if necessary
      userGroupInformation.checkTGTAndReloginFromKeytab();
    } catch (IOException e) {
      throw new SecurityException("Failed to verify delegation token " + e, e);
    }
  }
Exemplo n.º 8
0
 static {
   try {
     FileSystem.setDefaultUri(CONF, "hdfs://localhost:0");
     CONF.set("dfs.http.address", "0.0.0.0:0");
     NameNode.format(CONF);
     namenode = new NameNode(CONF);
   } catch (IOException e) {
     e.printStackTrace();
     throw (RuntimeException) new RuntimeException().initCause(e);
   }
   FSNamesystem fsNamesystem = namenode.getNamesystem();
   replicator = fsNamesystem.blockManager.replicator;
   cluster = fsNamesystem.clusterMap;
   // construct network topology
   for (int i = 0; i < NUM_OF_DATANODES; i++) {
     cluster.add(dataNodes[i]);
   }
   for (int i = 0; i < NUM_OF_DATANODES; i++) {
     dataNodes[i].updateHeartbeat(
         2 * FSConstants.MIN_BLOCKS_FOR_WRITE * BLOCK_SIZE,
         0L,
         2 * FSConstants.MIN_BLOCKS_FOR_WRITE * BLOCK_SIZE,
         0);
   }
 }
Exemplo n.º 9
0
 /**
  * @return true if safemode is not running, or if safemode has already initialized the replication
  *     queues
  */
 public static boolean safeModeInitializedReplQueues(NameNode nn) {
   SafeModeInfo smi = nn.getNamesystem().getSafeModeInfoForTests();
   if (smi == null) {
     return true;
   }
   return smi.initializedReplQueues;
 }
Exemplo n.º 10
0
 /** @return the number of blocks marked safe by safemode, or -1 if safemode is not running. */
 public static int getSafeModeSafeBlocks(NameNode nn) throws IOException {
   SafeModeInfo smi = nn.getNamesystem().getSafeModeInfoForTests();
   if (smi == null) {
     return -1;
   }
   return smi.blockSafe();
 }
Exemplo n.º 11
0
  private static DatanodeInfo chooseDatanode(
      final NameNode namenode, final String path, final HttpOpParam.Op op, final long openOffset)
      throws IOException {
    if (op == GetOpParam.Op.OPEN
        || op == GetOpParam.Op.GETFILECHECKSUM
        || op == PostOpParam.Op.APPEND) {
      final HdfsFileStatus status = namenode.getFileInfo(path);
      final long len = status.getLen();
      if (op == GetOpParam.Op.OPEN && (openOffset < 0L || openOffset >= len)) {
        throw new IOException(
            "Offset="
                + openOffset
                + " out of the range [0, "
                + len
                + "); "
                + op
                + ", path="
                + path);
      }

      if (len > 0) {
        final long offset = op == GetOpParam.Op.OPEN ? openOffset : len - 1;
        final LocatedBlocks locations = namenode.getBlockLocations(path, offset, 1);
        final int count = locations.locatedBlockCount();
        if (count > 0) {
          return JspHelper.bestNode(locations.get(0));
        }
      }
    }

    return namenode.getNamesystem().getRandomDatanode();
  }
Exemplo n.º 12
0
  private void listCorruptOpenFiles() throws IOException {
    int matchedCorruptFilesCount = 0;
    // directory representation of path
    String pathdir = path.endsWith(Path.SEPARATOR) ? path : path + Path.SEPARATOR;
    FileStatus pathFileStatus = nn.getNamesystem().getFileInfo(pathdir);
    List<FileStatus> corruptFileStatusList = new ArrayList<FileStatus>();
    checkForCorruptOpenFiles(pathFileStatus, corruptFileStatusList);

    for (FileStatus fileStatus : corruptFileStatusList) {
      String currentPath = fileStatus.getPath().toString();
      if (currentPath.startsWith(pathdir) || currentPath.equals(path)) {
        matchedCorruptFilesCount++;

        // print the header before listing first item
        if (matchedCorruptFilesCount == 1) {
          out.println("Here are a few files that may be corrupted:");
          out.println("===========================================");
        }

        out.println(currentPath);
      }
    }

    out.println();
    out.println(buildSummaryResultForListCorruptFiles(matchedCorruptFilesCount, path));
  }
Exemplo n.º 13
0
  static DatanodeInfo chooseDatanode(
      final NameNode namenode,
      final String path,
      final HttpOpParam.Op op,
      final long openOffset,
      final long blocksize,
      Configuration conf)
      throws IOException {
    final BlockManager bm = namenode.getNamesystem().getBlockManager();

    if (op == PutOpParam.Op.CREATE) {
      // choose a datanode near to client
      final DatanodeDescriptor clientNode =
          bm.getDatanodeManager().getDatanodeByHost(getRemoteAddress());
      if (clientNode != null) {
        final DatanodeDescriptor[] datanodes =
            bm.getBlockPlacementPolicy().chooseTarget(path, 1, clientNode, null, blocksize);
        if (datanodes.length > 0) {
          return datanodes[0];
        }
      }
    } else if (op == GetOpParam.Op.OPEN
        || op == GetOpParam.Op.GETFILECHECKSUM
        || op == PostOpParam.Op.APPEND) {
      // choose a datanode containing a replica
      final NamenodeProtocols np = namenode.getRpcServer();
      final HdfsFileStatus status = np.getFileInfo(path);
      if (status == null) {
        throw new FileNotFoundException("File " + path + " not found.");
      }
      final long len = status.getLen();
      if (op == GetOpParam.Op.OPEN) {
        if (openOffset < 0L || (openOffset >= len && len > 0)) {
          throw new IOException(
              "Offset="
                  + openOffset
                  + " out of the range [0, "
                  + len
                  + "); "
                  + op
                  + ", path="
                  + path);
        }
      }

      if (len > 0) {
        final long offset = op == GetOpParam.Op.OPEN ? openOffset : len - 1;
        final LocatedBlocks locations = np.getBlockLocations(path, offset, 1);
        final int count = locations.locatedBlockCount();
        if (count > 0) {
          return JspHelper.bestNode(locations.get(0).getLocations(), false, conf);
        }
      }
    }

    return (DatanodeDescriptor)
        bm.getDatanodeManager().getNetworkTopology().chooseRandom(NodeBase.ROOT);
  }
Exemplo n.º 14
0
 /** @return a randomly chosen datanode. */
 static DatanodeDescriptor getRandomDatanode(final NameNode namenode) {
   return (DatanodeDescriptor)
       namenode
           .getNamesystem()
           .getBlockManager()
           .getDatanodeManager()
           .getNetworkTopology()
           .chooseRandom(NodeBase.ROOT);
 }
  /**
   * testing that APPEND operation can handle token expiration when re-establishing pipeline is
   * needed
   */
  @Test
  public void testAppend() throws Exception {
    MiniDFSCluster cluster = null;
    int numDataNodes = 2;
    Configuration conf = getConf(numDataNodes);

    try {
      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build();
      cluster.waitActive();
      assertEquals(numDataNodes, cluster.getDataNodes().size());

      final NameNode nn = cluster.getNameNode();
      final BlockManager bm = nn.getNamesystem().getBlockManager();
      final BlockTokenSecretManager sm = bm.getBlockTokenSecretManager();

      // set a short token lifetime (1 second)
      SecurityTestUtil.setBlockTokenLifetime(sm, 1000L);
      Path fileToAppend = new Path(FILE_TO_APPEND);
      FileSystem fs = cluster.getFileSystem();
      byte[] expected = generateBytes(FILE_SIZE);
      // write a one-byte file
      FSDataOutputStream stm = writeFile(fs, fileToAppend, (short) numDataNodes, BLOCK_SIZE);
      stm.write(expected, 0, 1);
      stm.close();
      // open the file again for append
      stm = fs.append(fileToAppend);
      int mid = expected.length - 1;
      stm.write(expected, 1, mid - 1);
      stm.hflush();

      /*
       * wait till token used in stm expires
       */
      Token<BlockTokenIdentifier> token = DFSTestUtil.getBlockToken(stm);
      while (!SecurityTestUtil.isBlockTokenExpired(token)) {
        try {
          Thread.sleep(10);
        } catch (InterruptedException ignored) {
        }
      }

      // remove a datanode to force re-establishing pipeline
      cluster.stopDataNode(0);
      // append the rest of the file
      stm.write(expected, mid, expected.length - mid);
      stm.close();
      // check if append is successful
      FSDataInputStream in5 = fs.open(fileToAppend);
      assertTrue(checkFile1(in5, expected));
    } finally {
      if (cluster != null) {
        cluster.shutdown();
      }
    }
  }
Exemplo n.º 16
0
  public NameNodeRpcServer(Configuration conf, NameNode nn) throws IOException {
    this.nn = nn;
    this.namesystem = nn.getNamesystem();
    this.metrics = NameNode.getNameNodeMetrics();

    int handlerCount =
        conf.getInt(DFS_NAMENODE_HANDLER_COUNT_KEY, DFS_NAMENODE_HANDLER_COUNT_DEFAULT);
    InetSocketAddress socAddr = nn.getRpcServerAddress(conf);

    InetSocketAddress dnSocketAddr = nn.getServiceRpcServerAddress(conf);
    if (dnSocketAddr != null) {
      int serviceHandlerCount =
          conf.getInt(
              DFS_NAMENODE_SERVICE_HANDLER_COUNT_KEY, DFS_NAMENODE_SERVICE_HANDLER_COUNT_DEFAULT);
      this.serviceRpcServer =
          RPC.getServer(
              NamenodeProtocols.class,
              this,
              dnSocketAddr.getHostName(),
              dnSocketAddr.getPort(),
              serviceHandlerCount,
              false,
              conf,
              namesystem.getDelegationTokenSecretManager());
      this.serviceRPCAddress = this.serviceRpcServer.getListenerAddress();
      nn.setRpcServiceServerAddress(conf, serviceRPCAddress);
    } else {
      serviceRpcServer = null;
      serviceRPCAddress = null;
    }
    this.server =
        RPC.getServer(
            NamenodeProtocols.class,
            this,
            socAddr.getHostName(),
            socAddr.getPort(),
            handlerCount,
            false,
            conf,
            namesystem.getDelegationTokenSecretManager());

    // set service-level authorization security policy
    if (serviceAuthEnabled =
        conf.getBoolean(CommonConfigurationKeys.HADOOP_SECURITY_AUTHORIZATION, false)) {
      this.server.refreshServiceAcl(conf, new HDFSPolicyProvider());
      if (this.serviceRpcServer != null) {
        this.serviceRpcServer.refreshServiceAcl(conf, new HDFSPolicyProvider());
      }
    }

    // The rpc-server port can be ephemeral... ensure we have the correct info
    this.rpcAddress = this.server.getListenerAddress();
    nn.setRpcServerAddress(conf, rpcAddress);
  }
 private void listCorruptFileBlocks() throws IOException {
   Collection<FSNamesystem.CorruptFileBlockInfo> corruptFiles =
       namenode.getNamesystem().listCorruptFileBlocks(path, currentCookie);
   int numCorruptFiles = corruptFiles.size();
   String filler;
   if (numCorruptFiles > 0) {
     filler = Integer.toString(numCorruptFiles);
   } else if (currentCookie[0].equals("0")) {
     filler = "no";
   } else {
     filler = "no more";
   }
   out.println("Cookie:\t" + currentCookie[0]);
   for (FSNamesystem.CorruptFileBlockInfo c : corruptFiles) {
     out.println(c.toString());
   }
   out.println("\n\nThe filesystem under path '" + path + "' has " + filler + " CORRUPT files");
   out.println();
 }
Exemplo n.º 18
0
  /**
   * Ensure that the given NameNode marks the specified DataNode as entirely dead/expired.
   *
   * @param nn the NameNode to manipulate
   * @param dnName the name of the DataNode
   */
  public static void noticeDeadDatanode(NameNode nn, String dnName) {
    FSNamesystem namesystem = nn.getNamesystem();
    namesystem.writeLock();
    try {
      DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager();
      HeartbeatManager hbm = dnm.getHeartbeatManager();
      DatanodeDescriptor[] dnds = hbm.getDatanodes();
      DatanodeDescriptor theDND = null;
      for (DatanodeDescriptor dnd : dnds) {
        if (dnd.getXferAddr().equals(dnName)) {
          theDND = dnd;
        }
      }
      Assert.assertNotNull("Could not find DN with name: " + dnName, theDND);

      synchronized (hbm) {
        DFSTestUtil.setDatanodeDead(theDND);
        hbm.heartbeatCheck();
      }
    } finally {
      namesystem.writeUnlock();
    }
  }
  @BeforeClass
  public static void setupCluster() throws IOException {
    Configuration conf = new HdfsConfiguration();
    final String[] racks = {"/rack1", "/rack1", "/rack1", "/rack2", "/rack2", "/rack2"};
    storages = DFSTestUtil.createDatanodeStorageInfos(racks);
    dataNodes = DFSTestUtil.toDatanodeDescriptor(storages);
    FileSystem.setDefaultUri(conf, "hdfs://localhost:0");
    conf.set(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY, "0.0.0.0:0");
    File baseDir = PathUtils.getTestDir(TestReplicationPolicy.class);
    conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, new File(baseDir, "name").getPath());
    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY, true);
    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_WRITE_KEY, true);
    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REPLICATION_CONSIDERLOAD_KEY, true);
    DFSTestUtil.formatNameNode(conf);
    namenode = new NameNode(conf);
    int blockSize = 1024;

    dnrList = new ArrayList<DatanodeRegistration>();
    dnManager = namenode.getNamesystem().getBlockManager().getDatanodeManager();

    // Register DNs
    for (int i = 0; i < 6; i++) {
      DatanodeRegistration dnr =
          new DatanodeRegistration(
              dataNodes[i],
              new StorageInfo(NodeType.DATA_NODE),
              new ExportedBlockKeys(),
              VersionInfo.getVersion());
      dnrList.add(dnr);
      dnManager.registerDatanode(dnr);
      dataNodes[i].getStorageInfos()[0].setUtilizationForTesting(
          2 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * blockSize, 0L,
          2 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * blockSize, 0L);
      dataNodes[i].updateHeartbeat(
          BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[i]), 0L, 0L, 0, 0, null);
    }
  }
Exemplo n.º 20
0
 public static HdfsFileStatus getFileInfo(NameNode namenode, String src, boolean resolveLink)
     throws AccessControlException, UnresolvedLinkException, StandbyException, IOException {
   return namenode.getNamesystem().getFileInfo(src, resolveLink);
 }
Exemplo n.º 21
0
 /** Get block locations within the specified range. */
 public static LocatedBlocks getBlockLocations(
     NameNode namenode, String src, long offset, long length) throws IOException {
   return namenode.getNamesystem().getBlockLocations(src, offset, length, false, true, true);
 }
Exemplo n.º 22
0
  /**
   * Test case that reduces replication of a file with a lot of blocks and then fails over right
   * after those blocks enter the DN invalidation queues on the active. Ensures that fencing is
   * correct and no replicas are lost.
   */
  @Test
  public void testNNClearsCommandsOnFailoverWithReplChanges() throws Exception {
    // Make lots of blocks to increase chances of triggering a bug.
    DFSTestUtil.createFile(fs, TEST_FILE_PATH, 30 * SMALL_BLOCK, (short) 1, 1L);

    banner("rolling NN1's edit log, forcing catch-up");
    HATestUtil.waitForStandbyToCatchUp(nn1, nn2);

    // Get some new replicas reported so that NN2 now considers
    // them over-replicated and schedules some more deletions
    nn1.getRpcServer().setReplication(TEST_FILE, (short) 2);
    while (BlockManagerTestUtil.getComputedDatanodeWork(nn1.getNamesystem().getBlockManager())
        > 0) {
      LOG.info("Getting more replication work computed");
    }
    BlockManager bm1 = nn1.getNamesystem().getBlockManager();
    while (bm1.getPendingReplicationBlocksCount() > 0) {
      BlockManagerTestUtil.updateState(bm1);
      cluster.triggerHeartbeats();
      Thread.sleep(1000);
    }

    banner("triggering BRs");
    cluster.triggerBlockReports();

    nn1.getRpcServer().setReplication(TEST_FILE, (short) 1);

    banner("computing invalidation on nn1");

    BlockManagerTestUtil.computeInvalidationWork(nn1.getNamesystem().getBlockManager());
    doMetasave(nn1);

    banner("computing invalidation on nn2");
    BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager());
    doMetasave(nn2);

    // Dump some info for debugging purposes.
    banner("Metadata immediately before failover");
    doMetasave(nn2);

    // Transition nn2 to active even though nn1 still thinks it's active
    banner("Failing to NN2 but let NN1 continue to think it's active");
    NameNodeAdapter.abortEditLogs(nn1);
    NameNodeAdapter.enterSafeMode(nn1, false);

    BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager());
    cluster.transitionToActive(1);

    // Check that the standby picked up the replication change.
    assertEquals(1, nn2.getRpcServer().getFileInfo(TEST_FILE).getReplication());

    // Dump some info for debugging purposes.
    banner("Metadata immediately after failover");
    doMetasave(nn2);

    banner("Triggering heartbeats and block reports so that fencing is completed");
    cluster.triggerHeartbeats();
    cluster.triggerBlockReports();

    banner("Metadata after nodes have all block-reported");
    doMetasave(nn2);

    // The block should no longer be postponed.
    assertEquals(0, nn2.getNamesystem().getPostponedMisreplicatedBlocks());

    // Wait for NN2 to enact its deletions (replication monitor has to run, etc)
    BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager());

    HATestUtil.waitForNNToIssueDeletions(nn2);
    cluster.triggerHeartbeats();
    HATestUtil.waitForDNDeletions(cluster);
    cluster.triggerDeletionReports();
    assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks());
    assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks());

    banner("Making sure the file is still readable");
    FileSystem fs2 = cluster.getFileSystem(1);
    DFSTestUtil.readFile(fs2, TEST_FILE_PATH);
  }
Exemplo n.º 23
0
 public static boolean mkdirs(
     NameNode namenode, String src, PermissionStatus permissions, boolean createParent)
     throws UnresolvedLinkException, IOException {
   return namenode.getNamesystem().mkdirs(src, permissions, createParent);
 }
Exemplo n.º 24
0
 public static void enterSafeMode(NameNode namenode, boolean resourcesLow) throws IOException {
   namenode.getNamesystem().enterSafeMode(resourcesLow);
 }
Exemplo n.º 25
0
  /**
   * Test case which restarts the standby node in such a way that, when it exits safemode, it will
   * want to invalidate a bunch of over-replicated block replicas. Ensures that if we failover at
   * this point it won't lose data.
   */
  @Test
  public void testNNClearsCommandsOnFailoverAfterStartup() throws Exception {
    // Make lots of blocks to increase chances of triggering a bug.
    DFSTestUtil.createFile(fs, TEST_FILE_PATH, 30 * SMALL_BLOCK, (short) 3, 1L);

    banner("Shutting down NN2");
    cluster.shutdownNameNode(1);

    banner("Setting replication to 1, rolling edit log.");
    nn1.getRpcServer().setReplication(TEST_FILE, (short) 1);
    nn1.getRpcServer().rollEditLog();

    // Start NN2 again. When it starts up, it will see all of the
    // blocks as over-replicated, since it has the metadata for
    // replication=1, but the DNs haven't yet processed the deletions.
    banner("Starting NN2 again.");
    cluster.restartNameNode(1);
    nn2 = cluster.getNameNode(1);

    banner("triggering BRs");
    cluster.triggerBlockReports();

    // We expect that both NN1 and NN2 will have some number of
    // deletions queued up for the DNs.
    banner("computing invalidation on nn1");
    BlockManagerTestUtil.computeInvalidationWork(nn1.getNamesystem().getBlockManager());

    banner("computing invalidation on nn2");
    BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager());

    // Dump some info for debugging purposes.
    banner("Metadata immediately before failover");
    doMetasave(nn2);

    // Transition nn2 to active even though nn1 still thinks it's active
    banner("Failing to NN2 but let NN1 continue to think it's active");
    NameNodeAdapter.abortEditLogs(nn1);
    NameNodeAdapter.enterSafeMode(nn1, false);

    cluster.transitionToActive(1);

    // Check that the standby picked up the replication change.
    assertEquals(1, nn2.getRpcServer().getFileInfo(TEST_FILE).getReplication());

    // Dump some info for debugging purposes.
    banner("Metadata immediately after failover");
    doMetasave(nn2);

    banner("Triggering heartbeats and block reports so that fencing is completed");
    cluster.triggerHeartbeats();
    cluster.triggerBlockReports();

    banner("Metadata after nodes have all block-reported");
    doMetasave(nn2);

    // The block should no longer be postponed.
    assertEquals(0, nn2.getNamesystem().getPostponedMisreplicatedBlocks());

    // Wait for NN2 to enact its deletions (replication monitor has to run, etc)
    BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager());

    HATestUtil.waitForNNToIssueDeletions(nn2);
    cluster.triggerHeartbeats();
    HATestUtil.waitForDNDeletions(cluster);
    cluster.triggerDeletionReports();
    assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks());
    assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks());

    banner("Making sure the file is still readable");
    FileSystem fs2 = cluster.getFileSystem(1);
    DFSTestUtil.readFile(fs2, TEST_FILE_PATH);
  }
Exemplo n.º 26
0
 public static void leaveSafeMode(NameNode namenode) throws IOException {
   namenode.getNamesystem().leaveSafeMode();
 }
  protected void doTestRead(Configuration conf, MiniDFSCluster cluster, boolean isStriped)
      throws Exception {
    final int numDataNodes = cluster.getDataNodes().size();
    final NameNode nn = cluster.getNameNode();
    final NamenodeProtocols nnProto = nn.getRpcServer();
    final BlockManager bm = nn.getNamesystem().getBlockManager();
    final BlockTokenSecretManager sm = bm.getBlockTokenSecretManager();

    // set a short token lifetime (1 second) initially
    SecurityTestUtil.setBlockTokenLifetime(sm, 1000L);

    Path fileToRead = new Path(FILE_TO_READ);
    FileSystem fs = cluster.getFileSystem();
    byte[] expected = generateBytes(FILE_SIZE);
    createFile(fs, fileToRead, expected);

    /*
     * setup for testing expiration handling of cached tokens
     */

    // read using blockSeekTo(). Acquired tokens are cached in in1
    FSDataInputStream in1 = fs.open(fileToRead);
    assertTrue(checkFile1(in1, expected));
    // read using blockSeekTo(). Acquired tokens are cached in in2
    FSDataInputStream in2 = fs.open(fileToRead);
    assertTrue(checkFile1(in2, expected));
    // read using fetchBlockByteRange(). Acquired tokens are cached in in3
    FSDataInputStream in3 = fs.open(fileToRead);
    assertTrue(checkFile2(in3, expected));

    /*
     * testing READ interface on DN using a BlockReader
     */
    DFSClient client = null;
    try {
      client = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf);
    } finally {
      if (client != null) client.close();
    }
    List<LocatedBlock> locatedBlocks =
        nnProto.getBlockLocations(FILE_TO_READ, 0, FILE_SIZE).getLocatedBlocks();
    LocatedBlock lblock = locatedBlocks.get(0); // first block
    // verify token is not expired
    assertFalse(isBlockTokenExpired(lblock));
    // read with valid token, should succeed
    tryRead(conf, lblock, true);

    /*
     * wait till myToken and all cached tokens in in1, in2 and in3 expire
     */

    while (!isBlockTokenExpired(lblock)) {
      try {
        Thread.sleep(10);
      } catch (InterruptedException ignored) {
      }
    }

    /*
     * continue testing READ interface on DN using a BlockReader
     */

    // verify token is expired
    assertTrue(isBlockTokenExpired(lblock));
    // read should fail
    tryRead(conf, lblock, false);
    // use a valid new token
    bm.setBlockToken(lblock, BlockTokenIdentifier.AccessMode.READ);
    // read should succeed
    tryRead(conf, lblock, true);
    // use a token with wrong blockID
    long rightId = lblock.getBlock().getBlockId();
    long wrongId = rightId + 1;
    lblock.getBlock().setBlockId(wrongId);
    bm.setBlockToken(lblock, BlockTokenIdentifier.AccessMode.READ);
    lblock.getBlock().setBlockId(rightId);
    // read should fail
    tryRead(conf, lblock, false);
    // use a token with wrong access modes
    bm.setBlockToken(lblock, BlockTokenIdentifier.AccessMode.WRITE);
    // read should fail
    tryRead(conf, lblock, false);

    // set a long token lifetime for future tokens
    SecurityTestUtil.setBlockTokenLifetime(sm, 600 * 1000L);

    /*
     * testing that when cached tokens are expired, DFSClient will re-fetch
     * tokens transparently for READ.
     */

    // confirm all tokens cached in in1 are expired by now
    List<LocatedBlock> lblocks = DFSTestUtil.getAllBlocks(in1);
    for (LocatedBlock blk : lblocks) {
      assertTrue(isBlockTokenExpired(blk));
    }
    // verify blockSeekTo() is able to re-fetch token transparently
    in1.seek(0);
    assertTrue(checkFile1(in1, expected));

    // confirm all tokens cached in in2 are expired by now
    List<LocatedBlock> lblocks2 = DFSTestUtil.getAllBlocks(in2);
    for (LocatedBlock blk : lblocks2) {
      assertTrue(isBlockTokenExpired(blk));
    }
    // verify blockSeekTo() is able to re-fetch token transparently (testing
    // via another interface method)
    if (isStriped) {
      // striped block doesn't support seekToNewSource
      in2.seek(0);
    } else {
      assertTrue(in2.seekToNewSource(0));
    }
    assertTrue(checkFile1(in2, expected));

    // confirm all tokens cached in in3 are expired by now
    List<LocatedBlock> lblocks3 = DFSTestUtil.getAllBlocks(in3);
    for (LocatedBlock blk : lblocks3) {
      assertTrue(isBlockTokenExpired(blk));
    }
    // verify fetchBlockByteRange() is able to re-fetch token transparently
    assertTrue(checkFile2(in3, expected));

    /*
     * testing that after datanodes are restarted on the same ports, cached
     * tokens should still work and there is no need to fetch new tokens from
     * namenode. This test should run while namenode is down (to make sure no
     * new tokens can be fetched from namenode).
     */

    // restart datanodes on the same ports that they currently use
    assertTrue(cluster.restartDataNodes(true));
    cluster.waitActive();
    assertEquals(numDataNodes, cluster.getDataNodes().size());
    cluster.shutdownNameNode(0);

    // confirm tokens cached in in1 are still valid
    lblocks = DFSTestUtil.getAllBlocks(in1);
    for (LocatedBlock blk : lblocks) {
      assertFalse(isBlockTokenExpired(blk));
    }
    // verify blockSeekTo() still works (forced to use cached tokens)
    in1.seek(0);
    assertTrue(checkFile1(in1, expected));

    // confirm tokens cached in in2 are still valid
    lblocks2 = DFSTestUtil.getAllBlocks(in2);
    for (LocatedBlock blk : lblocks2) {
      assertFalse(isBlockTokenExpired(blk));
    }

    // verify blockSeekTo() still works (forced to use cached tokens)
    if (isStriped) {
      in2.seek(0);
    } else {
      in2.seekToNewSource(0);
    }
    assertTrue(checkFile1(in2, expected));

    // confirm tokens cached in in3 are still valid
    lblocks3 = DFSTestUtil.getAllBlocks(in3);
    for (LocatedBlock blk : lblocks3) {
      assertFalse(isBlockTokenExpired(blk));
    }
    // verify fetchBlockByteRange() still works (forced to use cached tokens)
    assertTrue(checkFile2(in3, expected));

    /*
     * testing that when namenode is restarted, cached tokens should still
     * work and there is no need to fetch new tokens from namenode. Like the
     * previous test, this test should also run while namenode is down. The
     * setup for this test depends on the previous test.
     */

    // restart the namenode and then shut it down for test
    cluster.restartNameNode(0);
    cluster.shutdownNameNode(0);

    // verify blockSeekTo() still works (forced to use cached tokens)
    in1.seek(0);
    assertTrue(checkFile1(in1, expected));
    // verify again blockSeekTo() still works (forced to use cached tokens)
    if (isStriped) {
      in2.seek(0);
    } else {
      in2.seekToNewSource(0);
    }
    assertTrue(checkFile1(in2, expected));

    // verify fetchBlockByteRange() still works (forced to use cached tokens)
    assertTrue(checkFile2(in3, expected));

    /*
     * testing that after both namenode and datanodes got restarted (namenode
     * first, followed by datanodes), DFSClient can't access DN without
     * re-fetching tokens and is able to re-fetch tokens transparently. The
     * setup of this test depends on the previous test.
     */

    // restore the cluster and restart the datanodes for test
    cluster.restartNameNode(0);
    assertTrue(cluster.restartDataNodes(true));
    cluster.waitActive();
    assertEquals(numDataNodes, cluster.getDataNodes().size());

    // shutdown namenode so that DFSClient can't get new tokens from namenode
    cluster.shutdownNameNode(0);

    // verify blockSeekTo() fails (cached tokens become invalid)
    in1.seek(0);
    assertFalse(checkFile1(in1, expected));
    // verify fetchBlockByteRange() fails (cached tokens become invalid)
    assertFalse(checkFile2(in3, expected));

    // restart the namenode to allow DFSClient to re-fetch tokens
    cluster.restartNameNode(0);
    // verify blockSeekTo() works again (by transparently re-fetching
    // tokens from namenode)
    in1.seek(0);
    assertTrue(checkFile1(in1, expected));
    if (isStriped) {
      in2.seek(0);
    } else {
      in2.seekToNewSource(0);
    }
    assertTrue(checkFile1(in2, expected));
    // verify fetchBlockByteRange() works again (by transparently
    // re-fetching tokens from namenode)
    assertTrue(checkFile2(in3, expected));

    /*
     * testing that when datanodes are restarted on different ports, DFSClient
     * is able to re-fetch tokens transparently to connect to them
     */

    // restart datanodes on newly assigned ports
    assertTrue(cluster.restartDataNodes(false));
    cluster.waitActive();
    assertEquals(numDataNodes, cluster.getDataNodes().size());
    // verify blockSeekTo() is able to re-fetch token transparently
    in1.seek(0);
    assertTrue(checkFile1(in1, expected));
    // verify blockSeekTo() is able to re-fetch token transparently
    if (isStriped) {
      in2.seek(0);
    } else {
      in2.seekToNewSource(0);
    }
    assertTrue(checkFile1(in2, expected));
    // verify fetchBlockByteRange() is able to re-fetch token transparently
    assertTrue(checkFile2(in3, expected));
  }
  /** Check files on DFS, starting from the indicated path. */
  public void fsck() {
    final long startTime = Time.now();
    try {
      String msg =
          "FSCK started by "
              + UserGroupInformation.getCurrentUser()
              + " from "
              + remoteAddress
              + " for path "
              + path
              + " at "
              + new Date();
      LOG.info(msg);
      out.println(msg);
      namenode.getNamesystem().logFsckEvent(path, remoteAddress);

      if (snapshottableDirs != null) {
        SnapshottableDirectoryStatus[] snapshotDirs =
            namenode.getRpcServer().getSnapshottableDirListing();
        if (snapshotDirs != null) {
          for (SnapshottableDirectoryStatus dir : snapshotDirs) {
            snapshottableDirs.add(dir.getFullPath().toString());
          }
        }
      }

      final HdfsFileStatus file = namenode.getRpcServer().getFileInfo(path);
      if (file != null) {

        if (showCorruptFileBlocks) {
          listCorruptFileBlocks();
          return;
        }

        Result res = new Result(conf);

        check(path, file, res);

        out.println(res);
        out.println(" Number of data-nodes:\t\t" + totalDatanodes);
        out.println(" Number of racks:\t\t" + networktopology.getNumOfRacks());

        out.println(
            "FSCK ended at " + new Date() + " in " + (Time.now() - startTime + " milliseconds"));

        // If there were internal errors during the fsck operation, we want to
        // return FAILURE_STATUS, even if those errors were not immediately
        // fatal.  Otherwise many unit tests will pass even when there are bugs.
        if (internalError) {
          throw new IOException("fsck encountered internal errors!");
        }

        // DFSck client scans for the string HEALTHY/CORRUPT to check the status
        // of file system and return appropriate code. Changing the output
        // string might break testcases. Also note this must be the last line
        // of the report.
        if (res.isHealthy()) {
          out.print("\n\nThe filesystem under path '" + path + "' " + HEALTHY_STATUS);
        } else {
          out.print("\n\nThe filesystem under path '" + path + "' " + CORRUPT_STATUS);
        }

      } else {
        out.print("\n\nPath '" + path + "' " + NONEXISTENT_STATUS);
      }
    } catch (Exception e) {
      String errMsg = "Fsck on path '" + path + "' " + FAILURE_STATUS;
      LOG.warn(errMsg, e);
      out.println(
          "FSCK ended at " + new Date() + " in " + (Time.now() - startTime + " milliseconds"));
      out.println(e.getMessage());
      out.print("\n\n" + errMsg);
    } finally {
      out.close();
    }
  }
  @VisibleForTesting
  void check(String parent, HdfsFileStatus file, Result res) throws IOException {
    String path = file.getFullName(parent);
    boolean isOpen = false;

    if (file.isDir()) {
      if (snapshottableDirs != null && snapshottableDirs.contains(path)) {
        String snapshotPath =
            (path.endsWith(Path.SEPARATOR) ? path : path + Path.SEPARATOR)
                + HdfsConstants.DOT_SNAPSHOT_DIR;
        HdfsFileStatus snapshotFileInfo = namenode.getRpcServer().getFileInfo(snapshotPath);
        check(snapshotPath, snapshotFileInfo, res);
      }
      byte[] lastReturnedName = HdfsFileStatus.EMPTY_NAME;
      DirectoryListing thisListing;
      if (showFiles) {
        out.println(path + " <dir>");
      }
      res.totalDirs++;
      do {
        assert lastReturnedName != null;
        thisListing = namenode.getRpcServer().getListing(path, lastReturnedName, false);
        if (thisListing == null) {
          return;
        }
        HdfsFileStatus[] files = thisListing.getPartialListing();
        for (int i = 0; i < files.length; i++) {
          check(path, files[i], res);
        }
        lastReturnedName = thisListing.getLastName();
      } while (thisListing.hasMore());
      return;
    }
    if (file.isSymlink()) {
      if (showFiles) {
        out.println(path + " <symlink>");
      }
      res.totalSymlinks++;
      return;
    }
    long fileLen = file.getLen();
    // Get block locations without updating the file access time
    // and without block access tokens
    LocatedBlocks blocks;
    try {
      blocks = namenode.getNamesystem().getBlockLocations(path, 0, fileLen, false, false, false);
    } catch (FileNotFoundException fnfe) {
      blocks = null;
    }
    if (blocks == null) { // the file is deleted
      return;
    }
    isOpen = blocks.isUnderConstruction();
    if (isOpen && !showOpenFiles) {
      // We collect these stats about open files to report with default options
      res.totalOpenFilesSize += fileLen;
      res.totalOpenFilesBlocks += blocks.locatedBlockCount();
      res.totalOpenFiles++;
      return;
    }
    res.totalFiles++;
    res.totalSize += fileLen;
    res.totalBlocks += blocks.locatedBlockCount();
    if (showOpenFiles && isOpen) {
      out.print(
          path
              + " "
              + fileLen
              + " bytes, "
              + blocks.locatedBlockCount()
              + " block(s), OPENFORWRITE: ");
    } else if (showFiles) {
      out.print(path + " " + fileLen + " bytes, " + blocks.locatedBlockCount() + " block(s): ");
    } else {
      out.print('.');
    }
    if (res.totalFiles % 100 == 0) {
      out.println();
      out.flush();
    }
    int missing = 0;
    int corrupt = 0;
    long missize = 0;
    int underReplicatedPerFile = 0;
    int misReplicatedPerFile = 0;
    StringBuilder report = new StringBuilder();
    int i = 0;
    for (LocatedBlock lBlk : blocks.getLocatedBlocks()) {
      ExtendedBlock block = lBlk.getBlock();
      boolean isCorrupt = lBlk.isCorrupt();
      String blkName = block.toString();
      DatanodeInfo[] locs = lBlk.getLocations();
      NumberReplicas numberReplicas =
          namenode.getNamesystem().getBlockManager().countNodes(block.getLocalBlock());
      int liveReplicas = numberReplicas.liveReplicas();
      res.totalReplicas += liveReplicas;
      short targetFileReplication = file.getReplication();
      res.numExpectedReplicas += targetFileReplication;
      if (liveReplicas > targetFileReplication) {
        res.excessiveReplicas += (liveReplicas - targetFileReplication);
        res.numOverReplicatedBlocks += 1;
      }
      // Check if block is Corrupt
      if (isCorrupt) {
        corrupt++;
        res.corruptBlocks++;
        out.print(
            "\n"
                + path
                + ": CORRUPT blockpool "
                + block.getBlockPoolId()
                + " block "
                + block.getBlockName()
                + "\n");
      }
      if (liveReplicas >= minReplication) res.numMinReplicatedBlocks++;
      if (liveReplicas < targetFileReplication && liveReplicas > 0) {
        res.missingReplicas += (targetFileReplication - liveReplicas);
        res.numUnderReplicatedBlocks += 1;
        underReplicatedPerFile++;
        if (!showFiles) {
          out.print("\n" + path + ": ");
        }
        out.println(
            " Under replicated "
                + block
                + ". Target Replicas is "
                + targetFileReplication
                + " but found "
                + liveReplicas
                + " replica(s).");
      }
      // verify block placement policy
      BlockPlacementStatus blockPlacementStatus =
          bpPolicy.verifyBlockPlacement(path, lBlk, targetFileReplication);
      if (!blockPlacementStatus.isPlacementPolicySatisfied()) {
        res.numMisReplicatedBlocks++;
        misReplicatedPerFile++;
        if (!showFiles) {
          if (underReplicatedPerFile == 0) out.println();
          out.print(path + ": ");
        }
        out.println(
            " Replica placement policy is violated for "
                + block
                + ". "
                + blockPlacementStatus.getErrorDescription());
      }
      report.append(i + ". " + blkName + " len=" + block.getNumBytes());
      if (liveReplicas == 0) {
        report.append(" MISSING!");
        res.addMissing(block.toString(), block.getNumBytes());
        missing++;
        missize += block.getNumBytes();
      } else {
        report.append(" repl=" + liveReplicas);
        if (showLocations || showRacks) {
          StringBuilder sb = new StringBuilder("[");
          for (int j = 0; j < locs.length; j++) {
            if (j > 0) {
              sb.append(", ");
            }
            if (showRacks) sb.append(NodeBase.getPath(locs[j]));
            else sb.append(locs[j]);
          }
          sb.append(']');
          report.append(" " + sb.toString());
        }
      }
      report.append('\n');
      i++;
    }
    if ((missing > 0) || (corrupt > 0)) {
      if (!showFiles && (missing > 0)) {
        out.print(
            "\n" + path + ": MISSING " + missing + " blocks of total size " + missize + " B.");
      }
      res.corruptFiles++;
      if (isOpen) {
        LOG.info("Fsck: ignoring open file " + path);
      } else {
        if (doMove) copyBlocksToLostFound(parent, file, blocks);
        if (doDelete) deleteCorruptedFile(path);
      }
    }
    if (showFiles) {
      if (missing > 0) {
        out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n");
      } else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) {
        out.print(" OK\n");
      }
      if (showBlocks) {
        out.print(report.toString() + "\n");
      }
    }
  }
Exemplo n.º 30
0
  /**
   * Get {@link UserGroupInformation} and possibly the delegation token out of the request.
   *
   * @param context the Servlet context
   * @param request the http request
   * @param conf configuration
   * @param secureAuthMethod the AuthenticationMethod used in secure mode.
   * @param tryUgiParameter Should it try the ugi parameter?
   * @return a new user from the request
   * @throws AccessControlException if the request has no token
   */
  public static UserGroupInformation getUGI(
      ServletContext context,
      HttpServletRequest request,
      Configuration conf,
      final AuthenticationMethod secureAuthMethod,
      final boolean tryUgiParameter)
      throws IOException {
    final UserGroupInformation ugi;
    final String usernameFromQuery = getUsernameFromQuery(request, tryUgiParameter);
    final String doAsUserFromQuery = request.getParameter(DoAsParam.NAME);

    if (UserGroupInformation.isSecurityEnabled()) {
      final String remoteUser = request.getRemoteUser();
      String tokenString = request.getParameter(DELEGATION_PARAMETER_NAME);
      if (tokenString != null) {
        Token<DelegationTokenIdentifier> token = new Token<DelegationTokenIdentifier>();
        token.decodeFromUrlString(tokenString);
        SecurityUtil.setTokenService(token, NameNode.getAddress(conf));
        token.setKind(DelegationTokenIdentifier.HDFS_DELEGATION_KIND);

        ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier());
        DataInputStream in = new DataInputStream(buf);
        DelegationTokenIdentifier id = new DelegationTokenIdentifier();
        id.readFields(in);
        if (context != null) {
          NameNode nn = (NameNode) context.getAttribute("name.node");
          if (nn != null) {
            // Verify the token.
            nn.getNamesystem()
                .getDelegationTokenSecretManager()
                .verifyToken(id, token.getPassword());
          }
        }
        ugi = id.getUser();
        if (ugi.getRealUser() == null) {
          // non-proxy case
          checkUsername(ugi.getShortUserName(), usernameFromQuery);
          checkUsername(null, doAsUserFromQuery);
        } else {
          // proxy case
          checkUsername(ugi.getRealUser().getShortUserName(), usernameFromQuery);
          checkUsername(ugi.getShortUserName(), doAsUserFromQuery);
          ProxyUsers.authorize(ugi, request.getRemoteAddr(), conf);
        }
        ugi.addToken(token);
        ugi.setAuthenticationMethod(AuthenticationMethod.TOKEN);
      } else {
        if (remoteUser == null) {
          throw new IOException("Security enabled but user not " + "authenticated by filter");
        }
        final UserGroupInformation realUgi = UserGroupInformation.createRemoteUser(remoteUser);
        checkUsername(realUgi.getShortUserName(), usernameFromQuery);
        // This is not necessarily true, could have been auth'ed by user-facing
        // filter
        realUgi.setAuthenticationMethod(secureAuthMethod);
        ugi = initUGI(realUgi, doAsUserFromQuery, request, true, conf);
      }
    } else { // Security's not on, pull from url
      final UserGroupInformation realUgi =
          usernameFromQuery == null
              ? getDefaultWebUser(conf) // not specified in request
              : UserGroupInformation.createRemoteUser(usernameFromQuery);
      realUgi.setAuthenticationMethod(AuthenticationMethod.SIMPLE);
      ugi = initUGI(realUgi, doAsUserFromQuery, request, false, conf);
    }

    if (LOG.isDebugEnabled()) LOG.debug("getUGI is returning: " + ugi.getShortUserName());
    return ugi;
  }