/**
   * Verify that the NameNode is able to still use <tt>READ_ONLY_SHARED</tt> replicas even when the
   * single NORMAL replica is offline (and the effective replication count is 0).
   */
  @Test
  public void testNormalReplicaOffline() throws Exception {
    // Stop the datanode hosting the NORMAL replica
    cluster.stopDataNode(normalDataNode.getXferAddr());

    // Force NameNode to detect that the datanode is down
    BlockManagerTestUtil.noticeDeadDatanode(cluster.getNameNode(), normalDataNode.getXferAddr());

    // The live replica count should now be zero (since the NORMAL replica is offline)
    NumberReplicas numberReplicas = blockManager.countNodes(block);
    assertThat(numberReplicas.liveReplicas(), is(0));

    // The block should be reported as under-replicated
    BlockManagerTestUtil.updateState(blockManager);
    assertThat(blockManager.getUnderReplicatedBlocksCount(), is(1L));

    // The BlockManager should be able to heal the replication count back to 1
    // by triggering an inter-datanode replication from one of the READ_ONLY_SHARED replicas
    BlockManagerTestUtil.computeAllPendingWork(blockManager);

    DFSTestUtil.waitForReplication(cluster, extendedBlock, 1, 1, 0);

    // There should now be 2 *locations* for the block, and 1 *replica*
    assertThat(getLocatedBlock().getLocations().length, is(2));
    validateNumberReplicas(1);
  }
  static DatanodeInfo chooseDatanode(
      final NameNode namenode,
      final String path,
      final HttpOpParam.Op op,
      final long openOffset,
      final long blocksize,
      Configuration conf)
      throws IOException {
    final BlockManager bm = namenode.getNamesystem().getBlockManager();

    if (op == PutOpParam.Op.CREATE) {
      // choose a datanode near to client
      final DatanodeDescriptor clientNode =
          bm.getDatanodeManager().getDatanodeByHost(getRemoteAddress());
      if (clientNode != null) {
        final DatanodeDescriptor[] datanodes =
            bm.getBlockPlacementPolicy().chooseTarget(path, 1, clientNode, null, blocksize);
        if (datanodes.length > 0) {
          return datanodes[0];
        }
      }
    } else if (op == GetOpParam.Op.OPEN
        || op == GetOpParam.Op.GETFILECHECKSUM
        || op == PostOpParam.Op.APPEND) {
      // choose a datanode containing a replica
      final NamenodeProtocols np = namenode.getRpcServer();
      final HdfsFileStatus status = np.getFileInfo(path);
      if (status == null) {
        throw new FileNotFoundException("File " + path + " not found.");
      }
      final long len = status.getLen();
      if (op == GetOpParam.Op.OPEN) {
        if (openOffset < 0L || (openOffset >= len && len > 0)) {
          throw new IOException(
              "Offset="
                  + openOffset
                  + " out of the range [0, "
                  + len
                  + "); "
                  + op
                  + ", path="
                  + path);
        }
      }

      if (len > 0) {
        final long offset = op == GetOpParam.Op.OPEN ? openOffset : len - 1;
        final LocatedBlocks locations = np.getBlockLocations(path, offset, 1);
        final int count = locations.locatedBlockCount();
        if (count > 0) {
          return JspHelper.bestNode(locations.get(0).getLocations(), false, conf);
        }
      }
    }

    return (DatanodeDescriptor)
        bm.getDatanodeManager().getNetworkTopology().chooseRandom(NodeBase.ROOT);
  }
  private void validateNumberReplicas(int expectedReplicas) throws IOException {
    NumberReplicas numberReplicas = blockManager.countNodes(block);
    assertThat(numberReplicas.liveReplicas(), is(expectedReplicas));
    assertThat(numberReplicas.excessReplicas(), is(0));
    assertThat(numberReplicas.corruptReplicas(), is(0));
    assertThat(numberReplicas.decommissionedReplicas(), is(0));
    assertThat(numberReplicas.replicasOnStaleNodes(), is(0));

    BlockManagerTestUtil.updateState(blockManager);
    assertThat(blockManager.getUnderReplicatedBlocksCount(), is(0L));
    assertThat(blockManager.getExcessBlocksCount(), is(0L));
  }
  /**
   * Setup a {@link MiniDFSCluster}. Create a block with both {@link State#NORMAL} and {@link
   * State#READ_ONLY_SHARED} replicas.
   */
  @Before
  public void setup() throws IOException, InterruptedException {
    conf = new HdfsConfiguration();
    SimulatedFSDataset.setFactory(conf);

    Configuration[] overlays = new Configuration[NUM_DATANODES];
    for (int i = 0; i < overlays.length; i++) {
      overlays[i] = new Configuration();
      if (i == RO_NODE_INDEX) {
        overlays[i].setEnum(
            SimulatedFSDataset.CONFIG_PROPERTY_STATE,
            i == RO_NODE_INDEX ? READ_ONLY_SHARED : NORMAL);
      }
    }

    cluster =
        new MiniDFSCluster.Builder(conf)
            .numDataNodes(NUM_DATANODES)
            .dataNodeConfOverlays(overlays)
            .build();
    fs = cluster.getFileSystem();
    blockManager = cluster.getNameNode().getNamesystem().getBlockManager();
    datanodeManager = blockManager.getDatanodeManager();
    client =
        new DFSClient(
            new InetSocketAddress("localhost", cluster.getNameNodePort()),
            cluster.getConfiguration(0));

    for (int i = 0; i < NUM_DATANODES; i++) {
      DataNode dataNode = cluster.getDataNodes().get(i);
      validateStorageState(
          BlockManagerTestUtil.getStorageReportsForDatanode(
              datanodeManager.getDatanode(dataNode.getDatanodeId())),
          i == RO_NODE_INDEX ? READ_ONLY_SHARED : NORMAL);
    }

    // Create a 1 block file
    DFSTestUtil.createFile(fs, PATH, BLOCK_SIZE, BLOCK_SIZE, BLOCK_SIZE, (short) 1, seed);

    LocatedBlock locatedBlock = getLocatedBlock();
    extendedBlock = locatedBlock.getBlock();
    block = extendedBlock.getLocalBlock();

    assertThat(locatedBlock.getLocations().length, is(1));
    normalDataNode = locatedBlock.getLocations()[0];
    readOnlyDataNode =
        datanodeManager.getDatanode(cluster.getDataNodes().get(RO_NODE_INDEX).getDatanodeId());
    assertThat(normalDataNode, is(not(readOnlyDataNode)));

    validateNumberReplicas(1);

    // Inject the block into the datanode with READ_ONLY_SHARED storage
    cluster.injectBlocks(0, RO_NODE_INDEX, Collections.singleton(block));

    // There should now be 2 *locations* for the block
    // Must wait until the NameNode has processed the block report for the injected blocks
    waitForLocations(2);
  }
  /**
   * Verify that corrupt <tt>READ_ONLY_SHARED</tt> replicas aren't counted towards the corrupt
   * replicas total.
   */
  @Test
  public void testReadOnlyReplicaCorrupt() throws Exception {
    // "Corrupt" a READ_ONLY_SHARED replica by reporting it as a bad replica
    client.reportBadBlocks(
        new LocatedBlock[] {
          new LocatedBlock(extendedBlock, new DatanodeInfo[] {readOnlyDataNode})
        });

    // There should now be only 1 *location* for the block as the READ_ONLY_SHARED is corrupt
    waitForLocations(1);

    // However, the corrupt READ_ONLY_SHARED replica should *not* affect the overall corrupt
    // replicas count
    NumberReplicas numberReplicas = blockManager.countNodes(block);
    assertThat(numberReplicas.corruptReplicas(), is(0));
  }
  /**
   * Test case that reduces replication of a file with a lot of blocks and then fails over right
   * after those blocks enter the DN invalidation queues on the active. Ensures that fencing is
   * correct and no replicas are lost.
   */
  @Test
  public void testNNClearsCommandsOnFailoverWithReplChanges() throws Exception {
    // Make lots of blocks to increase chances of triggering a bug.
    DFSTestUtil.createFile(fs, TEST_FILE_PATH, 30 * SMALL_BLOCK, (short) 1, 1L);

    banner("rolling NN1's edit log, forcing catch-up");
    HATestUtil.waitForStandbyToCatchUp(nn1, nn2);

    // Get some new replicas reported so that NN2 now considers
    // them over-replicated and schedules some more deletions
    nn1.getRpcServer().setReplication(TEST_FILE, (short) 2);
    while (BlockManagerTestUtil.getComputedDatanodeWork(nn1.getNamesystem().getBlockManager())
        > 0) {
      LOG.info("Getting more replication work computed");
    }
    BlockManager bm1 = nn1.getNamesystem().getBlockManager();
    while (bm1.getPendingReplicationBlocksCount() > 0) {
      BlockManagerTestUtil.updateState(bm1);
      cluster.triggerHeartbeats();
      Thread.sleep(1000);
    }

    banner("triggering BRs");
    cluster.triggerBlockReports();

    nn1.getRpcServer().setReplication(TEST_FILE, (short) 1);

    banner("computing invalidation on nn1");

    BlockManagerTestUtil.computeInvalidationWork(nn1.getNamesystem().getBlockManager());
    doMetasave(nn1);

    banner("computing invalidation on nn2");
    BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager());
    doMetasave(nn2);

    // Dump some info for debugging purposes.
    banner("Metadata immediately before failover");
    doMetasave(nn2);

    // Transition nn2 to active even though nn1 still thinks it's active
    banner("Failing to NN2 but let NN1 continue to think it's active");
    NameNodeAdapter.abortEditLogs(nn1);
    NameNodeAdapter.enterSafeMode(nn1, false);

    BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager());
    cluster.transitionToActive(1);

    // Check that the standby picked up the replication change.
    assertEquals(1, nn2.getRpcServer().getFileInfo(TEST_FILE).getReplication());

    // Dump some info for debugging purposes.
    banner("Metadata immediately after failover");
    doMetasave(nn2);

    banner("Triggering heartbeats and block reports so that fencing is completed");
    cluster.triggerHeartbeats();
    cluster.triggerBlockReports();

    banner("Metadata after nodes have all block-reported");
    doMetasave(nn2);

    // The block should no longer be postponed.
    assertEquals(0, nn2.getNamesystem().getPostponedMisreplicatedBlocks());

    // Wait for NN2 to enact its deletions (replication monitor has to run, etc)
    BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager());

    HATestUtil.waitForNNToIssueDeletions(nn2);
    cluster.triggerHeartbeats();
    HATestUtil.waitForDNDeletions(cluster);
    cluster.triggerDeletionReports();
    assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks());
    assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks());

    banner("Making sure the file is still readable");
    FileSystem fs2 = cluster.getFileSystem(1);
    DFSTestUtil.readFile(fs2, TEST_FILE_PATH);
  }