Beispiel #1
0
 /**
  * Checks whether {@link DataNode#checkDiskErrorAsync()} is being called or not. Before
  * refactoring the code the above function was not getting called
  *
  * @throws IOException, InterruptedException
  */
 @Test
 public void testcheckDiskError() throws IOException, InterruptedException {
   if (cluster.getDataNodes().size() <= 0) {
     cluster.startDataNodes(conf, 1, true, null, null);
     cluster.waitActive();
   }
   DataNode dataNode = cluster.getDataNodes().get(0);
   long slackTime = dataNode.checkDiskErrorInterval / 2;
   // checking for disk error
   dataNode.checkDiskErrorAsync();
   Thread.sleep(dataNode.checkDiskErrorInterval);
   long lastDiskErrorCheck = dataNode.getLastDiskErrorCheck();
   assertTrue(
       "Disk Error check is not performed within  " + dataNode.checkDiskErrorInterval + "  ms",
       ((Time.monotonicNow() - lastDiskErrorCheck)
           < (dataNode.checkDiskErrorInterval + slackTime)));
 }
Beispiel #2
0
/**
 * Keeps a Collection for every named machine containing blocks that have recently been invalidated
 * and are thought to live on the machine in question.
 */
@InterfaceAudience.Private
class InvalidateBlocks {
  /** Mapping: DatanodeInfo -> Collection of Blocks */
  private final Map<DatanodeInfo, LightWeightHashSet<Block>> node2blocks =
      new TreeMap<DatanodeInfo, LightWeightHashSet<Block>>();
  /** The total number of blocks in the map. */
  private long numBlocks = 0L;

  private final int blockInvalidateLimit;

  /** The period of pending time for block invalidation since the NameNode startup */
  private final long pendingPeriodInMs;
  /** the startup time */
  private final long startupTime = Time.monotonicNow();

  InvalidateBlocks(final int blockInvalidateLimit, long pendingPeriodInMs) {
    this.blockInvalidateLimit = blockInvalidateLimit;
    this.pendingPeriodInMs = pendingPeriodInMs;
    printBlockDeletionTime(BlockManager.LOG);
  }

  private void printBlockDeletionTime(final Log log) {
    log.info(
        DFSConfigKeys.DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_SEC_KEY
            + " is set to "
            + DFSUtil.durationToString(pendingPeriodInMs));
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy MMM dd HH:mm:ss");
    Calendar calendar = new GregorianCalendar();
    calendar.add(Calendar.SECOND, (int) (this.pendingPeriodInMs / 1000));
    log.info("The block deletion will start around " + sdf.format(calendar.getTime()));
  }

  /** @return the number of blocks to be invalidated . */
  synchronized long numBlocks() {
    return numBlocks;
  }

  /**
   * @return true if the given storage has the given block listed for invalidation. Blocks are
   *     compared including their generation stamps: if a block is pending invalidation but with a
   *     different generation stamp, returns false.
   */
  synchronized boolean contains(final DatanodeInfo dn, final Block block) {
    final LightWeightHashSet<Block> s = node2blocks.get(dn);
    if (s == null) {
      return false; // no invalidate blocks for this storage ID
    }
    Block blockInSet = s.getElement(block);
    return blockInSet != null && block.getGenerationStamp() == blockInSet.getGenerationStamp();
  }

  /** Add a block to the block collection which will be invalidated on the specified datanode. */
  synchronized void add(final Block block, final DatanodeInfo datanode, final boolean log) {
    LightWeightHashSet<Block> set = node2blocks.get(datanode);
    if (set == null) {
      set = new LightWeightHashSet<Block>();
      node2blocks.put(datanode, set);
    }
    if (set.add(block)) {
      numBlocks++;
      if (log) {
        NameNode.blockStateChangeLog.info(
            "BLOCK* " + getClass().getSimpleName() + ": add " + block + " to " + datanode);
      }
    }
  }

  /** Remove a storage from the invalidatesSet */
  synchronized void remove(final DatanodeInfo dn) {
    final LightWeightHashSet<Block> blocks = node2blocks.remove(dn);
    if (blocks != null) {
      numBlocks -= blocks.size();
    }
  }

  /** Remove the block from the specified storage. */
  synchronized void remove(final DatanodeInfo dn, final Block block) {
    final LightWeightHashSet<Block> v = node2blocks.get(dn);
    if (v != null && v.remove(block)) {
      numBlocks--;
      if (v.isEmpty()) {
        node2blocks.remove(dn);
      }
    }
  }

  /** Print the contents to out. */
  synchronized void dump(final PrintWriter out) {
    final int size = node2blocks.values().size();
    out.println("Metasave: Blocks " + numBlocks + " waiting deletion from " + size + " datanodes.");
    if (size == 0) {
      return;
    }

    for (Map.Entry<DatanodeInfo, LightWeightHashSet<Block>> entry : node2blocks.entrySet()) {
      final LightWeightHashSet<Block> blocks = entry.getValue();
      if (blocks.size() > 0) {
        out.println(entry.getKey());
        out.println(blocks);
      }
    }
  }

  /** @return a list of the storage IDs. */
  synchronized List<DatanodeInfo> getDatanodes() {
    return new ArrayList<DatanodeInfo>(node2blocks.keySet());
  }

  /** @return the remianing pending time */
  @VisibleForTesting
  long getInvalidationDelay() {
    return pendingPeriodInMs - (Time.monotonicNow() - startupTime);
  }

  synchronized List<Block> invalidateWork(final DatanodeDescriptor dn) {
    final long delay = getInvalidationDelay();
    if (delay > 0) {
      if (BlockManager.LOG.isDebugEnabled()) {
        BlockManager.LOG.debug(
            "Block deletion is delayed during NameNode startup. "
                + "The deletion will start after "
                + delay
                + " ms.");
      }
      return null;
    }
    final LightWeightHashSet<Block> set = node2blocks.get(dn);
    if (set == null) {
      return null;
    }

    // # blocks that can be sent in one message is limited
    final int limit = blockInvalidateLimit;
    final List<Block> toInvalidate = set.pollN(limit);

    // If we send everything in this message, remove this node entry
    if (set.isEmpty()) {
      remove(dn);
    }

    dn.addBlocksToBeInvalidated(toInvalidate);
    numBlocks -= toInvalidate.size();
    return toInvalidate;
  }

  synchronized void clear() {
    node2blocks.clear();
    numBlocks = 0;
  }
}
Beispiel #3
0
 /** @return the remianing pending time */
 @VisibleForTesting
 long getInvalidationDelay() {
   return pendingPeriodInMs - (Time.monotonicNow() - startupTime);
 }
  /** process datanode heartbeat or stats initialization. */
  public void updateHeartbeatState(
      StorageReport[] reports,
      long cacheCapacity,
      long cacheUsed,
      int xceiverCount,
      int volFailures,
      VolumeFailureSummary volumeFailureSummary) {
    long totalCapacity = 0;
    long totalRemaining = 0;
    long totalBlockPoolUsed = 0;
    long totalDfsUsed = 0;
    Set<DatanodeStorageInfo> failedStorageInfos = null;

    // Decide if we should check for any missing StorageReport and mark it as
    // failed. There are different scenarios.
    // 1. When DN is running, a storage failed. Given the current DN
    //    implementation doesn't add recovered storage back to its storage list
    //    until DN restart, we can assume volFailures won't decrease
    //    during the current DN registration session.
    //    When volumeFailures == this.volumeFailures, it implies there is no
    //    state change. No need to check for failed storage. This is an
    //    optimization.  Recent versions of the DataNode report a
    //    VolumeFailureSummary containing the date/time of the last volume
    //    failure.  If that's available, then we check that instead for greater
    //    accuracy.
    // 2. After DN restarts, volFailures might not increase and it is possible
    //    we still have new failed storage. For example, admins reduce
    //    available storages in configuration. Another corner case
    //    is the failed volumes might change after restart; a) there
    //    is one good storage A, one restored good storage B, so there is
    //    one element in storageReports and that is A. b) A failed. c) Before
    //    DN sends HB to NN to indicate A has failed, DN restarts. d) After DN
    //    restarts, storageReports has one element which is B.
    final boolean checkFailedStorages;
    if (volumeFailureSummary != null && this.volumeFailureSummary != null) {
      checkFailedStorages =
          volumeFailureSummary.getLastVolumeFailureDate()
              > this.volumeFailureSummary.getLastVolumeFailureDate();
    } else {
      checkFailedStorages = (volFailures > this.volumeFailures) || !heartbeatedSinceRegistration;
    }

    if (checkFailedStorages) {
      LOG.info(
          "Number of failed storage changes from " + this.volumeFailures + " to " + volFailures);
      synchronized (storageMap) {
        failedStorageInfos = new HashSet<>(storageMap.values());
      }
    }

    setCacheCapacity(cacheCapacity);
    setCacheUsed(cacheUsed);
    setXceiverCount(xceiverCount);
    setLastUpdate(Time.now());
    setLastUpdateMonotonic(Time.monotonicNow());
    this.volumeFailures = volFailures;
    this.volumeFailureSummary = volumeFailureSummary;
    for (StorageReport report : reports) {
      DatanodeStorageInfo storage = updateStorage(report.getStorage());
      if (checkFailedStorages) {
        failedStorageInfos.remove(storage);
      }

      storage.receivedHeartbeat(report);
      totalCapacity += report.getCapacity();
      totalRemaining += report.getRemaining();
      totalBlockPoolUsed += report.getBlockPoolUsed();
      totalDfsUsed += report.getDfsUsed();
    }
    rollBlocksScheduled(getLastUpdateMonotonic());

    // Update total metrics for the node.
    setCapacity(totalCapacity);
    setRemaining(totalRemaining);
    setBlockPoolUsed(totalBlockPoolUsed);
    setDfsUsed(totalDfsUsed);
    if (checkFailedStorages) {
      updateFailedStorage(failedStorageInfos);
    }
    long storageMapSize;
    synchronized (storageMap) {
      storageMapSize = storageMap.size();
    }
    if (storageMapSize != reports.length) {
      pruneStorageMap(reports);
    }
  }