/**
  * Test that if we fail a flush, abort gets set on close.
  *
  * @see <a href="https://issues.apache.org/jira/browse/HBASE-4270">HBASE-4270</a>
  * @throws IOException
  * @throws NodeExistsException
  * @throws KeeperException
  */
 @Test
 public void testFailedFlushAborts() throws IOException, NodeExistsException, KeeperException {
   final Server server = new MockServer(HTU, false);
   final RegionServerServices rss = HTU.createMockRegionServerService();
   HTableDescriptor htd = TEST_HTD;
   final HRegionInfo hri =
       new HRegionInfo(htd.getTableName(), HConstants.EMPTY_END_ROW, HConstants.EMPTY_END_ROW);
   HRegion region = HTU.createLocalHRegion(hri, htd);
   try {
     assertNotNull(region);
     // Spy on the region so can throw exception when close is called.
     HRegion spy = Mockito.spy(region);
     final boolean abort = false;
     Mockito.when(spy.close(abort)).thenThrow(new RuntimeException("Mocked failed close!"));
     // The CloseRegionHandler will try to get an HRegion that corresponds
     // to the passed hri -- so insert the region into the online region Set.
     rss.addToOnlineRegions(spy);
     // Assert the Server is NOT stopped before we call close region.
     assertFalse(server.isStopped());
     CloseRegionHandler handler = new CloseRegionHandler(server, rss, hri, false, false, -1);
     boolean throwable = false;
     try {
       handler.process();
     } catch (Throwable t) {
       throwable = true;
     } finally {
       assertTrue(throwable);
       // Abort calls stop so stopped flag should be set.
       assertTrue(server.isStopped());
     }
   } finally {
     HRegion.closeHRegion(region);
   }
 }
 private void OpenRegion(
     Server server, RegionServerServices rss, HTableDescriptor htd, HRegionInfo hri)
     throws IOException, NodeExistsException, KeeperException, DeserializationException {
   // Create it OFFLINE node, which is what Master set before sending OPEN RPC
   ZKAssign.createNodeOffline(server.getZooKeeper(), hri, server.getServerName());
   OpenRegionHandler openHandler = new OpenRegionHandler(server, rss, hri, htd);
   rss.getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
   openHandler.process();
   // This parse is not used?
   RegionTransition.parseFrom(ZKAssign.getData(server.getZooKeeper(), hri.getEncodedName()));
   // delete the node, which is what Master do after the region is opened
   ZKAssign.deleteNode(
       server.getZooKeeper(),
       hri.getEncodedName(),
       EventType.RS_ZK_REGION_OPENED,
       server.getServerName());
 }
コード例 #3
0
  /**
   * Submit a log split task to executor service
   *
   * @param curTask task to submit
   * @param curTaskZKVersion current version of task
   */
  void submitTask(
      final String curTask,
      final RecoveryMode mode,
      final int curTaskZKVersion,
      final int reportPeriod) {
    final MutableInt zkVersion = new MutableInt(curTaskZKVersion);

    CancelableProgressable reporter =
        new CancelableProgressable() {
          private long last_report_at = 0;

          @Override
          public boolean progress() {
            long t = EnvironmentEdgeManager.currentTime();
            if ((t - last_report_at) > reportPeriod) {
              last_report_at = t;
              int latestZKVersion =
                  attemptToOwnTask(
                      false, watcher, server.getServerName(), curTask, mode, zkVersion.intValue());
              if (latestZKVersion < 0) {
                LOG.warn("Failed to heartbeat the task" + curTask);
                return false;
              }
              zkVersion.setValue(latestZKVersion);
            }
            return true;
          }
        };
    ZkSplitLogWorkerCoordination.ZkSplitTaskDetails splitTaskDetails =
        new ZkSplitLogWorkerCoordination.ZkSplitTaskDetails();
    splitTaskDetails.setTaskNode(curTask);
    splitTaskDetails.setCurTaskZKVersion(zkVersion);

    WALSplitterHandler hsh =
        new WALSplitterHandler(
            server,
            this,
            splitTaskDetails,
            reporter,
            this.tasksInProgress,
            splitTaskExecutor,
            mode);
    server.getExecutorService().submit(hsh);
  }
コード例 #4
0
 /**
  * Wait for tasks to become available at /hbase/splitlog zknode. Grab a task one at a time. This
  * policy puts an upper-limit on the number of simultaneous log splitting that could be happening
  * in a cluster.
  *
  * <p>Synchronization using {@link #taskReadyLock} ensures that it will try to grab every task
  * that has been put up
  *
  * @throws InterruptedException
  */
 @Override
 public void taskLoop() throws InterruptedException {
   while (!shouldStop) {
     int seq_start = taskReadySeq;
     List<String> paths = null;
     paths = getTaskList();
     if (paths == null) {
       LOG.warn(
           "Could not get tasks, did someone remove "
               + watcher.splitLogZNode
               + " ... worker thread exiting.");
       return;
     }
     // pick meta wal firstly
     int offset = (int) (Math.random() * paths.size());
     for (int i = 0; i < paths.size(); i++) {
       if (DefaultWALProvider.isMetaFile(paths.get(i))) {
         offset = i;
         break;
       }
     }
     int numTasks = paths.size();
     for (int i = 0; i < numTasks; i++) {
       int idx = (i + offset) % paths.size();
       // don't call ZKSplitLog.getNodeName() because that will lead to
       // double encoding of the path name
       if (this.calculateAvailableSplitters(numTasks) > 0) {
         grabTask(ZKUtil.joinZNode(watcher.splitLogZNode, paths.get(idx)));
       } else {
         LOG.debug(
             "Current region server "
                 + server.getServerName()
                 + " has "
                 + this.tasksInProgress.get()
                 + " tasks in progress and can't take more.");
         break;
       }
       if (shouldStop) {
         return;
       }
     }
     SplitLogCounters.tot_wkr_task_grabing.incrementAndGet();
     synchronized (taskReadyLock) {
       while (seq_start == taskReadySeq) {
         taskReadyLock.wait(checkInterval);
         if (server != null) {
           // check to see if we have stale recovering regions in our internal memory state
           Map<String, HRegion> recoveringRegions = server.getRecoveringRegions();
           if (!recoveringRegions.isEmpty()) {
             // Make a local copy to prevent ConcurrentModificationException when other threads
             // modify recoveringRegions
             List<String> tmpCopy = new ArrayList<String>(recoveringRegions.keySet());
             int listSize = tmpCopy.size();
             for (int i = 0; i < listSize; i++) {
               String region = tmpCopy.get(i);
               String nodePath = ZKUtil.joinZNode(watcher.recoveringRegionsZNode, region);
               try {
                 if (ZKUtil.checkExists(watcher, nodePath) == -1) {
                   HRegion r = recoveringRegions.remove(region);
                   if (r != null) {
                     r.setRecovering(false);
                   }
                   LOG.debug("Mark recovering region:" + region + " up.");
                 } else {
                   // current check is a defensive(or redundant) mechanism to prevent us from
                   // having stale recovering regions in our internal RS memory state while
                   // zookeeper(source of truth) says differently. We stop at the first good one
                   // because we should not have a single instance such as this in normal case so
                   // check the first one is good enough.
                   break;
                 }
               } catch (KeeperException e) {
                 // ignore zookeeper error
                 LOG.debug("Got a zookeeper when trying to open a recovering region", e);
                 break;
               }
             }
           }
         }
       }
     }
   }
 }
コード例 #5
0
  /**
   * try to grab a 'lock' on the task zk node to own and execute the task.
   *
   * <p>
   *
   * @param path zk node for the task
   */
  private void grabTask(String path) {
    Stat stat = new Stat();
    byte[] data;
    synchronized (grabTaskLock) {
      currentTask = path;
      workerInGrabTask = true;
      if (Thread.interrupted()) {
        return;
      }
    }
    try {
      try {
        if ((data = ZKUtil.getDataNoWatch(watcher, path, stat)) == null) {
          SplitLogCounters.tot_wkr_failed_to_grab_task_no_data.incrementAndGet();
          return;
        }
      } catch (KeeperException e) {
        LOG.warn("Failed to get data for znode " + path, e);
        SplitLogCounters.tot_wkr_failed_to_grab_task_exception.incrementAndGet();
        return;
      }
      SplitLogTask slt;
      try {
        slt = SplitLogTask.parseFrom(data);
      } catch (DeserializationException e) {
        LOG.warn("Failed parse data for znode " + path, e);
        SplitLogCounters.tot_wkr_failed_to_grab_task_exception.incrementAndGet();
        return;
      }
      if (!slt.isUnassigned()) {
        SplitLogCounters.tot_wkr_failed_to_grab_task_owned.incrementAndGet();
        return;
      }

      currentVersion =
          attemptToOwnTask(
              true, watcher, server.getServerName(), path, slt.getMode(), stat.getVersion());
      if (currentVersion < 0) {
        SplitLogCounters.tot_wkr_failed_to_grab_task_lost_race.incrementAndGet();
        return;
      }

      if (ZKSplitLog.isRescanNode(watcher, currentTask)) {
        ZkSplitLogWorkerCoordination.ZkSplitTaskDetails splitTaskDetails =
            new ZkSplitLogWorkerCoordination.ZkSplitTaskDetails();
        splitTaskDetails.setTaskNode(currentTask);
        splitTaskDetails.setCurTaskZKVersion(new MutableInt(currentVersion));

        endTask(
            new SplitLogTask.Done(server.getServerName(), slt.getMode()),
            SplitLogCounters.tot_wkr_task_acquired_rescan,
            splitTaskDetails);
        return;
      }

      LOG.info("worker " + server.getServerName() + " acquired task " + path);
      SplitLogCounters.tot_wkr_task_acquired.incrementAndGet();
      getDataSetWatchAsync();

      submitTask(path, slt.getMode(), currentVersion, reportPeriod);

      // after a successful submit, sleep a little bit to allow other RSs to grab the rest tasks
      try {
        int sleepTime = RandomUtils.nextInt(500) + 500;
        Thread.sleep(sleepTime);
      } catch (InterruptedException e) {
        LOG.warn("Interrupted while yielding for other region servers", e);
        Thread.currentThread().interrupt();
      }
    } finally {
      synchronized (grabTaskLock) {
        workerInGrabTask = false;
        // clear the interrupt from stopTask() otherwise the next task will
        // suffer
        Thread.interrupted();
      }
    }
  }