Beispiel #1
0
 /**
  * Take a snapshot using the specified handler. On failure the snapshot temporary working
  * directory is removed. NOTE: prepareToTakeSnapshot() called before this one takes care of the
  * rejecting the snapshot request if the table is busy with another snapshot/restore operation.
  *
  * @param snapshot the snapshot description
  * @param handler the snapshot handler
  */
 private synchronized void snapshotTable(
     SnapshotDescription snapshot, final TakeSnapshotHandler handler)
     throws HBaseSnapshotException {
   try {
     handler.prepare();
     this.executorService.submit(handler);
     this.snapshotHandlers.put(TableName.valueOf(snapshot.getTable()), handler);
   } catch (Exception e) {
     // cleanup the working directory by trying to delete it from the fs.
     Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
     try {
       if (!this.master.getMasterFileSystem().getFileSystem().delete(workingDir, true)) {
         LOG.error(
             "Couldn't delete working directory ("
                 + workingDir
                 + " for snapshot:"
                 + ClientSnapshotDescriptionUtils.toString(snapshot));
       }
     } catch (IOException e1) {
       LOG.error(
           "Couldn't delete working directory ("
               + workingDir
               + " for snapshot:"
               + ClientSnapshotDescriptionUtils.toString(snapshot));
     }
     // fail the snapshot
     throw new SnapshotCreationException(
         "Could not build snapshot handler", e, ProtobufUtil.createSnapshotDesc(snapshot));
   }
 }
  // TODO consider parallelizing these operations since they are independent. Right now its just
  // easier to keep them serial though
  @Override
  public void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regionsAndLocations)
      throws IOException, KeeperException {
    try {
      timeoutInjector.start();

      // 1. get all the regions hosting this table.

      // extract each pair to separate lists
      Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
      for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
        regions.add(p.getFirst());
      }

      // 2. for each region, write all the info to disk
      String msg =
          "Starting to write region info and WALs for regions for offline snapshot:"
              + ClientSnapshotDescriptionUtils.toString(snapshot);
      LOG.info(msg);
      status.setStatus(msg);

      ThreadPoolExecutor exec = SnapshotManifest.createExecutor(conf, "DisabledTableSnapshot");
      try {
        ModifyRegionUtils.editRegions(
            exec,
            regions,
            new ModifyRegionUtils.RegionEditTask() {
              @Override
              public void editRegion(final HRegionInfo regionInfo) throws IOException {
                snapshotManifest.addRegion(FSUtils.getTableDir(rootDir, snapshotTable), regionInfo);
              }
            });
      } finally {
        exec.shutdown();
      }
    } catch (Exception e) {
      // make sure we capture the exception to propagate back to the client later
      String reason =
          "Failed snapshot "
              + ClientSnapshotDescriptionUtils.toString(snapshot)
              + " due to exception:"
              + e.getMessage();
      ForeignException ee = new ForeignException(reason, e);
      monitor.receive(ee);
      status.abort("Snapshot of table: " + snapshotTable + " failed because " + e.getMessage());
    } finally {
      LOG.debug(
          "Marking snapshot" + ClientSnapshotDescriptionUtils.toString(snapshot) + " as finished.");

      // 3. mark the timer as finished - even if we got an exception, we don't need to time the
      // operation any further
      timeoutInjector.complete();
    }
  }
Beispiel #3
0
  /**
   * Check to make sure that we are OK to run the passed snapshot. Checks to make sure that we
   * aren't already running a snapshot or restore on the requested table.
   *
   * @param snapshot description of the snapshot we want to start
   * @throws HBaseSnapshotException if the filesystem could not be prepared to start the snapshot
   */
  private synchronized void prepareToTakeSnapshot(SnapshotDescription snapshot)
      throws HBaseSnapshotException {
    FileSystem fs = master.getMasterFileSystem().getFileSystem();
    Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
    TableName snapshotTable = TableName.valueOf(snapshot.getTable());

    // make sure we aren't already running a snapshot
    if (isTakingSnapshot(snapshot)) {
      SnapshotSentinel handler = this.snapshotHandlers.get(snapshotTable);
      throw new SnapshotCreationException(
          "Rejected taking "
              + ClientSnapshotDescriptionUtils.toString(snapshot)
              + " because we are already running another snapshot "
              + (handler != null
                  ? ("on the same table "
                      + ClientSnapshotDescriptionUtils.toString(handler.getSnapshot()))
                  : "with the same name"),
          ProtobufUtil.createSnapshotDesc(snapshot));
    }

    // make sure we aren't running a restore on the same table
    if (isRestoringTable(snapshotTable)) {
      throw new SnapshotCreationException(
          "Rejected taking "
              + ClientSnapshotDescriptionUtils.toString(snapshot)
              + " because we are already have a restore in progress on the same snapshot.");
    }

    try {
      // delete the working directory, since we aren't running the snapshot. Likely leftovers
      // from a failed attempt.
      fs.delete(workingDir, true);

      // recreate the working directory for the snapshot
      if (!fs.mkdirs(workingDir)) {
        throw new SnapshotCreationException(
            "Couldn't create working directory (" + workingDir + ") for snapshot",
            ProtobufUtil.createSnapshotDesc(snapshot));
      }
    } catch (HBaseSnapshotException e) {
      throw e;
    } catch (IOException e) {
      throw new SnapshotCreationException(
          "Exception while checking to see if snapshot could be started.",
          e,
          ProtobufUtil.createSnapshotDesc(snapshot));
    }
  }
Beispiel #4
0
  /**
   * Check if the specified snapshot is done
   *
   * @param expected
   * @return true if snapshot is ready to be restored, false if it is still being taken.
   * @throws IOException IOException if error from HDFS or RPC
   * @throws UnknownSnapshotException if snapshot is invalid or does not exist.
   */
  public boolean isSnapshotDone(SnapshotDescription expected) throws IOException {
    // check the request to make sure it has a snapshot
    if (expected == null) {
      throw new UnknownSnapshotException(
          "No snapshot name passed in request, can't figure out which snapshot you want to check.");
    }

    String ssString = ClientSnapshotDescriptionUtils.toString(expected);

    // check to see if the sentinel exists,
    // and if the task is complete removes it from the in-progress snapshots map.
    SnapshotSentinel handler = removeSentinelIfFinished(this.snapshotHandlers, expected);

    // stop tracking "abandoned" handlers
    cleanupSentinels();

    if (handler == null) {
      // If there's no handler in the in-progress map, it means one of the following:
      //   - someone has already requested the snapshot state
      //   - the requested snapshot was completed long time ago (cleanupSentinels() timeout)
      //   - the snapshot was never requested
      // In those cases returns to the user the "done state" if the snapshots exists on disk,
      // otherwise raise an exception saying that the snapshot is not running and doesn't exist.
      if (!isSnapshotCompleted(expected)) {
        throw new UnknownSnapshotException(
            "Snapshot "
                + ssString
                + " is not currently running or one of the known completed snapshots.");
      }
      // was done, return true;
      return true;
    }

    // pass on any failure we find in the sentinel
    try {
      handler.rethrowExceptionIfFailed();
    } catch (ForeignException e) {
      // Give some procedure info on an exception.
      String status;
      Procedure p = coordinator.getProcedure(expected.getName());
      if (p != null) {
        status = p.getStatus();
      } else {
        status = expected.getName() + " not found in proclist " + coordinator.getProcedureNames();
      }
      throw new HBaseSnapshotException(
          "Snapshot " + ssString + " had an error.  " + status,
          e,
          ProtobufUtil.createSnapshotDesc(expected));
    }

    // check to see if we are done
    if (handler.isFinished()) {
      LOG.debug("Snapshot '" + ssString + "' has completed, notifying client.");
      return true;
    } else if (LOG.isDebugEnabled()) {
      LOG.debug("Snapshoting '" + ssString + "' is still in progress!");
    }
    return false;
  }
  private void flushSnapshot() throws ForeignException {
    if (regions.isEmpty()) {
      // No regions on this RS, we are basically done.
      return;
    }

    monitor.rethrowException();

    // assert that the taskManager is empty.
    if (taskManager.hasTasks()) {
      throw new IllegalStateException(
          "Attempting to take snapshot "
              + ClientSnapshotDescriptionUtils.toString(snapshot)
              + " but we currently have outstanding tasks");
    }

    // Add all hfiles already existing in region.
    for (Region region : regions) {
      // submit one task per region for parallelize by region.
      taskManager.submitTask(new RegionSnapshotTask(region));
      monitor.rethrowException();
    }

    // wait for everything to complete.
    LOG.debug("Flush Snapshot Tasks submitted for " + regions.size() + " regions");
    try {
      taskManager.waitForOutstandingTasks();
    } catch (InterruptedException e) {
      LOG.error("got interrupted exception for " + getMemberName());
      throw new ForeignException(getMemberName(), e);
    }
  }
  @Override
  public void cancel(String why) {
    if (finished) return;

    this.finished = true;
    LOG.info(
        "Stop taking snapshot="
            + ClientSnapshotDescriptionUtils.toString(snapshot)
            + " because: "
            + why);
    CancellationException ce = new CancellationException(why);
    monitor.receive(new ForeignException(master.getServerName().toString(), ce));
  }
Beispiel #7
0
  /**
   * Restore the specified snapshot. The restore will fail if the destination table has a snapshot
   * or restore in progress.
   *
   * @param snapshot Snapshot Descriptor
   * @param hTableDescriptor Table Descriptor
   * @param nonceGroup unique value to prevent duplicated RPC
   * @param nonce unique value to prevent duplicated RPC
   * @return procId the ID of the restore snapshot procedure
   */
  private synchronized long restoreSnapshot(
      final SnapshotDescription snapshot,
      final HTableDescriptor hTableDescriptor,
      final long nonceGroup,
      final long nonce)
      throws HBaseSnapshotException {
    TableName tableName = hTableDescriptor.getTableName();

    // make sure we aren't running a snapshot on the same table
    if (isTakingSnapshot(tableName)) {
      throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName);
    }

    // make sure we aren't running a restore on the same table
    if (isRestoringTable(tableName)) {
      throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName);
    }

    try {
      long procId =
          master
              .getMasterProcedureExecutor()
              .submitProcedure(
                  new RestoreSnapshotProcedure(
                      master.getMasterProcedureExecutor().getEnvironment(),
                      hTableDescriptor,
                      snapshot),
                  nonceGroup,
                  nonce);
      this.restoreTableToProcIdMap.put(tableName, procId);
      return procId;
    } catch (Exception e) {
      String msg =
          "Couldn't restore the snapshot="
              + ClientSnapshotDescriptionUtils.toString(snapshot)
              + " on table="
              + tableName;
      LOG.error(msg, e);
      throw new RestoreSnapshotException(msg, e);
    }
  }
Beispiel #8
0
  /**
   * Take a snapshot based on the enabled/disabled state of the table.
   *
   * @param snapshot
   * @throws HBaseSnapshotException when a snapshot specific exception occurs.
   * @throws IOException when some sort of generic IO exception occurs.
   */
  public void takeSnapshot(SnapshotDescription snapshot) throws IOException {
    // check to see if we already completed the snapshot
    if (isSnapshotCompleted(snapshot)) {
      throw new SnapshotExistsException(
          "Snapshot '" + snapshot.getName() + "' already stored on the filesystem.",
          ProtobufUtil.createSnapshotDesc(snapshot));
    }

    LOG.debug("No existing snapshot, attempting snapshot...");

    // stop tracking "abandoned" handlers
    cleanupSentinels();

    // check to see if the table exists
    HTableDescriptor desc = null;
    try {
      desc = master.getTableDescriptors().get(TableName.valueOf(snapshot.getTable()));
    } catch (FileNotFoundException e) {
      String msg = "Table:" + snapshot.getTable() + " info doesn't exist!";
      LOG.error(msg);
      throw new SnapshotCreationException(msg, e, ProtobufUtil.createSnapshotDesc(snapshot));
    } catch (IOException e) {
      throw new SnapshotCreationException(
          "Error while geting table description for table " + snapshot.getTable(),
          e,
          ProtobufUtil.createSnapshotDesc(snapshot));
    }
    if (desc == null) {
      throw new SnapshotCreationException(
          "Table '" + snapshot.getTable() + "' doesn't exist, can't take snapshot.",
          ProtobufUtil.createSnapshotDesc(snapshot));
    }
    SnapshotDescription.Builder builder = snapshot.toBuilder();
    // if not specified, set the snapshot format
    if (!snapshot.hasVersion()) {
      builder.setVersion(SnapshotDescriptionUtils.SNAPSHOT_LAYOUT_VERSION);
    }
    User user = RpcServer.getRequestUser();
    if (User.isHBaseSecurityEnabled(master.getConfiguration()) && user != null) {
      builder.setOwner(user.getShortName());
    }
    snapshot = builder.build();

    // call pre coproc hook
    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
    if (cpHost != null) {
      cpHost.preSnapshot(snapshot, desc);
    }

    // if the table is enabled, then have the RS run actually the snapshot work
    TableName snapshotTable = TableName.valueOf(snapshot.getTable());
    if (master.getTableStateManager().isTableState(snapshotTable, TableState.State.ENABLED)) {
      LOG.debug("Table enabled, starting distributed snapshot.");
      snapshotEnabledTable(snapshot);
      LOG.debug("Started snapshot: " + ClientSnapshotDescriptionUtils.toString(snapshot));
    }
    // For disabled table, snapshot is created by the master
    else if (master.getTableStateManager().isTableState(snapshotTable, TableState.State.DISABLED)) {
      LOG.debug("Table is disabled, running snapshot entirely on master.");
      snapshotDisabledTable(snapshot);
      LOG.debug("Started snapshot: " + ClientSnapshotDescriptionUtils.toString(snapshot));
    } else {
      LOG.error(
          "Can't snapshot table '"
              + snapshot.getTable()
              + "', isn't open or closed, we don't know what to do!");
      TablePartiallyOpenException tpoe =
          new TablePartiallyOpenException(snapshot.getTable() + " isn't fully open.");
      throw new SnapshotCreationException(
          "Table is not entirely open or closed", tpoe, ProtobufUtil.createSnapshotDesc(snapshot));
    }

    // call post coproc hook
    if (cpHost != null) {
      cpHost.postSnapshot(snapshot, desc);
    }
  }
  /**
   * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)} call
   * should get implemented for each snapshot flavor.
   */
  @Override
  @edu.umd.cs.findbugs.annotations.SuppressWarnings(
      value = "REC_CATCH_EXCEPTION",
      justification = "Intentional")
  public void process() {
    String msg =
        "Running "
            + snapshot.getType()
            + " table snapshot "
            + snapshot.getName()
            + " "
            + eventType
            + " on table "
            + snapshotTable;
    LOG.info(msg);
    status.setStatus(msg);
    try {
      // If regions move after this meta scan, the region specific snapshot should fail, triggering
      // an external exception that gets captured here.

      // write down the snapshot info in the working directory
      SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, fs);
      snapshotManifest.addTableDescriptor(this.htd);
      monitor.rethrowException();

      List<Pair<HRegionInfo, ServerName>> regionsAndLocations;
      if (TableName.META_TABLE_NAME.equals(snapshotTable)) {
        regionsAndLocations =
            new MetaTableLocator().getMetaRegionsAndLocations(server.getZooKeeper());
      } else {
        regionsAndLocations =
            MetaTableAccessor.getTableRegionsAndLocations(
                server.getConnection(), snapshotTable, false);
      }

      // run the snapshot
      snapshotRegions(regionsAndLocations);
      monitor.rethrowException();

      // extract each pair to separate lists
      Set<String> serverNames = new HashSet<String>();
      for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
        if (p != null && p.getFirst() != null && p.getSecond() != null) {
          HRegionInfo hri = p.getFirst();
          if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue;
          serverNames.add(p.getSecond().toString());
        }
      }

      // flush the in-memory state, and write the single manifest
      status.setStatus("Consolidate snapshot: " + snapshot.getName());
      snapshotManifest.consolidate();

      // verify the snapshot is valid
      status.setStatus("Verifying snapshot: " + snapshot.getName());
      verifier.verifySnapshot(this.workingDir, serverNames);

      // complete the snapshot, atomically moving from tmp to .snapshot dir.
      completeSnapshot(this.snapshotDir, this.workingDir, this.fs);
      msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed";
      status.markComplete(msg);
      LOG.info(msg);
      metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
    } catch (Exception e) { // FindBugs: REC_CATCH_EXCEPTION
      status.abort(
          "Failed to complete snapshot "
              + snapshot.getName()
              + " on table "
              + snapshotTable
              + " because "
              + e.getMessage());
      String reason =
          "Failed taking snapshot "
              + ClientSnapshotDescriptionUtils.toString(snapshot)
              + " due to exception:"
              + e.getMessage();
      LOG.error(reason, e);
      ForeignException ee = new ForeignException(reason, e);
      monitor.receive(ee);
      // need to mark this completed to close off and allow cleanup to happen.
      cancel(reason);
    } finally {
      LOG.debug("Launching cleanup of working dir:" + workingDir);
      try {
        // if the working dir is still present, the snapshot has failed.  it is present we delete
        // it.
        if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) {
          LOG.error("Couldn't delete snapshot working directory:" + workingDir);
        }
      } catch (IOException e) {
        LOG.error("Couldn't delete snapshot working directory:" + workingDir);
      }
      releaseTableLock();
    }
  }