Ejemplo n.º 1
0
 /**
  * Perform time consuming opening of the daughter regions.
  *
  * @param server Hosting server instance. Can be null when testing
  * @param services Used to online/offline regions.
  * @param a first daughter region
  * @param a second daughter region
  * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server,
  *     RegionServerServices)}
  */
 @VisibleForTesting
 void openDaughters(final Server server, final RegionServerServices services, Region a, Region b)
     throws IOException {
   boolean stopped = server != null && server.isStopped();
   boolean stopping = services != null && services.isStopping();
   // TODO: Is this check needed here?
   if (stopped || stopping) {
     LOG.info(
         "Not opening daughters "
             + b.getRegionInfo().getRegionNameAsString()
             + " and "
             + a.getRegionInfo().getRegionNameAsString()
             + " because stopping="
             + stopping
             + ", stopped="
             + stopped);
   } else {
     // Open daughters in parallel.
     DaughterOpener aOpener = new DaughterOpener(server, a);
     DaughterOpener bOpener = new DaughterOpener(server, b);
     aOpener.start();
     bOpener.start();
     try {
       aOpener.join();
       if (aOpener.getException() == null) {
         transition(SplitTransactionPhase.OPENED_REGION_A);
       }
       bOpener.join();
       if (bOpener.getException() == null) {
         transition(SplitTransactionPhase.OPENED_REGION_B);
       }
     } catch (InterruptedException e) {
       throw (InterruptedIOException) new InterruptedIOException().initCause(e);
     }
     if (aOpener.getException() != null) {
       throw new IOException("Failed " + aOpener.getName(), aOpener.getException());
     }
     if (bOpener.getException() != null) {
       throw new IOException("Failed " + bOpener.getName(), bOpener.getException());
     }
     if (services != null) {
       if (!services.reportRegionStateTransition(
           TransitionCode.SPLIT, parent.getRegionInfo(), hri_a, hri_b)) {
         throw new IOException(
             "Failed to report split region to master: "
                 + parent.getRegionInfo().getShortNameToLog());
       }
       // Should add it to OnlineRegions
       services.addToOnlineRegions(b);
       services.addToOnlineRegions(a);
     }
   }
 }
Ejemplo n.º 2
0
  @Override
  public boolean prepare() throws IOException {
    if (!this.parent.isSplittable()) return false;
    // Split key can be null if this region is unsplittable; i.e. has refs.
    if (this.splitrow == null) return false;
    HRegionInfo hri = this.parent.getRegionInfo();
    parent.prepareToSplit();
    // Check splitrow.
    byte[] startKey = hri.getStartKey();
    byte[] endKey = hri.getEndKey();
    if (Bytes.equals(startKey, splitrow) || !this.parent.getRegionInfo().containsRow(splitrow)) {
      LOG.info(
          "Split row is not inside region key range or is equal to "
              + "startkey: "
              + Bytes.toStringBinary(this.splitrow));
      return false;
    }
    long rid = getDaughterRegionIdTimestamp(hri);
    this.hri_a = new HRegionInfo(hri.getTable(), startKey, this.splitrow, false, rid);
    this.hri_b = new HRegionInfo(hri.getTable(), this.splitrow, endKey, false, rid);

    transition(SplitTransactionPhase.PREPARED);

    return true;
  }
Ejemplo n.º 3
0
 @Override
 public PairOfSameType<Region> execute(
     final Server server, final RegionServerServices services, User user) throws IOException {
   this.server = server;
   this.rsServices = services;
   PairOfSameType<Region> regions = createDaughters(server, services, user);
   stepsAfterPONR(server, services, regions, user);
   transition(SplitTransactionPhase.COMPLETED);
   return regions;
 }
Ejemplo n.º 4
0
  @VisibleForTesting
  void stepsAfterPONR(
      final Server server,
      final RegionServerServices services,
      final PairOfSameType<Region> regions,
      User user)
      throws IOException {
    if (this.parent.getCoprocessorHost() != null) {
      parent.getCoprocessorHost().preSplitAfterPONR(user);
    }

    openDaughters(server, services, regions.getFirst(), regions.getSecond());

    transition(SplitTransactionPhase.BEFORE_POST_SPLIT_HOOK);

    // Coprocessor callback
    if (parent.getCoprocessorHost() != null) {
      this.parent.getCoprocessorHost().postSplit(regions.getFirst(), regions.getSecond(), user);
    }

    transition(SplitTransactionPhase.AFTER_POST_SPLIT_HOOK);
  }
Ejemplo n.º 5
0
  @Override
  public boolean rollback(final Server server, final RegionServerServices services, User user)
      throws IOException {
    this.server = server;
    this.rsServices = services;
    // Coprocessor callback
    if (this.parent.getCoprocessorHost() != null) {
      this.parent.getCoprocessorHost().preRollBackSplit(user);
    }

    boolean result = true;
    ListIterator<JournalEntry> iterator = this.journal.listIterator(this.journal.size());
    // Iterate in reverse.
    while (iterator.hasPrevious()) {
      JournalEntry je = iterator.previous();

      transition(je.getPhase(), true);

      switch (je.getPhase()) {
        case SET_SPLITTING:
          if (services != null
              && !services.reportRegionStateTransition(
                  TransitionCode.SPLIT_REVERTED, parent.getRegionInfo(), hri_a, hri_b)) {
            return false;
          }
          break;

        case CREATE_SPLIT_DIR:
          this.parent.writestate.writesEnabled = true;
          this.parent.getRegionFileSystem().cleanupSplitsDir();
          break;

        case CLOSED_PARENT_REGION:
          try {
            // So, this returns a seqid but if we just closed and then reopened, we
            // should be ok. On close, we flushed using sequenceid obtained from
            // hosting regionserver so no need to propagate the sequenceid returned
            // out of initialize below up into regionserver as we normally do.
            // TODO: Verify.
            this.parent.initialize();
          } catch (IOException e) {
            LOG.error(
                "Failed rollbacking CLOSED_PARENT_REGION of region "
                    + parent.getRegionInfo().getRegionNameAsString(),
                e);
            throw new RuntimeException(e);
          }
          break;

        case STARTED_REGION_A_CREATION:
          this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_a);
          break;

        case STARTED_REGION_B_CREATION:
          this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_b);
          break;

        case OFFLINED_PARENT:
          if (services != null) services.addToOnlineRegions(this.parent);
          break;

        case PONR:
          // We got to the point-of-no-return so we need to just abort. Return
          // immediately.  Do not clean up created daughter regions.  They need
          // to be in place so we don't delete the parent region mistakenly.
          // See HBASE-3872.
          return false;

          // Informational only cases
        case STARTED:
        case PREPARED:
        case BEFORE_PRE_SPLIT_HOOK:
        case AFTER_PRE_SPLIT_HOOK:
        case BEFORE_POST_SPLIT_HOOK:
        case AFTER_POST_SPLIT_HOOK:
        case OPENED_REGION_A:
        case OPENED_REGION_B:
        case COMPLETED:
          break;

        default:
          throw new RuntimeException("Unhandled journal entry: " + je);
      }
    }
    // Coprocessor callback
    if (this.parent.getCoprocessorHost() != null) {
      this.parent.getCoprocessorHost().postRollBackSplit(user);
    }
    return result;
  }
Ejemplo n.º 6
0
  @VisibleForTesting
  public PairOfSameType<Region> stepsBeforePONR(
      final Server server, final RegionServerServices services, boolean testing)
      throws IOException {
    if (services != null
        && !services.reportRegionStateTransition(
            TransitionCode.READY_TO_SPLIT, parent.getRegionInfo(), hri_a, hri_b)) {
      throw new IOException(
          "Failed to get ok from master to split "
              + parent.getRegionInfo().getRegionNameAsString());
    }

    transition(SplitTransactionPhase.SET_SPLITTING);

    this.parent.getRegionFileSystem().createSplitsDir();

    transition(SplitTransactionPhase.CREATE_SPLIT_DIR);

    Map<byte[], List<StoreFile>> hstoreFilesToSplit = null;
    Exception exceptionToThrow = null;
    try {
      hstoreFilesToSplit = this.parent.close(false);
    } catch (Exception e) {
      exceptionToThrow = e;
    }
    if (exceptionToThrow == null && hstoreFilesToSplit == null) {
      // The region was closed by a concurrent thread.  We can't continue
      // with the split, instead we must just abandon the split.  If we
      // reopen or split this could cause problems because the region has
      // probably already been moved to a different server, or is in the
      // process of moving to a different server.
      exceptionToThrow = closedByOtherException;
    }
    if (exceptionToThrow != closedByOtherException) {
      transition(SplitTransactionPhase.CLOSED_PARENT_REGION);
    }
    if (exceptionToThrow != null) {
      if (exceptionToThrow instanceof IOException) throw (IOException) exceptionToThrow;
      throw new IOException(exceptionToThrow);
    }
    if (!testing) {
      services.removeFromOnlineRegions(this.parent, null);
    }

    transition(SplitTransactionPhase.OFFLINED_PARENT);

    // TODO: If splitStoreFiles were multithreaded would we complete steps in
    // less elapsed time?  St.Ack 20100920
    //
    // splitStoreFiles creates daughter region dirs under the parent splits dir
    // Nothing to unroll here if failure -- clean up of CREATE_SPLIT_DIR will
    // clean this up.
    Pair<Integer, Integer> expectedReferences = splitStoreFiles(hstoreFilesToSplit);

    // Log to the journal that we are creating region A, the first daughter
    // region.  We could fail halfway through.  If we do, we could have left
    // stuff in fs that needs cleanup -- a storefile or two.  Thats why we
    // add entry to journal BEFORE rather than AFTER the change.

    transition(SplitTransactionPhase.STARTED_REGION_A_CREATION);

    assertReferenceFileCount(
        expectedReferences.getFirst(), this.parent.getRegionFileSystem().getSplitsDir(this.hri_a));
    HRegion a = this.parent.createDaughterRegionFromSplits(this.hri_a);
    assertReferenceFileCount(
        expectedReferences.getFirst(),
        new Path(this.parent.getRegionFileSystem().getTableDir(), this.hri_a.getEncodedName()));

    // Ditto

    transition(SplitTransactionPhase.STARTED_REGION_B_CREATION);

    assertReferenceFileCount(
        expectedReferences.getSecond(), this.parent.getRegionFileSystem().getSplitsDir(this.hri_b));
    HRegion b = this.parent.createDaughterRegionFromSplits(this.hri_b);
    assertReferenceFileCount(
        expectedReferences.getSecond(),
        new Path(this.parent.getRegionFileSystem().getTableDir(), this.hri_b.getEncodedName()));

    return new PairOfSameType<Region>(a, b);
  }
Ejemplo n.º 7
0
  /**
   * Prepare the regions and region files.
   *
   * @param server Hosting server instance. Can be null when testing (won't try and update in zk if
   *     a null server)
   * @param services Used to online/offline regions.
   * @param user
   * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server,
   *     RegionServerServices)}
   * @return Regions created
   */
  @VisibleForTesting
  PairOfSameType<Region> createDaughters(
      final Server server, final RegionServerServices services, User user) throws IOException {
    LOG.info("Starting split of region " + this.parent);
    if ((server != null && server.isStopped()) || (services != null && services.isStopping())) {
      throw new IOException("Server is stopped or stopping");
    }
    assert !this.parent.lock.writeLock().isHeldByCurrentThread()
        : "Unsafe to hold write lock while performing RPCs";

    transition(SplitTransactionPhase.BEFORE_PRE_SPLIT_HOOK);

    // Coprocessor callback
    if (this.parent.getCoprocessorHost() != null) {
      // TODO: Remove one of these
      parent.getCoprocessorHost().preSplit(user);
      parent.getCoprocessorHost().preSplit(splitrow, user);
    }

    transition(SplitTransactionPhase.AFTER_PRE_SPLIT_HOOK);

    // If true, no cluster to write meta edits to or to update znodes in.
    boolean testing =
        server == null
            ? true
            : server.getConfiguration().getBoolean("hbase.testing.nocluster", false);
    this.fileSplitTimeout =
        testing
            ? this.fileSplitTimeout
            : server
                .getConfiguration()
                .getLong("hbase.regionserver.fileSplitTimeout", this.fileSplitTimeout);

    PairOfSameType<Region> daughterRegions = stepsBeforePONR(server, services, testing);

    final List<Mutation> metaEntries = new ArrayList<Mutation>();
    boolean ret = false;
    if (this.parent.getCoprocessorHost() != null) {
      ret = parent.getCoprocessorHost().preSplitBeforePONR(splitrow, metaEntries, user);
      if (ret) {
        throw new IOException(
            "Coprocessor bypassing region "
                + parent.getRegionInfo().getRegionNameAsString()
                + " split.");
      }
      try {
        for (Mutation p : metaEntries) {
          HRegionInfo.parseRegionName(p.getRow());
        }
      } catch (IOException e) {
        LOG.error(
            "Row key of mutation from coprossor is not parsable as region name."
                + "Mutations from coprocessor should only for hbase:meta table.");
        throw e;
      }
    }

    // This is the point of no return.  Adding subsequent edits to .META. as we
    // do below when we do the daughter opens adding each to .META. can fail in
    // various interesting ways the most interesting of which is a timeout
    // BUT the edits all go through (See HBASE-3872).  IF we reach the PONR
    // then subsequent failures need to crash out this regionserver; the
    // server shutdown processing should be able to fix-up the incomplete split.
    // The offlined parent will have the daughters as extra columns.  If
    // we leave the daughter regions in place and do not remove them when we
    // crash out, then they will have their references to the parent in place
    // still and the server shutdown fixup of .META. will point to these
    // regions.
    // We should add PONR JournalEntry before offlineParentInMeta,so even if
    // OfflineParentInMeta timeout,this will cause regionserver exit,and then
    // master ServerShutdownHandler will fix daughter & avoid data loss. (See
    // HBase-4562).

    transition(SplitTransactionPhase.PONR);

    // Edit parent in meta.  Offlines parent region and adds splita and splitb
    // as an atomic update. See HBASE-7721. This update to META makes the region
    // will determine whether the region is split or not in case of failures.
    // If it is successful, master will roll-forward, if not, master will rollback
    // and assign the parent region.
    if (services != null
        && !services.reportRegionStateTransition(
            TransitionCode.SPLIT_PONR, parent.getRegionInfo(), hri_a, hri_b)) {
      // Passed PONR, let SSH clean it up
      throw new IOException(
          "Failed to notify master that split passed PONR: "
              + parent.getRegionInfo().getRegionNameAsString());
    }
    return daughterRegions;
  }
Ejemplo n.º 8
0
 private void transition(SplitTransactionPhase nextPhase) throws IOException {
   transition(nextPhase, false);
 }