Ejemplo n.º 1
0
  /**
   * Kick off a new sub-procedure on the listener with the data stored in the passed znode.
   *
   * <p>Will attempt to create the same procedure multiple times if an procedure znode with the same
   * name is created. It is left up the coordinator to ensure this doesn't occur.
   *
   * @param path full path to the znode for the procedure to start
   */
  private synchronized void startNewSubprocedure(String path) {
    LOG.debug("Found procedure znode: " + path);
    String opName = ZKUtil.getNodeName(path);
    // start watching for an abort notification for the procedure
    String abortZNode = zkController.getAbortZNode(opName);
    try {
      if (ZKUtil.watchAndCheckExists(zkController.getWatcher(), abortZNode)) {
        LOG.debug("Not starting:" + opName + " because we already have an abort notification.");
        return;
      }
    } catch (KeeperException e) {
      member.controllerConnectionFailure(
          "Failed to get the abort znode (" + abortZNode + ") for procedure :" + opName, e, opName);
      return;
    }

    // get the data for the procedure
    Subprocedure subproc = null;
    try {
      byte[] data = ZKUtil.getData(zkController.getWatcher(), path);
      if (!ProtobufUtil.isPBMagicPrefix(data)) {
        String msg =
            "Data in for starting procuedure "
                + opName
                + " is illegally formatted (no pb magic). "
                + "Killing the procedure: "
                + Bytes.toString(data);
        LOG.error(msg);
        throw new IllegalArgumentException(msg);
      }
      LOG.debug("start proc data length is " + data.length);
      data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length);
      LOG.debug("Found data for znode:" + path);
      subproc = member.createSubprocedure(opName, data);
      member.submitSubprocedure(subproc);
    } catch (IllegalArgumentException iae) {
      LOG.error("Illegal argument exception", iae);
      sendMemberAborted(subproc, new ForeignException(getMemberName(), iae));
    } catch (IllegalStateException ise) {
      LOG.error("Illegal state exception ", ise);
      sendMemberAborted(subproc, new ForeignException(getMemberName(), ise));
    } catch (KeeperException e) {
      member.controllerConnectionFailure(
          "Failed to get data for new procedure:" + opName, e, opName);
    } catch (InterruptedException e) {
      member.controllerConnectionFailure(
          "Failed to get data for new procedure:" + opName, e, opName);
      Thread.currentThread().interrupt();
    }
  }
Ejemplo n.º 2
0
  /**
   * This attempts to create an acquired state znode for the procedure (snapshot name).
   *
   * <p>It then looks for the reached znode to trigger in-barrier execution. If not present we have
   * a watcher, if present then trigger the in-barrier action.
   */
  @Override
  public void sendMemberAcquired(Subprocedure sub) throws IOException {
    String procName = sub.getName();
    try {
      LOG.debug(
          "Member: '"
              + memberName
              + "' joining acquired barrier for procedure ("
              + procName
              + ") in zk");
      String acquiredZNode =
          ZKUtil.joinZNode(
              ZKProcedureUtil.getAcquireBarrierNode(zkController, procName), memberName);
      ZKUtil.createAndFailSilent(zkController.getWatcher(), acquiredZNode);

      // watch for the complete node for this snapshot
      String reachedBarrier = zkController.getReachedBarrierNode(procName);
      LOG.debug("Watch for global barrier reached:" + reachedBarrier);
      if (ZKUtil.watchAndCheckExists(zkController.getWatcher(), reachedBarrier)) {
        receivedReachedGlobalBarrier(reachedBarrier);
      }
    } catch (KeeperException e) {
      member.controllerConnectionFailure(
          "Failed to acquire barrier for procedure: " + procName + " and member: " + memberName,
          e,
          procName);
    }
  }
Ejemplo n.º 3
0
 private void waitForNewProcedures() {
   // watch for new procedues that we need to start subprocedures for
   LOG.debug("Looking for new procedures under znode:'" + zkController.getAcquiredBarrier() + "'");
   List<String> runningProcedures = null;
   try {
     runningProcedures =
         ZKUtil.listChildrenAndWatchForNewChildren(
             zkController.getWatcher(), zkController.getAcquiredBarrier());
     if (runningProcedures == null) {
       LOG.debug("No running procedures.");
       return;
     }
   } catch (KeeperException e) {
     member.controllerConnectionFailure(
         "General failure when watching for new procedures", e, null);
   }
   if (runningProcedures == null) {
     LOG.debug("No running procedures.");
     return;
   }
   for (String procName : runningProcedures) {
     // then read in the procedure information
     String path = ZKUtil.joinZNode(zkController.getAcquiredBarrier(), procName);
     startNewSubprocedure(path);
   }
 }
Ejemplo n.º 4
0
 private void watchForAbortedProcedures() {
   LOG.debug("Checking for aborted procedures on node: '" + zkController.getAbortZnode() + "'");
   try {
     // this is the list of the currently aborted procedues
     for (String node :
         ZKUtil.listChildrenAndWatchForNewChildren(
             zkController.getWatcher(), zkController.getAbortZnode())) {
       String abortNode = ZKUtil.joinZNode(zkController.getAbortZnode(), node);
       abort(abortNode);
     }
   } catch (KeeperException e) {
     member.controllerConnectionFailure(
         "Failed to list children for abort node:" + zkController.getAbortZnode(), e, null);
   }
 }
Ejemplo n.º 5
0
 /** This acts as the ack for a completed procedure */
 @Override
 public void sendMemberCompleted(Subprocedure sub, byte[] data) throws IOException {
   String procName = sub.getName();
   LOG.debug(
       "Marking procedure  '" + procName + "' completed for member '" + memberName + "' in zk");
   String joinPath = ZKUtil.joinZNode(zkController.getReachedBarrierNode(procName), memberName);
   // ProtobufUtil.prependPBMagic does not take care of null
   if (data == null) {
     data = new byte[0];
   }
   try {
     ZKUtil.createAndFailSilent(
         zkController.getWatcher(), joinPath, ProtobufUtil.prependPBMagic(data));
   } catch (KeeperException e) {
     member.controllerConnectionFailure(
         "Failed to post zk node:" + joinPath + " to join procedure barrier.", e, procName);
   }
 }
Ejemplo n.º 6
0
  /**
   * Pass along the found abort notification to the listener
   *
   * @param abortZNode full znode path to the failed procedure information
   */
  protected void abort(String abortZNode) {
    LOG.debug("Aborting procedure member for znode " + abortZNode);
    String opName = ZKUtil.getNodeName(abortZNode);
    try {
      byte[] data = ZKUtil.getData(zkController.getWatcher(), abortZNode);

      // figure out the data we need to pass
      ForeignException ee;
      try {
        if (data == null || data.length == 0) {
          // ignore
          return;
        } else if (!ProtobufUtil.isPBMagicPrefix(data)) {
          String msg =
              "Illegally formatted data in abort node for proc "
                  + opName
                  + ".  Killing the procedure.";
          LOG.error(msg);
          // we got a remote exception, but we can't describe it so just return exn from here
          ee = new ForeignException(getMemberName(), new IllegalArgumentException(msg));
        } else {
          data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length);
          ee = ForeignException.deserialize(data);
        }
      } catch (InvalidProtocolBufferException e) {
        LOG.warn(
            "Got an error notification for op:"
                + opName
                + " but we can't read the information. Killing the procedure.");
        // we got a remote exception, but we can't describe it so just return exn from here
        ee = new ForeignException(getMemberName(), e);
      }

      this.member.receiveAbortProcedure(opName, ee);
    } catch (KeeperException e) {
      member.controllerConnectionFailure(
          "Failed to get data for abort znode:" + abortZNode + zkController.getAbortZnode(),
          e,
          opName);
    } catch (InterruptedException e) {
      LOG.warn("abort already in progress", e);
      Thread.currentThread().interrupt();
    }
  }
Ejemplo n.º 7
0
 /**
  * This should be called by the member and should write a serialized root cause exception as to
  * the abort znode.
  */
 @Override
 public void sendMemberAborted(Subprocedure sub, ForeignException ee) {
   if (sub == null) {
     LOG.error("Failed due to null subprocedure", ee);
     return;
   }
   String procName = sub.getName();
   LOG.debug("Aborting procedure (" + procName + ") in zk");
   String procAbortZNode = zkController.getAbortZNode(procName);
   try {
     String source = (ee.getSource() == null) ? memberName : ee.getSource();
     byte[] errorInfo = ProtobufUtil.prependPBMagic(ForeignException.serialize(source, ee));
     ZKUtil.createAndFailSilent(zkController.getWatcher(), procAbortZNode, errorInfo);
     LOG.debug("Finished creating abort znode:" + procAbortZNode);
   } catch (KeeperException e) {
     // possible that we get this error for the procedure if we already reset the zk state, but in
     // that case we should still get an error for that procedure anyways
     zkController.logZKTree(zkController.getBaseZnode());
     member.controllerConnectionFailure(
         "Failed to post zk node:" + procAbortZNode + " to abort procedure", e, procName);
   }
 }