예제 #1
0
  public String createEphemeralSequential(final String path, final byte[] data) throws Exception {
    RetryCounter retryCounter = retryCounterFactory.create();
    while (true) {
      try {
        return zk.create(path, data, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL);
      } catch (KeeperException e) {
        switch (e.code()) {
          case CONNECTIONLOSS:
          case OPERATIONTIMEOUT:
            LOG.warn("Possibly transient ZooKeeper exception: " + e);
            if (!retryCounter.shouldRetry()) {
              LOG.error(
                  "ZooKeeper create failed after " + retryCounter.getMaxRetries() + " retries");
              throw e;
            }
            break;

          default:
            throw e;
        }
      }
      retryCounter.sleepUntilNextRetry();
      retryCounter.useRetry();
    }
  }
예제 #2
0
 /**
  * Utility - return true if the given exception is retry-able
  *
  * @param exception exception to check
  * @return true/false
  */
 public static boolean isRetryException(Throwable exception) {
   if (exception instanceof KeeperException) {
     KeeperException keeperException = (KeeperException) exception;
     return shouldRetry(keeperException.code().intValue());
   }
   return false;
 }
예제 #3
0
  public byte[] getData(String path, Watcher watcher, Stat stat)
      throws KeeperException, InterruptedException {
    RetryCounter retryCounter = retryCounterFactory.create();
    while (true) {
      try {
        // 120227 by DaeJin Choi - What about Metadata of data ( ex: Magic
        // number and so on )
        return zk.getData(path, watcher, stat);
      } catch (KeeperException e) {
        switch (e.code()) {
          case CONNECTIONLOSS:
          case OPERATIONTIMEOUT:
            LOG.warn("Possibly transient ZooKeeper exception: " + e);
            if (!retryCounter.shouldRetry()) {
              LOG.error(
                  "ZooKeeper getData failed after " + retryCounter.getMaxRetries() + " retries");
              throw e;
            }
            break;

          default:
            throw e;
        }
      }
      retryCounter.sleepUntilNextRetry();
      retryCounter.useRetry();
    }
  }
예제 #4
0
  private String createSequential(String path, byte[] data, List<ACL> acl, CreateMode createMode)
      throws KeeperException, InterruptedException {
    RetryCounter retryCounter = retryCounterFactory.create();
    boolean first = true;
    String newPath = path + this.identifier;
    while (true) {
      try {
        if (!first) {
          // Check if we succeeded on a previous attempt
          String previousResult = findPreviousSequentialNode(newPath);
          if (previousResult != null) {
            return previousResult;
          }
        }
        first = false;
        return zk.create(newPath, data, acl, createMode);
      } catch (KeeperException e) {
        switch (e.code()) {
          case CONNECTIONLOSS:
          case SESSIONEXPIRED:
          case OPERATIONTIMEOUT:
            retryOrThrow(retryCounter, e, "create");
            break;

          default:
            throw e;
        }
      }
      retryCounter.sleepUntilNextRetry();
      retryCounter.useRetry();
    }
  }
예제 #5
0
  public List<String> getChildren(String path) throws KeeperException, InterruptedException {
    RetryCounter retryCounter = retryCounterFactory.create();
    while (true) {
      try {
        return zk.getChildren(path, true);
      } catch (KeeperException e) {
        switch (e.code()) {
          case CONNECTIONLOSS:
          case OPERATIONTIMEOUT:
            LOG.warn("Possibly transient ZooKeeper exception: " + e);
            if (!retryCounter.shouldRetry()) {
              LOG.error(
                  "ZooKeeper getChildren failed after "
                      + retryCounter.getMaxRetries()
                      + " retries");
              throw e;
            }
            break;

          default:
            throw e;
        }
      }
      retryCounter.sleepUntilNextRetry();
      retryCounter.useRetry();
    }
  }
예제 #6
0
  public void createPersistent(final String path, final byte[] data)
      throws KeeperException, InterruptedException {
    RetryCounter retryCounter = retryCounterFactory.create();
    while (true) {
      try {
        zk.create(path, data, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
        return;
      } catch (KeeperException e) {
        switch (e.code()) {
          case CONNECTIONLOSS:
          case OPERATIONTIMEOUT:
            LOG.warn("Possibly transient ZooKeeper exception: " + e);
            if (!retryCounter.shouldRetry()) {
              LOG.error(
                  "ZooKeeper create failed after " + retryCounter.getMaxRetries() + " retries");
              throw e;
            }
            break;

          default:
            throw e;
        }
      }
      retryCounter.sleepUntilNextRetry();
      retryCounter.useRetry();
    }
  }
예제 #7
0
  /**
   * Place the host:port advertisement for the Monitor's Log4j listener in ZooKeeper
   *
   * @param conf configuration for the instance
   * @param instanceId instanceId for the instance
   * @param hostAddress Address that monitor process is bound to
   */
  public static void startLogListener(
      AccumuloConfiguration conf, String instanceId, String hostAddress) {
    try {
      SocketServer server = new SocketServer(conf.getPort(Property.MONITOR_LOG4J_PORT));

      // getLocalPort will return the actual ephemeral port used when '0' was provided.
      String logForwardingAddr = hostAddress + ":" + server.getLocalPort();

      log.debug("Setting monitor log4j log-forwarding address to: " + logForwardingAddr);

      final String path = ZooUtil.getRoot(instanceId) + Constants.ZMONITOR_LOG4J_ADDR;
      final ZooReaderWriter zoo = ZooReaderWriter.getInstance();

      // Delete before we try to re-create in case the previous session hasn't yet expired
      try {
        zoo.delete(path, -1);
      } catch (KeeperException e) {
        // We don't care if the node is already gone
        if (!KeeperException.Code.NONODE.equals(e.code())) {
          throw e;
        }
      }

      zoo.putEphemeralData(path, logForwardingAddr.getBytes(UTF_8));

      new Daemon(server).start();
    } catch (Throwable t) {
      log.info("Unable to start/advertise Log4j listener for log-forwarding to monitor", t);
    }
  }
 /**
  * Try to own the task by transitioning the zk node data from UNASSIGNED to OWNED.
  *
  * <p>This method is also used to periodically heartbeat the task progress by transitioning the
  * node from OWNED to OWNED.
  *
  * <p>
  *
  * @param isFirstTime shows whther it's the first attempt.
  * @param zkw zk wathcer
  * @param server name
  * @param task to own
  * @param taskZKVersion version of the task in zk
  * @return non-negative integer value when task can be owned by current region server otherwise -1
  */
 protected static int attemptToOwnTask(
     boolean isFirstTime,
     ZooKeeperWatcher zkw,
     ServerName server,
     String task,
     RecoveryMode mode,
     int taskZKVersion) {
   int latestZKVersion = FAILED_TO_OWN_TASK;
   try {
     SplitLogTask slt = new SplitLogTask.Owned(server, mode);
     Stat stat = zkw.getRecoverableZooKeeper().setData(task, slt.toByteArray(), taskZKVersion);
     if (stat == null) {
       LOG.warn("zk.setData() returned null for path " + task);
       SplitLogCounters.tot_wkr_task_heartbeat_failed.incrementAndGet();
       return FAILED_TO_OWN_TASK;
     }
     latestZKVersion = stat.getVersion();
     SplitLogCounters.tot_wkr_task_heartbeat.incrementAndGet();
     return latestZKVersion;
   } catch (KeeperException e) {
     if (!isFirstTime) {
       if (e.code().equals(KeeperException.Code.NONODE)) {
         LOG.warn("NONODE failed to assert ownership for " + task, e);
       } else if (e.code().equals(KeeperException.Code.BADVERSION)) {
         LOG.warn("BADVERSION failed to assert ownership for " + task, e);
       } else {
         LOG.warn("failed to assert ownership for " + task, e);
       }
     }
   } catch (InterruptedException e1) {
     LOG.warn(
         "Interrupted while trying to assert ownership of "
             + task
             + " "
             + StringUtils.stringifyException(e1));
     Thread.currentThread().interrupt();
   }
   SplitLogCounters.tot_wkr_task_heartbeat_failed.incrementAndGet();
   return FAILED_TO_OWN_TASK;
 }
예제 #9
0
  protected KeeperException.Code createFromByte(String path, byte[] data, CreateMode mode) {

    try {
      zooKeeper.create(path, data, acl, mode);

    } catch (KeeperException e) {
      return e.code();
    } catch (Exception e) {
      return KeeperException.Code.SYSTEMERROR;
    }

    return KeeperException.Code.OK;
  }
예제 #10
0
 public static boolean createIfNotExists(
     ZooKeeperConnection zooKeeper, String path, byte[] value, CreateMode createMode)
     throws KeeperException, InterruptedException {
   if (zooKeeper.exists(path, false) == null) {
     try {
       zooKeeper.create(path, value, ZooDefs.Ids.OPEN_ACL_UNSAFE, createMode);
     } catch (KeeperException e) {
       if (e.code() != KeeperException.Code.NODEEXISTS) throw e;
       return false;
     }
     return true;
   }
   return false;
 }
예제 #11
0
 private List<ReplicationPeer> listValidReplicationPeers() {
   Map<String, ReplicationPeerConfig> peers = listPeerConfigs();
   if (peers == null || peers.size() <= 0) {
     return null;
   }
   List<ReplicationPeer> validPeers = new ArrayList<ReplicationPeer>(peers.size());
   for (Entry<String, ReplicationPeerConfig> peerEntry : peers.entrySet()) {
     String peerId = peerEntry.getKey();
     String clusterKey = peerEntry.getValue().getClusterKey();
     Configuration peerConf = new Configuration(this.connection.getConfiguration());
     Stat s = null;
     try {
       ZKUtil.applyClusterKeyToConf(peerConf, clusterKey);
       Pair<ReplicationPeerConfig, Configuration> pair = this.replicationPeers.getPeerConf(peerId);
       ReplicationPeer peer = new ReplicationPeerZKImpl(peerConf, peerId, pair.getFirst());
       s =
           zkw.getRecoverableZooKeeper()
               .exists(peerConf.get(HConstants.ZOOKEEPER_ZNODE_PARENT), null);
       if (null == s) {
         LOG.info(peerId + ' ' + clusterKey + " is invalid now.");
         continue;
       }
       validPeers.add(peer);
     } catch (ReplicationException e) {
       LOG.warn(
           "Failed to get valid replication peers. "
               + "Error connecting to peer cluster with peerId="
               + peerId);
       LOG.debug("Failure details to get valid replication peers.", e);
       continue;
     } catch (KeeperException e) {
       LOG.warn(
           "Failed to get valid replication peers. KeeperException code=" + e.code().intValue());
       LOG.debug("Failure details to get valid replication peers.", e);
       continue;
     } catch (InterruptedException e) {
       LOG.warn("Failed to get valid replication peers due to InterruptedException.");
       LOG.debug("Failure details to get valid replication peers.", e);
       Thread.currentThread().interrupt();
       continue;
     } catch (IOException e) {
       LOG.warn("Failed to get valid replication peers due to IOException.");
       LOG.debug("Failure details to get valid replication peers.", e);
       continue;
     }
   }
   return validPeers;
 }
예제 #12
0
  private String createNonSequential(String path, byte[] data, List<ACL> acl, CreateMode createMode)
      throws KeeperException, InterruptedException {
    RetryCounter retryCounter = retryCounterFactory.create();
    boolean isRetry = false; // False for first attempt, true for all retries.
    while (true) {
      try {
        return zk.create(path, data, acl, createMode);
      } catch (KeeperException e) {
        switch (e.code()) {
          case NODEEXISTS:
            if (isRetry) {
              // If the connection was lost, there is still a possibility that
              // we have successfully created the node at our previous attempt,
              // so we read the node and compare.
              byte[] currentData = zk.getData(path, false, null);
              if (currentData != null && Bytes.compareTo(currentData, data) == 0) {
                // We successfully created a non-sequential node
                return path;
              }
              LOG.error(
                  "Node "
                      + path
                      + " already exists with "
                      + Bytes.toStringBinary(currentData)
                      + ", could not write "
                      + Bytes.toStringBinary(data));
              throw e;
            }
            LOG.info("Node " + path + " already exists and this is not a " + "retry");
            throw e;

          case CONNECTIONLOSS:
          case SESSIONEXPIRED:
          case OPERATIONTIMEOUT:
            retryOrThrow(retryCounter, e, "create");
            break;

          default:
            throw e;
        }
      }
      retryCounter.sleepUntilNextRetry();
      retryCounter.useRetry();
      isRetry = true;
    }
  }
예제 #13
0
  protected KeeperException.Code create(String path, String data, CreateMode mode) {

    try {
      byte[] byteData = null;
      if (data != null) {
        byteData = data.getBytes();
      }
      zooKeeper.create(path, byteData, acl, mode);

    } catch (KeeperException e) {
      return e.code();
    } catch (Exception e) {
      return KeeperException.Code.SYSTEMERROR;
    }

    return KeeperException.Code.OK;
  }
예제 #14
0
  /** Get metrics data for this service node (self) for current interval. */
  public MetricsData getMyMetrics(String clusterId, String serviceId) {
    String key = clusterId + "/" + serviceId + "/" + getContext().getZkNodeId().getPathToken();
    ExportMeta exportMeta = exportPathMap.get(key);
    if (exportMeta == null) {
      logger.trace(
          "MetricsData not found:  data has not been exported:  clusterId={}; serviceId={}; exportMeta={}",
          clusterId,
          serviceId,
          exportMeta);
      return null;
    }
    if (exportMeta.dataPath == null) {
      logger.trace(
          "MetricsData not found:  waiting for data to be reported in ZK:  clusterId={}; serviceId={}; exportMeta.dataPath={}",
          clusterId,
          serviceId,
          exportMeta.dataPath);
      synchronized (exportMeta) {
        try {
          exportMeta.wait();
        } catch (InterruptedException e) {
          logger.warn("Interrupted while waiting:  " + e, e);
        }
      }
    }

    try {
      logger.debug("Retrieving metrics:  path={}", exportMeta.dataPath);
      Stat stat = new Stat();
      byte[] bytes = getContext().getZkClient().getData(exportMeta.dataPath, true, stat);
      MetricsData metricsData = JacksonUtil.getObjectMapper().readValue(bytes, MetricsData.class);
      metricsData.setClusterId(clusterId);
      metricsData.setServiceId(serviceId);
      metricsData.setLastUpdatedTimestamp(stat.getMtime());
      return metricsData;
    } catch (KeeperException e) {
      if (e.code() == KeeperException.Code.NONODE) {
        return null;
      }
      throw new ReignException(e);
    } catch (Exception e) {
      throw new ReignException(e);
    }
  }
예제 #15
0
 @Override
 public void dropUser(String user) throws AccumuloSecurityException {
   try {
     synchronized (zooCache) {
       zooCache.clear();
       ZooReaderWriter.getInstance()
           .recursiveDelete(ZKUserPath + "/" + user, NodeMissingPolicy.FAIL);
     }
   } catch (InterruptedException e) {
     log.error("{}", e.getMessage(), e);
     throw new RuntimeException(e);
   } catch (KeeperException e) {
     if (e.code().equals(KeeperException.Code.NONODE)) {
       throw new AccumuloSecurityException(user, SecurityErrorCode.USER_DOESNT_EXIST, e);
     }
     log.error("{}", e.getMessage(), e);
     throw new AccumuloSecurityException(user, SecurityErrorCode.CONNECTION_ERROR, e);
   }
 }
예제 #16
0
 @Override
 public void createUser(String principal, AuthenticationToken token)
     throws AccumuloSecurityException {
   try {
     if (!(token instanceof PasswordToken))
       throw new AccumuloSecurityException(principal, SecurityErrorCode.INVALID_TOKEN);
     PasswordToken pt = (PasswordToken) token;
     constructUser(principal, ZKSecurityTool.createPass(pt.getPassword()));
   } catch (KeeperException e) {
     if (e.code().equals(KeeperException.Code.NODEEXISTS))
       throw new AccumuloSecurityException(principal, SecurityErrorCode.USER_EXISTS, e);
     throw new AccumuloSecurityException(principal, SecurityErrorCode.CONNECTION_ERROR, e);
   } catch (InterruptedException e) {
     log.error("{}", e.getMessage(), e);
     throw new RuntimeException(e);
   } catch (AccumuloException e) {
     log.error("{}", e.getMessage(), e);
     throw new AccumuloSecurityException(principal, SecurityErrorCode.DEFAULT_SECURITY_ERROR, e);
   }
 }
예제 #17
0
  /**
   * exists is an idempotent operation. Retry before throwing exception
   *
   * @return A Stat instance
   */
  public Stat exists(String path, boolean watch) throws KeeperException, InterruptedException {
    RetryCounter retryCounter = retryCounterFactory.create();
    while (true) {
      try {
        return zk.exists(path, watch);
      } catch (KeeperException e) {
        switch (e.code()) {
          case CONNECTIONLOSS:
          case SESSIONEXPIRED:
          case OPERATIONTIMEOUT:
            retryOrThrow(retryCounter, e, "exists");
            break;

          default:
            throw e;
        }
      }
      retryCounter.sleepUntilNextRetry();
      retryCounter.useRetry();
    }
  }
 /** Simply queue the request, which will be processed in FIFO order. */
 public void processRequest(Request request) {
   if (!finished) {
     Request upgradeRequest = null;
     try {
       upgradeRequest = zks.checkUpgradeSession(request);
     } catch (KeeperException ke) {
       if (request.getHdr() != null) {
         request.getHdr().setType(OpCode.error);
         request.setTxn(new ErrorTxn(ke.code().intValue()));
       }
       request.setException(ke);
       LOG.info("Error creating upgrade request", ke);
     } catch (IOException ie) {
       LOG.error("Unexpected error in upgrade", ie);
     }
     if (upgradeRequest != null) {
       queuedRequests.add(upgradeRequest);
     }
     queuedRequests.add(request);
   }
 }
예제 #19
0
 /**
  * setData is NOT an idempotent operation. Retry may cause BadVersion Exception Adding an
  * identifier field into the data to check whether badversion is caused by the result of previous
  * correctly setData
  *
  * @return Stat instance
  */
 public Stat setData(String path, byte[] data, int version)
     throws KeeperException, InterruptedException {
   RetryCounter retryCounter = retryCounterFactory.create();
   byte[] newData = appendMetaData(data);
   boolean isRetry = false;
   while (true) {
     try {
       return zk.setData(path, newData, version);
     } catch (KeeperException e) {
       switch (e.code()) {
         case CONNECTIONLOSS:
         case SESSIONEXPIRED:
         case OPERATIONTIMEOUT:
           retryOrThrow(retryCounter, e, "setData");
           break;
         case BADVERSION:
           if (isRetry) {
             // try to verify whether the previous setData success or not
             try {
               Stat stat = new Stat();
               byte[] revData = zk.getData(path, false, stat);
               if (Bytes.compareTo(revData, newData) == 0) {
                 // the bad version is caused by previous successful setData
                 return stat;
               }
             } catch (KeeperException keeperException) {
               // the ZK is not reliable at this moment. just throwing exception
               throw keeperException;
             }
           }
           // throw other exceptions and verified bad version exceptions
         default:
           throw e;
       }
     }
     retryCounter.sleepUntilNextRetry();
     retryCounter.useRetry();
     isRetry = true;
   }
 }
예제 #20
0
  /**
   * getChildren is an idempotent operation. Retry before throwing exception
   *
   * @return List of children znodes
   */
  public List<String> getChildren(String path, Watcher watcher)
      throws KeeperException, InterruptedException {
    RetryCounter retryCounter = retryCounterFactory.create();
    while (true) {
      try {
        return zk.getChildren(path, watcher);
      } catch (KeeperException e) {
        switch (e.code()) {
          case CONNECTIONLOSS:
          case SESSIONEXPIRED:
          case OPERATIONTIMEOUT:
            retryOrThrow(retryCounter, e, "getChildren");
            break;

          default:
            throw e;
        }
      }
      retryCounter.sleepUntilNextRetry();
      retryCounter.useRetry();
    }
  }
예제 #21
0
  /** Run multiple operations in a transactional manner. Retry before throwing exception */
  public List<OpResult> multi(Iterable<Op> ops) throws KeeperException, InterruptedException {
    RetryCounter retryCounter = retryCounterFactory.create();
    Iterable<Op> multiOps = prepareZKMulti(ops);
    while (true) {
      try {
        return zk.multi(multiOps);
      } catch (KeeperException e) {
        switch (e.code()) {
          case CONNECTIONLOSS:
          case SESSIONEXPIRED:
          case OPERATIONTIMEOUT:
            retryOrThrow(retryCounter, e, "multi");
            break;

          default:
            throw e;
        }
      }
      retryCounter.sleepUntilNextRetry();
      retryCounter.useRetry();
    }
  }
예제 #22
0
  /**
   * getData is an idemnpotent operation. Retry before throwing exception
   *
   * @return Data
   */
  public byte[] getData(String path, boolean watch, Stat stat)
      throws KeeperException, InterruptedException {
    RetryCounter retryCounter = retryCounterFactory.create();
    while (true) {
      try {
        byte[] revData = zk.getData(path, watch, stat);
        return this.removeMetaData(revData);
      } catch (KeeperException e) {
        switch (e.code()) {
          case CONNECTIONLOSS:
          case SESSIONEXPIRED:
          case OPERATIONTIMEOUT:
            retryOrThrow(retryCounter, e, "getData");
            break;

          default:
            throw e;
        }
      }
      retryCounter.sleepUntilNextRetry();
      retryCounter.useRetry();
    }
  }
예제 #23
0
  /**
   * delete is an idempotent operation. Retry before throwing exception. This function will not
   * throw NoNodeException if the path does not exist.
   */
  public void delete(String path, int version) throws InterruptedException, KeeperException {
    RetryCounter retryCounter = retryCounterFactory.create();
    boolean isRetry = false; // False for first attempt, true for all retries.
    while (true) {
      try {
        zk.delete(path, version);
        return;
      } catch (KeeperException e) {
        switch (e.code()) {
          case NONODE:
            if (isRetry) {
              LOG.info(
                  "Node "
                      + path
                      + " already deleted. Assuming that a "
                      + "previous attempt succeeded.");
              return;
            }
            LOG.warn("Node " + path + " already deleted, and this is not a " + "retry");
            throw e;

          case CONNECTIONLOSS:
          case SESSIONEXPIRED:
          case OPERATIONTIMEOUT:
            retryOrThrow(retryCounter, e, "delete");
            break;

          default:
            throw e;
        }
      }
      retryCounter.sleepUntilNextRetry();
      retryCounter.useRetry();
      isRetry = true;
    }
  }
  /**
   * This method will be called inside the ProcessRequestThread, which is a singleton, so there will
   * be a single thread calling this code.
   *
   * @param request
   */
  @SuppressWarnings("unchecked")
  protected void pRequest(Request request) throws RequestProcessorException {
    // LOG.info("Prep>>> cxid = " + request.cxid + " type = " +
    // request.type + " id = 0x" + Long.toHexString(request.sessionId));
    TxnHeader txnHeader = null;
    Record txn = null;
    try {
      switch (request.type) {
        case OpCode.create:
          txnHeader =
              new TxnHeader(
                  request.sessionId, request.cxid, zks.getNextZxid(), zks.getTime(), OpCode.create);
          zks.sessionTracker.checkSession(request.sessionId, request.getOwner());
          CreateRequest createRequest = new CreateRequest();
          ZooKeeperServer.byteBuffer2Record(request.request, createRequest);
          String path = createRequest.getPath();
          int lastSlash = path.lastIndexOf('/');
          if (lastSlash == -1 || path.indexOf('\0') != -1 || failCreate) {
            LOG.info(
                "Invalid path " + path + " with session 0x" + Long.toHexString(request.sessionId));
            throw new KeeperException.BadArgumentsException(path);
          }
          if (!fixupACL(request.authInfo, createRequest.getAcl())) {
            throw new KeeperException.InvalidACLException(path);
          }
          String parentPath = path.substring(0, lastSlash);
          ChangeRecord parentRecord = getRecordForPath(parentPath);

          checkACL(zks, parentRecord.acl, ZooDefs.Perms.CREATE, request.authInfo);
          int parentCVersion = parentRecord.stat.getCversion();
          CreateMode createMode = CreateMode.fromFlag(createRequest.getFlags());
          if (createMode.isSequential()) {
            path = path + String.format(Locale.ENGLISH, "%010d", parentCVersion);
          }
          try {
            PathUtils.validatePath(path);
          } catch (IllegalArgumentException ie) {
            LOG.info(
                "Invalid path " + path + " with session 0x" + Long.toHexString(request.sessionId));
            throw new KeeperException.BadArgumentsException(path);
          }
          try {
            if (getRecordForPath(path) != null) {
              throw new KeeperException.NodeExistsException(path);
            }
          } catch (KeeperException.NoNodeException e) {
            // ignore this one
          }
          boolean ephemeralParent = parentRecord.stat.getEphemeralOwner() != 0;
          if (ephemeralParent) {
            throw new KeeperException.NoChildrenForEphemeralsException(path);
          }
          txn =
              new CreateTxn(
                  path, createRequest.getData(), createRequest.getAcl(), createMode.isEphemeral());
          StatPersisted s = new StatPersisted();
          if (createMode.isEphemeral()) {
            s.setEphemeralOwner(request.sessionId);
          }
          parentRecord = parentRecord.duplicate(txnHeader.getZxid());
          parentRecord.childCount++;
          parentRecord.stat.setCversion(parentRecord.stat.getCversion() + 1);
          addChangeRecord(parentRecord);
          addChangeRecord(
              new ChangeRecord(txnHeader.getZxid(), path, s, 0, createRequest.getAcl()));

          break;
        case OpCode.delete:
          txnHeader =
              new TxnHeader(
                  request.sessionId, request.cxid, zks.getNextZxid(), zks.getTime(), OpCode.delete);
          zks.sessionTracker.checkSession(request.sessionId, request.getOwner());
          DeleteRequest deleteRequest = new DeleteRequest();
          ZooKeeperServer.byteBuffer2Record(request.request, deleteRequest);
          path = deleteRequest.getPath();
          lastSlash = path.lastIndexOf('/');
          if (lastSlash == -1
              || path.indexOf('\0') != -1
              || zks.getZKDatabase().isSpecialPath(path)) {
            throw new KeeperException.BadArgumentsException(path);
          }
          parentPath = path.substring(0, lastSlash);
          parentRecord = getRecordForPath(parentPath);
          ChangeRecord nodeRecord = getRecordForPath(path);
          checkACL(zks, parentRecord.acl, ZooDefs.Perms.DELETE, request.authInfo);
          int version = deleteRequest.getVersion();
          if (version != -1 && nodeRecord.stat.getVersion() != version) {
            throw new KeeperException.BadVersionException(path);
          }
          if (nodeRecord.childCount > 0) {
            throw new KeeperException.NotEmptyException(path);
          }
          txn = new DeleteTxn(path);
          parentRecord = parentRecord.duplicate(txnHeader.getZxid());
          parentRecord.childCount--;
          parentRecord.stat.setCversion(parentRecord.stat.getCversion() + 1);
          addChangeRecord(parentRecord);
          addChangeRecord(new ChangeRecord(txnHeader.getZxid(), path, null, -1, null));
          break;
        case OpCode.setData:
          txnHeader =
              new TxnHeader(
                  request.sessionId,
                  request.cxid,
                  zks.getNextZxid(),
                  zks.getTime(),
                  OpCode.setData);
          zks.sessionTracker.checkSession(request.sessionId, request.getOwner());
          SetDataRequest setDataRequest = new SetDataRequest();
          ZooKeeperServer.byteBuffer2Record(request.request, setDataRequest);
          path = setDataRequest.getPath();
          nodeRecord = getRecordForPath(path);
          checkACL(zks, nodeRecord.acl, ZooDefs.Perms.WRITE, request.authInfo);
          version = setDataRequest.getVersion();
          int currentVersion = nodeRecord.stat.getVersion();
          if (version != -1 && version != currentVersion) {
            throw new KeeperException.BadVersionException(path);
          }
          version = currentVersion + 1;
          txn = new SetDataTxn(path, setDataRequest.getData(), version);
          nodeRecord = nodeRecord.duplicate(txnHeader.getZxid());
          nodeRecord.stat.setVersion(version);
          addChangeRecord(nodeRecord);
          break;
        case OpCode.setACL:
          txnHeader =
              new TxnHeader(
                  request.sessionId, request.cxid, zks.getNextZxid(), zks.getTime(), OpCode.setACL);
          zks.sessionTracker.checkSession(request.sessionId, request.getOwner());
          SetACLRequest setAclRequest = new SetACLRequest();
          ZooKeeperServer.byteBuffer2Record(request.request, setAclRequest);
          path = setAclRequest.getPath();
          if (!fixupACL(request.authInfo, setAclRequest.getAcl())) {
            throw new KeeperException.InvalidACLException(path);
          }
          nodeRecord = getRecordForPath(path);
          checkACL(zks, nodeRecord.acl, ZooDefs.Perms.ADMIN, request.authInfo);
          version = setAclRequest.getVersion();
          currentVersion = nodeRecord.stat.getAversion();
          if (version != -1 && version != currentVersion) {
            throw new KeeperException.BadVersionException(path);
          }
          version = currentVersion + 1;
          txn = new SetACLTxn(path, setAclRequest.getAcl(), version);
          nodeRecord = nodeRecord.duplicate(txnHeader.getZxid());
          nodeRecord.stat.setAversion(version);
          addChangeRecord(nodeRecord);
          break;
        case OpCode.createSession:
          txnHeader =
              new TxnHeader(
                  request.sessionId,
                  request.cxid,
                  zks.getNextZxid(),
                  zks.getTime(),
                  OpCode.createSession);
          request.request.rewind();
          int to = request.request.getInt();
          txn = new CreateSessionTxn(to);
          request.request.rewind();
          zks.sessionTracker.addSession(request.sessionId, to);
          zks.setOwner(request.sessionId, request.getOwner());
          break;
        case OpCode.closeSession:
          txnHeader =
              new TxnHeader(
                  request.sessionId,
                  request.cxid,
                  zks.getNextZxid(),
                  zks.getTime(),
                  OpCode.closeSession);
          // We don't want to do this check since the session expiration thread
          // queues up this operation without being the session owner.
          // this request is the last of the session so it should be ok
          // zks.sessionTracker.checkSession(request.sessionId, request.getOwner());
          HashSet<String> es = zks.getZKDatabase().getEphemerals(request.sessionId);
          synchronized (zks.outstandingChanges) {
            for (ChangeRecord c : zks.outstandingChanges) {
              if (c.stat == null) {
                // Doing a delete
                es.remove(c.path);
              } else if (c.stat.getEphemeralOwner() == request.sessionId) {
                es.add(c.path);
              }
            }
            for (String path2Delete : es) {
              addChangeRecord(new ChangeRecord(txnHeader.getZxid(), path2Delete, null, 0, null));
            }

            zks.sessionTracker.setSessionClosing(request.sessionId);
          }

          LOG.info(
              "Processed session termination for sessionid: 0x"
                  + Long.toHexString(request.sessionId));
          break;
        case OpCode.sync:
        case OpCode.exists:
        case OpCode.getData:
        case OpCode.getACL:
        case OpCode.getChildren:
        case OpCode.getChildren2:
        case OpCode.ping:
        case OpCode.setWatches:
          zks.sessionTracker.checkSession(request.sessionId, request.getOwner());
          break;
      }
    } catch (KeeperException e) {
      if (txnHeader != null) {
        txnHeader.setType(OpCode.error);
        txn = new ErrorTxn(e.code().intValue());
      }
      LOG.info(
          "Got user-level KeeperException when processing "
              + request.toString()
              + " Error Path:"
              + e.getPath()
              + " Error:"
              + e.getMessage());
      request.setException(e);
    } catch (Exception e) {
      // log at error level as we are returning a marshalling
      // error to the user
      LOG.error("Failed to process " + request, e);

      StringBuilder sb = new StringBuilder();
      ByteBuffer bb = request.request;
      if (bb != null) {
        bb.rewind();
        while (bb.hasRemaining()) {
          sb.append(Integer.toHexString(bb.get() & 0xff));
        }
      } else {
        sb.append("request buffer is null");
      }

      LOG.error("Dumping request buffer: 0x" + sb.toString());
      if (txnHeader != null) {
        txnHeader.setType(OpCode.error);
        txn = new ErrorTxn(Code.MARSHALLINGERROR.intValue());
      }
    }
    request.hdr = txnHeader;
    request.txn = txn;
    request.zxid = zks.getZxid();
    nextProcessor.processRequest(request);
  }
예제 #25
0
 MetricsData getMetricsFromDataNode(String clusterId, String serviceId, String dataNode) {
   PathScheme pathScheme = getContext().getPathScheme();
   String dataPath = null;
   if (dataNode != null) {
     dataPath =
         pathScheme.getAbsolutePath(
             PathType.METRICS, pathScheme.joinTokens(clusterId, serviceId, dataNode));
   } else {
     dataPath =
         pathScheme.getAbsolutePath(PathType.METRICS, pathScheme.joinTokens(clusterId, serviceId));
   }
   byte[] bytes = null;
   try {
     Stat stat = new Stat();
     bytes = getContext().getZkClient().getData(dataPath, true, stat);
     MetricsData metricsData = JacksonUtil.getObjectMapper().readValue(bytes, MetricsData.class);
     metricsData.setLastUpdatedTimestamp(stat.getMtime());
     return metricsData;
   } catch (KeeperException e) {
     if (e.code() == KeeperException.Code.NONODE) {
       return null;
     }
     logger.warn(
         "Error retrieving data node:  clusterId="
             + clusterId
             + "; serviceId="
             + serviceId
             + "; dataPath="
             + dataPath
             + "; dataAsString="
             + (new String(bytes, UTF_8))
             + ":  "
             + e,
         e);
     throw new ReignException(
         "Error retrieving data node:  clusterId="
             + clusterId
             + "; serviceId="
             + serviceId
             + "; dataPath="
             + dataPath
             + "; dataAsString="
             + (new String(bytes, UTF_8)),
         e);
   } catch (UnrecognizedPropertyException e) {
     logger.warn(
         "Error retrieving data node:  clusterId="
             + clusterId
             + "; serviceId="
             + serviceId
             + "; dataPath="
             + dataPath
             + "; dataAsString="
             + (new String(bytes, UTF_8))
             + ":  "
             + e,
         e);
     return null;
   } catch (Exception e) {
     logger.warn(
         "Error retrieving data node:  clusterId="
             + clusterId
             + "; serviceId="
             + serviceId
             + "; dataPath="
             + dataPath
             + "; dataAsString="
             + (new String(bytes, UTF_8))
             + ":  "
             + e,
         e);
     throw new ReignException(
         "Error retrieving data node:  clusterId="
             + clusterId
             + "; serviceId="
             + serviceId
             + "; dataPath="
             + dataPath
             + "; dataAsString="
             + (new String(bytes, UTF_8)),
         e);
   }
 }
예제 #26
0
  @Override
  public ResponseMessage handleMessage(RequestMessage requestMessage) {
    ResponseMessage responseMessage = new SimpleResponseMessage();

    try {
      if (logger.isTraceEnabled()) {
        logger.trace(
            "Received message:  nodeId={}; request='{}:{}'",
            requestMessage.getSenderId(),
            requestMessage.getTargetService(),
            requestMessage.getBody());
      }

      /** preprocess request * */
      ParsedRequestMessage parsedRequestMessage = new ParsedRequestMessage(requestMessage);
      String resource = parsedRequestMessage.getResource();

      // strip beginning and ending slashes "/"
      boolean endsWithSlash = false;
      if (resource.startsWith("/")) {
        resource = resource.substring(1);
      }
      if (resource.endsWith("/")) {
        endsWithSlash = true;
        resource = resource.substring(0, resource.length() - 1);
      }

      /** get response * */
      if ("observe".equals(parsedRequestMessage.getMeta())) {
        responseMessage = new SimpleResponseMessage(ResponseStatus.OK);
        String[] tokens = getPathScheme().tokenizePath(resource);
        if (tokens.length == 2) {
          this.observe(
              tokens[0],
              tokens[1],
              this.getClientObserver(
                  parsedRequestMessage.getSenderId(), tokens[0], tokens[1], null));
        } else if (tokens.length == 3) {
          this.observe(
              tokens[0],
              tokens[1],
              this.getClientObserver(
                  parsedRequestMessage.getSenderId(), tokens[0], tokens[1], tokens[2]));
        } else {
          responseMessage.setComment("Observing not supported:  " + resource);
        }
      } else if ("observe-stop".equals(parsedRequestMessage.getMeta())) {
        responseMessage = new SimpleResponseMessage(ResponseStatus.OK);
        String absolutePath = getPathScheme().getAbsolutePath(PathType.METRICS, resource);
        getContext()
            .getObserverManager()
            .removeByOwnerId(parsedRequestMessage.getSenderId().toString(), absolutePath);
      } else {
        if (resource.length() == 0) {
          // list available clusters
          String path = getContext().getPathScheme().getAbsolutePath(PathType.METRICS);
          List<String> clusterList = getContext().getZkClient().getChildren(path, false);
          responseMessage.setBody(clusterList);

        } else {
          String[] tokens = getPathScheme().tokenizePath(resource);
          // logger.debug("tokens.length={}", tokens.length);

          if (tokens.length == 1) {
            // list available services
            String path = getContext().getPathScheme().getAbsolutePath(PathType.METRICS, tokens[0]);
            List<String> serviceList = getContext().getZkClient().getChildren(path, false);
            responseMessage.setBody(serviceList);
            if (serviceList == null) {
              responseMessage.setComment("Not found:  " + resource);
            }

          } else if (tokens.length == 2) {
            if (endsWithSlash) {
              // list available nodes for a given service
              String path =
                  getContext()
                      .getPathScheme()
                      .getAbsolutePath(PathType.METRICS, tokens[0], tokens[1]);
              List<String> nodeList = getContext().getZkClient().getChildren(path, false);

              responseMessage.setBody(nodeList);
              if (nodeList == null) {
                responseMessage.setComment("Not found:  " + resource);
              }
            } else {
              // get metrics data for service
              MetricsData metricsData = getMetricsFromDataNode(tokens[0], tokens[1], null);
              if (metricsData == null) {
                responseMessage.setComment("Not found:  " + resource);
              } else {
                responseMessage.setBody(metricsData);
              }
            }

          } else if (tokens.length == 3) {
            // get metrics data for single data node
            MetricsData metricsData = getMetricsFromDataNode(tokens[0], tokens[1], tokens[2]);
            if (metricsData == null) {
              responseMessage.setComment("Not found:  " + resource);
            } else {
              responseMessage.setBody(metricsData);
            }
          }
        }
      } // if observe

    } catch (KeeperException e) {
      if (e.code() == KeeperException.Code.NONODE) {
        responseMessage.setBody(Collections.EMPTY_LIST);
      } else {
        responseMessage.setStatus(ResponseStatus.ERROR_UNEXPECTED, "" + e);
      }

    } catch (Exception e) {
      logger.error("" + e, e);
      responseMessage.setStatus(ResponseStatus.ERROR_UNEXPECTED, "" + e);
    }

    responseMessage.setId(requestMessage.getId());

    return responseMessage;
  }
예제 #27
0
    @Override
    public void run() {
      long startTimeNanos = System.nanoTime();

      logger.trace("AggregationRunnable starting:  hashCode={}", this.hashCode());

      // list all services in cluster
      PresenceService presenceService = getContext().getService("presence");
      CoordinationService coordinationService = getContext().getService("coord");
      ZkClient zkClient = getContext().getZkClient();
      PathScheme pathScheme = getContext().getPathScheme();

      // list all services in cluster
      List<String> clusterIds = presenceService.getClusters();
      for (String clusterId : clusterIds) {

        // only proceed if in cluster
        if (!presenceService.isMemberOf(clusterId)
            || clusterId.equals(getContext().getPathScheme().getFrameworkClusterId())) {
          continue;
        }

        List<String> allServiceIds = presenceService.getServices(clusterId);
        List<String> memberServiceIds = new ArrayList<String>(allServiceIds.size());
        for (String serviceId : allServiceIds) {
          // only aggregate if node is in service
          if (presenceService.isMemberOf(clusterId, serviceId)) {
            memberServiceIds.add(serviceId);
          }
        }

        // go through member service list in deterministic order so
        // locks are acquired in the same order across
        // nodes
        Collections.sort(memberServiceIds);
        for (int i = 0; i < memberServiceIds.size(); i++) {
          long currentTimestamp = System.currentTimeMillis();

          String serviceId = memberServiceIds.get(i);

          logger.trace("Finding data nodes:  clusterId={}; serviceId={}", clusterId, serviceId);

          // get lock for a service
          DistributedLock lock =
              coordinationService.getLock("reign", "metrics-" + clusterId + "-" + serviceId);
          if (!lock.tryLock()) {
            continue;
          }
          try {

            // get all data nodes for a service
            String dataParentPath =
                pathScheme.getAbsolutePath(
                    PathType.METRICS, pathScheme.joinTokens(clusterId, serviceId));
            List<String> dataNodes = zkClient.getChildren(dataParentPath, false);

            /** iterate through service data nodes and gather up data to aggregate */
            Map<String, List<CounterData>> counterMap =
                new HashMap<String, List<CounterData>>(dataNodes.size() + 1, 1.0f);
            Map<String, List<GaugeData>> gaugeMap =
                new HashMap<String, List<GaugeData>>(dataNodes.size() + 1, 1.0f);
            Map<String, List<HistogramData>> histogramMap =
                new HashMap<String, List<HistogramData>>(dataNodes.size() + 1, 1.0f);
            Map<String, List<MeterData>> meterMap =
                new HashMap<String, List<MeterData>>(dataNodes.size() + 1, 1.0f);
            Map<String, List<TimerData>> timerMap =
                new HashMap<String, List<TimerData>>(dataNodes.size() + 1, 1.0f);
            int dataNodeCount = 0;
            int dataNodeInWindowCount = 0;
            Integer intervalLength = null;
            TimeUnit intervalLengthUnit = null;
            for (String dataNode : dataNodes) {

              dataNodeCount++;

              logger.trace(
                  "Found data node:  clusterId={}; serviceId={}; nodeId={}",
                  clusterId,
                  serviceId,
                  dataNode);

              String dataPath = null;
              MetricsData metricsData = null;

              dataPath =
                  pathScheme.getAbsolutePath(
                      PathType.METRICS, pathScheme.joinTokens(clusterId, serviceId, dataNode));

              try {
                metricsData = getMetricsFromDataNode(clusterId, serviceId, dataNode);
                if (metricsData == null) {
                  continue;
                }
              } catch (Exception e) {
                logger.warn(
                    "Error trying to aggregate data directory for service:  clusterId="
                        + clusterId
                        + "; serviceId="
                        + serviceId
                        + ":  "
                        + e,
                    e);
                continue;
              }

              // skip data node if not within interval
              long millisToExpiry = millisToExpiry(metricsData, currentTimestamp);
              if (millisToExpiry <= 0) {
                continue;
              }

              intervalLength = metricsData.getIntervalLength();
              intervalLengthUnit = metricsData.getIntervalLengthUnit();

              // aggregate service stats for data nodes that
              // within current rotation interval
              logger.trace(
                  "Aggregating data node:  path={}; millisToExpiry={}", dataPath, millisToExpiry);

              // increment node count
              dataNodeInWindowCount++;

              // counters
              Map<String, CounterData> counters = metricsData.getCounters();
              for (String key : counters.keySet()) {
                CounterData counter = counters.get(key);
                List<CounterData> counterList = counterMap.get(key);
                if (counterList == null) {
                  counterList = new ArrayList<CounterData>(dataNodes.size());
                  counterMap.put(key, counterList);
                }
                counterList.add(counter);
              }

              // gauges
              Map<String, GaugeData> gauges = metricsData.getGauges();
              for (String key : gauges.keySet()) {
                GaugeData gauge = gauges.get(key);
                List<GaugeData> gaugeList = gaugeMap.get(key);
                if (gaugeList == null) {
                  gaugeList = new ArrayList<GaugeData>(dataNodes.size());
                  gaugeMap.put(key, gaugeList);
                }
                gaugeList.add(gauge);
              }

              // histogram
              Map<String, HistogramData> histograms = metricsData.getHistograms();
              for (String key : histograms.keySet()) {
                HistogramData histogram = histograms.get(key);
                List<HistogramData> histogramList = histogramMap.get(key);
                if (histogramList == null) {
                  histogramList = new ArrayList<HistogramData>(dataNodes.size());
                  histogramMap.put(key, histogramList);
                }
                histogramList.add(histogram);
              }

              // meters
              Map<String, MeterData> meters = metricsData.getMeters();
              for (String key : meters.keySet()) {
                MeterData meter = meters.get(key);
                List<MeterData> meterList = meterMap.get(key);
                if (meterList == null) {
                  meterList = new ArrayList<MeterData>(dataNodes.size());
                  meterMap.put(key, meterList);
                }
                meterList.add(meter);
              }

              // timers
              Map<String, TimerData> timers = metricsData.getTimers();
              for (String key : timers.keySet()) {
                TimerData timer = timers.get(key);
                List<TimerData> meterList = timerMap.get(key);
                if (meterList == null) {
                  meterList = new ArrayList<TimerData>(dataNodes.size());
                  timerMap.put(key, meterList);
                }
                meterList.add(timer);
              }
            } // for dataNodes

            /** aggregate data and write to ZK * */
            MetricsData serviceMetricsData = new MetricsData();

            // counters
            Map<String, CounterData> counters =
                new HashMap<String, CounterData>(counterMap.size() + 1, 1.0f);
            for (String key : counterMap.keySet()) {
              List<CounterData> counterList = counterMap.get(key);
              // if (counterList.size() != dataNodeCount) {
              // logger.warn(
              // "counterList size does not match nodeCount:  counterList.size={}; nodeCount={}",
              // counterList.size(), dataNodeCount);
              // }
              CounterData counterData = CounterData.merge(counterList);
              counters.put(key, counterData);
            }
            serviceMetricsData.setCounters(counters);

            // gauges
            Map<String, GaugeData> gauges =
                new HashMap<String, GaugeData>(gaugeMap.size() + 1, 1.0f);
            for (String key : gaugeMap.keySet()) {
              List<GaugeData> gaugeList = gaugeMap.get(key);
              // if (gaugeList.size() != dataNodeCount) {
              // logger.warn(
              // "gaugeList size does not match nodeCount:  gaugeList.size={}; nodeCount={}",
              // gaugeList.size(), dataNodeCount);
              // }
              GaugeData gaugeData = GaugeData.merge(gaugeList);
              gauges.put(key, gaugeData);
            }
            serviceMetricsData.setGauges(gauges);

            // histograms
            Map<String, HistogramData> histograms =
                new HashMap<String, HistogramData>(histogramMap.size() + 1, 1.0f);
            for (String key : histogramMap.keySet()) {
              List<HistogramData> histogramList = histogramMap.get(key);
              // if (histogramList.size() != dataNodeCount) {
              // logger.warn(
              // "histogramList size does not match nodeCount:  histogramList.size={};
              // nodeCount={}",
              // histogramList.size(), dataNodeCount);
              // }
              HistogramData histogramData = HistogramData.merge(histogramList);
              histograms.put(key, histogramData);
            }
            serviceMetricsData.setHistograms(histograms);

            // meters
            Map<String, MeterData> meters =
                new HashMap<String, MeterData>(meterMap.size() + 1, 1.0f);
            for (String key : meterMap.keySet()) {
              List<MeterData> meterList = meterMap.get(key);
              // if (meterList.size() != dataNodeCount) {
              // logger.warn(
              // "meterList size does not match nodeCount:  meterList.size={}; nodeCount={}",
              // meterList.size(), dataNodeCount);
              // }
              MeterData meterData = MeterData.merge(meterList);
              meters.put(key, meterData);
            }
            serviceMetricsData.setMeters(meters);

            // timers
            Map<String, TimerData> timers =
                new HashMap<String, TimerData>(timerMap.size() + 1, 1.0f);
            for (String key : timerMap.keySet()) {
              List<TimerData> timerList = timerMap.get(key);
              // if (timerList.size() != dataNodeCount) {
              // logger.warn(
              // "timerList size does not match nodeCount:  timerList.size={}; nodeCount={}",
              // timerList.size(), dataNodeCount);
              // }
              TimerData timerData = TimerData.merge(timerList);
              timers.put(key, timerData);
            }
            serviceMetricsData.setTimers(timers);

            serviceMetricsData.setDataNodeCount(dataNodeCount);
            serviceMetricsData.setDataNodeInWindowCount(dataNodeInWindowCount);
            serviceMetricsData.setClusterId(clusterId);
            serviceMetricsData.setServiceId(serviceId);
            serviceMetricsData.setIntervalLength(intervalLength);
            serviceMetricsData.setIntervalLengthUnit(intervalLengthUnit);
            serviceMetricsData.setLastUpdatedTimestamp(System.currentTimeMillis());

            // write to ZK
            String dataPath =
                pathScheme.getAbsolutePath(
                    PathType.METRICS, pathScheme.joinTokens(clusterId, serviceId));
            String serviceMetricsDataString =
                JacksonUtil.getObjectMapper().writeValueAsString(serviceMetricsData);
            zkClientUtil.updatePath(
                getContext().getZkClient(),
                getContext().getPathScheme(),
                dataPath,
                serviceMetricsDataString.getBytes(UTF_8),
                getContext().getDefaultZkAclList(),
                CreateMode.PERSISTENT,
                -1);

            // sleep to hold lock before next interval so that
            // updates don't happen too frequently with
            // more nodes in service
            if (i == memberServiceIds.size() - 1) {
              try {
                long elapsedMillis = (System.nanoTime() - startTimeNanos) / 1000000;
                long sleepIntervalMillis = (updateIntervalMillis - elapsedMillis) / 2;
                if (sleepIntervalMillis < 0) {
                  sleepIntervalMillis = updateIntervalMillis;
                }
                logger.debug(
                    "AggregationRunnable SLEEPING btw. services:  sleepIntervalMillis={}; memberServiceIds.size={}",
                    sleepIntervalMillis,
                    memberServiceIds.size());
                Thread.sleep(sleepIntervalMillis);

              } catch (InterruptedException e) {
                logger.warn("Interrupted while sleeping at end of aggregation:  " + e, e);
              }
            }

          } catch (KeeperException e) {
            if (e.code() != KeeperException.Code.NONODE) {
              logger.warn(
                  "Error trying to aggregate data directory for service:  clusterId="
                      + clusterId
                      + "; serviceId="
                      + serviceId
                      + ":  "
                      + e,
                  e);
            }
          } catch (Exception e) {
            logger.warn(
                "Error trying to aggregate data directory for service:  clusterId="
                    + clusterId
                    + "; serviceId="
                    + serviceId
                    + ":  "
                    + e,
                e);
          } finally {
            logger.trace("Releasing lock:  metrics-aggregation-{}-{}", clusterId, serviceId);
            lock.unlock();
            lock.destroy();
            logger.trace(
                "Released and destroyed lock:  metrics-aggregation-{}-{}", clusterId, serviceId);
          } // try
        } // for service

        // store aggregated results in ZK at service level
      } // for cluster
    } // run
예제 #28
0
  public void processRequest(Request request) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Processing request:: " + request);
    }
    // request.addRQRec(">final");
    long traceMask = ZooTrace.CLIENT_REQUEST_TRACE_MASK;
    if (request.type == OpCode.ping) {
      traceMask = ZooTrace.SERVER_PING_TRACE_MASK;
    }
    if (LOG.isTraceEnabled()) {
      ZooTrace.logRequest(LOG, traceMask, 'E', request, "");
    }
    ProcessTxnResult rc = null;
    synchronized (zks.outstandingChanges) {
      while (!zks.outstandingChanges.isEmpty()
          && zks.outstandingChanges.get(0).zxid <= request.zxid) {
        ChangeRecord cr = zks.outstandingChanges.remove(0);
        if (cr.zxid < request.zxid) {
          LOG.warn("Zxid outstanding " + cr.zxid + " is less than current " + request.zxid);
        }
        if (zks.outstandingChangesForPath.get(cr.path) == cr) {
          zks.outstandingChangesForPath.remove(cr.path);
        }
      }
      if (request.hdr != null) {
        TxnHeader hdr = request.hdr;
        Record txn = request.txn;

        rc = zks.processTxn(hdr, txn);
      }
      // do not add non quorum packets to the queue.
      if (Request.isQuorum(request.type)) {
        zks.getZKDatabase().addCommittedProposal(request);
      }
    }

    if (request.hdr != null && request.hdr.getType() == OpCode.closeSession) {
      ServerCnxnFactory scxn = zks.getServerCnxnFactory();
      // this might be possible since
      // we might just be playing diffs from the leader
      if (scxn != null && request.cnxn == null) {
        // calling this if we have the cnxn results in the client's
        // close session response being lost - we've already closed
        // the session/socket here before we can send the closeSession
        // in the switch block below
        scxn.closeSession(request.sessionId);
        return;
      }
    }

    if (request.cnxn == null) {
      return;
    }
    ServerCnxn cnxn = request.cnxn;

    String lastOp = "NA";
    zks.decInProcess();
    Code err = Code.OK;
    Record rsp = null;
    boolean closeSession = false;
    try {
      if (request.hdr != null && request.hdr.getType() == OpCode.error) {
        throw KeeperException.create(KeeperException.Code.get(((ErrorTxn) request.txn).getErr()));
      }

      KeeperException ke = request.getException();
      if (ke != null && request.type != OpCode.multi) {
        throw ke;
      }

      if (LOG.isDebugEnabled()) {
        LOG.debug("{}", request);
      }
      switch (request.type) {
        case OpCode.ping:
          {
            zks.serverStats().updateLatency(request.createTime);

            lastOp = "PING";
            cnxn.updateStatsForResponse(
                request.cxid, request.zxid, lastOp, request.createTime, System.currentTimeMillis());

            cnxn.sendResponse(
                new ReplyHeader(-2, zks.getZKDatabase().getDataTreeLastProcessedZxid(), 0),
                null,
                "response");
            return;
          }
        case OpCode.createSession:
          {
            zks.serverStats().updateLatency(request.createTime);

            lastOp = "SESS";
            cnxn.updateStatsForResponse(
                request.cxid, request.zxid, lastOp, request.createTime, System.currentTimeMillis());

            zks.finishSessionInit(request.cnxn, true);
            return;
          }
        case OpCode.multi:
          {
            lastOp = "MULT";
            rsp = new MultiResponse();

            for (ProcessTxnResult subTxnResult : rc.multiResult) {

              OpResult subResult;

              switch (subTxnResult.type) {
                case OpCode.check:
                  subResult = new CheckResult();
                  break;
                case OpCode.create:
                  subResult = new CreateResult(subTxnResult.path);
                  break;
                case OpCode.delete:
                  subResult = new DeleteResult();
                  break;
                case OpCode.setData:
                  subResult = new SetDataResult(subTxnResult.stat);
                  break;
                case OpCode.error:
                  subResult = new ErrorResult(subTxnResult.err);
                  break;
                default:
                  throw new IOException("Invalid type of op");
              }

              ((MultiResponse) rsp).add(subResult);
            }

            break;
          }
        case OpCode.create:
          {
            lastOp = "CREA";
            rsp = new CreateResponse(rc.path);
            err = Code.get(rc.err);
            break;
          }
        case OpCode.delete:
          {
            lastOp = "DELE";
            err = Code.get(rc.err);
            break;
          }
        case OpCode.setData:
          {
            lastOp = "SETD";
            rsp = new SetDataResponse(rc.stat);
            err = Code.get(rc.err);
            break;
          }
        case OpCode.setACL:
          {
            lastOp = "SETA";
            rsp = new SetACLResponse(rc.stat);
            err = Code.get(rc.err);
            break;
          }
        case OpCode.closeSession:
          {
            lastOp = "CLOS";
            closeSession = true;
            err = Code.get(rc.err);
            break;
          }
        case OpCode.sync:
          {
            lastOp = "SYNC";
            SyncRequest syncRequest = new SyncRequest();
            ByteBufferInputStream.byteBuffer2Record(request.request, syncRequest);
            rsp = new SyncResponse(syncRequest.getPath());
            break;
          }
        case OpCode.check:
          {
            lastOp = "CHEC";
            rsp = new SetDataResponse(rc.stat);
            err = Code.get(rc.err);
            break;
          }
        case OpCode.exists:
          {
            lastOp = "EXIS";
            // TODO we need to figure out the security requirement for this!
            ExistsRequest existsRequest = new ExistsRequest();
            ByteBufferInputStream.byteBuffer2Record(request.request, existsRequest);
            String path = existsRequest.getPath();
            if (path.indexOf('\0') != -1) {
              throw new KeeperException.BadArgumentsException();
            }
            Stat stat = zks.getZKDatabase().statNode(path, existsRequest.getWatch() ? cnxn : null);
            rsp = new ExistsResponse(stat);
            break;
          }
        case OpCode.getData:
          {
            lastOp = "GETD";
            GetDataRequest getDataRequest = new GetDataRequest();
            ByteBufferInputStream.byteBuffer2Record(request.request, getDataRequest);
            DataNode n = zks.getZKDatabase().getNode(getDataRequest.getPath());
            if (n == null) {
              throw new KeeperException.NoNodeException();
            }
            Long aclL;
            synchronized (n) {
              aclL = n.acl;
            }
            PrepRequestProcessor.checkACL(
                zks, zks.getZKDatabase().convertLong(aclL), ZooDefs.Perms.READ, request.authInfo);
            Stat stat = new Stat();
            byte b[] =
                zks.getZKDatabase()
                    .getData(
                        getDataRequest.getPath(), stat, getDataRequest.getWatch() ? cnxn : null);
            rsp = new GetDataResponse(b, stat);
            break;
          }
        case OpCode.setWatches:
          {
            lastOp = "SETW";
            SetWatches setWatches = new SetWatches();
            // XXX We really should NOT need this!!!!
            request.request.rewind();
            ByteBufferInputStream.byteBuffer2Record(request.request, setWatches);
            long relativeZxid = setWatches.getRelativeZxid();
            zks.getZKDatabase()
                .setWatches(
                    relativeZxid,
                    setWatches.getDataWatches(),
                    setWatches.getExistWatches(),
                    setWatches.getChildWatches(),
                    cnxn);
            break;
          }
        case OpCode.getACL:
          {
            lastOp = "GETA";
            GetACLRequest getACLRequest = new GetACLRequest();
            ByteBufferInputStream.byteBuffer2Record(request.request, getACLRequest);
            Stat stat = new Stat();
            List<ACL> acl = zks.getZKDatabase().getACL(getACLRequest.getPath(), stat);
            rsp = new GetACLResponse(acl, stat);
            break;
          }
        case OpCode.getChildren:
          {
            lastOp = "GETC";
            GetChildrenRequest getChildrenRequest = new GetChildrenRequest();
            ByteBufferInputStream.byteBuffer2Record(request.request, getChildrenRequest);
            DataNode n = zks.getZKDatabase().getNode(getChildrenRequest.getPath());
            if (n == null) {
              throw new KeeperException.NoNodeException();
            }
            Long aclG;
            synchronized (n) {
              aclG = n.acl;
            }
            PrepRequestProcessor.checkACL(
                zks, zks.getZKDatabase().convertLong(aclG), ZooDefs.Perms.READ, request.authInfo);
            List<String> children =
                zks.getZKDatabase()
                    .getChildren(
                        getChildrenRequest.getPath(),
                        null,
                        getChildrenRequest.getWatch() ? cnxn : null);
            rsp = new GetChildrenResponse(children);
            break;
          }
        case OpCode.getChildren2:
          {
            lastOp = "GETC";
            GetChildren2Request getChildren2Request = new GetChildren2Request();
            ByteBufferInputStream.byteBuffer2Record(request.request, getChildren2Request);
            Stat stat = new Stat();
            DataNode n = zks.getZKDatabase().getNode(getChildren2Request.getPath());
            if (n == null) {
              throw new KeeperException.NoNodeException();
            }
            Long aclG;
            synchronized (n) {
              aclG = n.acl;
            }
            PrepRequestProcessor.checkACL(
                zks, zks.getZKDatabase().convertLong(aclG), ZooDefs.Perms.READ, request.authInfo);
            List<String> children =
                zks.getZKDatabase()
                    .getChildren(
                        getChildren2Request.getPath(),
                        stat,
                        getChildren2Request.getWatch() ? cnxn : null);
            rsp = new GetChildren2Response(children, stat);
            break;
          }
      }
    } catch (SessionMovedException e) {
      // session moved is a connection level error, we need to tear
      // down the connection otw ZOOKEEPER-710 might happen
      // ie client on slow follower starts to renew session, fails
      // before this completes, then tries the fast follower (leader)
      // and is successful, however the initial renew is then
      // successfully fwd/processed by the leader and as a result
      // the client and leader disagree on where the client is most
      // recently attached (and therefore invalid SESSION MOVED generated)
      cnxn.sendCloseSession();
      return;
    } catch (KeeperException e) {
      err = e.code();
    } catch (Exception e) {
      // log at error level as we are returning a marshalling
      // error to the user
      LOG.error("Failed to process " + request, e);
      StringBuilder sb = new StringBuilder();
      ByteBuffer bb = request.request;
      bb.rewind();
      while (bb.hasRemaining()) {
        sb.append(Integer.toHexString(bb.get() & 0xff));
      }
      LOG.error("Dumping request buffer: 0x" + sb.toString());
      err = Code.MARSHALLINGERROR;
    }

    long lastZxid = zks.getZKDatabase().getDataTreeLastProcessedZxid();
    ReplyHeader hdr = new ReplyHeader(request.cxid, lastZxid, err.intValue());

    zks.serverStats().updateLatency(request.createTime);
    cnxn.updateStatsForResponse(
        request.cxid, lastZxid, lastOp, request.createTime, System.currentTimeMillis());

    try {
      cnxn.sendResponse(hdr, rsp, "response");
      if (closeSession) {
        cnxn.sendCloseSession();
      }
    } catch (IOException e) {
      LOG.error("FIXMSG", e);
    }
  }
예제 #29
0
  public ProcessTxnResult processTxn(TxnHeader header, Record txn) {
    ProcessTxnResult rc = new ProcessTxnResult();

    String debug = "";
    try {
      rc.clientId = header.getClientId();
      rc.cxid = header.getCxid();
      rc.zxid = header.getZxid();
      rc.type = header.getType();
      rc.err = 0;
      if (rc.zxid > lastProcessedZxid) {
        lastProcessedZxid = rc.zxid;
      }
      switch (header.getType()) {
        case OpCode.create:
          CreateTxn createTxn = (CreateTxn) txn;
          debug = "Create transaction for " + createTxn.getPath();
          createNode(
              createTxn.getPath(),
              createTxn.getData(),
              createTxn.getAcl(),
              createTxn.getEphemeral() ? header.getClientId() : 0,
              header.getZxid(),
              header.getTime());
          rc.path = createTxn.getPath();
          break;
        case OpCode.delete:
          DeleteTxn deleteTxn = (DeleteTxn) txn;
          debug = "Delete transaction for " + deleteTxn.getPath();
          deleteNode(deleteTxn.getPath(), header.getZxid());
          break;
        case OpCode.setData:
          SetDataTxn setDataTxn = (SetDataTxn) txn;
          debug = "Set data for  transaction for " + setDataTxn.getPath();
          rc.stat =
              setData(
                  setDataTxn.getPath(),
                  setDataTxn.getData(),
                  setDataTxn.getVersion(),
                  header.getZxid(),
                  header.getTime());
          break;
        case OpCode.setACL:
          SetACLTxn setACLTxn = (SetACLTxn) txn;
          debug = "Set ACL for  transaction for " + setACLTxn.getPath();
          rc.stat = setACL(setACLTxn.getPath(), setACLTxn.getAcl(), setACLTxn.getVersion());
          break;
        case OpCode.closeSession:
          killSession(header.getClientId(), header.getZxid());
          break;
        case OpCode.error:
          ErrorTxn errTxn = (ErrorTxn) txn;
          rc.err = errTxn.getErr();
          break;
      }
    } catch (KeeperException e) {
      // These are expected errors since we take a lazy snapshot
      if (initialized || (e.code() != Code.NONODE && e.code() != Code.NODEEXISTS)) {
        LOG.warn("Failed:" + debug, e);
      }
    }
    return rc;
  }
예제 #30
0
    @Override
    public void run() {
      logger.trace("CleanerRunnable starting:  hashCode={}", this.hashCode());

      PresenceService presenceService = getContext().getService("presence");
      CoordinationService coordinationService = getContext().getService("coord");
      ZkClient zkClient = getContext().getZkClient();
      PathScheme pathScheme = getContext().getPathScheme();

      // list all services in cluster
      List<String> clusterIds = presenceService.getClusters();
      for (String clusterId : clusterIds) {

        // only proceed if in cluster
        if (!presenceService.isMemberOf(clusterId)
            || clusterId.equals(getContext().getPathScheme().getFrameworkClusterId())) {
          continue;
        }

        List<String> serviceIds = presenceService.getServices(clusterId);
        for (String serviceId : serviceIds) {
          logger.trace(
              "Checking data nodes expiry:  clusterId={}; serviceId={}", clusterId, serviceId);

          // only proceed if in service
          if (!presenceService.isMemberOf(clusterId, serviceId)) {
            continue;
          }

          long currentTimestamp = System.currentTimeMillis();

          // get lock for a service
          DistributedLock lock =
              coordinationService.getLock("reign", "metrics-" + clusterId + "-" + serviceId);
          if (!lock.tryLock()) {
            continue;
          }
          String dataPath = null;
          try {

            // get all data nodes for a service
            String dataParentPath =
                pathScheme.getAbsolutePath(
                    PathType.METRICS, pathScheme.joinTokens(clusterId, serviceId));
            List<String> dataNodes = zkClient.getChildren(dataParentPath, false);

            // remove all nodes that are older than rotation
            // interval
            for (String dataNode : dataNodes) {
              try {
                logger.trace(
                    "Checking data node expiry:  clusterId={}; serviceId={}; nodeId={}",
                    clusterId,
                    serviceId,
                    dataNode);
                dataPath =
                    pathScheme.getAbsolutePath(
                        PathType.METRICS, pathScheme.joinTokens(clusterId, serviceId, dataNode));
                MetricsData metricsData = getMetricsFromDataNode(clusterId, serviceId, dataNode);
                if (metricsData == null) {
                  logger.warn(
                      "Removing unrecognized/corrupted/deprecated data node:  path={}", dataPath);
                  zkClient.delete(dataPath, -1);
                  continue;
                }

                // keep last few hours worth of data
                long millisToExpiry =
                    millisToExpiry(metricsData, currentTimestamp - (86400000 / 6));

                // delete data that is older than some threshold
                boolean dataTooOld =
                    currentTimestamp - metricsData.getIntervalStartTimestamp() > 86400000;

                // delete old and expired data
                if (millisToExpiry <= 0 || dataTooOld) {
                  logger.info(
                      "Removing expired data node:  path={}; millisToExpiry={}",
                      dataPath,
                      millisToExpiry);
                  zkClient.delete(dataPath, -1);
                } else {
                  logger.trace(
                      "Data node is not yet expired:  path={}; millisToExpiry={}",
                      dataPath,
                      millisToExpiry);
                }
              } catch (Exception e) {
                logger.warn(
                    "Error trying to clean up data directory for service:  clusterId="
                        + clusterId
                        + "; serviceId="
                        + serviceId
                        + "; dataPath="
                        + dataPath
                        + ":  "
                        + e,
                    e);
              } // try
            } // for

          } catch (KeeperException e) {
            if (e.code() != KeeperException.Code.NONODE) {
              logger.warn(
                  "Error trying to clean up data directory for service:  clusterId="
                      + clusterId
                      + "; serviceId="
                      + serviceId
                      + "; dataPath="
                      + dataPath
                      + ":  "
                      + e,
                  e);
            }
          } catch (Exception e) {
            logger.warn(
                "Error trying to clean up data directory for service:  clusterId="
                    + clusterId
                    + "; serviceId="
                    + serviceId
                    + "; dataPath="
                    + dataPath
                    + ":  "
                    + e,
                e);
          } finally {
            lock.unlock();
            lock.destroy();
          } // try
        } // for service
      } // for cluster
    } // run()