Example #1
0
    /**
     * The run method lives for the life of the JobTracker, and removes Jobs that are not still
     * running, but which finished a long time ago.
     */
    public void run() {
      while (shouldRun) {
        try {
          Thread.sleep(RETIRE_JOB_CHECK_INTERVAL);
        } catch (InterruptedException ie) {
        }

        synchronized (jobs) {
          synchronized (jobInitQueue) {
            synchronized (jobsByArrival) {
              for (Iterator it = jobs.keySet().iterator(); it.hasNext(); ) {
                String jobid = (String) it.next();
                JobInProgress job = (JobInProgress) jobs.get(jobid);

                if (job.getStatus().getRunState() != JobStatus.RUNNING
                    && job.getStatus().getRunState() != JobStatus.PREP
                    && (job.getFinishTime() + RETIRE_JOB_INTERVAL < System.currentTimeMillis())) {
                  it.remove();

                  jobInitQueue.remove(job);
                  jobsByArrival.remove(job);
                }
              }
            }
          }
        }
      }
    }
Example #2
0
    /**
     * The run method lives for the life of the JobTracker, and removes TaskTrackers that have not
     * checked in for some time.
     */
    public void run() {
      while (shouldRun) {
        //
        // Thread runs periodically to check whether trackers should be expired.
        // The sleep interval must be no more than half the maximum expiry time
        // for a task tracker.
        //
        try {
          Thread.sleep(TASKTRACKER_EXPIRY_INTERVAL / 3);
        } catch (InterruptedException ie) {
        }

        //
        // Loop through all expired items in the queue
        //
        synchronized (taskTrackers) {
          synchronized (trackerExpiryQueue) {
            long now = System.currentTimeMillis();
            TaskTrackerStatus leastRecent = null;
            while ((trackerExpiryQueue.size() > 0)
                && ((leastRecent = (TaskTrackerStatus) trackerExpiryQueue.first()) != null)
                && (now - leastRecent.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL)) {

              // Remove profile from head of queue
              trackerExpiryQueue.remove(leastRecent);
              String trackerName = leastRecent.getTrackerName();

              // Figure out if last-seen time should be updated, or if tracker is dead
              TaskTrackerStatus newProfile =
                  (TaskTrackerStatus) taskTrackers.get(leastRecent.getTrackerName());
              // Items might leave the taskTracker set through other means; the
              // status stored in 'taskTrackers' might be null, which means the
              // tracker has already been destroyed.
              if (newProfile != null) {
                if (now - newProfile.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL) {
                  // Remove completely
                  updateTaskTrackerStatus(trackerName, null);
                  lostTaskTracker(leastRecent.getTrackerName());
                } else {
                  // Update time by inserting latest profile
                  trackerExpiryQueue.add(newProfile);
                }
              }
            }
          }
        }
      }
    }
Example #3
0
 public static void startTracker(Configuration conf) throws IOException {
   if (tracker != null) throw new IOException("JobTracker already running.");
   while (true) {
     try {
       tracker = new JobTracker(conf);
       break;
     } catch (IOException e) {
       LOG.log(Level.WARNING, "Starting tracker", e);
     }
     try {
       Thread.sleep(1000);
     } catch (InterruptedException e) {
     }
   }
   tracker.offerService();
 }
Example #4
0
  private static String lock(String lock) {
    String realPath = "";
    String parent = "/lock";
    String lockName = parent + "/" + lock;

    logger.debug("Getting lock " + lockName);

    try {
      if (zkInstance.exists(parent, false) == null)
        zkInstance.create(parent, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.fromFlag(0));
    } catch (Exception E) {
      logger.error("Error creating lock node: " + E.toString());
      return null;
    }

    List<String> children = new LinkedList<String>();
    try {
      // List <ACL> ACLList = zkInstance.getACL(lockName, zkInstance.exists(lock, false));

      realPath =
          zkInstance.create(
              lockName, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL);
      // children = zkInstance.getChildren(realPath, false);
      checkLock:
      while (true) {
        children = zkInstance.getChildren(parent, false);
        for (String curChild : children) {
          String child = parent + "/" + curChild;
          // System.out.println(child + " " + realPath + " " +
          // Integer.toString(child.compareTo(realPath)));
          if (child.compareTo(realPath) < 0
              && child.length() == realPath.length()
              && curChild.startsWith(lock)) {
            // System.out.println(child + " cmp to " + realPath);
            Thread.sleep(300);
            continue checkLock;
          }
        }
        logger.info("Got lock " + lockName);
        return realPath;
      }
    } catch (Exception E) {
      logger.error("Exception while trying to get lock " + lockName + " :" + E.toString());
      E.printStackTrace();
      return null;
    }
  }
Example #5
0
  /**
   * Get a proxy connection to a remote server
   *
   * @param protocol protocol class
   * @param clientVersion client version
   * @param addr remote address
   * @param conf configuration to use
   * @param rpcTimeout timeout for each RPC
   * @param timeout time in milliseconds before giving up
   * @return the proxy
   * @throws IOException if the far end through a RemoteException
   */
  static <T extends VersionedProtocol> ProtocolProxy<T> waitForProtocolProxy(
      Class<T> protocol,
      long clientVersion,
      InetSocketAddress addr,
      Configuration conf,
      long timeout,
      int rpcTimeout)
      throws IOException {
    long startTime = System.currentTimeMillis();
    UserGroupInformation ugi = null;
    try {
      ugi = UserGroupInformation.login(conf);
    } catch (LoginException le) {
      throw new RuntimeException("Couldn't login!");
    }
    IOException ioe;
    while (true) {
      try {
        return getProtocolProxy(
            protocol,
            clientVersion,
            addr,
            ugi,
            conf,
            NetUtils.getDefaultSocketFactory(conf),
            rpcTimeout);
      } catch (ConnectException se) { // namenode has not been started
        LOG.info("Server at " + addr + " not available yet, Zzzzz...");
        ioe = se;
      } catch (SocketTimeoutException te) { // namenode is busy
        LOG.info("Problem connecting to server: " + addr);
        ioe = te;
      }
      // check if timed out
      if (System.currentTimeMillis() - timeout >= startTime) {
        throw ioe;
      }

      // wait for retry
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ie) {
        // IGNORE
      }
    }
  }
Example #6
0
  /**
   * Get a protocol proxy that contains a proxy connection to a remote server and a set of methods
   * that are supported by the server
   *
   * @param protocol protocol class
   * @param clientVersion client version
   * @param addr remote address
   * @param conf configuration to use
   * @param rpcTimeout timeout for each RPC
   * @param timeout time in milliseconds before giving up
   * @return the proxy
   * @throws IOException if the far end through a RemoteException
   */
  public static <T> ProtocolProxy<T> waitForProtocolProxy(
      Class<T> protocol,
      long clientVersion,
      InetSocketAddress addr,
      Configuration conf,
      int rpcTimeout,
      RetryPolicy connectionRetryPolicy,
      long timeout)
      throws IOException {
    long startTime = Time.now();
    IOException ioe;
    while (true) {
      try {
        return getProtocolProxy(
            protocol,
            clientVersion,
            addr,
            UserGroupInformation.getCurrentUser(),
            conf,
            NetUtils.getDefaultSocketFactory(conf),
            rpcTimeout,
            connectionRetryPolicy);
      } catch (ConnectException se) { // namenode has not been started
        LOG.info("Server at " + addr + " not available yet, Zzzzz...");
        ioe = se;
      } catch (SocketTimeoutException te) { // namenode is busy
        LOG.info("Problem connecting to server: " + addr);
        ioe = te;
      } catch (NoRouteToHostException nrthe) { // perhaps a VIP is failing over
        LOG.info("No route to host for server: " + addr);
        ioe = nrthe;
      }
      // check if timed out
      if (Time.now() - timeout >= startTime) {
        throw ioe;
      }

      // wait for retry
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ie) {
        // IGNORE
      }
    }
  }
Example #7
0
  //
  // The main work loop
  //
  public void run() {

    //
    // Poll the Namenode (once every 5 minutes) to find the size of the
    // pending edit log.
    //
    long period = 5 * 60; // 5 minutes
    long lastCheckpointTime = 0;
    if (checkpointPeriod < period) {
      period = checkpointPeriod;
    }

    while (shouldRun) {
      try {
        Thread.sleep(1000 * period);
      } catch (InterruptedException ie) {
        // do nothing
      }
      if (!shouldRun) {
        break;
      }
      try {
        long now = System.currentTimeMillis();

        long size = namenode.getEditLogSize();
        if (size >= checkpointSize || now >= lastCheckpointTime + 1000 * checkpointPeriod) {
          doCheckpoint();
          lastCheckpointTime = now;
        }
      } catch (IOException e) {
        LOG.error("Exception in doCheckpoint: ");
        LOG.error(StringUtils.stringifyException(e));
        e.printStackTrace();
        checkpointImage.imageDigest = null;
      } catch (Throwable e) {
        LOG.error("Throwable Exception in doCheckpoint: ");
        LOG.error(StringUtils.stringifyException(e));
        e.printStackTrace();
        Runtime.getRuntime().exit(-1);
      }
    }
  }