/** * The run method lives for the life of the JobTracker, and removes Jobs that are not still * running, but which finished a long time ago. */ public void run() { while (shouldRun) { try { Thread.sleep(RETIRE_JOB_CHECK_INTERVAL); } catch (InterruptedException ie) { } synchronized (jobs) { synchronized (jobInitQueue) { synchronized (jobsByArrival) { for (Iterator it = jobs.keySet().iterator(); it.hasNext(); ) { String jobid = (String) it.next(); JobInProgress job = (JobInProgress) jobs.get(jobid); if (job.getStatus().getRunState() != JobStatus.RUNNING && job.getStatus().getRunState() != JobStatus.PREP && (job.getFinishTime() + RETIRE_JOB_INTERVAL < System.currentTimeMillis())) { it.remove(); jobInitQueue.remove(job); jobsByArrival.remove(job); } } } } } } }
/** * The run method lives for the life of the JobTracker, and removes TaskTrackers that have not * checked in for some time. */ public void run() { while (shouldRun) { // // Thread runs periodically to check whether trackers should be expired. // The sleep interval must be no more than half the maximum expiry time // for a task tracker. // try { Thread.sleep(TASKTRACKER_EXPIRY_INTERVAL / 3); } catch (InterruptedException ie) { } // // Loop through all expired items in the queue // synchronized (taskTrackers) { synchronized (trackerExpiryQueue) { long now = System.currentTimeMillis(); TaskTrackerStatus leastRecent = null; while ((trackerExpiryQueue.size() > 0) && ((leastRecent = (TaskTrackerStatus) trackerExpiryQueue.first()) != null) && (now - leastRecent.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL)) { // Remove profile from head of queue trackerExpiryQueue.remove(leastRecent); String trackerName = leastRecent.getTrackerName(); // Figure out if last-seen time should be updated, or if tracker is dead TaskTrackerStatus newProfile = (TaskTrackerStatus) taskTrackers.get(leastRecent.getTrackerName()); // Items might leave the taskTracker set through other means; the // status stored in 'taskTrackers' might be null, which means the // tracker has already been destroyed. if (newProfile != null) { if (now - newProfile.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL) { // Remove completely updateTaskTrackerStatus(trackerName, null); lostTaskTracker(leastRecent.getTrackerName()); } else { // Update time by inserting latest profile trackerExpiryQueue.add(newProfile); } } } } } } }
public static void startTracker(Configuration conf) throws IOException { if (tracker != null) throw new IOException("JobTracker already running."); while (true) { try { tracker = new JobTracker(conf); break; } catch (IOException e) { LOG.log(Level.WARNING, "Starting tracker", e); } try { Thread.sleep(1000); } catch (InterruptedException e) { } } tracker.offerService(); }
private static String lock(String lock) { String realPath = ""; String parent = "/lock"; String lockName = parent + "/" + lock; logger.debug("Getting lock " + lockName); try { if (zkInstance.exists(parent, false) == null) zkInstance.create(parent, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.fromFlag(0)); } catch (Exception E) { logger.error("Error creating lock node: " + E.toString()); return null; } List<String> children = new LinkedList<String>(); try { // List <ACL> ACLList = zkInstance.getACL(lockName, zkInstance.exists(lock, false)); realPath = zkInstance.create( lockName, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL); // children = zkInstance.getChildren(realPath, false); checkLock: while (true) { children = zkInstance.getChildren(parent, false); for (String curChild : children) { String child = parent + "/" + curChild; // System.out.println(child + " " + realPath + " " + // Integer.toString(child.compareTo(realPath))); if (child.compareTo(realPath) < 0 && child.length() == realPath.length() && curChild.startsWith(lock)) { // System.out.println(child + " cmp to " + realPath); Thread.sleep(300); continue checkLock; } } logger.info("Got lock " + lockName); return realPath; } } catch (Exception E) { logger.error("Exception while trying to get lock " + lockName + " :" + E.toString()); E.printStackTrace(); return null; } }
/** * Get a proxy connection to a remote server * * @param protocol protocol class * @param clientVersion client version * @param addr remote address * @param conf configuration to use * @param rpcTimeout timeout for each RPC * @param timeout time in milliseconds before giving up * @return the proxy * @throws IOException if the far end through a RemoteException */ static <T extends VersionedProtocol> ProtocolProxy<T> waitForProtocolProxy( Class<T> protocol, long clientVersion, InetSocketAddress addr, Configuration conf, long timeout, int rpcTimeout) throws IOException { long startTime = System.currentTimeMillis(); UserGroupInformation ugi = null; try { ugi = UserGroupInformation.login(conf); } catch (LoginException le) { throw new RuntimeException("Couldn't login!"); } IOException ioe; while (true) { try { return getProtocolProxy( protocol, clientVersion, addr, ugi, conf, NetUtils.getDefaultSocketFactory(conf), rpcTimeout); } catch (ConnectException se) { // namenode has not been started LOG.info("Server at " + addr + " not available yet, Zzzzz..."); ioe = se; } catch (SocketTimeoutException te) { // namenode is busy LOG.info("Problem connecting to server: " + addr); ioe = te; } // check if timed out if (System.currentTimeMillis() - timeout >= startTime) { throw ioe; } // wait for retry try { Thread.sleep(1000); } catch (InterruptedException ie) { // IGNORE } } }
/** * Get a protocol proxy that contains a proxy connection to a remote server and a set of methods * that are supported by the server * * @param protocol protocol class * @param clientVersion client version * @param addr remote address * @param conf configuration to use * @param rpcTimeout timeout for each RPC * @param timeout time in milliseconds before giving up * @return the proxy * @throws IOException if the far end through a RemoteException */ public static <T> ProtocolProxy<T> waitForProtocolProxy( Class<T> protocol, long clientVersion, InetSocketAddress addr, Configuration conf, int rpcTimeout, RetryPolicy connectionRetryPolicy, long timeout) throws IOException { long startTime = Time.now(); IOException ioe; while (true) { try { return getProtocolProxy( protocol, clientVersion, addr, UserGroupInformation.getCurrentUser(), conf, NetUtils.getDefaultSocketFactory(conf), rpcTimeout, connectionRetryPolicy); } catch (ConnectException se) { // namenode has not been started LOG.info("Server at " + addr + " not available yet, Zzzzz..."); ioe = se; } catch (SocketTimeoutException te) { // namenode is busy LOG.info("Problem connecting to server: " + addr); ioe = te; } catch (NoRouteToHostException nrthe) { // perhaps a VIP is failing over LOG.info("No route to host for server: " + addr); ioe = nrthe; } // check if timed out if (Time.now() - timeout >= startTime) { throw ioe; } // wait for retry try { Thread.sleep(1000); } catch (InterruptedException ie) { // IGNORE } } }
// // The main work loop // public void run() { // // Poll the Namenode (once every 5 minutes) to find the size of the // pending edit log. // long period = 5 * 60; // 5 minutes long lastCheckpointTime = 0; if (checkpointPeriod < period) { period = checkpointPeriod; } while (shouldRun) { try { Thread.sleep(1000 * period); } catch (InterruptedException ie) { // do nothing } if (!shouldRun) { break; } try { long now = System.currentTimeMillis(); long size = namenode.getEditLogSize(); if (size >= checkpointSize || now >= lastCheckpointTime + 1000 * checkpointPeriod) { doCheckpoint(); lastCheckpointTime = now; } } catch (IOException e) { LOG.error("Exception in doCheckpoint: "); LOG.error(StringUtils.stringifyException(e)); e.printStackTrace(); checkpointImage.imageDigest = null; } catch (Throwable e) { LOG.error("Throwable Exception in doCheckpoint: "); LOG.error(StringUtils.stringifyException(e)); e.printStackTrace(); Runtime.getRuntime().exit(-1); } } }