/** Process incoming heartbeat messages from the task trackers. */ public synchronized int emitHeartbeat(TaskTrackerStatus trackerStatus, boolean initialContact) { String trackerName = trackerStatus.getTrackerName(); trackerStatus.setLastSeen(System.currentTimeMillis()); synchronized (taskTrackers) { synchronized (trackerExpiryQueue) { boolean seenBefore = updateTaskTrackerStatus(trackerName, trackerStatus); if (initialContact) { // If it's first contact, then clear out any state hanging around if (seenBefore) { lostTaskTracker(trackerName); } } else { // If not first contact, there should be some record of the tracker if (!seenBefore) { return InterTrackerProtocol.UNKNOWN_TASKTRACKER; } } if (initialContact) { trackerExpiryQueue.add(trackerStatus); } } } updateTaskStatuses(trackerStatus); // LOG.info("Got heartbeat from "+trackerName); return InterTrackerProtocol.TRACKERS_OK; }
void removeTaskEntry(String taskid) { // taskid --> tracker String tracker = (String) taskidToTrackerMap.remove(taskid); // tracker --> taskid TreeSet trackerSet = (TreeSet) trackerToTaskMap.get(tracker); if (trackerSet != null) { trackerSet.remove(taskid); } // taskid --> TIP taskidToTIPMap.remove(taskid); }
private void checkRules() { outputMessage("\nChecking " + m_baseUrl, TEST_SUMMARY_MESSAGE); outputMessage( "crawl depth: " + m_crawlDepth + " crawl delay: " + m_crawlDelay + " ms.", PLAIN_MESSAGE); TreeSet crawlList = new TreeSet(); TreeSet fetched = new TreeSet(); // inialize with the baseUrl crawlList.add(m_baseUrl); depth_incl = new int[m_crawlDepth]; depth_fetched = new int[m_crawlDepth]; depth_parsed = new int[m_crawlDepth]; long start_time = TimeBase.nowMs(); for (int depth = 1; depth <= m_crawlDepth; depth++) { if (isInterrupted()) { return; } m_curDepth = depth; if (crawlList.isEmpty() && depth <= m_crawlDepth) { outputMessage("\nNothing left to crawl, exiting after depth " + (depth - 1), PLAIN_MESSAGE); break; } String[] urls = (String[]) crawlList.toArray(new String[0]); crawlList.clear(); outputMessage("\nDepth " + depth, PLAIN_MESSAGE); for (int ix = 0; ix < urls.length; ix++) { if (isInterrupted()) { return; } pauseBeforeFetch(); String urlstr = urls[ix]; m_incls.clear(); m_excls.clear(); // crawl the page buildUrlSets(urlstr); fetched.add(urlstr); // output incl/excl results, // add the new_incls to the crawlList for next crawl depth loop crawlList.addAll(outputUrlResults(urlstr, m_incls, m_excls)); } } long elapsed_time = TimeBase.nowMs() - start_time; outputSummary(m_baseUrl, fetched, crawlList, elapsed_time); }
/** * We lost the task tracker! All task-tracker structures have already been updated. Just process * the contained tasks and any jobs that might be affected. */ void lostTaskTracker(String trackerName) { LOG.info("Lost tracker '" + trackerName + "'"); TreeSet lostTasks = (TreeSet) trackerToTaskMap.get(trackerName); trackerToTaskMap.remove(trackerName); if (lostTasks != null) { for (Iterator it = lostTasks.iterator(); it.hasNext(); ) { String taskId = (String) it.next(); TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(taskId); // Tell the job to fail the relevant task JobInProgress job = tip.getJob(); job.failedTask(tip, taskId, trackerName); } } }
/** * A tracker wants to know if any of its Tasks have been closed (because the job completed, * whether successfully or not) */ public synchronized String pollForTaskWithClosedJob(String taskTracker) { TreeSet taskIds = (TreeSet) trackerToTaskMap.get(taskTracker); if (taskIds != null) { for (Iterator it = taskIds.iterator(); it.hasNext(); ) { String taskId = (String) it.next(); TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(taskId); if (tip.shouldCloseForClosedJob(taskId)) { // // This is how the JobTracker ends a task at the TaskTracker. // It may be successfully completed, or may be killed in // mid-execution. // return taskId; } } } return null; }
/////////////////////////////////////////////////////// // Maintain lookup tables; called by JobInProgress // and TaskInProgress /////////////////////////////////////////////////////// void createTaskEntry(String taskid, String taskTracker, TaskInProgress tip) { LOG.info( "Adding task '" + taskid + "' to tip " + tip.getTIPId() + ", for tracker '" + taskTracker + "'"); // taskid --> tracker taskidToTrackerMap.put(taskid, taskTracker); // tracker --> taskid TreeSet taskset = (TreeSet) trackerToTaskMap.get(taskTracker); if (taskset == null) { taskset = new TreeSet(); trackerToTaskMap.put(taskTracker, taskset); } taskset.add(taskid); // taskid --> TIP taskidToTIPMap.put(taskid, tip); }
private Set outputUrlResults(String url, Set m_inclset, Set m_exclset) { Set new_incls = new TreeSet(CollectionUtils.subtract(m_inclset, m_reported)); Set new_excls = new TreeSet(CollectionUtils.subtract(m_exclset, m_reported)); if (!m_inclset.isEmpty()) { outputMessage( "\nIncluded Urls: (" + new_incls.size() + " new, " + (m_inclset.size() - new_incls.size()) + " old)", URL_SUMMARY_MESSAGE); depth_incl[m_curDepth - 1] += new_incls.size(); } for (Iterator it = new_incls.iterator(); it.hasNext(); ) { outputMessage(it.next().toString(), PLAIN_MESSAGE); } if (!m_exclset.isEmpty()) { outputMessage( "\nExcluded Urls: (" + new_excls.size() + " new, " + (m_exclset.size() - new_excls.size()) + " old)", URL_SUMMARY_MESSAGE); } for (Iterator it = new_excls.iterator(); it.hasNext(); ) { outputMessage(it.next().toString(), PLAIN_MESSAGE); } m_reported.addAll(new_incls); m_reported.addAll(new_excls); if (m_outWriter != null) { try { m_outWriter.flush(); } catch (IOException ex) { } } return new_incls; }
private void outputSummary(String baseUrl, Set fetched, Set toCrawl, long elapsedTime) { int fetchCount = fetched.size(); outputMessage( "\n\nSummary for starting Url: " + baseUrl + " and depth: " + m_crawlDepth, TEST_SUMMARY_MESSAGE); outputMessage( "\nUrls fetched: " + fetchCount + " Urls extracted: " + m_extracted.size(), PLAIN_MESSAGE); outputMessage("\nDepth Fetched Parsed New URLs", PLAIN_MESSAGE); for (int depth = 1; depth <= m_crawlDepth; depth++) { PrintfFormat pf = new PrintfFormat("%5d %7d %6d %8d"); Integer[] args = new Integer[] { new Integer(depth), new Integer(depth_fetched[depth - 1]), new Integer(depth_parsed[depth - 1]), new Integer(depth_incl[depth - 1]), }; String s = pf.sprintf(args); outputMessage(s, PLAIN_MESSAGE); } outputMessage("\nRemaining unfetched: " + toCrawl.size(), PLAIN_MESSAGE); if (false) { for (Iterator iter = toCrawl.iterator(); iter.hasNext(); ) { String url = (String) iter.next(); outputMessage(url, PLAIN_MESSAGE); } } long secs = elapsedTime / Constants.SECOND; long fetchRate = 0; if (secs > 0) { fetchRate = fetchCount * 60 * Constants.SECOND / elapsedTime; } outputMessage( "\nElapsed Time: " + secs + " secs." + " Fetch Rate: " + fetchRate + " p/m", PLAIN_MESSAGE); }