/** Process incoming heartbeat messages from the task trackers. */ public synchronized int emitHeartbeat(TaskTrackerStatus trackerStatus, boolean initialContact) { String trackerName = trackerStatus.getTrackerName(); trackerStatus.setLastSeen(System.currentTimeMillis()); synchronized (taskTrackers) { synchronized (trackerExpiryQueue) { boolean seenBefore = updateTaskTrackerStatus(trackerName, trackerStatus); if (initialContact) { // If it's first contact, then clear out any state hanging around if (seenBefore) { lostTaskTracker(trackerName); } } else { // If not first contact, there should be some record of the tracker if (!seenBefore) { return InterTrackerProtocol.UNKNOWN_TASKTRACKER; } } if (initialContact) { trackerExpiryQueue.add(trackerStatus); } } } updateTaskStatuses(trackerStatus); // LOG.info("Got heartbeat from "+trackerName); return InterTrackerProtocol.TRACKERS_OK; }
@Override public int run(String[] args) throws Exception { if (args.length < 4) { writeUsage(); return 1; } Path secretsPath = new Path(args[0]); Path saltFilePath = new Path(args[1]); Path inputPath = new Path(args[2]); Path outputPath = new Path(args[3]); // Make sure the salt file exists generateSaltIfNeeded(saltFilePath, secretsPath); // Configure the job Job job = configureJob(secretsPath, saltFilePath, inputPath, outputPath); // Run it long startTime = System.currentTimeMillis(); job.submit(); if (job.waitForCompletion(true)) { System.out.printf( "Done obfuscating - took %d seconds.\n", (System.currentTimeMillis() - startTime) / 1000); } else { System.err.printf("Job finished with errors: %s\n", job.getStatus().getFailureInfo()); return 2; } return 0; }
public void inject(Path crawlDb, Path urlDir) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); if (LOG.isInfoEnabled()) { LOG.info("Injector: starting at " + sdf.format(start)); LOG.info("Injector: crawlDb: " + crawlDb); LOG.info("Injector: urlDir: " + urlDir); } Path tempDir = new Path( getConf().get("mapred.temp.dir", ".") + "/inject-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); // map text input file to a <url,CrawlDatum> file if (LOG.isInfoEnabled()) { LOG.info("Injector: Converting injected urls to crawl db entries."); } JobConf sortJob = new NutchJob(getConf()); sortJob.setJobName("inject " + urlDir); FileInputFormat.addInputPath(sortJob, urlDir); sortJob.setMapperClass(InjectMapper.class); FileOutputFormat.setOutputPath(sortJob, tempDir); sortJob.setOutputFormat(SequenceFileOutputFormat.class); sortJob.setOutputKeyClass(Text.class); sortJob.setOutputValueClass(CrawlDatum.class); sortJob.setLong("injector.current.time", System.currentTimeMillis()); RunningJob mapJob = JobClient.runJob(sortJob); long urlsInjected = mapJob.getCounters().findCounter("injector", "urls_injected").getValue(); long urlsFiltered = mapJob.getCounters().findCounter("injector", "urls_filtered").getValue(); LOG.info("Injector: total number of urls rejected by filters: " + urlsFiltered); LOG.info( "Injector: total number of urls injected after normalization and filtering: " + urlsInjected); // merge with existing crawl db if (LOG.isInfoEnabled()) { LOG.info("Injector: Merging injected urls into crawl db."); } JobConf mergeJob = CrawlDb.createJob(getConf(), crawlDb); FileInputFormat.addInputPath(mergeJob, tempDir); mergeJob.setReducerClass(InjectReducer.class); JobClient.runJob(mergeJob); CrawlDb.install(mergeJob, crawlDb); // clean up FileSystem fs = FileSystem.get(getConf()); fs.delete(tempDir, true); long end = System.currentTimeMillis(); LOG.info( "Injector: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); }
private static void run(Callable c, boolean read, int size) { // Count all i/o time from here, including all retry overheads long start_io_ms = System.currentTimeMillis(); while (true) { try { long start_ns = System.nanoTime(); // Blocking i/o call timing - without counting repeats c.call(); TimeLine.record_IOclose(start_ns, start_io_ms, read ? 1 : 0, size, Value.HDFS); break; // Explicitly ignore the following exceptions but // fail on the rest IOExceptions } catch (EOFException e) { ignoreAndWait(e, false); } catch (SocketTimeoutException e) { ignoreAndWait(e, false); } catch (S3Exception e) { // Preserve S3Exception before IOException // Since this is tricky code - we are supporting different HDFS version // New version declares S3Exception as IOException // But old versions (0.20.xxx) declares it as RuntimeException // So we have to catch it before IOException !!! ignoreAndWait(e, false); } catch (IOException e) { ignoreAndWait(e, true); } catch (Exception e) { throw Log.errRTExcept(e); } } }
/** Implements basic throttling capabilities. */ public static class Throttler { double bytesPerSec; long lastTime = System.currentTimeMillis(); public Throttler(double bytesPerSec) { this.bytesPerSec = bytesPerSec; } public void incrementAndThrottle(int bytes) { if (bytesPerSec < 1) { // no throttle at all return; } long currentTime = System.currentTimeMillis(); long timeDiff = currentTime - lastTime; if (timeDiff == 0) { timeDiff = 1; } double bytesPerSec = (bytes / (double) timeDiff) * 1000; if (bytesPerSec > this.bytesPerSec) { // Throttle double exceededByFactorOf = bytesPerSec / this.bytesPerSec; try { long mustSleep = (long) ((exceededByFactorOf - 1) * timeDiff); Thread.sleep(mustSleep); } catch (InterruptedException e) { e.printStackTrace(); } } lastTime = System.currentTimeMillis(); } }
/** * The run method lives for the life of the JobTracker, and removes Jobs that are not still * running, but which finished a long time ago. */ public void run() { while (shouldRun) { try { Thread.sleep(RETIRE_JOB_CHECK_INTERVAL); } catch (InterruptedException ie) { } synchronized (jobs) { synchronized (jobInitQueue) { synchronized (jobsByArrival) { for (Iterator it = jobs.keySet().iterator(); it.hasNext(); ) { String jobid = (String) it.next(); JobInProgress job = (JobInProgress) jobs.get(jobid); if (job.getStatus().getRunState() != JobStatus.RUNNING && job.getStatus().getRunState() != JobStatus.PREP && (job.getFinishTime() + RETIRE_JOB_INTERVAL < System.currentTimeMillis())) { it.remove(); jobInitQueue.remove(job); jobsByArrival.remove(job); } } } } } } }
public void configure(JobConf job) { this.jobConf = job; urlNormalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT); interval = jobConf.getInt("db.fetch.interval.default", 2592000); filters = new URLFilters(jobConf); scfilters = new ScoringFilters(jobConf); scoreInjected = jobConf.getFloat("db.score.injected", 1.0f); curTime = job.getLong("injector.current.time", System.currentTimeMillis()); }
@Override public int doWork() { try { setupProcedureStore(); ExecutorService executor = Executors.newFixedThreadPool(numThreads); Future<?>[] futures = new Future<?>[numThreads]; // Start worker threads. long start = System.currentTimeMillis(); for (int i = 0; i < numThreads; i++) { futures[i] = executor.submit(this.new Worker(start)); } boolean failure = false; try { for (Future<?> future : futures) { long timeout = start + WORKER_THREADS_TIMEOUT_SEC * 1000 - System.currentTimeMillis(); failure |= (future.get(timeout, TimeUnit.MILLISECONDS).equals(EXIT_FAILURE)); } } catch (Exception e) { System.err.println("Exception in worker thread."); e.printStackTrace(); return EXIT_FAILURE; } executor.shutdown(); if (failure) { return EXIT_FAILURE; } long timeTaken = System.currentTimeMillis() - start; System.out.println("******************************************"); System.out.println("Num threads : " + numThreads); System.out.println("Num procedures : " + numProcs); System.out.println("Sync type : " + syncType); System.out.println("Time taken : " + (timeTaken / 1000.0f) + "sec"); System.out.println("******************************************"); return EXIT_SUCCESS; } catch (IOException e) { e.printStackTrace(); return EXIT_FAILURE; } finally { tearDownProcedureStore(); } }
/** * The run method lives for the life of the JobTracker, and removes TaskTrackers that have not * checked in for some time. */ public void run() { while (shouldRun) { // // Thread runs periodically to check whether trackers should be expired. // The sleep interval must be no more than half the maximum expiry time // for a task tracker. // try { Thread.sleep(TASKTRACKER_EXPIRY_INTERVAL / 3); } catch (InterruptedException ie) { } // // Loop through all expired items in the queue // synchronized (taskTrackers) { synchronized (trackerExpiryQueue) { long now = System.currentTimeMillis(); TaskTrackerStatus leastRecent = null; while ((trackerExpiryQueue.size() > 0) && ((leastRecent = (TaskTrackerStatus) trackerExpiryQueue.first()) != null) && (now - leastRecent.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL)) { // Remove profile from head of queue trackerExpiryQueue.remove(leastRecent); String trackerName = leastRecent.getTrackerName(); // Figure out if last-seen time should be updated, or if tracker is dead TaskTrackerStatus newProfile = (TaskTrackerStatus) taskTrackers.get(leastRecent.getTrackerName()); // Items might leave the taskTracker set through other means; the // status stored in 'taskTrackers' might be null, which means the // tracker has already been destroyed. if (newProfile != null) { if (now - newProfile.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL) { // Remove completely updateTaskTrackerStatus(trackerName, null); lostTaskTracker(leastRecent.getTrackerName()); } else { // Update time by inserting latest profile trackerExpiryQueue.add(newProfile); } } } } } } }
/** Start the JobTracker process, listen on the indicated port */ JobTracker(Configuration conf) throws IOException { // // Grab some static constants // maxCurrentTasks = conf.getInt("mapred.tasktracker.tasks.maximum", 2); RETIRE_JOB_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.interval", 24 * 60 * 60 * 1000); RETIRE_JOB_CHECK_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.check", 60 * 1000); TASK_ALLOC_EPSILON = conf.getFloat("mapred.jobtracker.taskalloc.loadbalance.epsilon", 0.2f); PAD_FRACTION = conf.getFloat("mapred.jobtracker.taskalloc.capacitypad", 0.1f); MIN_SLOTS_FOR_PADDING = 3 * maxCurrentTasks; // This is a directory of temporary submission files. We delete it // on startup, and can delete any files that we're done with this.conf = conf; JobConf jobConf = new JobConf(conf); this.systemDir = jobConf.getSystemDir(); this.fs = FileSystem.get(conf); FileUtil.fullyDelete(fs, systemDir); fs.mkdirs(systemDir); // Same with 'localDir' except it's always on the local disk. jobConf.deleteLocalFiles(SUBDIR); // Set ports, start RPC servers, etc. InetSocketAddress addr = getAddress(conf); this.localMachine = addr.getHostName(); this.port = addr.getPort(); this.interTrackerServer = RPC.getServer(this, addr.getPort(), 10, false, conf); this.interTrackerServer.start(); Properties p = System.getProperties(); for (Iterator it = p.keySet().iterator(); it.hasNext(); ) { String key = (String) it.next(); String val = (String) p.getProperty(key); LOG.info("Property '" + key + "' is " + val); } this.infoPort = conf.getInt("mapred.job.tracker.info.port", 50030); this.infoServer = new JobTrackerInfoServer(this, infoPort); this.infoServer.start(); this.startTime = System.currentTimeMillis(); new Thread(this.expireTrackers).start(); new Thread(this.retireJobs).start(); new Thread(this.initJobs).start(); }
private static void run(Callable c, boolean read, int size) { // Count all i/o time from here, including all retry overheads long start_io_ms = System.currentTimeMillis(); while (true) { try { long start_ns = System.nanoTime(); // Blocking i/o call timing - without counting repeats c.call(); TimeLine.record_IOclose(start_ns, start_io_ms, read ? 1 : 0, size, Value.HDFS); break; // Explicitly ignore the following exceptions but // fail on the rest IOExceptions } catch (EOFException e) { ignoreAndWait(e, false); } catch (SocketTimeoutException e) { ignoreAndWait(e, false); } catch (IOException e) { ignoreAndWait(e, true); } catch (Exception e) { throw Log.errRTExcept(e); } } }
// TODO: Can also collect #procs, time taken by each thread to measure fairness. @Override public Integer call() throws IOException { while (true) { if (workersFailed.get()) { return EXIT_FAILURE; } long procId = procIds.getAndIncrement(); if (procId >= numProcs) { break; } if (procId != 0 && procId % 10000 == 0) { long ms = System.currentTimeMillis() - start; System.out.println("Wrote " + procId + " procedures in " + StringUtils.humanTimeDiff(ms)); } try { if (procId > 0 && procId % numProcsPerWal == 0) { store.rollWriterForTesting(); System.out.println( "Starting new log : " + store.getActiveLogs().get(store.getActiveLogs().size() - 1)); } } catch (IOException ioe) { // Ask other threads to quit too. workersFailed.set(true); System.err.println("Exception when rolling log file. Current procId = " + procId); ioe.printStackTrace(); return EXIT_FAILURE; } ProcedureTestingUtility.TestProcedure proc = new ProcedureTestingUtility.TestProcedure(procId); proc.setData(serializedState); store.insert(proc, null); store.update(proc); } return EXIT_SUCCESS; }