/** * The main loop of the simulation. First call init() to get objects ready, then go into the main * loop, where {@link SimulatorEvent}s are handled removed from the {@link SimulatorEventQueue}, * and new {@link SimulatorEvent}s are created and inserted into the {@link SimulatorEventQueue}. * * @throws IOException * @throws InterruptedException */ void run() throws IOException, InterruptedException { init(); for (SimulatorEvent next = queue.get(); next != null && next.getTimeStamp() < terminateTime && !shutdown; next = queue.get()) { currentTime = next.getTimeStamp(); assert (currentTime == queue.getCurrentTime()); SimulatorEventListener listener = next.getListener(); List<SimulatorEvent> response = listener.accept(next); queue.addAll(response); } summary(System.out); }
/** * Start simulated task trackers based on topology. * * @param clusterStory the cluster topology. * @param jobConf configuration object. * @param now time stamp when the simulator is started, {@link SimulatorTaskTracker}s are started * uniformly randomly spread in [now,now+startDuration). * @return time stamp by which the entire cluster is booted up and all task trackers are sending * hearbeats in their steady rate. */ long startTaskTrackers(ClusterStory cluster, JobConf jobConf, long now) { /** port assigned to TTs, incremented by 1 for each TT */ int port = 10000; int numTaskTrackers = 0; Random random = new Random(RandomSeedGenerator.getSeed("forStartTaskTrackers()", masterRandomSeed)); final int startDuration = jobConf.getInt("mumak.cluster.startup.duration", DEFAULT_CLUSTER_STARTUP_DURATION); for (MachineNode node : cluster.getMachines()) { jobConf.set("mumak.tasktracker.host.name", node.getName()); jobConf.set( "mumak.tasktracker.tracker.name", "tracker_" + node.getName() + ":localhost/127.0.0.1:" + port); long subRandomSeed = RandomSeedGenerator.getSeed("forTaskTracker" + numTaskTrackers, masterRandomSeed); jobConf.setLong("mumak.tasktracker.random.seed", subRandomSeed); numTaskTrackers++; port++; SimulatorTaskTracker tt = new SimulatorTaskTracker(jt, jobConf); long firstHeartbeat = now + random.nextInt(startDuration); queue.addAll(tt.init(firstHeartbeat)); } // In startDuration + heartbeat interval of the full cluster time each // TT is started up and told on its 2nd heartbeat to beat at a rate // corresponding to the steady state of the cluster long clusterSteady = now + startDuration + jt.getNextHeartbeatInterval(); return clusterSteady; }
/** * In this function, we collect the set of leaf queues from JobTracker, and for each of them * creates a simulated thread that performs the same check as * JobInitializationPoller.JobInitializationThread in Capacity Scheduler. * * @param now * @throws IOException */ private void startSimulatorThreadsCapSched(long now) throws IOException { Set<String> queueNames = jt.getQueueManager().getLeafQueueNames(); TaskScheduler taskScheduler = jt.getTaskScheduler(); threadSet = new HashSet<SimulatorCSJobInitializationThread>(); // We create a different thread for each queue and hold a // reference to each of them for (String jobQueue : queueNames) { SimulatorCSJobInitializationThread capThread = new SimulatorCSJobInitializationThread(taskScheduler, jobQueue); threadSet.add(capThread); queue.addAll(capThread.init(now)); } }
/** * Called when a job is completed. Insert a {@link JobCompleteEvent} into the {@link * SimulatorEventQueue}. This event will be picked up by {@link SimulatorJobClient}, which will in * turn decide whether the simulation is done. * * @param jobStatus final status of a job, SUCCEEDED or FAILED * @param timestamp time stamp when the job is completed */ void markCompletedJob(JobStatus jobStatus, long timestamp) { queue.add(new JobCompleteEvent(jc, timestamp, jobStatus, this)); }
/** * Run after the main loop. * * @param out stream to output information about the simulation */ void summary(PrintStream out) { out.println("Done, total events processed: " + queue.getEventCount()); }
/** * Initiate components in the simulation. The JobConf is create separately and passed to the * init(). * * @param JobConf: The configuration for the jobtracker. * @throws InterruptedException * @throws IOException if trace or topology files cannot be opened. */ @SuppressWarnings("deprecation") void init(JobConf jobConf) throws InterruptedException, IOException { FileSystem lfs = FileSystem.getLocal(getConf()); Path logPath = new Path(System.getProperty("hadoop.log.dir")).makeQualified(lfs); jobConf.set("mapred.system.dir", logPath.toString()); jobConf.set("hadoop.job.history.location", (new Path(logPath, "history").toString())); // start time for virtual clock // possible improvement: set default value to sth more meaningful based on // the 1st job long now = getTimeProperty(jobConf, "mumak.start.time", System.currentTimeMillis()); jt = SimulatorJobTracker.startTracker(jobConf, now, this); jt.offerService(); masterRandomSeed = jobConf.getLong("mumak.random.seed", System.nanoTime()); // max Map/Reduce tasks per node int maxMaps = getConf() .getInt("mapred.tasktracker.map.tasks.maximum", SimulatorTaskTracker.DEFAULT_MAP_SLOTS); int maxReduces = getConf() .getInt( "mapred.tasktracker.reduce.tasks.maximum", SimulatorTaskTracker.DEFAULT_REDUCE_SLOTS); MachineNode defaultNode = new MachineNode.Builder("default", 2) .setMapSlots(maxMaps) .setReduceSlots(maxReduces) .build(); LoggedNetworkTopology topology = new ClusterTopologyReader(new Path(topologyFile), jobConf).get(); // Setting the static mapping before removing numeric IP hosts. setStaticMapping(topology); if (getConf().getBoolean("mumak.topology.filter-numeric-ips", true)) { removeIpHosts(topology); } ZombieCluster cluster = new ZombieCluster(topology, defaultNode); // create TTs based on topology.json long firstJobStartTime = startTaskTrackers(cluster, jobConf, now); long subRandomSeed = RandomSeedGenerator.getSeed("forSimulatorJobStoryProducer", masterRandomSeed); JobStoryProducer jobStoryProducer = new SimulatorJobStoryProducer( new Path(traceFile), cluster, firstJobStartTime, jobConf, subRandomSeed); final SimulatorJobSubmissionPolicy submissionPolicy = SimulatorJobSubmissionPolicy.getPolicy(jobConf); jc = new SimulatorJobClient(jt, jobStoryProducer, submissionPolicy); queue.addAll(jc.init(firstJobStartTime)); // if the taskScheduler is CapacityTaskScheduler start off the JobInitialization // threads too if (jobConf .get("mapred.jobtracker.taskScheduler") .equals(CapacityTaskScheduler.class.getName())) { LOG.info("CapacityScheduler used: starting simulatorThreads"); startSimulatorThreadsCapSched(now); } terminateTime = getTimeProperty(jobConf, "mumak.terminate.time", Long.MAX_VALUE); }