/** * Renew the lease on the job. * * @param theJobScheduler Job Scheduler that is calling this method. * @exception IOException Thrown if an I/O error occurred. */ public synchronized void renewLease(JobSchedulerRef theJobScheduler) throws IOException { mySchedulerExpireTimer.start(Constants.LEASE_EXPIRE_INTERVAL); }
/** * Report that a backend process is ready to commence executing the job. * * @param theJobBackend Job Backend that is calling this method. * @param rank Rank of the job backend process. * @param middlewareAddress Host/port to which the job backend process is listening for middleware * messages. * @param worldAddress Host/port to which the job backend process is listening for the world * communicator. * @param frontendAddress Host/port to which the job backend process is listening for the frontend * communicator, or null if the frontend communicator does not exist. * @exception IOException Thrown if an I/O error occurred. */ public synchronized void backendReady( JobBackendRef theJobBackend, int rank, InetSocketAddress middlewareAddress, InetSocketAddress worldAddress, InetSocketAddress frontendAddress) throws IOException { // Verify that rank is in range. if (0 > rank || rank >= Np) { terminateCancelJob("Illegal \"backend ready\" message, rank=" + rank); } // Verify that this backend has not started already. ProcessInfo processinfo = myProcessInfo[rank]; if (processinfo.state != ProcessInfo.State.NOT_STARTED) { terminateCancelJob("Unexpected \"backend ready\" message, rank=" + rank); } // Record information in job backend process info record. processinfo.state = ProcessInfo.State.RUNNING; processinfo.backend = theJobBackend; processinfo.middlewareAddress = middlewareAddress; processinfo.worldAddress = worldAddress; processinfo.frontendAddress = frontendAddress; myProcessMap.put(theJobBackend, processinfo); // Record channel group addresses. myMiddlewareAddress[rank] = middlewareAddress; myWorldAddress[rank] = worldAddress; if (hasFrontendComm) { myFrontendAddress[rank] = frontendAddress; } // Increase count of running processes. ++myRunningCount; // If all job backend processes have reported ready, commence job. if (myRunningCount == Np) { // Start job timer if necessary. int jobtime = PJProperties.getPjJobTime(); if (jobtime > 0) { myJobTimer.start(jobtime * 1000L); } // Get the system properties. Properties props = System.getProperties(); // Send "commence job" message to each job backend, with system // property "pj.nt" set to the proper number of CPUs. for (ProcessInfo info : myProcessMap.values()) { props.setProperty("pj.nt", "" + info.Nt); info.backend.commenceJob( /*theJobFrontend */ this, /*middlewareAddress*/ myMiddlewareAddress, /*worldAddress */ myWorldAddress, /*frontendAddress */ myFrontendAddress, /*properties */ props, /*mainClassName */ myMainClassName, /*args */ myArgs); } } }
/** * Construct a new job frontend object. The job frontend object will contact the Job Scheduler * Daemon specified by the <TT>"pj.host"</TT> and <TT>"pj.port"</TT> Java system properties. See * class {@linkplain benchmarks.detinfer.pj.edu.ritpj.PJProperties} for further information. * * @param username User name. * @param Nn Number of backend nodes (>= 1). * @param Np Number of processes (>= 1). * @param Nt Number of CPUs per process (>= 0). 0 means "all CPUs." * @param hasFrontendComm True if the job has the frontend communicator, false if it doesn't. * @param mainClassName Main class name. * @param args Command line arguments. * @exception JobSchedulerException (subclass of IOException) Thrown if the job frontend object * could not contact the Job Scheduler Daemon. * @exception IOException Thrown if an I/O error occurred. */ public JobFrontend( String username, int Nn, int Np, int Nt, boolean hasFrontendComm, String mainClassName, String[] args) throws IOException { // Record arguments. this.username = username; this.Nn = Nn; this.Np = Np; this.Nt = Nt; this.hasFrontendComm = hasFrontendComm; this.myMainClassName = mainClassName; this.myArgs = args; // Set up shutdown hook. Runtime.getRuntime() .addShutdownHook( new Thread() { public void run() { shutdown(); } }); // Set up lease timer thread. myLeaseTimerThread = new TimerThread(); myLeaseTimerThread.setDaemon(true); myLeaseTimerThread.start(); // Set up Job Scheduler lease timers. mySchedulerRenewTimer = myLeaseTimerThread.createTimer( new TimerTask() { public void action(Timer timer) { try { schedulerRenewTimeout(); } catch (Throwable exc) { } } }); mySchedulerExpireTimer = myLeaseTimerThread.createTimer( new TimerTask() { public void action(Timer timer) { try { schedulerExpireTimeout(); } catch (Throwable exc) { } } }); // Set up job timer. myJobTimer = myLeaseTimerThread.createTimer( new TimerTask() { public void action(Timer timer) { try { jobTimeout(); } catch (Throwable exc) { } } }); // Set up array of job backend process info records. myProcessInfo = new ProcessInfo[Np]; for (int i = 0; i < Np; ++i) { final int rank = i; ProcessInfo processinfo = new ProcessInfo( /*state */ ProcessInfo.State.NOT_STARTED, /*name */ null, /*rank */ rank, /*backend */ null, /*middlewareAddress*/ null, /*worldAddress */ null, /*frontendAddress */ null, /*renewTimer */ myLeaseTimerThread.createTimer( new TimerTask() { public void action(Timer timer) { try { backendRenewTimeout(rank); } catch (Throwable exc) { } } }), /*expireTimer */ myLeaseTimerThread.createTimer( new TimerTask() { public void action(Timer timer) { try { backendExpireTimeout(rank); } catch (Throwable exc) { } } }), /*Nt */ 0); myProcessInfo[rank] = processinfo; } // Set up middleware channel group and address array. myMiddlewareChannelGroup = new ChannelGroup(); myMiddlewareAddress = new InetSocketAddress[Np + 1]; // Set up world communicator address array. myWorldAddress = new InetSocketAddress[Np]; // Set up frontend communicator channel group and address array. if (hasFrontendComm) { myFrontendChannelGroup = new ChannelGroup(); myFrontendAddress = new InetSocketAddress[Np + 1]; } // Set up frontend file writer and reader. myFrontendFileWriter = new FrontendFileWriter(this); myFrontendFileReader = new FrontendFileReader(this); // Set up Job Scheduler proxy. InetSocketAddress js_address = null; Channel js_channel = null; try { js_address = new InetSocketAddress(PJProperties.getPjHost(), PJProperties.getPjPort()); js_channel = myMiddlewareChannelGroup.connect(js_address); } catch (IOException exc) { throw new JobSchedulerException( "JobFrontend(): Cannot contact Job Scheduler Daemon at " + js_address, exc); } myJobScheduler = new JobSchedulerProxy(myMiddlewareChannelGroup, js_channel); // Start Job Scheduler lease timers. mySchedulerRenewTimer.start(Constants.LEASE_RENEW_INTERVAL, Constants.LEASE_RENEW_INTERVAL); mySchedulerExpireTimer.start(Constants.LEASE_EXPIRE_INTERVAL); // Kick off the job! myJobScheduler.requestJob(this, username, Nn, Np, Nt); }