Exemplo n.º 1
0
  /**
   * Take action when the job timer times out.
   *
   * @exception IOException Thrown if an I/O error occurred.
   */
  private void jobTimeout() throws IOException {
    boolean doExit = false;
    synchronized (this) {
      if (myJobTimer.isTriggered()) {
        continueRun = false;
        if (myState == State.RUNNING) {
          myState = State.TERMINATE_CANCEL_JOB;
          myCancelMessage = "Job exceeded maximum running time";
          System.err.println(myCancelMessage);
          doExit = true;
        }
      }
    }

    // Cannot hold the synchronization lock while calling System.exit(),
    // otherwise a deadlock can occur between this thread (the timer thread)
    // and the shutdown hook thread.
    if (doExit) System.exit(1);
  }
Exemplo n.º 2
0
  /** Shut down this Job Frontend. */
  private void shutdown() {
    synchronized (this) {
      // Stop all lease timers.
      mySchedulerRenewTimer.stop();
      mySchedulerExpireTimer.stop();
      for (ProcessInfo processinfo : myProcessInfo) {
        processinfo.renewTimer.stop();
        processinfo.expireTimer.stop();
      }

      // If state is RUNNING but myCancelMessage is not null, it means the
      // user canceled the job (e.g., by hitting CTRL-C).
      if (myState == State.RUNNING && myCancelMessage != null) {
        myState = State.TERMINATE_CANCEL_JOB;
      }

      // Inform Job Scheduler and Job Backends.
      switch (myState) {
        case RUNNING:
          // Send "job finished" messages.
          for (ProcessInfo processinfo : myProcessInfo) {
            if (processinfo.backend != null) {
              try {
                processinfo.backend.jobFinished(this);
              } catch (IOException exc) {
              }
            }
          }
          if (myJobScheduler != null) {
            try {
              myJobScheduler.jobFinished(this);
            } catch (IOException exc) {
            }
          }
          break;
        case TERMINATE_CANCEL_JOB:
          // Send "cancel job" messages.
          for (ProcessInfo processinfo : myProcessInfo) {
            if (processinfo.backend != null && processinfo.state != ProcessInfo.State.FAILED) {
              try {
                processinfo.backend.cancelJob(this, myCancelMessage);
              } catch (IOException exc) {
              }
            }
          }
          if (myJobScheduler != null) {
            try {
              myJobScheduler.cancelJob(this, myCancelMessage);
            } catch (IOException exc) {
            }
          }
          break;
        case TERMINATING:
          // Send nothing.
          break;
      }

      // Record that we are terminating.
      myState = State.TERMINATING;
    }

    // All proxies, channels, and channel groups will close when the process
    // exits.
  }
Exemplo n.º 3
0
 /**
  * Take action when the Job Scheduler's lease renewal timer times out.
  *
  * @exception IOException Thrown if an I/O error occurred.
  */
 private synchronized void schedulerRenewTimeout() throws IOException {
   if (mySchedulerRenewTimer.isTriggered()) {
     myJobScheduler.renewLease(this);
   }
 }
Exemplo n.º 4
0
  /**
   * Report that a backend process is ready to commence executing the job.
   *
   * @param theJobBackend Job Backend that is calling this method.
   * @param rank Rank of the job backend process.
   * @param middlewareAddress Host/port to which the job backend process is listening for middleware
   *     messages.
   * @param worldAddress Host/port to which the job backend process is listening for the world
   *     communicator.
   * @param frontendAddress Host/port to which the job backend process is listening for the frontend
   *     communicator, or null if the frontend communicator does not exist.
   * @exception IOException Thrown if an I/O error occurred.
   */
  public synchronized void backendReady(
      JobBackendRef theJobBackend,
      int rank,
      InetSocketAddress middlewareAddress,
      InetSocketAddress worldAddress,
      InetSocketAddress frontendAddress)
      throws IOException {
    // Verify that rank is in range.
    if (0 > rank || rank >= Np) {
      terminateCancelJob("Illegal \"backend ready\" message, rank=" + rank);
    }

    // Verify that this backend has not started already.
    ProcessInfo processinfo = myProcessInfo[rank];
    if (processinfo.state != ProcessInfo.State.NOT_STARTED) {
      terminateCancelJob("Unexpected \"backend ready\" message, rank=" + rank);
    }

    // Record information in job backend process info record.
    processinfo.state = ProcessInfo.State.RUNNING;
    processinfo.backend = theJobBackend;
    processinfo.middlewareAddress = middlewareAddress;
    processinfo.worldAddress = worldAddress;
    processinfo.frontendAddress = frontendAddress;
    myProcessMap.put(theJobBackend, processinfo);

    // Record channel group addresses.
    myMiddlewareAddress[rank] = middlewareAddress;
    myWorldAddress[rank] = worldAddress;
    if (hasFrontendComm) {
      myFrontendAddress[rank] = frontendAddress;
    }

    // Increase count of running processes.
    ++myRunningCount;

    // If all job backend processes have reported ready, commence job.
    if (myRunningCount == Np) {
      // Start job timer if necessary.
      int jobtime = PJProperties.getPjJobTime();
      if (jobtime > 0) {
        myJobTimer.start(jobtime * 1000L);
      }

      // Get the system properties.
      Properties props = System.getProperties();

      // Send "commence job" message to each job backend, with system
      // property "pj.nt" set to the proper number of CPUs.
      for (ProcessInfo info : myProcessMap.values()) {
        props.setProperty("pj.nt", "" + info.Nt);
        info.backend.commenceJob(
            /*theJobFrontend   */ this,
            /*middlewareAddress*/ myMiddlewareAddress,
            /*worldAddress     */ myWorldAddress,
            /*frontendAddress  */ myFrontendAddress,
            /*properties       */ props,
            /*mainClassName    */ myMainClassName,
            /*args             */ myArgs);
      }
    }
  }
Exemplo n.º 5
0
 /**
  * Renew the lease on the job.
  *
  * @param theJobScheduler Job Scheduler that is calling this method.
  * @exception IOException Thrown if an I/O error occurred.
  */
 public synchronized void renewLease(JobSchedulerRef theJobScheduler) throws IOException {
   mySchedulerExpireTimer.start(Constants.LEASE_EXPIRE_INTERVAL);
 }
Exemplo n.º 6
0
  /**
   * Construct a new job frontend object. The job frontend object will contact the Job Scheduler
   * Daemon specified by the <TT>"pj.host"</TT> and <TT>"pj.port"</TT> Java system properties. See
   * class {@linkplain benchmarks.detinfer.pj.edu.ritpj.PJProperties} for further information.
   *
   * @param username User name.
   * @param Nn Number of backend nodes (&gt;= 1).
   * @param Np Number of processes (&gt;= 1).
   * @param Nt Number of CPUs per process (&gt;= 0). 0 means "all CPUs."
   * @param hasFrontendComm True if the job has the frontend communicator, false if it doesn't.
   * @param mainClassName Main class name.
   * @param args Command line arguments.
   * @exception JobSchedulerException (subclass of IOException) Thrown if the job frontend object
   *     could not contact the Job Scheduler Daemon.
   * @exception IOException Thrown if an I/O error occurred.
   */
  public JobFrontend(
      String username,
      int Nn,
      int Np,
      int Nt,
      boolean hasFrontendComm,
      String mainClassName,
      String[] args)
      throws IOException {
    // Record arguments.
    this.username = username;
    this.Nn = Nn;
    this.Np = Np;
    this.Nt = Nt;
    this.hasFrontendComm = hasFrontendComm;
    this.myMainClassName = mainClassName;
    this.myArgs = args;

    // Set up shutdown hook.
    Runtime.getRuntime()
        .addShutdownHook(
            new Thread() {
              public void run() {
                shutdown();
              }
            });

    // Set up lease timer thread.
    myLeaseTimerThread = new TimerThread();
    myLeaseTimerThread.setDaemon(true);
    myLeaseTimerThread.start();

    // Set up Job Scheduler lease timers.
    mySchedulerRenewTimer =
        myLeaseTimerThread.createTimer(
            new TimerTask() {
              public void action(Timer timer) {
                try {
                  schedulerRenewTimeout();
                } catch (Throwable exc) {
                }
              }
            });
    mySchedulerExpireTimer =
        myLeaseTimerThread.createTimer(
            new TimerTask() {
              public void action(Timer timer) {
                try {
                  schedulerExpireTimeout();
                } catch (Throwable exc) {
                }
              }
            });

    // Set up job timer.
    myJobTimer =
        myLeaseTimerThread.createTimer(
            new TimerTask() {
              public void action(Timer timer) {
                try {
                  jobTimeout();
                } catch (Throwable exc) {
                }
              }
            });

    // Set up array of job backend process info records.
    myProcessInfo = new ProcessInfo[Np];
    for (int i = 0; i < Np; ++i) {
      final int rank = i;
      ProcessInfo processinfo =
          new ProcessInfo(
              /*state            */ ProcessInfo.State.NOT_STARTED,
              /*name             */ null,
              /*rank             */ rank,
              /*backend          */ null,
              /*middlewareAddress*/ null,
              /*worldAddress     */ null,
              /*frontendAddress  */ null,
              /*renewTimer       */
              myLeaseTimerThread.createTimer(
                  new TimerTask() {
                    public void action(Timer timer) {
                      try {
                        backendRenewTimeout(rank);
                      } catch (Throwable exc) {
                      }
                    }
                  }),
              /*expireTimer      */
              myLeaseTimerThread.createTimer(
                  new TimerTask() {
                    public void action(Timer timer) {
                      try {
                        backendExpireTimeout(rank);
                      } catch (Throwable exc) {
                      }
                    }
                  }),
              /*Nt               */ 0);
      myProcessInfo[rank] = processinfo;
    }

    // Set up middleware channel group and address array.
    myMiddlewareChannelGroup = new ChannelGroup();
    myMiddlewareAddress = new InetSocketAddress[Np + 1];

    // Set up world communicator address array.
    myWorldAddress = new InetSocketAddress[Np];

    // Set up frontend communicator channel group and address array.
    if (hasFrontendComm) {
      myFrontendChannelGroup = new ChannelGroup();
      myFrontendAddress = new InetSocketAddress[Np + 1];
    }

    // Set up frontend file writer and reader.
    myFrontendFileWriter = new FrontendFileWriter(this);
    myFrontendFileReader = new FrontendFileReader(this);

    // Set up Job Scheduler proxy.
    InetSocketAddress js_address = null;
    Channel js_channel = null;
    try {
      js_address = new InetSocketAddress(PJProperties.getPjHost(), PJProperties.getPjPort());
      js_channel = myMiddlewareChannelGroup.connect(js_address);
    } catch (IOException exc) {
      throw new JobSchedulerException(
          "JobFrontend(): Cannot contact Job Scheduler Daemon at " + js_address, exc);
    }
    myJobScheduler = new JobSchedulerProxy(myMiddlewareChannelGroup, js_channel);

    // Start Job Scheduler lease timers.
    mySchedulerRenewTimer.start(Constants.LEASE_RENEW_INTERVAL, Constants.LEASE_RENEW_INTERVAL);
    mySchedulerExpireTimer.start(Constants.LEASE_EXPIRE_INTERVAL);

    // Kick off the job!
    myJobScheduler.requestJob(this, username, Nn, Np, Nt);
  }