예제 #1
0
  @Override
  protected void handleLostConnections() {
    String cluster;
    String node;
    for (SendPortIdentifier lost : masterRP.lostConnections()) {
      System.out.println("lost connection with " + lost.ibisIdentifier().location().toString());
      cluster = lost.ibisIdentifier().location().getParent().toString();
      node = lost.ibisIdentifier().location().getLevel(0);

      if (!workers.get(cluster).get(node).isFinished()) {
        for (Job j : schedJobs.values())
          if (j.getNode().compareTo(node) == 0) {
            schedJobs.remove(j.getJobID());
            /*begin hpdc tests*/
            if (j.getNode().contains("slow")) {
              j.args[0] = new Long(3 * Long.parseLong(j.args[0]) / 2).toString();
            }
            /*end hpdc tests*/
            bot.tasks.add(j);
            workers.get(cluster).get(j.getNode()).workerFinished(System.currentTimeMillis());

            System.err.println(
                "Node "
                    + node
                    + " in cluster "
                    + cluster
                    + " failed during execution of job "
                    + j.jobID);
            break;
          }
      }
    }
  }
  /**
   * Retrieves the value associated with the provided iteration of the given optimizing job.
   *
   * @param iteration The job iteration for which to retrieve the value.
   * @return The value associated with the provided iteration of the given optimizing job.
   * @throws SLAMDException If a problem occurs while trying to determine the value for the given
   *     optimizing job iteration.
   */
  @Override()
  public double getIterationOptimizationValue(Job iteration) throws SLAMDException {
    StatTracker[] trackers = iteration.getStatTrackers(optimizeStat);
    if ((trackers == null) || (trackers.length == 0)) {
      throw new SLAMDException(
          "The provided optimizing job iteration did "
              + "not include any values for the statistic to "
              + "optimize, \""
              + optimizeStat
              + "\".");
    }

    StatTracker tracker = trackers[0].newInstance();
    tracker.aggregate(trackers);

    double summaryValue = tracker.getSummaryValue();
    iteration.slamdServer.logMessage(
        Constants.LOG_LEVEL_JOB_DEBUG,
        "SingleStatisticWithReplicationLatency"
            + "OptimizationAlgorithm."
            + "getIterationOptimizationValue("
            + iteration.getJobID()
            + ") returning "
            + summaryValue);

    return summaryValue;
  }
예제 #3
0
  @Override
  protected Job handleJobResult(JobResult received, IbisIdentifier from) {
    // TODO Auto-generated method stub
    String cluster = from.location().getParent().toString();

    System.err.println(from.location().toString() + " " + received.getStats().getRuntime());

    /* assumes jobs don't need to be replicated on the same cluster, except on failure */
    Job doneJob = schedJobs.remove(received.getJobID());

    workers
        .get(cluster)
        .get(from.location().getLevel(0))
        .addJobStats(received.getStats().getRuntime());
    /*create category if it doesn't exist yet
     * upper duration since we pay in discrete increments of priced time unit*/

    doneJobs.put(doneJob.getJobID(), doneJob);

    if (hosts.get(from.location().toString()).schedJobs.size() == 0) return sayGB(from);

    Job nextJob = hosts.get(from.location().toString()).schedJobs.remove(0);
    nextJob.startTime = System.nanoTime();

    return nextJob;
  }
  /**
   * Indicates whether the provided iteration is the best one seen so far for the given optimizing
   * job based on the constraints specified in the parameters used to initialize this optimization
   * algorithm.
   *
   * @param iteration The job iteration for which to make the determination.
   * @return <CODE>true</CODE> if the provided iteration is the best one seen so far for the
   *     optimizing job, or <CODE>false</CODE> if not.
   * @throws SLAMDException If a problem occurs that prevents a valid determination from being made.
   *     If this exception is thrown, then the optimizing job will stop immediately with no further
   *     iterations.
   */
  @Override()
  public boolean isBestIterationSoFar(Job iteration) throws SLAMDException {
    SLAMDServer slamdServer = iteration.slamdServer;

    if (!isAcceptableReplicationLatency(iteration)) {
      slamdServer.logMessage(
          Constants.LOG_LEVEL_JOB_DEBUG,
          "SingleStatisticWithReplicationLatency"
              + "OptimizationAlgorithm.isBestIterationSoFar("
              + iteration.getJobID()
              + ") returning false "
              + "because the iteration does not have acceptable "
              + "replication latency data.");
      return false;
    }

    double iterationValue = getIterationOptimizationValue(iteration);

    if (Double.isNaN(bestValueSoFar) && (!Double.isNaN(iterationValue))) {
      bestValueSoFar = iterationValue;
      slamdServer.logMessage(
          Constants.LOG_LEVEL_JOB_DEBUG,
          "SingleStatisticWithReplicationLatency"
              + "OptimizationAlgorithm.isBestIterationSoFar("
              + iteration.getJobID()
              + ") returning true "
              + "because iteration value "
              + iterationValue
              + " is not NaN but current best is NaN.");
      return true;
    }

    switch (optimizeType) {
      case OPTIMIZE_TYPE_MAXIMIZE:
        if (iterationValue > bestValueSoFar) {
          if (iterationValue > bestValueSoFar + bestValueSoFar * minPctImprovement) {
            slamdServer.logMessage(
                Constants.LOG_LEVEL_JOB_DEBUG,
                "SingleStatisticWithReplicationLatency"
                    + "OptimizationAlgorithm."
                    + "isBestIterationSoFar("
                    + iteration.getJobID()
                    + ") returning true "
                    + "because iteration value "
                    + iterationValue
                    + " is greater than previous best value "
                    + bestValueSoFar
                    + " by at least "
                    + (minPctImprovement * 100)
                    + "%.");
            bestValueSoFar = iterationValue;
            return true;
          } else {
            slamdServer.logMessage(
                Constants.LOG_LEVEL_JOB_DEBUG,
                "SingleStatisticWithReplicationLatency"
                    + "OptimizationAlgorithm."
                    + "isBestIterationSoFar("
                    + iteration.getJobID()
                    + ") returning false "
                    + "because iteration value "
                    + iterationValue
                    + " is greater than previous best value "
                    + bestValueSoFar
                    + " but the margin of "
                    + "improvement is less than "
                    + (minPctImprovement * 100)
                    + "%.");
            return false;
          }
        } else {
          slamdServer.logMessage(
              Constants.LOG_LEVEL_JOB_DEBUG,
              "SingleStatisticWithReplicationLatency"
                  + "OptimizationAlgorithm.isBestIterationSoFar("
                  + iteration.getJobID()
                  + ") returning false "
                  + "because iteration value "
                  + iterationValue
                  + " is less than previous best value "
                  + bestValueSoFar);
          return false;
        }
      case OPTIMIZE_TYPE_MINIMIZE:
        if (iterationValue < bestValueSoFar) {
          if (iterationValue < bestValueSoFar - bestValueSoFar * minPctImprovement) {
            slamdServer.logMessage(
                Constants.LOG_LEVEL_JOB_DEBUG,
                "SingleStatisticWithReplicationLatency"
                    + "OptimizationAlgorithm."
                    + "isBestIterationSoFar("
                    + iteration.getJobID()
                    + ") returning true "
                    + "because iteration value "
                    + iterationValue
                    + " is less than previous best value "
                    + bestValueSoFar
                    + " by at least "
                    + (minPctImprovement * 100)
                    + "%.");
            bestValueSoFar = iterationValue;
            return true;
          } else {
            slamdServer.logMessage(
                Constants.LOG_LEVEL_JOB_DEBUG,
                "SingleStatisticWithReplicationLatency"
                    + "OptimizationAlgorithm."
                    + "isBestIterationSoFar("
                    + iteration.getJobID()
                    + ") returning false "
                    + "because iteration value "
                    + iterationValue
                    + " is less than previous best value "
                    + bestValueSoFar
                    + " but the margin of "
                    + "improvement is less than "
                    + (minPctImprovement * 100)
                    + "%.");
            return false;
          }
        } else {
          slamdServer.logMessage(
              Constants.LOG_LEVEL_JOB_DEBUG,
              "SingleStatisticWithReplicationLatency"
                  + "OptimizationAlgorithm.isBestIterationSoFar("
                  + iteration.getJobID()
                  + ") returning false "
                  + "because iteration value "
                  + iterationValue
                  + " is greater than previous best value "
                  + bestValueSoFar);
          return false;
        }
      default:
        slamdServer.logMessage(
            Constants.LOG_LEVEL_JOB_DEBUG,
            "SingleStatisticWithReplicationLatency"
                + "OptimizationAlgorithm.isBestIterationSoFar("
                + iteration.getJobID()
                + ") returning false "
                + "because an unknown optimization type of "
                + optimizeType
                + " is being used.");
        return false;
    }
  }
  /**
   * Indicates whether the provided job iteration has an acceptable CPU utilization.
   *
   * @param iteration The iteration for which to make the determination.
   * @return <CODE>true</CODE> if the CPU utilization for the provided iteration is acceptable, or
   *     <CODE>false</CODE> if not.
   * @throws SLAMDException If the provided iteration does not include sufficient CPU utilization
   *     data to make the determination.
   */
  private boolean isAcceptableCPUUtilization(Job iteration) throws SLAMDException {
    SLAMDServer slamdServer = iteration.slamdServer;
    boolean utilizationFound = false;

    String className = VMStatResourceMonitor.class.getName();
    ResourceMonitorStatTracker[] monitorTrackers =
        iteration.getResourceMonitorStatTrackersForClass(className);
    for (int i = 0; i < monitorTrackers.length; i++) {
      StatTracker tracker = monitorTrackers[i].getStatTracker();
      String name = tracker.getDisplayName();

      if ((tracker instanceof StackedValueTracker)
          && name.endsWith(VMStatResourceMonitor.STAT_TRACKER_CPU_UTILIZATION)) {
        utilizationFound = true;
        StackedValueTracker utilizationTracker = (StackedValueTracker) tracker;
        double userTime =
            utilizationTracker.getAverageValue(VMStatResourceMonitor.UTILIZATION_CATEGORY_USER);
        double systemTime =
            utilizationTracker.getAverageValue(VMStatResourceMonitor.UTILIZATION_CATEGORY_SYSTEM);
        double busyTime = userTime + systemTime;

        switch (utilizationComponent) {
          case UTILIZATION_COMPONENT_USER_TIME:
            if (userTime > maxUtilization) {
              slamdServer.logMessage(
                  Constants.LOG_LEVEL_JOB_DEBUG,
                  "SingleStatisticWithCPUUtilization"
                      + "OptimizationAlgorithm.isAcceptableCPU"
                      + "Utilization("
                      + iteration.getJobID()
                      + ") returning false because user time of "
                      + userTime
                      + " for stat "
                      + tracker.getDisplayName()
                      + " exceeded the maximum allowed of "
                      + maxUtilization);
              return false;
            }
            break;
          case UTILIZATION_COMPONENT_SYSTEM_TIME:
            if (systemTime > maxUtilization) {
              slamdServer.logMessage(
                  Constants.LOG_LEVEL_JOB_DEBUG,
                  "SingleStatisticWithCPUUtilization"
                      + "OptimizationAlgorithm.isAcceptableCPU"
                      + "Utilization("
                      + iteration.getJobID()
                      + ") returning false because system time "
                      + "of "
                      + systemTime
                      + " for stat "
                      + tracker.getDisplayName()
                      + " exceeded the maximum allowed of "
                      + maxUtilization);
              return false;
            }
            break;
          case UTILIZATION_COMPONENT_BUSY_TIME:
            if (busyTime > maxUtilization) {
              slamdServer.logMessage(
                  Constants.LOG_LEVEL_JOB_DEBUG,
                  "SingleStatisticWithCPUUtilization"
                      + "OptimizationAlgorithm.isAcceptableCPU"
                      + "Utilization("
                      + iteration.getJobID()
                      + ") returning false because busy time of "
                      + busyTime
                      + " for stat "
                      + tracker.getDisplayName()
                      + " exceeded the maximum allowed of "
                      + maxUtilization);
              return false;
            }
            break;
          default:
            slamdServer.logMessage(
                Constants.LOG_LEVEL_JOB_DEBUG,
                "SingleStatisticWithCPUUtilization"
                    + "OptimizationAlgorithm.isAcceptableCPU"
                    + "Utilization("
                    + iteration.getJobID()
                    + ") returning false because an unknown "
                    + "utilization component of "
                    + utilizationComponent
                    + " is in use.");
            return false;
        }
      } else if ((tracker instanceof IntegerValueTracker)
              && ((utilizationComponent == UTILIZATION_COMPONENT_USER_TIME)
                  && name.endsWith(VMStatResourceMonitor.STAT_TRACKER_CPU_USER))
          || ((utilizationComponent == UTILIZATION_COMPONENT_SYSTEM_TIME)
              && name.endsWith(VMStatResourceMonitor.STAT_TRACKER_CPU_SYSTEM))
          || ((utilizationComponent == UTILIZATION_COMPONENT_BUSY_TIME)
              && name.endsWith(VMStatResourceMonitor.STAT_TRACKER_CPU_BUSY))) {
        utilizationFound = true;
        double value = ((IntegerValueTracker) tracker).getAverageValue();
        if (value > maxUtilization) {
          slamdServer.logMessage(
              Constants.LOG_LEVEL_JOB_DEBUG,
              "SingleStatisticWithCPUUtilization"
                  + "OptimizationAlgorithm.isAcceptableCPU"
                  + "Utilization("
                  + iteration.getJobID()
                  + ") returning false because value of "
                  + value
                  + " for stat "
                  + tracker.getDisplayName()
                  + " exceeded the maximum allowed of "
                  + maxUtilization);
          return false;
        }
      }
    }

    if (!utilizationFound) {
      throw new SLAMDException(
          "The provided job iteration did not include " + "any CPU utilization data.");
    }

    slamdServer.logMessage(
        Constants.LOG_LEVEL_JOB_DEBUG,
        "SingleStatisticWithCPUUtilizationOptimization"
            + "Algorithm.isAcceptableCPUUtilization("
            + iteration.getJobID()
            + ") returning true.");
    return true;
  }
예제 #6
0
  public void run() {
    // TODO Auto-generated method stub
    timeout = (long) (BoTRunner.INITIAL_TIMEOUT_PERCENT * bot.deadline * 60000);
    System.err.println("Timeout is now " + timeout);

    /*first receive requests from all workers*/
    while (hosts.size() != maxWorkers) {
      ReadMessage rm;
      try {

        rm = masterRP.receive(timeout);

        Object received = rm.readObject();
        IbisIdentifier from = rm.origin().ibisIdentifier();
        rm.finish();

        hosts.put(from.location().toString(), new Host(from));

        String cluster = from.location().getParent().toString();

        /*DEBUG*/
        System.err.println(
            "job request from node "
                + from.location().toString()
                + " in cluster "
                + cluster
                + "; number of hosts is now "
                + hosts.size());

      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      } catch (ClassNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }

    /*then precompute schedule*/
    while (bot.tasks.size() != 0) {
      long mct = Long.MIN_VALUE;
      String bestHost = "";
      Job schedJob = null;
      for (Job j : bot.tasks) {
        long mctj = Long.MAX_VALUE;
        String bestHostJ = "";
        long et = Long.parseLong(j.args[0]);
        for (Host host : hosts.values()) {
          if (host.node.contains("slow")) {
            if (mctj > host.EAT + 2 * et / 3) {
              mctj = host.EAT + 2 * et / 3;
              bestHostJ = host.node;
            }
          } else {
            if (mctj > host.EAT + et) {
              mctj = host.EAT + et;
              bestHostJ = host.node;
            }
          }
        }
        if (mct < mctj) {
          mct = mctj;
          bestHost = bestHostJ;
          schedJob = j;
        }
      }
      hosts.get(bestHost).addJob(schedJob);
      schedJobs.put(schedJob.jobID, schedJob);
      bot.tasks.remove(schedJob);
      System.out.println(
          "Job "
              + schedJob.jobID
              + " with et: "
              + schedJob.args[0]
              + " was scheduled on machine "
              + bestHost
              + "; EAT is now "
              + hosts.get(bestHost).EAT);
    }

    long meat = Long.MIN_VALUE;
    for (Host host : hosts.values()) {
      if (host.EAT > meat) meat = host.EAT;
    }
    System.out.println("Longest run should be: " + meat / 60 + "m" + meat % 60 + "s");

    actualStartTime = System.currentTimeMillis();

    /*send first job to each worker*/
    for (Host host : hosts.values()) {
      /*begin for hpdc tests*/

      Job nextJob = handleJobRequest(host.from);

      nextJob.setNode(host.from.location().getLevel(0));

      if ((!(nextJob instanceof NoJob)) && (nextJob.submitted != true)) {
        long sleep = Long.parseLong(nextJob.args[0]);
        if (host.from.location().getParent().toString().compareTo("slow") == 0) {
          nextJob.args[0] = new Long(2 * sleep / 3).toString();
        }
        nextJob.submitted = true;
      }
      /*end for hpdc tests*/

      SendPort workReplyPort;
      try {
        workReplyPort = myIbis.createSendPort(masterReplyPortType);

        workReplyPort.connect(host.from, "worker");

        WriteMessage wm = workReplyPort.newMessage();
        wm.writeObject(nextJob);
        wm.finish();
        workReplyPort.close();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }

    boolean undone = true;

    while (undone) {
      try {

        ReadMessage rm = masterRP.receive(timeout);

        Object received = rm.readObject();
        IbisIdentifier from = rm.origin().ibisIdentifier();
        rm.finish();

        Job nextJob = null;

        if (received instanceof JobResult) {
          nextJob = handleJobResult((JobResult) received, from);
        } else {
          throw new RuntimeException("received " + "an object which is not JobResult:" + received);
        }

        nextJob.setNode(from.location().getLevel(0));

        /*begin for hpdc tests*/
        if (!(nextJob instanceof NoJob)) {
          long sleep = Long.parseLong(nextJob.args[0]);
          if (from.location().getParent().toString().compareTo("slow") == 0) {
            nextJob.args[0] = new Long(2 * sleep / 3).toString();
          }
        }
        /*end for hpdc tests*/

        SendPort workReplyPort = myIbis.createSendPort(masterReplyPortType);
        workReplyPort.connect(from, "worker");

        WriteMessage wm = workReplyPort.newMessage();
        wm.writeObject(nextJob);
        wm.finish();
        workReplyPort.close();

        undone = !areWeDone();

      } catch (ReceiveTimedOutException rtoe) {
        System.err.println("I timed out!");
        undone = !areWeDone();

      } catch (ConnectionFailedException cfe) {
        String cluster = cfe.ibisIdentifier().location().getParent().toString();
        String node = cfe.ibisIdentifier().location().getLevel(0);
        for (Job j : schedJobs.values())
          if (j.getNode().compareTo(node) == 0) {
            schedJobs.remove(j.getJobID());
            /*begin hpdc tests*/
            if (j.getNode().contains("slow")) {
              j.args[0] = new Long(3 * Long.parseLong(j.args[0]) / 2).toString();
            }
            /*end hpdc tests*/
            bot.tasks.add(j);
            workers.get(cluster).get(j.getNode()).workerFinished(System.currentTimeMillis());

            System.err.println(
                "Node "
                    + cfe.ibisIdentifier().location().toString()
                    + " failed before receiving job "
                    + j.jobID);
            break;
          }
      } catch (IOException ioe) {
        ioe.printStackTrace();
        undone = !areWeDone();
      } catch (ClassNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }
  }
예제 #7
0
  /**
   * Receive a job and add it to the master JobList if its data is valid.
   *
   * @param theJob the Job to potentially add to Schedule's List of Jobs.
   * @return true if theJob was added; false otherwise.
   */
  public boolean receiveJob(Job theJob) throws IllegalArgumentException {

    /*
     * We subject both the Job and JobList to a wide variety of tests.
     * If all of the tests are passed, then we add the Job to the JobList.
     */
    boolean addFlag = true;

    if (theJob == null) addFlag = false;

    // Check to ensure that the total number of pending jobs is less than 30.
    else if (!(new BusinessRule1().test(myJobList))) {
      throw new IllegalArgumentException(
          "Sorry, but the limit of 30 pending jobs has already been reached.");
    }

    /*
     * Check to ensure that the total number of Jobs for the week that this Job is to occur is
     * less than 5.
     * A week is defined as 3 days before a Job's Start Date and 3 days after its End Date.
     */
    else if (!(new BusinessRule2().test(theJob, myJobList))) {
      throw new IllegalArgumentException(
          "Sorry, but the limit of 5 jobs has already been reached for the week that "
              + "this job was scheduled.");
    }

    // Check that the Job is not scheduled to last longer than two days.
    else if (!(new BusinessRule4().test(theJob))) {
      throw new IllegalArgumentException("Sorry, but a job cannot last any longer than two days.");
    }

    // Check that a Job is scheduled to begin after the current date.
    else if (!(new BusinessRule5().pastTest(theJob))) {
      throw new IllegalArgumentException(
          "Sorry but the date you entered for this " + "job has already passed.");
    }

    // Check that a Job is scheduled to begin within the next three months.
    else if (!(new BusinessRule5().futureTest(theJob))) {
      throw new IllegalArgumentException(
          "Sorry but the date you entered is too far "
              + "into the future. \nAll jobs must be scheduled within the next 3 months.");
    }

    // Check that the Start Date and End Date are not swapped.
    else if (theJob.getStartDate().after(theJob.getEndDate())) {
      throw new IllegalArgumentException(
          "Sorry, but the Start Date must come before or on the same date as the End Date.");
    }

    // Check that the Job ID is valid.
    else if (theJob.getJobID() < 0 || theJob.getJobID() > Job.MAX_NUM_JOBS) {
      throw new IllegalArgumentException("Error: Invalid Job ID. Please logout and try again.");
    }

    // Check that the Volunteer List is not null.
    else if (theJob.getVolunteerList() == null) {
      throw new IllegalArgumentException(
          "Error: Null Volunteer List. Please logout and try again.");
    }

    // Check that the Volunteer List is empty.
    else if (!theJob.getVolunteerList().isEmpty()) {
      throw new IllegalArgumentException(
          "Error: Non-empty Volunteer List. Please logout and try again.");
    }

    // Check that there is at least one slot available for a Volunteer to sign up for.
    else if (!theJob.hasLightRoom() && !theJob.hasMediumRoom() && !theJob.hasHeavyRoom()) {
      throw new IllegalArgumentException(
          "Sorry, but a slot in at least one Volunteer Grade must be available.");
    }

    // Check that none of the slots are set to negative numbers.
    else if (theJob.getLightCurrent() > theJob.getLightMax()
        || theJob.getMediumCurrent() > theJob.getMediumMax()
        || theJob.getHeavyCurrent() > theJob.getHeavyMax()) {
      throw new IllegalArgumentException(
          "Sorry, but the number of slots for a Volunteer Grade cannot be negative.");
    }

    // Check that the Park for the Job is not null.
    else if (theJob.getPark() == null) {
      throw new IllegalArgumentException("Error: Null Park. Please logout and try again.");
    }

    // Check that the ParkManager for the Job is not null.
    else if (theJob.getManager() == null) {
      throw new IllegalArgumentException("Error: Null ParkManager. Please logout and try again.");
    }

    // If all tests passed, then we add the Job to the Schedule.
    if (addFlag) {
      // To get the master job list which is editable
      List<Job> editableJobList = myJobList.getJobList();
      editableJobList.add(theJob); // add valid job to list
    } else {
      // If we somehow got here without throwing an exception, and the Job is not valid, then we
      // throw a general exception.
      throw new IllegalArgumentException(
          "Error: Job data is invalid for unknown reasons. Please logout and try again.");
    }

    return addFlag;
  }