Пример #1
0
  @Override
  protected Job handleJobResult(JobResult received, IbisIdentifier from) {
    // TODO Auto-generated method stub
    String cluster = from.location().getParent().toString();

    System.err.println(from.location().toString() + " " + received.getStats().getRuntime());

    /* assumes jobs don't need to be replicated on the same cluster, except on failure */
    Job doneJob = schedJobs.remove(received.getJobID());

    workers
        .get(cluster)
        .get(from.location().getLevel(0))
        .addJobStats(received.getStats().getRuntime());
    /*create category if it doesn't exist yet
     * upper duration since we pay in discrete increments of priced time unit*/

    doneJobs.put(doneJob.getJobID(), doneJob);

    if (hosts.get(from.location().toString()).schedJobs.size() == 0) return sayGB(from);

    Job nextJob = hosts.get(from.location().toString()).schedJobs.remove(0);
    nextJob.startTime = System.nanoTime();

    return nextJob;
  }
Пример #2
0
  @Override
  protected Job handleJobRequest(IbisIdentifier from) {
    String cluster = from.location().getParent().toString();
    String node = from.location().getLevel(0);

    /*DEBUG*/
    System.err.println(
        "served first job request from node "
            + from.location().toString()
            + " in cluster "
            + cluster);

    workers.get(cluster).put(node, new WorkerStats(node, System.currentTimeMillis(), from));

    /*release unnecessary workers*/

    if (hosts.get(from.location().toString()).schedJobs.size() == 0) return sayGB(from);

    Job nextJob = hosts.get(from.location().toString()).schedJobs.remove(0);

    /*the fact that pending jobs are timed from master side (hence including the latency to the worker) should
     * be mentioned and should also have some impact on the convergence speed of the histogram in those cases where
     * the job size is somewhat equal to this latency.
     * */
    nextJob.startTime = System.nanoTime();
    // sJobs.put(nextJob.jobID, nextJob);
    /* might be the case that even here I return sayGB() */
    return nextJob;
  }
Пример #3
0
  private Job sayGB(IbisIdentifier to) {

    System.err.println(
        "We say goodbye to " + to.location().toString() + " from " + this.getClass().getName());

    String cluster = to.location().getParent().toString();
    String node = to.location().getLevel(0);
    workers.get(cluster).get(node).workerFinished(System.currentTimeMillis());
    return new NoJob();
  }
Пример #4
0
  public void resolve(IbisIdentifier toResolve, String ibisName) throws IOException {
    if (logger.isDebugEnabled()) {
      logger.debug("Making Resolve Request for: " + ibisName);
    }
    Integer id = new Integer(toResolve.hashCode());
    synchronized (resolveQueue) {
      // Make sure we don't collide
      while (resolveQueue.get(id) != null) {
        try {
          resolveQueue.wait();
        } catch (InterruptedException e) {
          // Ignored
        }
      }
      resolveQueue.put(id, toResolve);
    }
    synchronized (toResolve) {
      this.requestPort.connect(toResolve, resolvePortName);
      if (logger.isDebugEnabled()) {
        logger.debug("Sending Request for: " + ibisName);
      }
      WriteMessage writeMessage = this.requestPort.newMessage();
      writeMessage.writeByte(OPP_REQUEST);
      writeMessage.writeInt(toResolve.hashCode());
      if (logger.isDebugEnabled()) {
        logger.debug("Finishing Request for: " + ibisName);
      }
      writeMessage.finish();
      if (logger.isDebugEnabled()) {
        logger.debug("Disconnecting Request for: " + ibisName);
      }
      this.requestPort.disconnect(toResolve, resolvePortName);

      while (!ibis.isResolved(toResolve)) {
        try {
          // Wait for the resolution to finish.
          if (logger.isDebugEnabled()) {
            logger.debug("Waiting For Resolution For: " + ibisName + " on: " + this);
          }
          toResolve.wait();
        } catch (InterruptedException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }
      }
      if (logger.isDebugEnabled()) {
        logger.debug("Resolution Complete for: " + ibisName);
      }
    }
  }
Пример #5
0
  public synchronized boolean mustReportMaybeDead(ibis.ipl.IbisIdentifier ibisIdentifier) {
    Member member = members.get(ibisIdentifier.name());

    if (member == null) {
      logger.debug(
          "user reporting ibis " + ibisIdentifier + "  which is not in pool, not reporting");
      return false;
    }

    if (member.getTime() > (System.currentTimeMillis() - heartbeatInterval)) {
      logger.debug(
          "user reporting member "
              + ibisIdentifier
              + "  recently reported already, skiping this time");
      return false;
    }

    member.updateTime();
    return true;
  }
Пример #6
0
  public void run() {
    // TODO Auto-generated method stub
    timeout = (long) (BoTRunner.INITIAL_TIMEOUT_PERCENT * bot.deadline * 60000);
    System.err.println("Timeout is now " + timeout);

    /*first receive requests from all workers*/
    while (hosts.size() != maxWorkers) {
      ReadMessage rm;
      try {

        rm = masterRP.receive(timeout);

        Object received = rm.readObject();
        IbisIdentifier from = rm.origin().ibisIdentifier();
        rm.finish();

        hosts.put(from.location().toString(), new Host(from));

        String cluster = from.location().getParent().toString();

        /*DEBUG*/
        System.err.println(
            "job request from node "
                + from.location().toString()
                + " in cluster "
                + cluster
                + "; number of hosts is now "
                + hosts.size());

      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      } catch (ClassNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }

    /*then precompute schedule*/
    while (bot.tasks.size() != 0) {
      long mct = Long.MIN_VALUE;
      String bestHost = "";
      Job schedJob = null;
      for (Job j : bot.tasks) {
        long mctj = Long.MAX_VALUE;
        String bestHostJ = "";
        long et = Long.parseLong(j.args[0]);
        for (Host host : hosts.values()) {
          if (host.node.contains("slow")) {
            if (mctj > host.EAT + 2 * et / 3) {
              mctj = host.EAT + 2 * et / 3;
              bestHostJ = host.node;
            }
          } else {
            if (mctj > host.EAT + et) {
              mctj = host.EAT + et;
              bestHostJ = host.node;
            }
          }
        }
        if (mct < mctj) {
          mct = mctj;
          bestHost = bestHostJ;
          schedJob = j;
        }
      }
      hosts.get(bestHost).addJob(schedJob);
      schedJobs.put(schedJob.jobID, schedJob);
      bot.tasks.remove(schedJob);
      System.out.println(
          "Job "
              + schedJob.jobID
              + " with et: "
              + schedJob.args[0]
              + " was scheduled on machine "
              + bestHost
              + "; EAT is now "
              + hosts.get(bestHost).EAT);
    }

    long meat = Long.MIN_VALUE;
    for (Host host : hosts.values()) {
      if (host.EAT > meat) meat = host.EAT;
    }
    System.out.println("Longest run should be: " + meat / 60 + "m" + meat % 60 + "s");

    actualStartTime = System.currentTimeMillis();

    /*send first job to each worker*/
    for (Host host : hosts.values()) {
      /*begin for hpdc tests*/

      Job nextJob = handleJobRequest(host.from);

      nextJob.setNode(host.from.location().getLevel(0));

      if ((!(nextJob instanceof NoJob)) && (nextJob.submitted != true)) {
        long sleep = Long.parseLong(nextJob.args[0]);
        if (host.from.location().getParent().toString().compareTo("slow") == 0) {
          nextJob.args[0] = new Long(2 * sleep / 3).toString();
        }
        nextJob.submitted = true;
      }
      /*end for hpdc tests*/

      SendPort workReplyPort;
      try {
        workReplyPort = myIbis.createSendPort(masterReplyPortType);

        workReplyPort.connect(host.from, "worker");

        WriteMessage wm = workReplyPort.newMessage();
        wm.writeObject(nextJob);
        wm.finish();
        workReplyPort.close();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }

    boolean undone = true;

    while (undone) {
      try {

        ReadMessage rm = masterRP.receive(timeout);

        Object received = rm.readObject();
        IbisIdentifier from = rm.origin().ibisIdentifier();
        rm.finish();

        Job nextJob = null;

        if (received instanceof JobResult) {
          nextJob = handleJobResult((JobResult) received, from);
        } else {
          throw new RuntimeException("received " + "an object which is not JobResult:" + received);
        }

        nextJob.setNode(from.location().getLevel(0));

        /*begin for hpdc tests*/
        if (!(nextJob instanceof NoJob)) {
          long sleep = Long.parseLong(nextJob.args[0]);
          if (from.location().getParent().toString().compareTo("slow") == 0) {
            nextJob.args[0] = new Long(2 * sleep / 3).toString();
          }
        }
        /*end for hpdc tests*/

        SendPort workReplyPort = myIbis.createSendPort(masterReplyPortType);
        workReplyPort.connect(from, "worker");

        WriteMessage wm = workReplyPort.newMessage();
        wm.writeObject(nextJob);
        wm.finish();
        workReplyPort.close();

        undone = !areWeDone();

      } catch (ReceiveTimedOutException rtoe) {
        System.err.println("I timed out!");
        undone = !areWeDone();

      } catch (ConnectionFailedException cfe) {
        String cluster = cfe.ibisIdentifier().location().getParent().toString();
        String node = cfe.ibisIdentifier().location().getLevel(0);
        for (Job j : schedJobs.values())
          if (j.getNode().compareTo(node) == 0) {
            schedJobs.remove(j.getJobID());
            /*begin hpdc tests*/
            if (j.getNode().contains("slow")) {
              j.args[0] = new Long(3 * Long.parseLong(j.args[0]) / 2).toString();
            }
            /*end hpdc tests*/
            bot.tasks.add(j);
            workers.get(cluster).get(j.getNode()).workerFinished(System.currentTimeMillis());

            System.err.println(
                "Node "
                    + cfe.ibisIdentifier().location().toString()
                    + " failed before receiving job "
                    + j.jobID);
            break;
          }
      } catch (IOException ioe) {
        ioe.printStackTrace();
        undone = !areWeDone();
      } catch (ClassNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }
  }
Пример #7
0
 public void run() {
   waitForId();
   do {
     try {
       if (logger.isDebugEnabled()) {
         logger.debug("ReplyHandler running for: " + ibisName);
       }
       ReadMessage readMessage;
       readMessage = replyListenPort.receive();
       if (logger.isDebugEnabled()) {
         logger.debug("ReplyHandler read message for: " + ibisName);
       }
       byte operation = readMessage.readByte();
       switch (operation) {
         case OPP_REPLY:
           if (logger.isDebugEnabled()) {
             logger.debug("Received Reply For: " + ibisName);
           }
           int hashCode = readMessage.readInt();
           MultiIbisIdentifier id = (MultiIbisIdentifier) readMessage.readObject();
           readMessage.finish();
           if (logger.isDebugEnabled()) {
             logger.debug("Setting Resolved for: " + ibisName + " id: " + id);
           }
           ibis.resolved(id);
           if (logger.isDebugEnabled()) {
             logger.debug("Locking for: " + ibisName);
           }
           synchronized (resolveQueue) {
             IbisIdentifier toResolve = resolveQueue.remove(new Integer(hashCode));
             synchronized (toResolve) {
               if (logger.isDebugEnabled()) {
                 logger.debug("Notifying for resolution: " + ibisName + " on: " + this);
               }
               if (logger.isDebugEnabled()) {
                 logger.debug("Notifying for resolution: " + ibisName + " on: " + this);
               }
               toResolve.notifyAll();
             }
           }
           break;
         case OPP_QUIT:
           if (logger.isDebugEnabled()) {
             logger.debug("Resolver quitting for: " + ibisName);
           }
           readMessage.finish();
           quit = true;
           break;
         default:
           if (logger.isDebugEnabled()) {
             logger.debug("Unknown request for: " + ibisName);
           }
           readMessage.finish();
           break;
       }
     } catch (IOException e) {
       // TODO What do we do here now?
       logger.error("Got IOException while resolving: " + e);
       e.printStackTrace();
     } catch (ClassNotFoundException e) {
       // TODO What do we do here now?
       logger.error("Got ClassNotFoundException while resolving: " + e);
       e.printStackTrace();
     }
   } while (!quit);
 }