@Override protected Job handleJobResult(JobResult received, IbisIdentifier from) { // TODO Auto-generated method stub String cluster = from.location().getParent().toString(); System.err.println(from.location().toString() + " " + received.getStats().getRuntime()); /* assumes jobs don't need to be replicated on the same cluster, except on failure */ Job doneJob = schedJobs.remove(received.getJobID()); workers .get(cluster) .get(from.location().getLevel(0)) .addJobStats(received.getStats().getRuntime()); /*create category if it doesn't exist yet * upper duration since we pay in discrete increments of priced time unit*/ doneJobs.put(doneJob.getJobID(), doneJob); if (hosts.get(from.location().toString()).schedJobs.size() == 0) return sayGB(from); Job nextJob = hosts.get(from.location().toString()).schedJobs.remove(0); nextJob.startTime = System.nanoTime(); return nextJob; }
@Override protected Job handleJobRequest(IbisIdentifier from) { String cluster = from.location().getParent().toString(); String node = from.location().getLevel(0); /*DEBUG*/ System.err.println( "served first job request from node " + from.location().toString() + " in cluster " + cluster); workers.get(cluster).put(node, new WorkerStats(node, System.currentTimeMillis(), from)); /*release unnecessary workers*/ if (hosts.get(from.location().toString()).schedJobs.size() == 0) return sayGB(from); Job nextJob = hosts.get(from.location().toString()).schedJobs.remove(0); /*the fact that pending jobs are timed from master side (hence including the latency to the worker) should * be mentioned and should also have some impact on the convergence speed of the histogram in those cases where * the job size is somewhat equal to this latency. * */ nextJob.startTime = System.nanoTime(); // sJobs.put(nextJob.jobID, nextJob); /* might be the case that even here I return sayGB() */ return nextJob; }
private Job sayGB(IbisIdentifier to) { System.err.println( "We say goodbye to " + to.location().toString() + " from " + this.getClass().getName()); String cluster = to.location().getParent().toString(); String node = to.location().getLevel(0); workers.get(cluster).get(node).workerFinished(System.currentTimeMillis()); return new NoJob(); }
public void resolve(IbisIdentifier toResolve, String ibisName) throws IOException { if (logger.isDebugEnabled()) { logger.debug("Making Resolve Request for: " + ibisName); } Integer id = new Integer(toResolve.hashCode()); synchronized (resolveQueue) { // Make sure we don't collide while (resolveQueue.get(id) != null) { try { resolveQueue.wait(); } catch (InterruptedException e) { // Ignored } } resolveQueue.put(id, toResolve); } synchronized (toResolve) { this.requestPort.connect(toResolve, resolvePortName); if (logger.isDebugEnabled()) { logger.debug("Sending Request for: " + ibisName); } WriteMessage writeMessage = this.requestPort.newMessage(); writeMessage.writeByte(OPP_REQUEST); writeMessage.writeInt(toResolve.hashCode()); if (logger.isDebugEnabled()) { logger.debug("Finishing Request for: " + ibisName); } writeMessage.finish(); if (logger.isDebugEnabled()) { logger.debug("Disconnecting Request for: " + ibisName); } this.requestPort.disconnect(toResolve, resolvePortName); while (!ibis.isResolved(toResolve)) { try { // Wait for the resolution to finish. if (logger.isDebugEnabled()) { logger.debug("Waiting For Resolution For: " + ibisName + " on: " + this); } toResolve.wait(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } if (logger.isDebugEnabled()) { logger.debug("Resolution Complete for: " + ibisName); } } }
public synchronized boolean mustReportMaybeDead(ibis.ipl.IbisIdentifier ibisIdentifier) { Member member = members.get(ibisIdentifier.name()); if (member == null) { logger.debug( "user reporting ibis " + ibisIdentifier + " which is not in pool, not reporting"); return false; } if (member.getTime() > (System.currentTimeMillis() - heartbeatInterval)) { logger.debug( "user reporting member " + ibisIdentifier + " recently reported already, skiping this time"); return false; } member.updateTime(); return true; }
public void run() { // TODO Auto-generated method stub timeout = (long) (BoTRunner.INITIAL_TIMEOUT_PERCENT * bot.deadline * 60000); System.err.println("Timeout is now " + timeout); /*first receive requests from all workers*/ while (hosts.size() != maxWorkers) { ReadMessage rm; try { rm = masterRP.receive(timeout); Object received = rm.readObject(); IbisIdentifier from = rm.origin().ibisIdentifier(); rm.finish(); hosts.put(from.location().toString(), new Host(from)); String cluster = from.location().getParent().toString(); /*DEBUG*/ System.err.println( "job request from node " + from.location().toString() + " in cluster " + cluster + "; number of hosts is now " + hosts.size()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /*then precompute schedule*/ while (bot.tasks.size() != 0) { long mct = Long.MIN_VALUE; String bestHost = ""; Job schedJob = null; for (Job j : bot.tasks) { long mctj = Long.MAX_VALUE; String bestHostJ = ""; long et = Long.parseLong(j.args[0]); for (Host host : hosts.values()) { if (host.node.contains("slow")) { if (mctj > host.EAT + 2 * et / 3) { mctj = host.EAT + 2 * et / 3; bestHostJ = host.node; } } else { if (mctj > host.EAT + et) { mctj = host.EAT + et; bestHostJ = host.node; } } } if (mct < mctj) { mct = mctj; bestHost = bestHostJ; schedJob = j; } } hosts.get(bestHost).addJob(schedJob); schedJobs.put(schedJob.jobID, schedJob); bot.tasks.remove(schedJob); System.out.println( "Job " + schedJob.jobID + " with et: " + schedJob.args[0] + " was scheduled on machine " + bestHost + "; EAT is now " + hosts.get(bestHost).EAT); } long meat = Long.MIN_VALUE; for (Host host : hosts.values()) { if (host.EAT > meat) meat = host.EAT; } System.out.println("Longest run should be: " + meat / 60 + "m" + meat % 60 + "s"); actualStartTime = System.currentTimeMillis(); /*send first job to each worker*/ for (Host host : hosts.values()) { /*begin for hpdc tests*/ Job nextJob = handleJobRequest(host.from); nextJob.setNode(host.from.location().getLevel(0)); if ((!(nextJob instanceof NoJob)) && (nextJob.submitted != true)) { long sleep = Long.parseLong(nextJob.args[0]); if (host.from.location().getParent().toString().compareTo("slow") == 0) { nextJob.args[0] = new Long(2 * sleep / 3).toString(); } nextJob.submitted = true; } /*end for hpdc tests*/ SendPort workReplyPort; try { workReplyPort = myIbis.createSendPort(masterReplyPortType); workReplyPort.connect(host.from, "worker"); WriteMessage wm = workReplyPort.newMessage(); wm.writeObject(nextJob); wm.finish(); workReplyPort.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } boolean undone = true; while (undone) { try { ReadMessage rm = masterRP.receive(timeout); Object received = rm.readObject(); IbisIdentifier from = rm.origin().ibisIdentifier(); rm.finish(); Job nextJob = null; if (received instanceof JobResult) { nextJob = handleJobResult((JobResult) received, from); } else { throw new RuntimeException("received " + "an object which is not JobResult:" + received); } nextJob.setNode(from.location().getLevel(0)); /*begin for hpdc tests*/ if (!(nextJob instanceof NoJob)) { long sleep = Long.parseLong(nextJob.args[0]); if (from.location().getParent().toString().compareTo("slow") == 0) { nextJob.args[0] = new Long(2 * sleep / 3).toString(); } } /*end for hpdc tests*/ SendPort workReplyPort = myIbis.createSendPort(masterReplyPortType); workReplyPort.connect(from, "worker"); WriteMessage wm = workReplyPort.newMessage(); wm.writeObject(nextJob); wm.finish(); workReplyPort.close(); undone = !areWeDone(); } catch (ReceiveTimedOutException rtoe) { System.err.println("I timed out!"); undone = !areWeDone(); } catch (ConnectionFailedException cfe) { String cluster = cfe.ibisIdentifier().location().getParent().toString(); String node = cfe.ibisIdentifier().location().getLevel(0); for (Job j : schedJobs.values()) if (j.getNode().compareTo(node) == 0) { schedJobs.remove(j.getJobID()); /*begin hpdc tests*/ if (j.getNode().contains("slow")) { j.args[0] = new Long(3 * Long.parseLong(j.args[0]) / 2).toString(); } /*end hpdc tests*/ bot.tasks.add(j); workers.get(cluster).get(j.getNode()).workerFinished(System.currentTimeMillis()); System.err.println( "Node " + cfe.ibisIdentifier().location().toString() + " failed before receiving job " + j.jobID); break; } } catch (IOException ioe) { ioe.printStackTrace(); undone = !areWeDone(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
public void run() { waitForId(); do { try { if (logger.isDebugEnabled()) { logger.debug("ReplyHandler running for: " + ibisName); } ReadMessage readMessage; readMessage = replyListenPort.receive(); if (logger.isDebugEnabled()) { logger.debug("ReplyHandler read message for: " + ibisName); } byte operation = readMessage.readByte(); switch (operation) { case OPP_REPLY: if (logger.isDebugEnabled()) { logger.debug("Received Reply For: " + ibisName); } int hashCode = readMessage.readInt(); MultiIbisIdentifier id = (MultiIbisIdentifier) readMessage.readObject(); readMessage.finish(); if (logger.isDebugEnabled()) { logger.debug("Setting Resolved for: " + ibisName + " id: " + id); } ibis.resolved(id); if (logger.isDebugEnabled()) { logger.debug("Locking for: " + ibisName); } synchronized (resolveQueue) { IbisIdentifier toResolve = resolveQueue.remove(new Integer(hashCode)); synchronized (toResolve) { if (logger.isDebugEnabled()) { logger.debug("Notifying for resolution: " + ibisName + " on: " + this); } if (logger.isDebugEnabled()) { logger.debug("Notifying for resolution: " + ibisName + " on: " + this); } toResolve.notifyAll(); } } break; case OPP_QUIT: if (logger.isDebugEnabled()) { logger.debug("Resolver quitting for: " + ibisName); } readMessage.finish(); quit = true; break; default: if (logger.isDebugEnabled()) { logger.debug("Unknown request for: " + ibisName); } readMessage.finish(); break; } } catch (IOException e) { // TODO What do we do here now? logger.error("Got IOException while resolving: " + e); e.printStackTrace(); } catch (ClassNotFoundException e) { // TODO What do we do here now? logger.error("Got ClassNotFoundException while resolving: " + e); e.printStackTrace(); } } while (!quit); }