/** * Attempts to allocate a slice of the given type for the given job. The method first attempts to * allocate this slice by finding a physical host which exactly matches the given instance type. * If this attempt failed, it tries to allocate the slice by partitioning the resources of a more * powerful host. * * @param jobID the ID of the job the slice shall be allocated for * @param instanceType the instance type of the requested slice * @return the allocated slice or <code>null</code> if no such slice could be allocated */ private AllocatedSlice getSliceOfType(final JobID jobID, final InstanceType instanceType) { AllocatedSlice slice = null; // Try to match the instance type without slicing first for (final ClusterInstance host : this.registeredHosts.values()) { if (host.getType().equals(instanceType)) { slice = host.createSlice(instanceType, jobID); if (slice != null) { break; } } } // Use slicing now if necessary if (slice == null) { for (final ClusterInstance host : this.registeredHosts.values()) { slice = host.createSlice(instanceType, jobID); if (slice != null) { break; } } } return slice; }
@Override public synchronized void releaseAllocatedResource( final JobID jobID, final Configuration conf, final AllocatedResource allocatedResource) throws InstanceException { // release the instance from the host final ClusterInstance clusterInstance = (ClusterInstance) allocatedResource.getInstance(); final AllocatedSlice removedSlice = clusterInstance.removeAllocatedSlice(allocatedResource.getAllocationID()); // remove the local association between instance and job final List<AllocatedSlice> slicesOfJob = this.slicesOfJobs.get(jobID); if (slicesOfJob == null) { LOG.error("Cannot find allocated slice to release allocated slice for job " + jobID); return; } slicesOfJob.remove(removedSlice); // Clean up if (slicesOfJob.isEmpty()) { this.slicesOfJobs.remove(jobID); } // Check pending requests checkPendingRequests(); }
@Override public synchronized void reportHeartBeat( final InstanceConnectionInfo instanceConnectionInfo, final HardwareDescription hardwareDescription) { ClusterInstance host = registeredHosts.get(instanceConnectionInfo); // check whether we have discovered a new host if (host == null) { host = createNewHost(instanceConnectionInfo, hardwareDescription); if (host == null) { LOG.error( "Could not create a new host object for incoming heart-beat. " + "Probably the configuration file is lacking some entries."); return; } this.registeredHosts.put(instanceConnectionInfo, host); LOG.info("New number of registered hosts is " + this.registeredHosts.size()); // Update the list of instance type descriptions updateInstaceTypeDescriptionMap(); // Check if a pending request can be fulfilled by the new host checkPendingRequests(); } host.reportHeartBeat(); }
/** Updates the list of instance type descriptions based on the currently registered hosts. */ private void updateInstaceTypeDescriptionMap() { // this.registeredHosts.values().iterator() this.instanceTypeDescriptionMap.clear(); final List<InstanceTypeDescription> instanceTypeDescriptionList = new ArrayList<InstanceTypeDescription>(); // initialize array which stores the availability counter for each instance type final int[] numberOfInstances = new int[this.availableInstanceTypes.length]; for (int i = 0; i < numberOfInstances.length; i++) { numberOfInstances[i] = 0; } // Shuffle through instance types for (int i = 0; i < this.availableInstanceTypes.length; i++) { final InstanceType currentInstanceType = this.availableInstanceTypes[i]; int numberOfMatchingInstances = 0; int minNumberOfCPUCores = Integer.MAX_VALUE; long minSizeOfPhysicalMemory = Long.MAX_VALUE; long minSizeOfFreeMemory = Long.MAX_VALUE; final Iterator<ClusterInstance> it = this.registeredHosts.values().iterator(); while (it.hasNext()) { final ClusterInstance clusterInstance = it.next(); if (clusterInstance.getType().equals(currentInstanceType)) { ++numberOfMatchingInstances; final HardwareDescription hardwareDescription = clusterInstance.getHardwareDescription(); minNumberOfCPUCores = Math.min(minNumberOfCPUCores, hardwareDescription.getNumberOfCPUCores()); minSizeOfPhysicalMemory = Math.min(minSizeOfPhysicalMemory, hardwareDescription.getSizeOfPhysicalMemory()); minSizeOfFreeMemory = Math.min(minSizeOfFreeMemory, hardwareDescription.getSizeOfFreeMemory()); } } // Update number of instances int highestAccommodationNumber = -1; int highestAccommodationIndex = -1; for (int j = 0; j < this.availableInstanceTypes.length; j++) { final int accommodationNumber = canBeAccommodated(j, i); // LOG.debug(this.availableInstanceTypes[j].getIdentifier() + " fits into " // + this.availableInstanceTypes[i].getIdentifier() + " " + accommodationNumber + " times"); if (accommodationNumber > 0) { numberOfInstances[j] += numberOfMatchingInstances * accommodationNumber; if (accommodationNumber > highestAccommodationNumber) { highestAccommodationNumber = accommodationNumber; highestAccommodationIndex = j; } } } // Calculate hardware description HardwareDescription pessimisticHardwareDescription = null; if (minNumberOfCPUCores < Integer.MAX_VALUE && minSizeOfPhysicalMemory < Long.MAX_VALUE && minSizeOfFreeMemory < Long.MAX_VALUE) { pessimisticHardwareDescription = HardwareDescriptionFactory.construct( minNumberOfCPUCores, minSizeOfPhysicalMemory, minSizeOfFreeMemory); } else { if (highestAccommodationIndex < i) { // Since highestAccommodationIndex smaller than my index, the // target instance must be more powerful final InstanceTypeDescription descriptionOfLargerInstanceType = instanceTypeDescriptionList.get(highestAccommodationIndex); if (descriptionOfLargerInstanceType.getHardwareDescription() != null) { final HardwareDescription hardwareDescriptionOfLargerInstanceType = descriptionOfLargerInstanceType.getHardwareDescription(); final int numCores = hardwareDescriptionOfLargerInstanceType.getNumberOfCPUCores() / highestAccommodationNumber; final long physMem = hardwareDescriptionOfLargerInstanceType.getSizeOfPhysicalMemory() / highestAccommodationNumber; final long freeMem = hardwareDescriptionOfLargerInstanceType.getSizeOfFreeMemory() / highestAccommodationNumber; pessimisticHardwareDescription = HardwareDescriptionFactory.construct(numCores, physMem, freeMem); } } } instanceTypeDescriptionList.add( InstanceTypeDescriptionFactory.construct( currentInstanceType, pessimisticHardwareDescription, numberOfInstances[i])); } final Iterator<InstanceTypeDescription> it = instanceTypeDescriptionList.iterator(); while (it.hasNext()) { final InstanceTypeDescription itd = it.next(); this.instanceTypeDescriptionMap.put(itd.getInstanceType(), itd); } }
@Override public void run() { synchronized (ClusterManager.this) { final List<Map.Entry<InstanceConnectionInfo, ClusterInstance>> hostsToRemove = new ArrayList<Map.Entry<InstanceConnectionInfo, ClusterInstance>>(); final Map<JobID, List<AllocatedResource>> staleResources = new HashMap<JobID, List<AllocatedResource>>(); // check all hosts whether they did not send heat-beat messages. for (Map.Entry<InstanceConnectionInfo, ClusterInstance> entry : registeredHosts.entrySet()) { final ClusterInstance host = entry.getValue(); if (!host.isStillAlive(cleanUpInterval)) { // this host has not sent the heat-beat messages // -> we terminate all instances running on this host and notify the jobs final List<AllocatedSlice> removedSlices = host.removeAllAllocatedSlices(); for (AllocatedSlice removedSlice : removedSlices) { final JobID jobID = removedSlice.getJobID(); final List<AllocatedSlice> slicesOfJob = slicesOfJobs.get(jobID); if (slicesOfJob == null) { LOG.error("Cannot find allocated slices for job with ID + " + jobID); continue; } slicesOfJob.remove(removedSlice); // Clean up if (slicesOfJob.isEmpty()) { slicesOfJobs.remove(jobID); } List<AllocatedResource> staleResourcesOfJob = staleResources.get(removedSlice.getJobID()); if (staleResourcesOfJob == null) { staleResourcesOfJob = new ArrayList<AllocatedResource>(); staleResources.put(removedSlice.getJobID(), staleResourcesOfJob); } staleResourcesOfJob.add( new AllocatedResource( removedSlice.getHostingInstance(), removedSlice.getType(), removedSlice.getAllocationID())); } hostsToRemove.add(entry); } } registeredHosts.entrySet().removeAll(hostsToRemove); updateInstaceTypeDescriptionMap(); final Iterator<Map.Entry<JobID, List<AllocatedResource>>> it = staleResources.entrySet().iterator(); while (it.hasNext()) { final Map.Entry<JobID, List<AllocatedResource>> entry = it.next(); if (instanceListener != null) { instanceListener.allocatedResourcesDied(entry.getKey(), entry.getValue()); } } } }