public Long migrate(final HaWorkVO work) { long vmId = work.getInstanceId(); long srcHostId = work.getHostId(); try { work.setStep(Step.Migrating); _haDao.update(work.getId(), work); VMInstanceVO vm = _instanceDao.findById(vmId); // First try starting the vm with its original planner, if it doesn't succeed send HAPlanner // as its an emergency. boolean result = false; try { _itMgr.migrateAway(vm.getUuid(), srcHostId, null); } catch (InsufficientServerCapacityException e) { s_logger.warn("Failed to deploy vm " + vmId + " with original planner, sending HAPlanner"); _itMgr.migrateAway(vm.getUuid(), srcHostId, _haPlanners.get(0)); } return null; } catch (InsufficientServerCapacityException e) { s_logger.warn("Insufficient capacity for migrating a VM."); _resourceMgr.maintenanceFailed(srcHostId); return (System.currentTimeMillis() >> 10) + _migrateRetryInterval; } }
protected Long destroyVM(HaWorkVO work) { final VMInstanceVO vm = _itMgr.findByIdAndType(work.getType(), work.getInstanceId()); s_logger.info("Destroying " + vm.toString()); try { if (vm.getState() != State.Destroyed) { s_logger.info("VM is no longer in Destroyed state " + vm.toString()); return null; } if (vm.getHostId() != null) { if (_itMgr.destroy(vm, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount())) { s_logger.info("Successfully destroy " + vm); return null; } s_logger.debug("Stop for " + vm + " was unsuccessful."); } else { if (s_logger.isDebugEnabled()) { s_logger.debug(vm + " has already been stopped"); } return null; } } catch (final AgentUnavailableException e) { s_logger.debug("Agnet is not available" + e.getMessage()); } catch (OperationTimedoutException e) { s_logger.debug("operation timed out: " + e.getMessage()); } catch (ConcurrentOperationException e) { s_logger.debug("concurrent operation: " + e.getMessage()); } work.setTimesTried(work.getTimesTried() + 1); return (System.currentTimeMillis() >> 10) + _stopRetryInterval; }
@Override public void deployVirtualMachine( String reservationId, String caller, Map<VirtualMachineProfile.Param, Object> params) throws InsufficientCapacityException, ResourceUnavailableException { // grab the VM Id and destination using the reservationId. VMReservationVO vmReservation = _reservationDao.findByReservationId(reservationId); long vmId = vmReservation.getVmId(); VMInstanceVO vm = _vmDao.findById(vmId); // Pass it down Long poolId = null; Map<Long, Long> storage = vmReservation.getVolumeReservation(); if (storage != null) { List<Long> volIdList = new ArrayList<Long>(storage.keySet()); if (volIdList != null && !volIdList.isEmpty()) { poolId = storage.get(volIdList.get(0)); } } DataCenterDeployment reservedPlan = new DataCenterDeployment( vm.getDataCenterId(), vmReservation.getPodId(), vmReservation.getClusterId(), vmReservation.getHostId(), null, null); try { VMInstanceVO vmDeployed = _itMgr.start( vm, params, _userDao.findById(new Long(caller)), _accountDao.findById(vm.getAccountId()), reservedPlan); } catch (Exception ex) { // Retry the deployment without using the reservation plan DataCenterDeployment plan = new DataCenterDeployment(vm.getDataCenterId(), null, null, null, null, null); _itMgr.start( vm, params, _userDao.findById(new Long(caller)), _accountDao.findById(vm.getAccountId()), plan); } }
@Override public NicTO getNicTO( final VirtualRouter router, final Long networkId, final String broadcastUri) { final NicProfile nicProfile = _networkModel.getNicProfile(router, networkId, broadcastUri); return _itMgr.toNicTO(nicProfile, router.getHypervisorType()); }
protected DomainRouterVO start( DomainRouterVO router, final User user, final Account caller, final Map<Param, Object> params, final DeploymentPlan planToDeploy) throws StorageUnavailableException, InsufficientCapacityException, ConcurrentOperationException, ResourceUnavailableException { s_logger.debug("Starting router " + router); try { _itMgr.advanceStart(router.getUuid(), params, planToDeploy, null); } catch (final OperationTimedoutException e) { throw new ResourceUnavailableException( "Starting router " + router + " failed! " + e.toString(), DataCenter.class, router.getDataCenterId()); } if (router.isStopPending()) { s_logger.info( "Clear the stop pending flag of router " + router.getHostName() + " after start router successfully!"); router.setStopPending(false); router = _routerDao.persist(router); } // We don't want the failure of VPN Connection affect the status of // router, so we try to make connection // only after router start successfully final Long vpcId = router.getVpcId(); if (vpcId != null) { _s2sVpnMgr.reconnectDisconnectedVpnByVpc(vpcId); } return _routerDao.findById(router.getId()); }
@Override public boolean stopvirtualmachine(VMEntityVO vmEntityVO, String caller) throws ResourceUnavailableException { VMInstanceVO vm = _vmDao.findByUuid(vmEntityVO.getUuid()); return _itMgr.stop( vm, _userDao.findById(new Long(caller)), _accountDao.findById(vm.getAccountId())); }
@Override public boolean destroyVirtualMachine(VMEntityVO vmEntityVO, String caller) throws AgentUnavailableException, OperationTimedoutException, ConcurrentOperationException { VMInstanceVO vm = _vmDao.findByUuid(vmEntityVO.getUuid()); return _itMgr.destroy( vm, _userDao.findById(new Long(caller)), _accountDao.findById(vm.getAccountId())); }
@Override public boolean destroySecStorageVm(long vmId) { SecondaryStorageVmVO ssvm = _secStorageVmDao.findById(vmId); try { return _itMgr.expunge(ssvm, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); } catch (ResourceUnavailableException e) { s_logger.warn("Unable to expunge " + ssvm, e); return false; } }
private DomainRouterVO start( DomainRouterVO elbVm, User user, Account caller, Map<Param, Object> params) throws StorageUnavailableException, InsufficientCapacityException, ConcurrentOperationException, ResourceUnavailableException { s_logger.debug("Starting ELB VM " + elbVm); if (_itMgr.start(elbVm, params, user, caller) != null) { return _routerDao.findById(elbVm.getId()); } else { return null; } }
@Override public boolean stopSecStorageVm(long secStorageVmId) { SecondaryStorageVmVO secStorageVm = _secStorageVmDao.findById(secStorageVmId); if (secStorageVm == null) { String msg = "Stopping secondary storage vm failed: secondary storage vm " + secStorageVmId + " no longer exists"; if (s_logger.isDebugEnabled()) { s_logger.debug(msg); } return false; } try { if (secStorageVm.getHostId() != null) { GlobalLock secStorageVmLock = GlobalLock.getInternLock(getSecStorageVmLockName(secStorageVm.getId())); try { if (secStorageVmLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_SYNC)) { try { boolean result = _itMgr.stop( secStorageVm, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); if (result) {} return result; } finally { secStorageVmLock.unlock(); } } else { String msg = "Unable to acquire secondary storage vm lock : " + secStorageVm.toString(); s_logger.debug(msg); return false; } } finally { secStorageVmLock.releaseRef(); } } // vm was already stopped, return true return true; } catch (ResourceUnavailableException e) { if (s_logger.isDebugEnabled()) { s_logger.debug( "Stopping secondary storage vm " + secStorageVm.getHostName() + " faled : exception " + e.toString()); } return false; } }
private DomainRouterVO stop(DomainRouterVO elbVm, boolean forced, User user, Account caller) throws ConcurrentOperationException, ResourceUnavailableException { s_logger.debug("Stopping ELB vm " + elbVm); try { if (_itMgr.advanceStop(elbVm, forced, user, caller)) { return _routerDao.findById(elbVm.getId()); } else { return null; } } catch (OperationTimedoutException e) { throw new CloudRuntimeException("Unable to stop " + elbVm, e); } }
@Override public boolean configure(String name, Map<String, Object> params) throws ConfigurationException { if (s_logger.isInfoEnabled()) { s_logger.info("Start configuring AgentBasedConsoleProxyManager"); } _name = name; ComponentLocator locator = ComponentLocator.getCurrentLocator(); ConfigurationDao configDao = locator.getDao(ConfigurationDao.class); if (configDao == null) { throw new ConfigurationException("Unable to get the configuration dao."); } Map<String, String> configs = configDao.getConfiguration("management-server", params); String value = configs.get("consoleproxy.url.port"); if (value != null) { _consoleProxyUrlPort = NumbersUtil.parseInt(value, ConsoleProxyManager.DEFAULT_PROXY_URL_PORT); } value = configs.get("consoleproxy.port"); if (value != null) { _consoleProxyPort = NumbersUtil.parseInt(value, ConsoleProxyManager.DEFAULT_PROXY_VNC_PORT); } value = configs.get("consoleproxy.sslEnabled"); if (value != null && value.equalsIgnoreCase("true")) { _sslEnabled = true; } _instance = configs.get("instance.name"); _consoleProxyUrlDomain = configs.get("consoleproxy.url.domain"); _listener = new ConsoleProxyListener(this); _agentMgr.registerForHostEvents(_listener, true, true, false); _itMgr.registerGuru(VirtualMachine.Type.ConsoleProxy, this); if (s_logger.isInfoEnabled()) { s_logger.info( "AgentBasedConsoleProxyManager has been configured. SSL enabled: " + _sslEnabled); } return true; }
@Override public VirtualRouter destroyRouter( final long routerId, final Account caller, final Long callerUserId) throws ResourceUnavailableException, ConcurrentOperationException { if (s_logger.isDebugEnabled()) { s_logger.debug("Attempting to destroy router " + routerId); } final DomainRouterVO router = _routerDao.findById(routerId); if (router == null) { return null; } _accountMgr.checkAccess(caller, null, true, router); _itMgr.expunge(router.getUuid()); _routerDao.remove(router.getId()); return router; }
@Override public SecondaryStorageVmVO startSecStorageVm(long secStorageVmId) { try { SecondaryStorageVmVO secStorageVm = _secStorageVmDao.findById(secStorageVmId); Account systemAcct = _accountMgr.getSystemAccount(); User systemUser = _accountMgr.getSystemUser(); return _itMgr.start(secStorageVm, null, systemUser, systemAcct); } catch (StorageUnavailableException e) { s_logger.warn("Exception while trying to start secondary storage vm", e); return null; } catch (InsufficientCapacityException e) { s_logger.warn("Exception while trying to start secondary storage vm", e); return null; } catch (ResourceUnavailableException e) { s_logger.warn("Exception while trying to start secondary storage vm", e); return null; } catch (Exception e) { s_logger.warn("Exception while trying to start secondary storage vm", e); return null; } }
@Override public void reallocateRouterNetworks( final RouterDeploymentDefinition routerDeploymentDefinition, final VirtualRouter router, final VMTemplateVO template, final HypervisorType hType) throws ConcurrentOperationException, InsufficientCapacityException { final ServiceOfferingVO routerOffering = _serviceOfferingDao.findById(routerDeploymentDefinition.getServiceOfferingId()); final LinkedHashMap<Network, List<? extends NicProfile>> networks = configureDefaultNics(routerDeploymentDefinition); _itMgr.allocate( router.getInstanceName(), template, routerOffering, networks, routerDeploymentDefinition.getPlan(), hType); }
void garbageCollectUnusedElbVms() { List<DomainRouterVO> unusedElbVms = _elbVmMapDao.listUnusedElbVms(); if (unusedElbVms != null && unusedElbVms.size() > 0) s_logger.info("Found " + unusedElbVms.size() + " unused ELB vms"); Set<Long> currentGcCandidates = new HashSet<Long>(); for (DomainRouterVO elbVm : unusedElbVms) { currentGcCandidates.add(elbVm.getId()); } _gcCandidateElbVmIds.retainAll(currentGcCandidates); currentGcCandidates.removeAll(_gcCandidateElbVmIds); User user = _accountService.getSystemUser(); for (Long elbVmId : _gcCandidateElbVmIds) { DomainRouterVO elbVm = _routerDao.findById(elbVmId); boolean gceed = false; try { s_logger.info("Attempting to stop ELB VM: " + elbVm); stop(elbVm, true, user, _systemAcct); gceed = true; } catch (ConcurrentOperationException e) { s_logger.warn("Unable to stop unused ELB vm " + elbVm + " due to ", e); } catch (ResourceUnavailableException e) { s_logger.warn("Unable to stop unused ELB vm " + elbVm + " due to ", e); continue; } if (gceed) { try { s_logger.info("Attempting to destroy ELB VM: " + elbVm); _itMgr.expunge(elbVm, user, _systemAcct); } catch (ResourceUnavailableException e) { s_logger.warn("Unable to destroy unused ELB vm " + elbVm + " due to ", e); gceed = false; } } if (!gceed) { currentGcCandidates.add(elbVm.getId()); } } _gcCandidateElbVmIds = currentGcCandidates; }
public Long migrate(final HaWorkVO work) { long vmId = work.getInstanceId(); long srcHostId = work.getHostId(); try { work.setStep(Step.Migrating); _haDao.update(work.getId(), work); if (!_itMgr.migrateAway(work.getType(), vmId, srcHostId)) { s_logger.warn("Unable to migrate vm from " + srcHostId); _resourceMgr.maintenanceFailed(srcHostId); } return null; } catch (InsufficientServerCapacityException e) { s_logger.warn("Insufficient capacity for migrating a VM."); _resourceMgr.maintenanceFailed(srcHostId); return (System.currentTimeMillis() >> 10) + _migrateRetryInterval; } catch (VirtualMachineMigrationException e) { s_logger.warn("Looks like VM is still starting, we need to retry migrating the VM later."); _resourceMgr.maintenanceFailed(srcHostId); return (System.currentTimeMillis() >> 10) + _migrateRetryInterval; } }
@Override public void scheduleRestart(VMInstanceVO vm, boolean investigate) { Long hostId = vm.getHostId(); if (hostId == null) { try { s_logger.debug("Found a vm that is scheduled to be restarted but has no host id: " + vm); _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } return; } if (vm.getHypervisorType() == HypervisorType.VMware) { s_logger.info("Skip HA for VMware VM " + vm.getInstanceName()); return; } if (!investigate) { if (s_logger.isDebugEnabled()) { s_logger.debug( "VM does not require investigation so I'm marking it as Stopped: " + vm.toString()); } short alertType = AlertManager.ALERT_TYPE_USERVM; if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER; } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY; } else if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_SSVM; } if (!(_forceHA || vm.isHaEnabled())) { String hostDesc = "id:" + vm.getHostId() + ", availability zone id:" + vm.getDataCenterIdToDeployIn() + ", pod id:" + vm.getPodIdToDeployIn(); _alertMgr.sendAlert( alertType, vm.getDataCenterIdToDeployIn(), vm.getPodIdToDeployIn(), "VM (name: " + vm.getHostName() + ", id: " + vm.getId() + ") stopped unexpectedly on host " + hostDesc, "Virtual Machine " + vm.getHostName() + " (id: " + vm.getId() + ") running on host [" + vm.getHostId() + "] stopped unexpectedly."); if (s_logger.isDebugEnabled()) { s_logger.debug("VM is not HA enabled so we're done."); } } try { _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } } List<HaWorkVO> items = _haDao.findPreviousHA(vm.getId()); int maxRetries = 0; for (HaWorkVO item : items) { if (maxRetries < item.getTimesTried() && !item.canScheduleNew(_timeBetweenFailures)) { maxRetries = item.getTimesTried(); break; } } HaWorkVO work = new HaWorkVO( vm.getId(), vm.getType(), WorkType.HA, investigate ? Step.Investigating : Step.Scheduled, hostId, vm.getState(), maxRetries + 1, vm.getUpdated()); _haDao.persist(work); if (s_logger.isInfoEnabled()) { s_logger.info("Schedule vm for HA: " + vm); } wakeupWorkers(); }
@Override public boolean configure(String name, Map<String, Object> params) throws ConfigurationException { if (s_logger.isInfoEnabled()) { s_logger.info("Start configuring secondary storage vm manager : " + name); } _name = name; ComponentLocator locator = ComponentLocator.getCurrentLocator(); ConfigurationDao configDao = locator.getDao(ConfigurationDao.class); if (configDao == null) { throw new ConfigurationException("Unable to get the configuration dao."); } Map<String, String> configs = configDao.getConfiguration("management-server", params); _secStorageVmRamSize = NumbersUtil.parseInt(configs.get("secstorage.vm.ram.size"), DEFAULT_SS_VM_RAMSIZE); _secStorageVmCpuMHz = NumbersUtil.parseInt(configs.get("secstorage.vm.cpu.mhz"), DEFAULT_SS_VM_CPUMHZ); String useServiceVM = configDao.getValue("secondary.storage.vm"); boolean _useServiceVM = false; if ("true".equalsIgnoreCase(useServiceVM)) { _useServiceVM = true; } String sslcopy = configDao.getValue("secstorage.encrypt.copy"); if ("true".equalsIgnoreCase(sslcopy)) { _useSSlCopy = true; } _allowedInternalSites = configDao.getValue("secstorage.allowed.internal.sites"); String value = configs.get("secstorage.capacityscan.interval"); _capacityScanInterval = NumbersUtil.parseLong(value, DEFAULT_CAPACITY_SCAN_INTERVAL); _instance = configs.get("instance.name"); if (_instance == null) { _instance = "DEFAULT"; } Map<String, String> agentMgrConfigs = configDao.getConfiguration("AgentManager", params); _mgmt_host = agentMgrConfigs.get("host"); if (_mgmt_host == null) { s_logger.warn( "Critical warning! Please configure your management server host address right after you have started your management server and then restart it, otherwise you won't have access to secondary storage"); } value = agentMgrConfigs.get("port"); _mgmt_port = NumbersUtil.parseInt(value, 8250); _listener = new SecondaryStorageListener(this, _agentMgr); _agentMgr.registerForHostEvents(_listener, true, false, true); _itMgr.registerGuru(VirtualMachine.Type.SecondaryStorageVm, this); _useLocalStorage = Boolean.parseBoolean(configs.get(Config.SystemVMUseLocalStorage.key())); _serviceOffering = new ServiceOfferingVO( "System Offering For Secondary Storage VM", 1, _secStorageVmRamSize, _secStorageVmCpuMHz, null, null, false, null, _useLocalStorage, true, null, true, VirtualMachine.Type.SecondaryStorageVm, true); _serviceOffering.setUniqueName("Cloud.com-SecondaryStorage"); _serviceOffering = _offeringDao.persistSystemServiceOffering(_serviceOffering); // this can sometimes happen, if DB is manually or programmatically manipulated if (_serviceOffering == null) { String msg = "Data integrity problem : System Offering For Secondary Storage VM has been removed?"; s_logger.error(msg); throw new ConfigurationException(msg); } if (_useServiceVM) { _loadScanner = new SystemVmLoadScanner<Long>(this); _loadScanner.initScan(STARTUP_DELAY, _capacityScanInterval); } if (s_logger.isInfoEnabled()) { s_logger.info("Secondary storage vm Manager is configured."); } return true; }
@Override public boolean configure(String name, Map<String, Object> params) throws ConfigurationException { final Map<String, String> configs = _configDao.getConfiguration("AgentManager", params); _systemAcct = _accountService.getSystemAccount(); _instance = configs.get("instance.name"); if (_instance == null) { _instance = "VM"; } _mgmtCidr = _configDao.getValue(Config.ManagementNetwork.key()); _mgmtHost = _configDao.getValue(Config.ManagementHostIPAdr.key()); boolean useLocalStorage = Boolean.parseBoolean(configs.get(Config.SystemVMUseLocalStorage.key())); _elasticLbVmRamSize = NumbersUtil.parseInt( configs.get(Config.ElasticLoadBalancerVmMemory.key()), DEFAULT_ELB_VM_RAMSIZE); _elasticLbvmCpuMHz = NumbersUtil.parseInt( configs.get(Config.ElasticLoadBalancerVmCpuMhz.key()), DEFAULT_ELB_VM_CPU_MHZ); _elasticLbvmNumCpu = NumbersUtil.parseInt(configs.get(Config.ElasticLoadBalancerVmNumVcpu.key()), 1); _elasticLbVmOffering = new ServiceOfferingVO( "System Offering For Elastic LB VM", _elasticLbvmNumCpu, _elasticLbVmRamSize, _elasticLbvmCpuMHz, 0, 0, true, null, useLocalStorage, true, null, true, VirtualMachine.Type.ElasticLoadBalancerVm, true); _elasticLbVmOffering.setUniqueName(ServiceOffering.elbVmDefaultOffUniqueName); _elasticLbVmOffering = _serviceOfferingDao.persistSystemServiceOffering(_elasticLbVmOffering); String enabled = _configDao.getValue(Config.ElasticLoadBalancerEnabled.key()); _enabled = (enabled == null) ? false : Boolean.parseBoolean(enabled); s_logger.info("Elastic Load balancer enabled: " + _enabled); if (_enabled) { String traffType = _configDao.getValue(Config.ElasticLoadBalancerNetwork.key()); if ("guest".equalsIgnoreCase(traffType)) { _frontendTrafficType = TrafficType.Guest; } else if ("public".equalsIgnoreCase(traffType)) { _frontendTrafficType = TrafficType.Public; } else throw new ConfigurationException( "ELB: Traffic type for front end of load balancer has to be guest or public; found : " + traffType); s_logger.info("ELB: Elastic Load Balancer: will balance on " + traffType); int gcIntervalMinutes = NumbersUtil.parseInt(configs.get(Config.ElasticLoadBalancerVmGcInterval.key()), 5); if (gcIntervalMinutes < 5) gcIntervalMinutes = 5; s_logger.info( "ELB: Elastic Load Balancer: scheduling GC to run every " + gcIntervalMinutes + " minutes"); _gcThreadPool = Executors.newScheduledThreadPool(1, new NamedThreadFactory("ELBVM-GC")); _gcThreadPool.scheduleAtFixedRate( new CleanupThread(), gcIntervalMinutes, gcIntervalMinutes, TimeUnit.MINUTES); _itMgr.registerGuru(VirtualMachine.Type.ElasticLoadBalancerVm, this); } return true; }
public DomainRouterVO deployELBVm( Network guestNetwork, DeployDestination dest, Account owner, Map<Param, Object> params) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException { long dcId = dest.getDataCenter().getId(); // lock guest network Long guestNetworkId = guestNetwork.getId(); guestNetwork = _networkDao.acquireInLockTable(guestNetworkId); if (guestNetwork == null) { throw new ConcurrentOperationException("Unable to acquire network lock: " + guestNetworkId); } try { if (_networkModel.isNetworkSystem(guestNetwork) || guestNetwork.getGuestType() == Network.GuestType.Shared) { owner = _accountService.getSystemAccount(); } if (s_logger.isDebugEnabled()) { s_logger.debug( "Starting a ELB vm for network configurations: " + guestNetwork + " in " + dest); } assert guestNetwork.getState() == Network.State.Implemented || guestNetwork.getState() == Network.State.Setup || guestNetwork.getState() == Network.State.Implementing : "Network is not yet fully implemented: " + guestNetwork; DataCenterDeployment plan = null; DomainRouterVO elbVm = null; plan = new DataCenterDeployment(dcId, dest.getPod().getId(), null, null, null, null); if (elbVm == null) { long id = _routerDao.getNextInSequence(Long.class, "id"); if (s_logger.isDebugEnabled()) { s_logger.debug("Creating the ELB vm " + id); } List<? extends NetworkOffering> offerings = _networkModel.getSystemAccountNetworkOfferings(NetworkOffering.SystemControlNetwork); NetworkOffering controlOffering = offerings.get(0); NetworkVO controlConfig = _networkMgr.setupNetwork(_systemAcct, controlOffering, plan, null, null, false).get(0); List<Pair<NetworkVO, NicProfile>> networks = new ArrayList<Pair<NetworkVO, NicProfile>>(2); NicProfile guestNic = new NicProfile(); guestNic.setDefaultNic(true); networks.add(new Pair<NetworkVO, NicProfile>(controlConfig, null)); networks.add(new Pair<NetworkVO, NicProfile>((NetworkVO) guestNetwork, guestNic)); VMTemplateVO template = _templateDao.findSystemVMTemplate(dcId); String typeString = "ElasticLoadBalancerVm"; Long physicalNetworkId = _networkModel.getPhysicalNetworkId(guestNetwork); PhysicalNetworkServiceProvider provider = _physicalProviderDao.findByServiceProvider(physicalNetworkId, typeString); if (provider == null) { throw new CloudRuntimeException( "Cannot find service provider " + typeString + " in physical network " + physicalNetworkId); } VirtualRouterProvider vrProvider = _vrProviderDao.findByNspIdAndType( provider.getId(), VirtualRouterProviderType.ElasticLoadBalancerVm); if (vrProvider == null) { throw new CloudRuntimeException( "Cannot find virtual router provider " + typeString + " as service provider " + provider.getId()); } elbVm = new DomainRouterVO( id, _elasticLbVmOffering.getId(), vrProvider.getId(), VirtualMachineName.getSystemVmName(id, _instance, _elbVmNamePrefix), template.getId(), template.getHypervisorType(), template.getGuestOSId(), owner.getDomainId(), owner.getId(), false, 0, false, RedundantState.UNKNOWN, _elasticLbVmOffering.getOfferHA(), false, VirtualMachine.Type.ElasticLoadBalancerVm, null); elbVm.setRole(Role.LB); elbVm = _itMgr.allocate(elbVm, template, _elasticLbVmOffering, networks, plan, null, owner); // TODO: create usage stats } State state = elbVm.getState(); if (state != State.Running) { elbVm = this.start( elbVm, _accountService.getSystemUser(), _accountService.getSystemAccount(), params); } return elbVm; } finally { _networkDao.releaseFromLockTable(guestNetworkId); } }
protected Long stopVM(final HaWorkVO work) throws ConcurrentOperationException { VMInstanceVO vm = _itMgr.findByIdAndType(work.getType(), work.getInstanceId()); if (vm == null) { s_logger.info("No longer can find VM " + work.getInstanceId() + ". Throwing away " + work); work.setStep(Step.Done); return null; } s_logger.info("Stopping " + vm); try { if (work.getWorkType() == WorkType.Stop) { if (_itMgr.advanceStop( vm, false, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount())) { s_logger.info("Successfully stopped " + vm); return null; } } else if (work.getWorkType() == WorkType.CheckStop) { if ((vm.getState() != work.getPreviousState()) || vm.getUpdated() != work.getUpdateTime() || vm.getHostId() == null || vm.getHostId().longValue() != work.getHostId()) { s_logger.info( vm + " is different now. Scheduled Host: " + work.getHostId() + " Current Host: " + (vm.getHostId() != null ? vm.getHostId() : "none") + " State: " + vm.getState()); return null; } if (_itMgr.advanceStop( vm, false, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount())) { s_logger.info("Stop for " + vm + " was successful"); return null; } } else if (work.getWorkType() == WorkType.ForceStop) { if ((vm.getState() != work.getPreviousState()) || vm.getUpdated() != work.getUpdateTime() || vm.getHostId() == null || vm.getHostId().longValue() != work.getHostId()) { s_logger.info( vm + " is different now. Scheduled Host: " + work.getHostId() + " Current Host: " + (vm.getHostId() != null ? vm.getHostId() : "none") + " State: " + vm.getState()); return null; } if (_itMgr.advanceStop( vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount())) { s_logger.info("Stop for " + vm + " was successful"); return null; } } else { assert false : "Who decided there's other steps but didn't modify the guy who does the work?"; } } catch (final ResourceUnavailableException e) { s_logger.debug("Agnet is not available" + e.getMessage()); } catch (OperationTimedoutException e) { s_logger.debug("operation timed out: " + e.getMessage()); } work.setTimesTried(work.getTimesTried() + 1); if (s_logger.isDebugEnabled()) { s_logger.debug("Stop was unsuccessful. Rescheduling"); } return (System.currentTimeMillis() >> 10) + _stopRetryInterval; }
protected Long restart(final HaWorkVO work) { final long vmId = work.getInstanceId(); final VirtualMachineGuru<VMInstanceVO> mgr = findManager(work.getType()); if (mgr == null) { s_logger.warn( "Unable to find a handler for " + work.getType().toString() + ", throwing out " + vmId); return null; } VMInstanceVO vm = mgr.get(vmId); if (vm == null) { s_logger.info("Unable to find vm: " + vmId); return null; } s_logger.info("HA on " + vm.toString()); if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) { s_logger.info( "VM " + vm.toString() + " has been changed. Current State = " + vm.getState() + " Previous State = " + work.getPreviousState() + " last updated = " + vm.getUpdated() + " previous updated = " + work.getUpdateTime()); return null; } final HostVO host = _hostDao.findById(work.getHostId()); DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); HostPodVO podVO = _podDao.findById(host.getPodId()); String hostDesc = "name: " + host.getName() + "(id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); short alertType = AlertManager.ALERT_TYPE_USERVM; if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER; } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY; } Boolean alive = null; if (work.getStep() == Step.Investigating) { if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) { s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId()); if (vm.getState() == State.Starting && vm.getUpdated() == work.getUpdateTime()) { _itMgr.stateTransitTo(vm, Event.AgentReportStopped, null); } return null; } Enumeration<Investigator> en = _investigators.enumeration(); Investigator investigator = null; while (en.hasMoreElements()) { investigator = en.nextElement(); alive = investigator.isVmAlive(vm, host); if (alive != null) { s_logger.debug( investigator.getName() + " found VM " + vm.getName() + "to be alive? " + alive); break; } } if (alive != null && alive) { s_logger.debug("VM " + vm.getName() + " is found to be alive by " + investigator.getName()); if (host.getStatus() == Status.Up) { compareState(vm, new AgentVmInfo(vm.getInstanceName(), mgr, State.Running), false); return null; } else { s_logger.debug("Rescheduling because the host is not up but the vm is alive"); return (System.currentTimeMillis() >> 10) + _investigateRetryInterval; } } boolean fenced = false; if (alive == null || !alive) { fenced = true; s_logger.debug("Fencing off VM that we don't know the state of"); Enumeration<FenceBuilder> enfb = _fenceBuilders.enumeration(); while (enfb.hasMoreElements()) { final FenceBuilder fb = enfb.nextElement(); Boolean result = fb.fenceOff(vm, host); if (result != null && !result) { fenced = false; } } } if (alive == null && !fenced) { s_logger.debug("We were unable to fence off the VM " + vm.toString()); _alertMgr.sendAlert( alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc); return (System.currentTimeMillis() >> 10) + _restartRetryInterval; } mgr.completeStopCommand(vm); work.setStep(Step.Scheduled); _haDao.update(work.getId(), work); } // send an alert for VMs that stop unexpectedly _alertMgr.sendAlert( alertType, vm.getDataCenterId(), vm.getPodId(), "VM (name: " + vm.getName() + ", id: " + vmId + ") stopped unexpectedly on host " + hostDesc, "Virtual Machine " + vm.getName() + " (id: " + vm.getId() + ") running on host [" + hostDesc + "] stopped unexpectedly."); vm = mgr.get(vm.getId()); if (!_forceHA && !vm.isHaEnabled()) { if (s_logger.isDebugEnabled()) { s_logger.debug("VM is not HA enabled so we're done."); } return null; // VM doesn't require HA } if (!_storageMgr.canVmRestartOnAnotherServer(vm.getId())) { if (s_logger.isDebugEnabled()) { s_logger.debug("VM can not restart on another server."); } return null; } if (work.getTimesTried() > _maxRetries) { s_logger.warn("Retried to max times so deleting: " + vmId); return null; } try { VMInstanceVO started = mgr.start(vm.getId(), 0); if (started != null) { s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId()); return null; } if (s_logger.isDebugEnabled()) { s_logger.debug( "Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval); } vm = mgr.get(vm.getId()); work.setUpdateTime(vm.getUpdated()); work.setPreviousState(vm.getState()); return (System.currentTimeMillis() >> 10) + _restartRetryInterval; } catch (final InsufficientCapacityException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); _alertMgr.sendAlert( alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc); return null; } catch (final StorageUnavailableException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); _alertMgr.sendAlert( alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc); return null; } catch (ConcurrentOperationException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); _alertMgr.sendAlert( alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc); return null; } catch (ExecutionException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); _alertMgr.sendAlert( alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc); return null; } }
protected Map<String, Object> createSecStorageVmInstance( long dataCenterId, SecondaryStorageVm.Role role) { HostVO secHost = _hostDao.findSecondaryStorageHost(dataCenterId); if (secHost == null) { String msg = "No secondary storage available in zone " + dataCenterId + ", cannot create secondary storage vm"; s_logger.warn(msg); throw new CloudRuntimeException(msg); } long id = _secStorageVmDao.getNextInSequence(Long.class, "id"); String name = VirtualMachineName.getSystemVmName(id, _instance, "s").intern(); Account systemAcct = _accountMgr.getSystemAccount(); DataCenterDeployment plan = new DataCenterDeployment(dataCenterId); DataCenter dc = _dcDao.findById(plan.getDataCenterId()); List<NetworkOfferingVO> defaultOffering = _networkMgr.getSystemAccountNetworkOfferings(NetworkOfferingVO.SystemPublicNetwork); if (dc.getNetworkType() == NetworkType.Basic || dc.isSecurityGroupEnabled()) { defaultOffering = _networkMgr.getSystemAccountNetworkOfferings(NetworkOfferingVO.SystemGuestNetwork); } List<NetworkOfferingVO> offerings = _networkMgr.getSystemAccountNetworkOfferings( NetworkOfferingVO.SystemControlNetwork, NetworkOfferingVO.SystemManagementNetwork); List<Pair<NetworkVO, NicProfile>> networks = new ArrayList<Pair<NetworkVO, NicProfile>>(offerings.size() + 1); NicProfile defaultNic = new NicProfile(); defaultNic.setDefaultNic(true); defaultNic.setDeviceId(2); try { networks.add( new Pair<NetworkVO, NicProfile>( _networkMgr .setupNetwork(systemAcct, defaultOffering.get(0), plan, null, null, false, false) .get(0), defaultNic)); for (NetworkOfferingVO offering : offerings) { networks.add( new Pair<NetworkVO, NicProfile>( _networkMgr .setupNetwork(systemAcct, offering, plan, null, null, false, false) .get(0), null)); } } catch (ConcurrentOperationException e) { s_logger.info("Unable to setup due to concurrent operation. " + e); return new HashMap<String, Object>(); } VMTemplateVO template = _templateDao.findSystemVMTemplate(dataCenterId); if (template == null) { s_logger.debug("Can't find a template to start"); throw new CloudRuntimeException("Insufficient capacity exception"); } SecondaryStorageVmVO secStorageVm = new SecondaryStorageVmVO( id, _serviceOffering.getId(), name, template.getId(), template.getHypervisorType(), template.getGuestOSId(), dataCenterId, systemAcct.getDomainId(), systemAcct.getId(), role, _serviceOffering.getOfferHA()); try { secStorageVm = _itMgr.allocate( secStorageVm, template, _serviceOffering, networks, plan, null, systemAcct); } catch (InsufficientCapacityException e) { s_logger.warn("InsufficientCapacity", e); throw new CloudRuntimeException("Insufficient capacity exception", e); } Map<String, Object> context = new HashMap<String, Object>(); context.put("secStorageVmId", secStorageVm.getId()); return context; }
/** * compareState does as its name suggests and compares the states between management server and * agent. It returns whether something should be cleaned up */ protected Command compareState(VMInstanceVO vm, final AgentVmInfo info, final boolean fullSync) { State agentState = info.state; final String agentName = info.name; final State serverState = vm.getState(); final String serverName = vm.getName(); Command command = null; if (s_logger.isDebugEnabled()) { s_logger.debug( "VM " + serverName + ": server state = " + serverState.toString() + " and agent state = " + agentState.toString()); } if (agentState == State.Error) { agentState = State.Stopped; short alertType = AlertManager.ALERT_TYPE_USERVM; if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER; } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY; } HostPodVO podVO = _podDao.findById(vm.getPodId()); DataCenterVO dcVO = _dcDao.findById(vm.getDataCenterId()); HostVO hostVO = _hostDao.findById(vm.getHostId()); String hostDesc = "name: " + hostVO.getName() + " (id:" + hostVO.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); _alertMgr.sendAlert( alertType, vm.getDataCenterId(), vm.getPodId(), "VM (name: " + vm.getName() + ", id: " + vm.getId() + ") stopped on host " + hostDesc + " due to storage failure", "Virtual Machine " + vm.getName() + " (id: " + vm.getId() + ") running on host [" + vm.getHostId() + "] stopped due to storage failure."); } if (serverState == State.Migrating) { s_logger.debug("Skipping vm in migrating state: " + vm.toString()); return null; } if (agentState == serverState) { if (s_logger.isDebugEnabled()) { s_logger.debug("Both states are " + agentState.toString() + " for " + serverName); } assert (agentState == State.Stopped || agentState == State.Running) : "If the states we send up is changed, this must be changed."; _itMgr.stateTransitTo( vm, agentState == State.Stopped ? VirtualMachine.Event.AgentReportStopped : VirtualMachine.Event.AgentReportRunning, vm.getHostId()); if (agentState == State.Stopped) { s_logger.debug("State matches but the agent said stopped so let's send a cleanup anyways."); return info.mgr.cleanup(vm, agentName); } return null; } if (agentState == State.Stopped) { // This state means the VM on the agent was detected previously // and now is gone. This is slightly different than if the VM // was never completed but we still send down a Stop Command // to ensure there's cleanup. if (serverState == State.Running) { // Our records showed that it should be running so let's restart it. vm = info.mgr.get(vm.getId()); scheduleRestart(vm, false); command = info.mgr.cleanup(vm, agentName); } else if (serverState == State.Stopping) { if (fullSync) { s_logger.debug("VM is in stopping state on full sync. Updating the status to stopped"); vm = info.mgr.get(vm.getId()); info.mgr.completeStopCommand(vm); command = info.mgr.cleanup(vm, agentName); } else { s_logger.debug("Ignoring VM in stopping mode: " + vm.getName()); } } else if (serverState == State.Starting) { s_logger.debug("Ignoring VM in starting mode: " + vm.getName()); } else { s_logger.debug("Sending cleanup to a stopped vm: " + agentName); _itMgr.stateTransitTo(vm, VirtualMachine.Event.AgentReportStopped, null); command = info.mgr.cleanup(vm, agentName); } } else if (agentState == State.Running) { if (serverState == State.Starting) { if (fullSync) { s_logger.debug("VM state is starting on full sync so updating it to running"); vm = info.mgr.get(vm.getId()); info.mgr.completeStartCommand(vm); } } else if (serverState == State.Stopping) { if (fullSync) { s_logger.debug("VM state is in stopping on fullsync so resend stop."); vm = info.mgr.get(vm.getId()); info.mgr.completeStopCommand(vm); command = info.mgr.cleanup(vm, agentName); } else { s_logger.debug("VM is in stopping state so no action."); } } else if (serverState == State.Destroyed || serverState == State.Stopped || serverState == State.Expunging) { s_logger.debug("VM state is in stopped so stopping it on the agent"); vm = info.mgr.get(vm.getId()); command = info.mgr.cleanup(vm, agentName); } else { _itMgr.stateTransitTo(vm, VirtualMachine.Event.AgentReportRunning, vm.getHostId()); } } /*else if (agentState == State.Unknown) { if (serverState == State.Running) { if (fullSync) { vm = info.handler.get(vm.getId()); } scheduleRestart(vm, false); } else if (serverState == State.Starting) { if (fullSync) { vm = info.handler.get(vm.getId()); } scheduleRestart(vm, false); } else if (serverState == State.Stopping) { if (fullSync) { s_logger.debug("VM state is stopping in full sync. Resending stop"); command = info.handler.cleanup(vm, agentName); } } }*/ return command; }
protected Long restart(HaWorkVO work) { List<HaWorkVO> items = _haDao.listFutureHaWorkForVm(work.getInstanceId(), work.getId()); if (items.size() > 0) { StringBuilder str = new StringBuilder( "Cancelling this work item because newer ones have been scheduled. Work Ids = ["); for (HaWorkVO item : items) { str.append(item.getId()).append(", "); } str.delete(str.length() - 2, str.length()).append("]"); s_logger.info(str.toString()); return null; } items = _haDao.listRunningHaWorkForVm(work.getInstanceId()); if (items.size() > 0) { StringBuilder str = new StringBuilder( "Waiting because there's HA work being executed on an item currently. Work Ids =["); for (HaWorkVO item : items) { str.append(item.getId()).append(", "); } str.delete(str.length() - 2, str.length()).append("]"); s_logger.info(str.toString()); return (System.currentTimeMillis() >> 10) + _investigateRetryInterval; } long vmId = work.getInstanceId(); VMInstanceVO vm = _itMgr.findByIdAndType(work.getType(), work.getInstanceId()); if (vm == null) { s_logger.info("Unable to find vm: " + vmId); return null; } s_logger.info("HA on " + vm); if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) { s_logger.info( "VM " + vm + " has been changed. Current State = " + vm.getState() + " Previous State = " + work.getPreviousState() + " last updated = " + vm.getUpdated() + " previous updated = " + work.getUpdateTime()); return null; } short alertType = AlertManager.ALERT_TYPE_USERVM; if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER; } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY; } else if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_SSVM; } HostVO host = _hostDao.findById(work.getHostId()); boolean isHostRemoved = false; if (host == null) { host = _hostDao.findByIdIncludingRemoved(work.getHostId()); if (host != null) { s_logger.debug( "VM " + vm.toString() + " is now no longer on host " + work.getHostId() + " as the host is removed"); isHostRemoved = true; } } DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); HostPodVO podVO = _podDao.findById(host.getPodId()); String hostDesc = "name: " + host.getName() + "(id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); Boolean alive = null; if (work.getStep() == Step.Investigating) { if (!isHostRemoved) { if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) { s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId()); return null; } Enumeration<Investigator> en = _investigators.enumeration(); Investigator investigator = null; while (en.hasMoreElements()) { investigator = en.nextElement(); alive = investigator.isVmAlive(vm, host); s_logger.info(investigator.getName() + " found " + vm + "to be alive? " + alive); if (alive != null) { break; } } boolean fenced = false; if (alive == null) { s_logger.debug("Fencing off VM that we don't know the state of"); Enumeration<FenceBuilder> enfb = _fenceBuilders.enumeration(); while (enfb.hasMoreElements()) { FenceBuilder fb = enfb.nextElement(); Boolean result = fb.fenceOff(vm, host); s_logger.info("Fencer " + fb.getName() + " returned " + result); if (result != null && result) { fenced = true; break; } } } else if (!alive) { fenced = true; } else { s_logger.debug( "VM " + vm.getHostName() + " is found to be alive by " + investigator.getName()); if (host.getStatus() == Status.Up) { s_logger.info(vm + " is alive and host is up. No need to restart it."); return null; } else { s_logger.debug("Rescheduling because the host is not up but the vm is alive"); return (System.currentTimeMillis() >> 10) + _investigateRetryInterval; } } if (!fenced) { s_logger.debug("We were unable to fence off the VM " + vm); _alertMgr.sendAlert( alertType, vm.getDataCenterIdToDeployIn(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); return (System.currentTimeMillis() >> 10) + _restartRetryInterval; } try { _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } work.setStep(Step.Scheduled); _haDao.update(work.getId(), work); } else { s_logger.debug( "How come that HA step is Investigating and the host is removed? Calling forced Stop on Vm anyways"); try { _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } } } vm = _itMgr.findByIdAndType(vm.getType(), vm.getId()); if (!_forceHA && !vm.isHaEnabled()) { if (s_logger.isDebugEnabled()) { s_logger.debug("VM is not HA enabled so we're done."); } return null; // VM doesn't require HA } if (!_storageMgr.canVmRestartOnAnotherServer(vm.getId())) { if (s_logger.isDebugEnabled()) { s_logger.debug("VM can not restart on another server."); } return null; } if (work.getTimesTried() > _maxRetries) { s_logger.warn("Retried to max times so deleting: " + vmId); return null; } try { VMInstanceVO started = _itMgr.advanceStart( vm, new HashMap<VirtualMachineProfile.Param, Object>(), _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); if (started != null) { s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId()); return null; } if (s_logger.isDebugEnabled()) { s_logger.debug( "Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval); } } catch (final InsufficientCapacityException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); _alertMgr.sendAlert( alertType, vm.getDataCenterIdToDeployIn(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); } catch (final ResourceUnavailableException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); _alertMgr.sendAlert( alertType, vm.getDataCenterIdToDeployIn(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); } catch (ConcurrentOperationException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); _alertMgr.sendAlert( alertType, vm.getDataCenterIdToDeployIn(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); } catch (OperationTimedoutException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); _alertMgr.sendAlert( alertType, vm.getDataCenterIdToDeployIn(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); } vm = _itMgr.findByIdAndType(vm.getType(), vm.getId()); work.setUpdateTime(vm.getUpdated()); work.setPreviousState(vm.getState()); return (System.currentTimeMillis() >> 10) + _restartRetryInterval; }
public Long migrate(final HaWorkVO work) { final long vmId = work.getInstanceId(); final VirtualMachineGuru<VMInstanceVO> mgr = findManager(work.getType()); VMInstanceVO vm = mgr.get(vmId); if (vm == null || vm.getRemoved() != null) { s_logger.debug("Unable to find the vm " + vmId); return null; } s_logger.info("Migrating vm: " + vm.toString()); if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) { s_logger.info("VM is not longer running on the current hostId"); return null; } short alertType = AlertManager.ALERT_TYPE_USERVM_MIGRATE; if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER_MIGRATE; } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY_MIGRATE; } HostVO fromHost = _hostDao.findById(vm.getHostId()); String fromHostName = ((fromHost == null) ? "unknown" : fromHost.getName()); HostVO toHost = null; if (work.getStep() == Step.Scheduled) { if (vm.getState() != State.Running) { s_logger.info( "VM's state is not ready for migration. " + vm.toString() + " State is " + vm.getState().toString()); return (System.currentTimeMillis() >> 10) + _migrateRetryInterval; } DataCenterVO dcVO = _dcDao.findById(fromHost.getDataCenterId()); HostPodVO podVO = _podDao.findById(fromHost.getPodId()); try { toHost = mgr.prepareForMigration(vm); if (toHost == null) { if (s_logger.isDebugEnabled()) { s_logger.debug("Unable to find a host for migrating vm " + vmId); } _alertMgr.sendAlert( alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to migrate vm " + vm.getName() + " from host " + fromHostName + " in zone " + dcVO.getName() + " and pod " + podVO.getName(), "Unable to find a suitable host"); } } catch (final InsufficientCapacityException e) { s_logger.warn("Unable to mgirate due to insufficient capacity " + vm.toString()); _alertMgr.sendAlert( alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to migrate vm " + vm.getName() + " from host " + fromHostName + " in zone " + dcVO.getName() + " and pod " + podVO.getName(), "Insufficient capacity"); } catch (final StorageUnavailableException e) { s_logger.warn("Storage is unavailable: " + vm.toString()); _alertMgr.sendAlert( alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to migrate vm " + vm.getName() + " from host " + fromHostName + " in zone " + dcVO.getName() + " and pod " + podVO.getName(), "Storage is gone."); } if (toHost == null) { _agentMgr.maintenanceFailed(vm.getHostId()); return null; } if (s_logger.isDebugEnabled()) { s_logger.debug("Migrating from " + work.getHostId() + " to " + toHost.getId()); } work.setStep(Step.Migrating); work.setHostId(toHost.getId()); _haDao.update(work.getId(), work); } if (work.getStep() == Step.Migrating) { vm = mgr.get(vmId); // let's see if anything has changed. boolean migrated = false; if (vm == null || vm.getRemoved() != null || vm.getHostId() == null || !_itMgr.stateTransitTo(vm, Event.MigrationRequested, vm.getHostId())) { s_logger.info("Migration cancelled because state has changed: " + vm.toString()); } else { try { boolean isWindows = _guestOSCategoryDao .findById(_guestOSDao.findById(vm.getGuestOSId()).getCategoryId()) .getName() .equalsIgnoreCase("Windows"); MigrateCommand cmd = new MigrateCommand(vm.getInstanceName(), toHost.getPrivateIpAddress(), isWindows); Answer answer = _agentMgr.send(fromHost.getId(), cmd); if (answer != null && answer.getResult()) { migrated = true; _storageMgr.unshare(vm, fromHost); work.setStep(Step.Investigating); _haDao.update(work.getId(), work); } } catch (final AgentUnavailableException e) { s_logger.debug("host became unavailable"); } catch (final OperationTimedoutException e) { s_logger.debug("operation timed out"); if (e.isActive()) { scheduleRestart(vm, true); } } } if (!migrated) { s_logger.info("Migration was unsuccessful. Cleaning up: " + vm.toString()); DataCenterVO dcVO = _dcDao.findById(vm.getDataCenterId()); HostPodVO podVO = _podDao.findById(vm.getPodId()); _alertMgr.sendAlert( alertType, fromHost.getDataCenterId(), fromHost.getPodId(), "Unable to migrate vm " + vm.getName() + " from host " + fromHost.getName() + " in zone " + dcVO.getName() + " and pod " + podVO.getName(), "Migrate Command failed. Please check logs."); _itMgr.stateTransitTo(vm, Event.MigrationFailedOnSource, toHost.getId()); _agentMgr.maintenanceFailed(vm.getHostId()); Command cleanup = mgr.cleanup(vm, null); _agentMgr.easySend(toHost.getId(), cleanup); _storageMgr.unshare(vm, toHost); return null; } } if (toHost == null) { toHost = _hostDao.findById(work.getHostId()); } DataCenterVO dcVO = _dcDao.findById(toHost.getDataCenterId()); HostPodVO podVO = _podDao.findById(toHost.getPodId()); try { if (!mgr.completeMigration(vm, toHost)) { _alertMgr.sendAlert( alertType, toHost.getDataCenterId(), toHost.getPodId(), "Unable to migrate " + vmId + " to host " + toHost.getName() + " in zone " + dcVO.getName() + " and pod " + podVO.getName(), "Migration not completed"); s_logger.warn("Unable to complete migration: " + vm.toString()); } else { s_logger.info("Migration is complete: " + vm.toString()); } return null; } catch (final AgentUnavailableException e) { s_logger.warn("Agent is unavailable for " + vm.toString()); } catch (final OperationTimedoutException e) { s_logger.warn("Operation timed outfor " + vm.toString()); } _itMgr.stateTransitTo(vm, Event.MigrationFailedOnDest, toHost.getId()); return (System.currentTimeMillis() >> 10) + _migrateRetryInterval; }