@Override public void scheduleStop(VMInstanceVO vm, long hostId, WorkType type) { assert (type == WorkType.CheckStop || type == WorkType.ForceStop || type == WorkType.Stop); if (_haDao.hasBeenScheduled(vm.getId(), type)) { s_logger.info("There's already a job scheduled to stop " + vm); return; } HaWorkVO work = new HaWorkVO( vm.getId(), vm.getType(), type, Step.Scheduled, hostId, vm.getState(), 0, vm.getUpdated()); _haDao.persist(work); if (s_logger.isDebugEnabled()) { s_logger.debug("Scheduled " + work); } wakeupWorkers(); }
@Override public boolean stop() { _stopped = true; wakeupWorkers(); _executor.shutdown(); return true; }
@Override public boolean scheduleMigration(final VMInstanceVO vm) { final HaWorkVO work = new HaWorkVO( vm.getId(), vm.getType(), WorkType.Migration, Step.Scheduled, vm.getHostId(), vm.getState(), 0, vm.getUpdated()); _haDao.persist(work); wakeupWorkers(); return true; }
@Override public void scheduleDestroy(VMInstanceVO vm, long hostId) { final HaWorkVO work = new HaWorkVO( vm.getId(), vm.getType(), WorkType.Destroy, Step.Scheduled, hostId, vm.getState(), 0, vm.getUpdated()); _haDao.persist(work); if (s_logger.isDebugEnabled()) { s_logger.debug("Scheduled " + work.toString()); } wakeupWorkers(); }
@Override public void scheduleStop(final VMInstanceVO vm, long hostId, boolean verifyHost) { if (_haDao.hasBeenScheduled(vm.getId(), verifyHost ? WorkType.CheckStop : WorkType.Stop)) { s_logger.info("There's already a job scheduled to stop " + vm.toString()); return; } final HaWorkVO work = new HaWorkVO( vm.getId(), vm.getType(), verifyHost ? WorkType.CheckStop : WorkType.Stop, Step.Scheduled, hostId, vm.getState(), 0, vm.getUpdated()); _haDao.persist(work); if (s_logger.isDebugEnabled()) { s_logger.debug("Scheduled " + work.toString() + " verifyHost = " + verifyHost); } wakeupWorkers(); }
@Override public void scheduleRestart(VMInstanceVO vm, boolean investigate) { Long hostId = vm.getHostId(); if (hostId == null) { try { s_logger.debug("Found a vm that is scheduled to be restarted but has no host id: " + vm); _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } return; } if (vm.getHypervisorType() == HypervisorType.VMware) { s_logger.info("Skip HA for VMware VM " + vm.getInstanceName()); return; } if (!investigate) { if (s_logger.isDebugEnabled()) { s_logger.debug( "VM does not require investigation so I'm marking it as Stopped: " + vm.toString()); } short alertType = AlertManager.ALERT_TYPE_USERVM; if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER; } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY; } else if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_SSVM; } if (!(_forceHA || vm.isHaEnabled())) { String hostDesc = "id:" + vm.getHostId() + ", availability zone id:" + vm.getDataCenterIdToDeployIn() + ", pod id:" + vm.getPodIdToDeployIn(); _alertMgr.sendAlert( alertType, vm.getDataCenterIdToDeployIn(), vm.getPodIdToDeployIn(), "VM (name: " + vm.getHostName() + ", id: " + vm.getId() + ") stopped unexpectedly on host " + hostDesc, "Virtual Machine " + vm.getHostName() + " (id: " + vm.getId() + ") running on host [" + vm.getHostId() + "] stopped unexpectedly."); if (s_logger.isDebugEnabled()) { s_logger.debug("VM is not HA enabled so we're done."); } } try { _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } } List<HaWorkVO> items = _haDao.findPreviousHA(vm.getId()); int maxRetries = 0; for (HaWorkVO item : items) { if (maxRetries < item.getTimesTried() && !item.canScheduleNew(_timeBetweenFailures)) { maxRetries = item.getTimesTried(); break; } } HaWorkVO work = new HaWorkVO( vm.getId(), vm.getType(), WorkType.HA, investigate ? Step.Investigating : Step.Scheduled, hostId, vm.getState(), maxRetries + 1, vm.getUpdated()); _haDao.persist(work); if (s_logger.isInfoEnabled()) { s_logger.info("Schedule vm for HA: " + vm); } wakeupWorkers(); }
@Override public void scheduleRestart(VMInstanceVO vm, final boolean investigate) { Long hostId = vm.getHostId(); VirtualMachineGuru<VMInstanceVO> mgr = findManager(vm.getType()); vm = mgr.get(vm.getId()); if (!investigate) { if (s_logger.isDebugEnabled()) { s_logger.debug( "VM does not require investigation so I'm marking it as Stopped: " + vm.toString()); } short alertType = AlertManager.ALERT_TYPE_USERVM; if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER; } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY; } if (!(_forceHA || vm.isHaEnabled())) { _alertMgr.sendAlert( alertType, vm.getDataCenterId(), vm.getPodId(), "VM (name: " + vm.getName() + ", id: " + vm.getId() + ") stopped unexpectedly on host " + vm.getHostId(), "Virtual Machine " + vm.getName() + " (id: " + vm.getId() + ") running on host [" + vm.getHostId() + "] stopped unexpectedly."); if (s_logger.isDebugEnabled()) { s_logger.debug("VM is not HA enabled so we're done."); } } mgr.completeStopCommand(vm); } final List<HaWorkVO> items = _haDao.findPreviousHA(vm.getId()); int maxRetries = 0; for (final HaWorkVO item : items) { if (maxRetries < item.getTimesTried() && !item.canScheduleNew(_timeBetweenFailures)) { maxRetries = item.getTimesTried(); break; } } final HaWorkVO work = new HaWorkVO( vm.getId(), vm.getType(), WorkType.HA, investigate ? Step.Investigating : Step.Scheduled, hostId, vm.getState(), maxRetries + 1, vm.getUpdated()); _haDao.persist(work); if (s_logger.isInfoEnabled()) { s_logger.info("Schedule vm for HA: " + vm.toString()); } wakeupWorkers(); }