public Map<Long, List<String>> getVmIdIpListMap() { Map<Long, List<String>> vmIdIpsMap = new HashMap<Long, List<String>>(); if (vmIdIpMap != null && !vmIdIpMap.isEmpty()) { Collection idIpsCollection = vmIdIpMap.values(); Iterator iter = idIpsCollection.iterator(); while (iter.hasNext()) { HashMap<String, String> idIpsMap = (HashMap<String, String>) iter.next(); String vmId = idIpsMap.get("vmid"); String vmIp = idIpsMap.get("vmip"); VirtualMachine lbvm = _entityMgr.findByUuid(VirtualMachine.class, vmId); if (lbvm == null) { throw new InvalidParameterValueException("Unable to find virtual machine ID: " + vmId); } // check wether the given ip is valid ip or not if (vmIp == null || !NetUtils.isValidIp(vmIp)) { throw new InvalidParameterValueException( "Invalid ip address " + vmIp + " passed in vmidipmap for " + "vmid " + vmId); } Long longVmId = lbvm.getId(); List<String> ipsList = null; if (vmIdIpsMap.containsKey(longVmId)) { ipsList = vmIdIpsMap.get(longVmId); } else { ipsList = new ArrayList<String>(); } ipsList.add(vmIp); vmIdIpsMap.put(longVmId, ipsList); } } return vmIdIpsMap; }
@Override public boolean postStateTransitionEvent( State oldState, Event event, State newState, VirtualMachine vm, boolean status, Long oldHostId) { if (!status) { return false; } if (VirtualMachine.State.isVmStarted(oldState, event, newState)) { if (s_logger.isTraceEnabled()) { s_logger.trace("Security Group Mgr: handling start of vm id" + vm.getId()); } handleVmStarted((VMInstanceVO) vm); } else if (VirtualMachine.State.isVmStopped(oldState, event, newState)) { if (s_logger.isTraceEnabled()) { s_logger.trace("Security Group Mgr: handling stop of vm id" + vm.getId()); } handleVmStopped((VMInstanceVO) vm); } else if (VirtualMachine.State.isVmMigrated(oldState, event, newState)) { if (s_logger.isTraceEnabled()) { s_logger.trace("Security Group Mgr: handling migration of vm id" + vm.getId()); } handleVmMigrated((VMInstanceVO) vm); } return true; }
@Override public boolean isVmSecurityGroupEnabled(Long vmId) { VirtualMachine vm = _vmDao.findByIdIncludingRemoved(vmId); List<NicProfile> nics = _networkMgr.getNicProfiles(vm); for (NicProfile nic : nics) { if (nic.isSecurityGroupEnabled() && vm.getHypervisorType() != HypervisorType.VMware) { return true; } } return false; }
@Override public Boolean isVmAlive(VirtualMachine vm, Host host) { if (vm.getType() != VirtualMachine.Type.User) { if (s_logger.isDebugEnabled()) { s_logger.debug("Not a User Vm, unable to determine state of " + vm + " returning null"); } return null; } if (s_logger.isDebugEnabled()) { s_logger.debug("testing if " + vm + " is alive"); } // to verify that the VM is alive, we ask the domR (router) to ping the VM (private IP) UserVmVO userVm = _userVmDao.findById(vm.getId()); List<? extends Nic> nics = _networkMgr.getNicsForTraffic(userVm.getId(), TrafficType.Guest); for (Nic nic : nics) { if (nic.getIp4Address() == null) { continue; } List<VirtualRouter> routers = _vnaMgr.getRoutersForNetwork(nic.getNetworkId()); if (routers == null || routers.isEmpty()) { if (s_logger.isDebugEnabled()) { s_logger.debug( "Unable to find a router in network " + nic.getNetworkId() + " to ping " + vm); } continue; } Boolean result = null; for (VirtualRouter router : routers) { result = testUserVM(vm, nic, router); if (result != null) { break; } } if (result == null) { continue; } return result; } if (s_logger.isDebugEnabled()) { s_logger.debug("Returning null since we're unable to determine state of " + vm); } return null; }
@Override public Boolean fenceOff(VirtualMachine vm, Host host) { if (host.getHypervisorType() != HypervisorType.KVM && host.getHypervisorType() != HypervisorType.LXC) { s_logger.warn("Don't know how to fence non kvm hosts " + host.getHypervisorType()); return null; } List<HostVO> hosts = _resourceMgr.listAllHostsInCluster(host.getClusterId()); FenceCommand fence = new FenceCommand(vm, host); int i = 0; for (HostVO h : hosts) { if (h.getHypervisorType() == HypervisorType.KVM || h.getHypervisorType() == HypervisorType.LXC) { if (h.getStatus() != Status.Up) { continue; } i++; if (h.getId() == host.getId()) { continue; } FenceAnswer answer; try { answer = (FenceAnswer) _agentMgr.send(h.getId(), fence); } catch (AgentUnavailableException e) { s_logger.info("Moving on to the next host because " + h.toString() + " is unavailable"); continue; } catch (OperationTimedoutException e) { s_logger.info("Moving on to the next host because " + h.toString() + " is unavailable"); continue; } if (answer != null && answer.getResult()) { return true; } } } _alertMgr.sendAlert( AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Unable to fence off host: " + host.getId(), "Fencing off host " + host.getId() + " did not succeed after asking " + i + " hosts. " + "Check Agent logs for more information."); s_logger.error("Unable to fence off " + vm.toString() + " on " + host.toString()); return false; }
private Boolean testUserVM(VirtualMachine vm, Nic nic, VirtualRouter router) { String privateIp = nic.getIp4Address(); String routerPrivateIp = router.getPrivateIpAddress(); List<Long> otherHosts = new ArrayList<Long>(); if (vm.getHypervisorType() == HypervisorType.XenServer || vm.getHypervisorType() == HypervisorType.KVM) { otherHosts.add(router.getHostId()); } else { otherHosts = findHostByPod(router.getPodIdToDeployIn(), null); } for (Long hostId : otherHosts) { try { Answer pingTestAnswer = _agentMgr.easySend(hostId, new PingTestCommand(routerPrivateIp, privateIp)); if (pingTestAnswer != null && pingTestAnswer.getResult()) { if (s_logger.isDebugEnabled()) { s_logger.debug( "user vm's " + vm.getHostName() + " ip address " + privateIp + " has been successfully pinged from the Virtual Router " + router.getHostName() + ", returning that vm is alive"); } return Boolean.TRUE; } } catch (Exception e) { if (s_logger.isDebugEnabled()) { s_logger.debug("Couldn't reach due to", e); } continue; } } if (s_logger.isDebugEnabled()) { s_logger.debug(vm + " could not be pinged, returning that it is unknown"); } return null; }
protected Long destroyVM(HaWorkVO work) { final VirtualMachine vm = _itMgr.findById(work.getInstanceId()); s_logger.info("Destroying " + vm.toString()); try { if (vm.getState() != State.Destroyed) { s_logger.info("VM is no longer in Destroyed state " + vm.toString()); return null; } if (vm.getHostId() != null) { _itMgr.destroy(vm.getUuid()); s_logger.info("Successfully destroy " + vm); return null; } else { if (s_logger.isDebugEnabled()) { s_logger.debug(vm + " has already been stopped"); } return null; } } catch (final AgentUnavailableException e) { s_logger.debug("Agnet is not available" + e.getMessage()); } catch (OperationTimedoutException e) { s_logger.debug("operation timed out: " + e.getMessage()); } catch (ConcurrentOperationException e) { s_logger.debug("concurrent operation: " + e.getMessage()); } work.setTimesTried(work.getTimesTried() + 1); return (System.currentTimeMillis() >> 10) + _stopRetryInterval; }
@Override public boolean postStateTransitionEvent( State oldState, Event event, State newState, VirtualMachine vm, boolean status, Object opaque) { if (!status) { return false; } @SuppressWarnings("unchecked") Pair<Long, Long> hosts = (Pair<Long, Long>) opaque; Long oldHostId = hosts.first(); s_logger.debug( "VM state transitted from :" + oldState + " to " + newState + " with event: " + event + "vm's original host id: " + vm.getLastHostId() + " new host id: " + vm.getHostId() + " host id before state transition: " + oldHostId); if (oldState == State.Starting) { if (newState != State.Running) { releaseVmCapacity(vm, false, false, oldHostId); } } else if (oldState == State.Running) { if (event == Event.AgentReportStopped) { releaseVmCapacity(vm, false, true, oldHostId); } else if (event == Event.AgentReportMigrated) { releaseVmCapacity(vm, false, false, oldHostId); } } else if (oldState == State.Migrating) { if (event == Event.AgentReportStopped) { /* Release capacity from original host */ releaseVmCapacity(vm, false, false, vm.getLastHostId()); releaseVmCapacity(vm, false, false, oldHostId); } else if (event == Event.OperationFailed) { /* Release from dest host */ releaseVmCapacity(vm, false, false, oldHostId); } else if (event == Event.OperationSucceeded) { releaseVmCapacity(vm, false, false, vm.getLastHostId()); } } else if (oldState == State.Stopping) { if (event == Event.OperationSucceeded) { releaseVmCapacity(vm, false, true, oldHostId); } else if (event == Event.AgentReportStopped) { releaseVmCapacity(vm, false, false, oldHostId); } else if (event == Event.AgentReportMigrated) { releaseVmCapacity(vm, false, false, oldHostId); } } else if (oldState == State.Stopped) { if (event == Event.DestroyRequested || event == Event.ExpungeOperation) { releaseVmCapacity(vm, true, false, vm.getLastHostId()); } else if (event == Event.AgentReportMigrated) { releaseVmCapacity(vm, false, false, oldHostId); } } if ((newState == State.Starting || newState == State.Migrating || event == Event.AgentReportMigrated) && vm.getHostId() != null) { boolean fromLastHost = false; if (vm.getLastHostId() == vm.getHostId()) { s_logger.debug("VM starting again on the last host it was stopped on"); fromLastHost = true; } allocateVmCapacity(vm, fromLastHost); } return true; }
@DB @Override public void allocateVmCapacity(VirtualMachine vm, boolean fromLastHost) { long hostId = vm.getHostId(); HostVO host = _hostDao.findById(hostId); long clusterId = host.getClusterId(); float cpuOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterId, "cpuOvercommitRatio").getValue()); float memoryOvercommitRatio = Float.parseFloat( _clusterDetailsDao.findDetail(clusterId, "memoryOvercommitRatio").getValue()); ServiceOfferingVO svo = _offeringsDao.findById(vm.getServiceOfferingId()); CapacityVO capacityCpu = _capacityDao.findByHostIdType(hostId, CapacityVO.CAPACITY_TYPE_CPU); CapacityVO capacityMem = _capacityDao.findByHostIdType(hostId, CapacityVO.CAPACITY_TYPE_MEMORY); if (capacityCpu == null || capacityMem == null || svo == null) { return; } int cpu = (int) (svo.getCpu() * svo.getSpeed()); long ram = (long) (svo.getRamSize() * 1024L * 1024L); Transaction txn = Transaction.currentTxn(); try { txn.start(); capacityCpu = _capacityDao.lockRow(capacityCpu.getId(), true); capacityMem = _capacityDao.lockRow(capacityMem.getId(), true); long usedCpu = capacityCpu.getUsedCapacity(); long usedMem = capacityMem.getUsedCapacity(); long reservedCpu = capacityCpu.getReservedCapacity(); long reservedMem = capacityMem.getReservedCapacity(); long actualTotalCpu = capacityCpu.getTotalCapacity(); long actualTotalMem = capacityMem.getTotalCapacity(); long totalCpu = (long) (actualTotalCpu * cpuOvercommitRatio); long totalMem = (long) (actualTotalMem * memoryOvercommitRatio); if (s_logger.isDebugEnabled()) { s_logger.debug( "Hosts's actual total CPU: " + actualTotalCpu + " and CPU after applying overprovisioning: " + totalCpu); } long freeCpu = totalCpu - (reservedCpu + usedCpu); long freeMem = totalMem - (reservedMem + usedMem); if (s_logger.isDebugEnabled()) { s_logger.debug("We are allocating VM, increasing the used capacity of this host:" + hostId); s_logger.debug( "Current Used CPU: " + usedCpu + " , Free CPU:" + freeCpu + " ,Requested CPU: " + cpu); s_logger.debug( "Current Used RAM: " + usedMem + " , Free RAM:" + freeMem + " ,Requested RAM: " + ram); } capacityCpu.setUsedCapacity(usedCpu + cpu); capacityMem.setUsedCapacity(usedMem + ram); if (fromLastHost) { /* alloc from reserved */ if (s_logger.isDebugEnabled()) { s_logger.debug( "We are allocating VM to the last host again, so adjusting the reserved capacity if it is not less than required"); s_logger.debug("Reserved CPU: " + reservedCpu + " , Requested CPU: " + cpu); s_logger.debug("Reserved RAM: " + reservedMem + " , Requested RAM: " + ram); } if (reservedCpu >= cpu && reservedMem >= ram) { capacityCpu.setReservedCapacity(reservedCpu - cpu); capacityMem.setReservedCapacity(reservedMem - ram); } } else { /* alloc from free resource */ if (!((reservedCpu + usedCpu + cpu <= totalCpu) && (reservedMem + usedMem + ram <= totalMem))) { if (s_logger.isDebugEnabled()) { s_logger.debug( "Host doesnt seem to have enough free capacity, but increasing the used capacity anyways, since the VM is already starting on this host "); } } } s_logger.debug( "CPU STATS after allocation: for host: " + hostId + ", old used: " + usedCpu + ", old reserved: " + reservedCpu + ", actual total: " + actualTotalCpu + ", total with overprovisioning: " + totalCpu + "; new used:" + capacityCpu.getUsedCapacity() + ", reserved:" + capacityCpu.getReservedCapacity() + "; requested cpu:" + cpu + ",alloc_from_last:" + fromLastHost); s_logger.debug( "RAM STATS after allocation: for host: " + hostId + ", old used: " + usedMem + ", old reserved: " + reservedMem + ", total: " + totalMem + "; new used: " + capacityMem.getUsedCapacity() + ", reserved: " + capacityMem.getReservedCapacity() + "; requested mem: " + ram + ",alloc_from_last:" + fromLastHost); _capacityDao.update(capacityCpu.getId(), capacityCpu); _capacityDao.update(capacityMem.getId(), capacityMem); txn.commit(); } catch (Exception e) { txn.rollback(); return; } }
@DB @Override public boolean releaseVmCapacity( VirtualMachine vm, boolean moveFromReserved, boolean moveToReservered, Long hostId) { ServiceOfferingVO svo = _offeringsDao.findById(vm.getServiceOfferingId()); CapacityVO capacityCpu = _capacityDao.findByHostIdType(hostId, CapacityVO.CAPACITY_TYPE_CPU); CapacityVO capacityMemory = _capacityDao.findByHostIdType(hostId, CapacityVO.CAPACITY_TYPE_MEMORY); Long clusterId = null; if (hostId != null) { HostVO host = _hostDao.findById(hostId); clusterId = host.getClusterId(); } if (capacityCpu == null || capacityMemory == null || svo == null) { return false; } Transaction txn = Transaction.currentTxn(); try { txn.start(); capacityCpu = _capacityDao.lockRow(capacityCpu.getId(), true); capacityMemory = _capacityDao.lockRow(capacityMemory.getId(), true); long usedCpu = capacityCpu.getUsedCapacity(); long usedMem = capacityMemory.getUsedCapacity(); long reservedCpu = capacityCpu.getReservedCapacity(); long reservedMem = capacityMemory.getReservedCapacity(); long actualTotalCpu = capacityCpu.getTotalCapacity(); float cpuOvercommitRatio = Float.parseFloat( _clusterDetailsDao.findDetail(clusterId, "cpuOvercommitRatio").getValue()); float memoryOvercommitRatio = Float.parseFloat( _clusterDetailsDao.findDetail(clusterId, "memoryOvercommitRatio").getValue()); int vmCPU = (int) (svo.getCpu() * svo.getSpeed()); long vmMem = (long) (svo.getRamSize() * 1024L * 1024L); long actualTotalMem = capacityMemory.getTotalCapacity(); long totalMem = (long) (actualTotalMem * memoryOvercommitRatio); long totalCpu = (long) (actualTotalCpu * cpuOvercommitRatio); if (s_logger.isDebugEnabled()) { s_logger.debug( "Hosts's actual total CPU: " + actualTotalCpu + " and CPU after applying overprovisioning: " + totalCpu); s_logger.debug( "Hosts's actual total RAM: " + actualTotalMem + " and RAM after applying overprovisioning: " + totalMem); } if (!moveFromReserved) { /* move resource from used */ if (usedCpu >= vmCPU) { capacityCpu.setUsedCapacity(usedCpu - vmCPU); } if (usedMem >= vmMem) { capacityMemory.setUsedCapacity(usedMem - vmMem); } if (moveToReservered) { if (reservedCpu + vmCPU <= totalCpu) { capacityCpu.setReservedCapacity(reservedCpu + vmCPU); } if (reservedMem + vmMem <= totalMem) { capacityMemory.setReservedCapacity(reservedMem + vmMem); } } } else { if (reservedCpu >= vmCPU) { capacityCpu.setReservedCapacity(reservedCpu - vmCPU); } if (reservedMem >= vmMem) { capacityMemory.setReservedCapacity(reservedMem - vmMem); } } s_logger.debug( "release cpu from host: " + hostId + ", old used: " + usedCpu + ",reserved: " + reservedCpu + ", actual total: " + actualTotalCpu + ", total with overprovisioning: " + totalCpu + "; new used: " + capacityCpu.getUsedCapacity() + ",reserved:" + capacityCpu.getReservedCapacity() + "; movedfromreserved: " + moveFromReserved + ",moveToReservered" + moveToReservered); s_logger.debug( "release mem from host: " + hostId + ", old used: " + usedMem + ",reserved: " + reservedMem + ", total: " + totalMem + "; new used: " + capacityMemory.getUsedCapacity() + ",reserved:" + capacityMemory.getReservedCapacity() + "; movedfromreserved: " + moveFromReserved + ",moveToReservered" + moveToReservered); _capacityDao.update(capacityCpu.getId(), capacityCpu); _capacityDao.update(capacityMemory.getId(), capacityMemory); txn.commit(); return true; } catch (Exception e) { s_logger.debug("Failed to transit vm's state, due to " + e.getMessage()); txn.rollback(); return false; } }
@Override public Boolean isVmAlive(VirtualMachine vm, Host host) { if (!vm.getType().isUsedBySystem()) { s_logger.debug("Not a System Vm, unable to determine state of " + vm + " returning null"); } if (s_logger.isDebugEnabled()) { s_logger.debug("Testing if " + vm + " is alive"); } if (vm.getHostId() == null) { s_logger.debug("There's no host id for " + vm); return null; } HostVO vmHost = _hostDao.findById(vm.getHostId()); if (vmHost == null) { s_logger.debug("Unable to retrieve the host by using id " + vm.getHostId()); return null; } List<? extends Nic> nics = _networkMgr.getNicsForTraffic(vm.getId(), TrafficType.Management); if (nics.size() == 0) { if (s_logger.isDebugEnabled()) { s_logger.debug( "Unable to find a management nic, cannot ping this system VM, unable to determine state of " + vm + " returning null"); } return null; } for (Nic nic : nics) { if (nic.getIp4Address() == null) { continue; } // get the data center IP address, find a host on the pod, use that host to ping the data // center IP address List<Long> otherHosts = findHostByPod(vmHost.getPodId(), vm.getHostId()); for (Long otherHost : otherHosts) { Status vmState = testIpAddress(otherHost, nic.getIp4Address()); if (vmState == null) { // can't get information from that host, try the next one continue; } if (vmState == Status.Up) { if (s_logger.isDebugEnabled()) { s_logger.debug( "successfully pinged vm's private IP (" + vm.getPrivateIpAddress() + "), returning that the VM is up"); } return Boolean.TRUE; } else if (vmState == Status.Down) { // We can't ping the VM directly...if we can ping the host, then report the VM down. // If we can't ping the host, then we don't have enough information. Status vmHostState = testIpAddress(otherHost, vmHost.getPrivateIpAddress()); if ((vmHostState != null) && (vmHostState == Status.Up)) { if (s_logger.isDebugEnabled()) { s_logger.debug( "successfully pinged vm's host IP (" + vmHost.getPrivateIpAddress() + "), but could not ping VM, returning that the VM is down"); } return Boolean.FALSE; } } } } if (s_logger.isDebugEnabled()) { s_logger.debug("unable to determine state of " + vm + " returning null"); } return null; }
@Override public void CheckAndDestroyTunnel(VirtualMachine vm) { if (!_isEnabled) { return; } List<UserVmVO> userVms = _userVmDao.listByAccountIdAndHostId(vm.getAccountId(), vm.getHostId()); if (vm.getType() == VirtualMachine.Type.User) { if (userVms.size() > 1) { return; } List<DomainRouterVO> routers = _routerDao.findBy(vm.getAccountId(), vm.getDataCenterIdToDeployIn()); for (DomainRouterVO router : routers) { if (router.getHostId() == vm.getHostId()) { return; } } } else if (vm.getType() == VirtualMachine.Type.DomainRouter && userVms.size() != 0) { return; } try { /* Now we are last one on host, destroy all tunnels of my account */ Command cmd = new OvsDestroyTunnelCommand(vm.getAccountId(), "[]"); Answer ans = _agentMgr.send(vm.getHostId(), cmd); handleDestroyTunnelAnswer(ans, vm.getHostId(), 0, vm.getAccountId()); /* Then ask hosts have peer tunnel with me to destroy them */ List<OvsTunnelAccountVO> peers = _tunnelAccountDao.listByToAccount(vm.getHostId(), vm.getAccountId()); for (OvsTunnelAccountVO p : peers) { cmd = new OvsDestroyTunnelCommand(p.getAccount(), p.getPortName()); ans = _agentMgr.send(p.getFrom(), cmd); handleDestroyTunnelAnswer(ans, p.getFrom(), p.getTo(), p.getAccount()); } } catch (Exception e) { s_logger.warn( String.format( "Destroy tunnel(account:%1$s, hostId:%2$s) failed", vm.getAccountId(), vm.getHostId()), e); } }
@DB protected void CheckAndCreateTunnel(VirtualMachine instance, DeployDestination dest) { if (!_isEnabled) { return; } if (instance.getType() != VirtualMachine.Type.User && instance.getType() != VirtualMachine.Type.DomainRouter) { return; } long hostId = dest.getHost().getId(); long accountId = instance.getAccountId(); List<UserVmVO> vms = _userVmDao.listByAccountId(accountId); List<DomainRouterVO> routers = _routerDao.findBy(accountId, instance.getDataCenterIdToDeployIn()); List<VMInstanceVO> ins = new ArrayList<VMInstanceVO>(); if (vms != null) { ins.addAll(vms); } if (routers.size() != 0) { ins.addAll(routers); } List<Pair<Long, Integer>> toHosts = new ArrayList<Pair<Long, Integer>>(); List<Pair<Long, Integer>> fromHosts = new ArrayList<Pair<Long, Integer>>(); int key; for (VMInstanceVO v : ins) { Long rh = v.getHostId(); if (rh == null || rh.longValue() == hostId) { continue; } OvsTunnelAccountVO ta = _tunnelAccountDao.getByFromToAccount(hostId, rh.longValue(), accountId); if (ta == null) { key = getGreKey(hostId, rh.longValue(), accountId); if (key == -1) { s_logger.warn( String.format( "Cannot get GRE key for from=%1$s to=%2$s accountId=%3$s, tunnel create failed", hostId, rh.longValue(), accountId)); continue; } Pair<Long, Integer> p = new Pair<Long, Integer>(rh, Integer.valueOf(key)); if (!toHosts.contains(p)) { toHosts.add(p); } } ta = _tunnelAccountDao.getByFromToAccount(rh.longValue(), hostId, accountId); if (ta == null) { key = getGreKey(rh.longValue(), hostId, accountId); if (key == -1) { s_logger.warn( String.format( "Cannot get GRE key for from=%1$s to=%2$s accountId=%3$s, tunnel create failed", rh.longValue(), hostId, accountId)); continue; } Pair<Long, Integer> p = new Pair<Long, Integer>(rh, Integer.valueOf(key)); if (!fromHosts.contains(p)) { fromHosts.add(p); } } } try { String myIp = dest.getHost().getPrivateIpAddress(); for (Pair<Long, Integer> i : toHosts) { HostVO rHost = _hostDao.findById(i.first()); Commands cmds = new Commands( new OvsCreateTunnelCommand( rHost.getPrivateIpAddress(), i.second().toString(), Long.valueOf(hostId), i.first(), accountId, myIp)); s_logger.debug("Ask host " + hostId + " to create gre tunnel to " + i.first()); Answer[] answers = _agentMgr.send(hostId, cmds); handleCreateTunnelAnswer(answers); } for (Pair<Long, Integer> i : fromHosts) { HostVO rHost = _hostDao.findById(i.first()); Commands cmd2s = new Commands( new OvsCreateTunnelCommand( myIp, i.second().toString(), i.first(), Long.valueOf(hostId), accountId, rHost.getPrivateIpAddress())); s_logger.debug("Ask host " + i.first() + " to create gre tunnel to " + hostId); Answer[] answers = _agentMgr.send(i.first(), cmd2s); handleCreateTunnelAnswer(answers); } } catch (Exception e) { s_logger.debug("Ovs Tunnel network created tunnel failed", e); } }
protected Long stopVM(final HaWorkVO work) throws ConcurrentOperationException { VirtualMachine vm = _itMgr.findById(work.getInstanceId()); if (vm == null) { s_logger.info("No longer can find VM " + work.getInstanceId() + ". Throwing away " + work); work.setStep(Step.Done); return null; } s_logger.info("Stopping " + vm); try { if (work.getWorkType() == WorkType.Stop) { _itMgr.advanceStop(vm.getUuid(), false); s_logger.info("Successfully stopped " + vm); return null; } else if (work.getWorkType() == WorkType.CheckStop) { if ((vm.getState() != work.getPreviousState()) || vm.getUpdated() != work.getUpdateTime() || vm.getHostId() == null || vm.getHostId().longValue() != work.getHostId()) { s_logger.info( vm + " is different now. Scheduled Host: " + work.getHostId() + " Current Host: " + (vm.getHostId() != null ? vm.getHostId() : "none") + " State: " + vm.getState()); return null; } _itMgr.advanceStop(vm.getUuid(), false); s_logger.info("Stop for " + vm + " was successful"); return null; } else if (work.getWorkType() == WorkType.ForceStop) { if ((vm.getState() != work.getPreviousState()) || vm.getUpdated() != work.getUpdateTime() || vm.getHostId() == null || vm.getHostId().longValue() != work.getHostId()) { s_logger.info( vm + " is different now. Scheduled Host: " + work.getHostId() + " Current Host: " + (vm.getHostId() != null ? vm.getHostId() : "none") + " State: " + vm.getState()); return null; } _itMgr.advanceStop(vm.getUuid(), true); s_logger.info("Stop for " + vm + " was successful"); return null; } else { assert false : "Who decided there's other steps but didn't modify the guy who does the work?"; } } catch (final ResourceUnavailableException e) { s_logger.debug("Agnet is not available" + e.getMessage()); } catch (OperationTimedoutException e) { s_logger.debug("operation timed out: " + e.getMessage()); } work.setTimesTried(work.getTimesTried() + 1); if (s_logger.isDebugEnabled()) { s_logger.debug("Stop was unsuccessful. Rescheduling"); } return (System.currentTimeMillis() >> 10) + _stopRetryInterval; }
protected Long restart(HaWorkVO work) { List<HaWorkVO> items = _haDao.listFutureHaWorkForVm(work.getInstanceId(), work.getId()); if (items.size() > 0) { StringBuilder str = new StringBuilder( "Cancelling this work item because newer ones have been scheduled. Work Ids = ["); for (HaWorkVO item : items) { str.append(item.getId()).append(", "); } str.delete(str.length() - 2, str.length()).append("]"); s_logger.info(str.toString()); return null; } items = _haDao.listRunningHaWorkForVm(work.getInstanceId()); if (items.size() > 0) { StringBuilder str = new StringBuilder( "Waiting because there's HA work being executed on an item currently. Work Ids =["); for (HaWorkVO item : items) { str.append(item.getId()).append(", "); } str.delete(str.length() - 2, str.length()).append("]"); s_logger.info(str.toString()); return (System.currentTimeMillis() >> 10) + _investigateRetryInterval; } long vmId = work.getInstanceId(); VirtualMachine vm = _itMgr.findById(work.getInstanceId()); if (vm == null) { s_logger.info("Unable to find vm: " + vmId); return null; } s_logger.info("HA on " + vm); if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) { s_logger.info( "VM " + vm + " has been changed. Current State = " + vm.getState() + " Previous State = " + work.getPreviousState() + " last updated = " + vm.getUpdated() + " previous updated = " + work.getUpdateTime()); return null; } AlertManager.AlertType alertType = AlertManager.AlertType.ALERT_TYPE_USERVM; if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { alertType = AlertManager.AlertType.ALERT_TYPE_DOMAIN_ROUTER; } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { alertType = AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY; } else if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) { alertType = AlertManager.AlertType.ALERT_TYPE_SSVM; } HostVO host = _hostDao.findById(work.getHostId()); boolean isHostRemoved = false; if (host == null) { host = _hostDao.findByIdIncludingRemoved(work.getHostId()); if (host != null) { s_logger.debug( "VM " + vm.toString() + " is now no longer on host " + work.getHostId() + " as the host is removed"); isHostRemoved = true; } } DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); HostPodVO podVO = _podDao.findById(host.getPodId()); String hostDesc = "name: " + host.getName() + "(id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); Boolean alive = null; if (work.getStep() == Step.Investigating) { if (!isHostRemoved) { if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) { s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId()); return null; } Investigator investigator = null; for (Investigator it : investigators) { investigator = it; alive = investigator.isVmAlive(vm, host); s_logger.info(investigator.getName() + " found " + vm + "to be alive? " + alive); if (alive != null) { break; } } boolean fenced = false; if (alive == null) { s_logger.debug("Fencing off VM that we don't know the state of"); for (FenceBuilder fb : fenceBuilders) { Boolean result = fb.fenceOff(vm, host); s_logger.info("Fencer " + fb.getName() + " returned " + result); if (result != null && result) { fenced = true; break; } } } else if (!alive) { fenced = true; } else { s_logger.debug( "VM " + vm.getHostName() + " is found to be alive by " + investigator.getName()); if (host.getStatus() == Status.Up) { s_logger.info(vm + " is alive and host is up. No need to restart it."); return null; } else { s_logger.debug("Rescheduling because the host is not up but the vm is alive"); return (System.currentTimeMillis() >> 10) + _investigateRetryInterval; } } if (!fenced) { s_logger.debug("We were unable to fence off the VM " + vm); _alertMgr.sendAlert( alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); return (System.currentTimeMillis() >> 10) + _restartRetryInterval; } try { _itMgr.advanceStop(vm.getUuid(), true); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } work.setStep(Step.Scheduled); _haDao.update(work.getId(), work); } else { s_logger.debug( "How come that HA step is Investigating and the host is removed? Calling forced Stop on Vm anyways"); try { _itMgr.advanceStop(vm.getUuid(), true); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } } } vm = _itMgr.findById(vm.getId()); if (!_forceHA && !vm.isHaEnabled()) { if (s_logger.isDebugEnabled()) { s_logger.debug("VM is not HA enabled so we're done."); } return null; // VM doesn't require HA } if (!volumeMgr.canVmRestartOnAnotherServer(vm.getId())) { if (s_logger.isDebugEnabled()) { s_logger.debug("VM can not restart on another server."); } return null; } if (work.getTimesTried() > _maxRetries) { s_logger.warn("Retried to max times so deleting: " + vmId); return null; } try { HashMap<VirtualMachineProfile.Param, Object> params = new HashMap<VirtualMachineProfile.Param, Object>(); if (_haTag != null) { params.put(VirtualMachineProfile.Param.HaTag, _haTag); } try { // First try starting the vm with its original planner, if it doesn't succeed send HAPlanner // as its an emergency. _itMgr.advanceStart(vm.getUuid(), params, null); } catch (InsufficientCapacityException e) { s_logger.warn("Failed to deploy vm " + vmId + " with original planner, sending HAPlanner"); _itMgr.advanceStart(vm.getUuid(), params, _haPlanners.get(0)); } VMInstanceVO started = _instanceDao.findById(vm.getId()); if (started != null && started.getState() == VirtualMachine.State.Running) { s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId()); return null; } if (s_logger.isDebugEnabled()) { s_logger.debug( "Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval); } } catch (final InsufficientCapacityException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); _alertMgr.sendAlert( alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); } catch (final ResourceUnavailableException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); _alertMgr.sendAlert( alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); } catch (ConcurrentOperationException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); _alertMgr.sendAlert( alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); } catch (OperationTimedoutException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); _alertMgr.sendAlert( alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); } vm = _itMgr.findById(vm.getId()); work.setUpdateTime(vm.getUpdated()); work.setPreviousState(vm.getState()); return (System.currentTimeMillis() >> 10) + _restartRetryInterval; }