public Map<Long, List<String>> getVmIdIpListMap() {
    Map<Long, List<String>> vmIdIpsMap = new HashMap<Long, List<String>>();
    if (vmIdIpMap != null && !vmIdIpMap.isEmpty()) {
      Collection idIpsCollection = vmIdIpMap.values();
      Iterator iter = idIpsCollection.iterator();
      while (iter.hasNext()) {
        HashMap<String, String> idIpsMap = (HashMap<String, String>) iter.next();
        String vmId = idIpsMap.get("vmid");
        String vmIp = idIpsMap.get("vmip");

        VirtualMachine lbvm = _entityMgr.findByUuid(VirtualMachine.class, vmId);
        if (lbvm == null) {
          throw new InvalidParameterValueException("Unable to find virtual machine ID: " + vmId);
        }

        // check wether the given ip is valid ip or not
        if (vmIp == null || !NetUtils.isValidIp(vmIp)) {
          throw new InvalidParameterValueException(
              "Invalid ip address " + vmIp + " passed in vmidipmap for " + "vmid " + vmId);
        }
        Long longVmId = lbvm.getId();

        List<String> ipsList = null;
        if (vmIdIpsMap.containsKey(longVmId)) {
          ipsList = vmIdIpsMap.get(longVmId);
        } else {
          ipsList = new ArrayList<String>();
        }
        ipsList.add(vmIp);
        vmIdIpsMap.put(longVmId, ipsList);
      }
    }

    return vmIdIpsMap;
  }
  @Override
  public boolean postStateTransitionEvent(
      State oldState,
      Event event,
      State newState,
      VirtualMachine vm,
      boolean status,
      Long oldHostId) {
    if (!status) {
      return false;
    }

    if (VirtualMachine.State.isVmStarted(oldState, event, newState)) {
      if (s_logger.isTraceEnabled()) {
        s_logger.trace("Security Group Mgr: handling start of vm id" + vm.getId());
      }
      handleVmStarted((VMInstanceVO) vm);
    } else if (VirtualMachine.State.isVmStopped(oldState, event, newState)) {
      if (s_logger.isTraceEnabled()) {
        s_logger.trace("Security Group Mgr: handling stop of vm id" + vm.getId());
      }
      handleVmStopped((VMInstanceVO) vm);
    } else if (VirtualMachine.State.isVmMigrated(oldState, event, newState)) {
      if (s_logger.isTraceEnabled()) {
        s_logger.trace("Security Group Mgr: handling migration of vm id" + vm.getId());
      }
      handleVmMigrated((VMInstanceVO) vm);
    }

    return true;
  }
 @Override
 public boolean isVmSecurityGroupEnabled(Long vmId) {
   VirtualMachine vm = _vmDao.findByIdIncludingRemoved(vmId);
   List<NicProfile> nics = _networkMgr.getNicProfiles(vm);
   for (NicProfile nic : nics) {
     if (nic.isSecurityGroupEnabled() && vm.getHypervisorType() != HypervisorType.VMware) {
       return true;
     }
   }
   return false;
 }
  @Override
  public Boolean isVmAlive(VirtualMachine vm, Host host) {
    if (vm.getType() != VirtualMachine.Type.User) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("Not a User Vm, unable to determine state of " + vm + " returning null");
      }
      return null;
    }

    if (s_logger.isDebugEnabled()) {
      s_logger.debug("testing if " + vm + " is alive");
    }
    // to verify that the VM is alive, we ask the domR (router) to ping the VM (private IP)
    UserVmVO userVm = _userVmDao.findById(vm.getId());

    List<? extends Nic> nics = _networkMgr.getNicsForTraffic(userVm.getId(), TrafficType.Guest);

    for (Nic nic : nics) {
      if (nic.getIp4Address() == null) {
        continue;
      }

      List<VirtualRouter> routers = _vnaMgr.getRoutersForNetwork(nic.getNetworkId());
      if (routers == null || routers.isEmpty()) {
        if (s_logger.isDebugEnabled()) {
          s_logger.debug(
              "Unable to find a router in network " + nic.getNetworkId() + " to ping " + vm);
        }
        continue;
      }

      Boolean result = null;
      for (VirtualRouter router : routers) {
        result = testUserVM(vm, nic, router);
        if (result != null) {
          break;
        }
      }

      if (result == null) {
        continue;
      }

      return result;
    }

    if (s_logger.isDebugEnabled()) {
      s_logger.debug("Returning null since we're unable to determine state of " + vm);
    }
    return null;
  }
Example #5
0
  @Override
  public Boolean fenceOff(VirtualMachine vm, Host host) {
    if (host.getHypervisorType() != HypervisorType.KVM
        && host.getHypervisorType() != HypervisorType.LXC) {
      s_logger.warn("Don't know how to fence non kvm hosts " + host.getHypervisorType());
      return null;
    }

    List<HostVO> hosts = _resourceMgr.listAllHostsInCluster(host.getClusterId());
    FenceCommand fence = new FenceCommand(vm, host);

    int i = 0;
    for (HostVO h : hosts) {
      if (h.getHypervisorType() == HypervisorType.KVM
          || h.getHypervisorType() == HypervisorType.LXC) {
        if (h.getStatus() != Status.Up) {
          continue;
        }

        i++;

        if (h.getId() == host.getId()) {
          continue;
        }
        FenceAnswer answer;
        try {
          answer = (FenceAnswer) _agentMgr.send(h.getId(), fence);
        } catch (AgentUnavailableException e) {
          s_logger.info("Moving on to the next host because " + h.toString() + " is unavailable");
          continue;
        } catch (OperationTimedoutException e) {
          s_logger.info("Moving on to the next host because " + h.toString() + " is unavailable");
          continue;
        }
        if (answer != null && answer.getResult()) {
          return true;
        }
      }
    }

    _alertMgr.sendAlert(
        AlertManager.AlertType.ALERT_TYPE_HOST,
        host.getDataCenterId(),
        host.getPodId(),
        "Unable to fence off host: " + host.getId(),
        "Fencing off host "
            + host.getId()
            + " did not succeed after asking "
            + i
            + " hosts. "
            + "Check Agent logs for more information.");

    s_logger.error("Unable to fence off " + vm.toString() + " on " + host.toString());

    return false;
  }
  private Boolean testUserVM(VirtualMachine vm, Nic nic, VirtualRouter router) {
    String privateIp = nic.getIp4Address();
    String routerPrivateIp = router.getPrivateIpAddress();

    List<Long> otherHosts = new ArrayList<Long>();
    if (vm.getHypervisorType() == HypervisorType.XenServer
        || vm.getHypervisorType() == HypervisorType.KVM) {
      otherHosts.add(router.getHostId());
    } else {
      otherHosts = findHostByPod(router.getPodIdToDeployIn(), null);
    }
    for (Long hostId : otherHosts) {
      try {
        Answer pingTestAnswer =
            _agentMgr.easySend(hostId, new PingTestCommand(routerPrivateIp, privateIp));
        if (pingTestAnswer != null && pingTestAnswer.getResult()) {
          if (s_logger.isDebugEnabled()) {
            s_logger.debug(
                "user vm's "
                    + vm.getHostName()
                    + " ip address "
                    + privateIp
                    + "  has been successfully pinged from the Virtual Router "
                    + router.getHostName()
                    + ", returning that vm is alive");
          }
          return Boolean.TRUE;
        }
      } catch (Exception e) {
        if (s_logger.isDebugEnabled()) {
          s_logger.debug("Couldn't reach due to", e);
        }
        continue;
      }
    }
    if (s_logger.isDebugEnabled()) {
      s_logger.debug(vm + " could not be pinged, returning that it is unknown");
    }
    return null;
  }
  protected Long destroyVM(HaWorkVO work) {
    final VirtualMachine vm = _itMgr.findById(work.getInstanceId());
    s_logger.info("Destroying " + vm.toString());
    try {
      if (vm.getState() != State.Destroyed) {
        s_logger.info("VM is no longer in Destroyed state " + vm.toString());
        return null;
      }

      if (vm.getHostId() != null) {
        _itMgr.destroy(vm.getUuid());
        s_logger.info("Successfully destroy " + vm);
        return null;
      } else {
        if (s_logger.isDebugEnabled()) {
          s_logger.debug(vm + " has already been stopped");
        }
        return null;
      }
    } catch (final AgentUnavailableException e) {
      s_logger.debug("Agnet is not available" + e.getMessage());
    } catch (OperationTimedoutException e) {
      s_logger.debug("operation timed out: " + e.getMessage());
    } catch (ConcurrentOperationException e) {
      s_logger.debug("concurrent operation: " + e.getMessage());
    }

    work.setTimesTried(work.getTimesTried() + 1);
    return (System.currentTimeMillis() >> 10) + _stopRetryInterval;
  }
  @Override
  public boolean postStateTransitionEvent(
      State oldState,
      Event event,
      State newState,
      VirtualMachine vm,
      boolean status,
      Object opaque) {
    if (!status) {
      return false;
    }
    @SuppressWarnings("unchecked")
    Pair<Long, Long> hosts = (Pair<Long, Long>) opaque;
    Long oldHostId = hosts.first();

    s_logger.debug(
        "VM state transitted from :"
            + oldState
            + " to "
            + newState
            + " with event: "
            + event
            + "vm's original host id: "
            + vm.getLastHostId()
            + " new host id: "
            + vm.getHostId()
            + " host id before state transition: "
            + oldHostId);

    if (oldState == State.Starting) {
      if (newState != State.Running) {
        releaseVmCapacity(vm, false, false, oldHostId);
      }
    } else if (oldState == State.Running) {
      if (event == Event.AgentReportStopped) {
        releaseVmCapacity(vm, false, true, oldHostId);
      } else if (event == Event.AgentReportMigrated) {
        releaseVmCapacity(vm, false, false, oldHostId);
      }
    } else if (oldState == State.Migrating) {
      if (event == Event.AgentReportStopped) {
        /* Release capacity from original host */
        releaseVmCapacity(vm, false, false, vm.getLastHostId());
        releaseVmCapacity(vm, false, false, oldHostId);
      } else if (event == Event.OperationFailed) {
        /* Release from dest host */
        releaseVmCapacity(vm, false, false, oldHostId);
      } else if (event == Event.OperationSucceeded) {
        releaseVmCapacity(vm, false, false, vm.getLastHostId());
      }
    } else if (oldState == State.Stopping) {
      if (event == Event.OperationSucceeded) {
        releaseVmCapacity(vm, false, true, oldHostId);
      } else if (event == Event.AgentReportStopped) {
        releaseVmCapacity(vm, false, false, oldHostId);
      } else if (event == Event.AgentReportMigrated) {
        releaseVmCapacity(vm, false, false, oldHostId);
      }
    } else if (oldState == State.Stopped) {
      if (event == Event.DestroyRequested || event == Event.ExpungeOperation) {
        releaseVmCapacity(vm, true, false, vm.getLastHostId());
      } else if (event == Event.AgentReportMigrated) {
        releaseVmCapacity(vm, false, false, oldHostId);
      }
    }

    if ((newState == State.Starting
            || newState == State.Migrating
            || event == Event.AgentReportMigrated)
        && vm.getHostId() != null) {
      boolean fromLastHost = false;
      if (vm.getLastHostId() == vm.getHostId()) {
        s_logger.debug("VM starting again on the last host it was stopped on");
        fromLastHost = true;
      }
      allocateVmCapacity(vm, fromLastHost);
    }

    return true;
  }
  @DB
  @Override
  public void allocateVmCapacity(VirtualMachine vm, boolean fromLastHost) {

    long hostId = vm.getHostId();
    HostVO host = _hostDao.findById(hostId);
    long clusterId = host.getClusterId();
    float cpuOvercommitRatio =
        Float.parseFloat(_clusterDetailsDao.findDetail(clusterId, "cpuOvercommitRatio").getValue());
    float memoryOvercommitRatio =
        Float.parseFloat(
            _clusterDetailsDao.findDetail(clusterId, "memoryOvercommitRatio").getValue());

    ServiceOfferingVO svo = _offeringsDao.findById(vm.getServiceOfferingId());

    CapacityVO capacityCpu = _capacityDao.findByHostIdType(hostId, CapacityVO.CAPACITY_TYPE_CPU);
    CapacityVO capacityMem = _capacityDao.findByHostIdType(hostId, CapacityVO.CAPACITY_TYPE_MEMORY);

    if (capacityCpu == null || capacityMem == null || svo == null) {
      return;
    }

    int cpu = (int) (svo.getCpu() * svo.getSpeed());
    long ram = (long) (svo.getRamSize() * 1024L * 1024L);

    Transaction txn = Transaction.currentTxn();

    try {
      txn.start();
      capacityCpu = _capacityDao.lockRow(capacityCpu.getId(), true);
      capacityMem = _capacityDao.lockRow(capacityMem.getId(), true);

      long usedCpu = capacityCpu.getUsedCapacity();
      long usedMem = capacityMem.getUsedCapacity();
      long reservedCpu = capacityCpu.getReservedCapacity();
      long reservedMem = capacityMem.getReservedCapacity();
      long actualTotalCpu = capacityCpu.getTotalCapacity();
      long actualTotalMem = capacityMem.getTotalCapacity();
      long totalCpu = (long) (actualTotalCpu * cpuOvercommitRatio);
      long totalMem = (long) (actualTotalMem * memoryOvercommitRatio);
      if (s_logger.isDebugEnabled()) {
        s_logger.debug(
            "Hosts's actual total CPU: "
                + actualTotalCpu
                + " and CPU after applying overprovisioning: "
                + totalCpu);
      }

      long freeCpu = totalCpu - (reservedCpu + usedCpu);
      long freeMem = totalMem - (reservedMem + usedMem);

      if (s_logger.isDebugEnabled()) {
        s_logger.debug("We are allocating VM, increasing the used capacity of this host:" + hostId);
        s_logger.debug(
            "Current Used CPU: " + usedCpu + " , Free CPU:" + freeCpu + " ,Requested CPU: " + cpu);
        s_logger.debug(
            "Current Used RAM: " + usedMem + " , Free RAM:" + freeMem + " ,Requested RAM: " + ram);
      }
      capacityCpu.setUsedCapacity(usedCpu + cpu);
      capacityMem.setUsedCapacity(usedMem + ram);

      if (fromLastHost) {
        /* alloc from reserved */
        if (s_logger.isDebugEnabled()) {
          s_logger.debug(
              "We are allocating VM to the last host again, so adjusting the reserved capacity if it is not less than required");
          s_logger.debug("Reserved CPU: " + reservedCpu + " , Requested CPU: " + cpu);
          s_logger.debug("Reserved RAM: " + reservedMem + " , Requested RAM: " + ram);
        }
        if (reservedCpu >= cpu && reservedMem >= ram) {
          capacityCpu.setReservedCapacity(reservedCpu - cpu);
          capacityMem.setReservedCapacity(reservedMem - ram);
        }
      } else {
        /* alloc from free resource */
        if (!((reservedCpu + usedCpu + cpu <= totalCpu)
            && (reservedMem + usedMem + ram <= totalMem))) {
          if (s_logger.isDebugEnabled()) {
            s_logger.debug(
                "Host doesnt seem to have enough free capacity, but increasing the used capacity anyways, since the VM is already starting on this host ");
          }
        }
      }

      s_logger.debug(
          "CPU STATS after allocation: for host: "
              + hostId
              + ", old used: "
              + usedCpu
              + ", old reserved: "
              + reservedCpu
              + ", actual total: "
              + actualTotalCpu
              + ", total with overprovisioning: "
              + totalCpu
              + "; new used:"
              + capacityCpu.getUsedCapacity()
              + ", reserved:"
              + capacityCpu.getReservedCapacity()
              + "; requested cpu:"
              + cpu
              + ",alloc_from_last:"
              + fromLastHost);

      s_logger.debug(
          "RAM STATS after allocation: for host: "
              + hostId
              + ", old used: "
              + usedMem
              + ", old reserved: "
              + reservedMem
              + ", total: "
              + totalMem
              + "; new used: "
              + capacityMem.getUsedCapacity()
              + ", reserved: "
              + capacityMem.getReservedCapacity()
              + "; requested mem: "
              + ram
              + ",alloc_from_last:"
              + fromLastHost);

      _capacityDao.update(capacityCpu.getId(), capacityCpu);
      _capacityDao.update(capacityMem.getId(), capacityMem);
      txn.commit();
    } catch (Exception e) {
      txn.rollback();
      return;
    }
  }
  @DB
  @Override
  public boolean releaseVmCapacity(
      VirtualMachine vm, boolean moveFromReserved, boolean moveToReservered, Long hostId) {
    ServiceOfferingVO svo = _offeringsDao.findById(vm.getServiceOfferingId());
    CapacityVO capacityCpu = _capacityDao.findByHostIdType(hostId, CapacityVO.CAPACITY_TYPE_CPU);
    CapacityVO capacityMemory =
        _capacityDao.findByHostIdType(hostId, CapacityVO.CAPACITY_TYPE_MEMORY);
    Long clusterId = null;
    if (hostId != null) {
      HostVO host = _hostDao.findById(hostId);
      clusterId = host.getClusterId();
    }
    if (capacityCpu == null || capacityMemory == null || svo == null) {
      return false;
    }

    Transaction txn = Transaction.currentTxn();
    try {
      txn.start();

      capacityCpu = _capacityDao.lockRow(capacityCpu.getId(), true);
      capacityMemory = _capacityDao.lockRow(capacityMemory.getId(), true);

      long usedCpu = capacityCpu.getUsedCapacity();
      long usedMem = capacityMemory.getUsedCapacity();
      long reservedCpu = capacityCpu.getReservedCapacity();
      long reservedMem = capacityMemory.getReservedCapacity();
      long actualTotalCpu = capacityCpu.getTotalCapacity();
      float cpuOvercommitRatio =
          Float.parseFloat(
              _clusterDetailsDao.findDetail(clusterId, "cpuOvercommitRatio").getValue());
      float memoryOvercommitRatio =
          Float.parseFloat(
              _clusterDetailsDao.findDetail(clusterId, "memoryOvercommitRatio").getValue());
      int vmCPU = (int) (svo.getCpu() * svo.getSpeed());
      long vmMem = (long) (svo.getRamSize() * 1024L * 1024L);
      long actualTotalMem = capacityMemory.getTotalCapacity();
      long totalMem = (long) (actualTotalMem * memoryOvercommitRatio);
      long totalCpu = (long) (actualTotalCpu * cpuOvercommitRatio);
      if (s_logger.isDebugEnabled()) {
        s_logger.debug(
            "Hosts's actual total CPU: "
                + actualTotalCpu
                + " and CPU after applying overprovisioning: "
                + totalCpu);
        s_logger.debug(
            "Hosts's actual total RAM: "
                + actualTotalMem
                + " and RAM after applying overprovisioning: "
                + totalMem);
      }

      if (!moveFromReserved) {
        /* move resource from used */
        if (usedCpu >= vmCPU) {
          capacityCpu.setUsedCapacity(usedCpu - vmCPU);
        }
        if (usedMem >= vmMem) {
          capacityMemory.setUsedCapacity(usedMem - vmMem);
        }

        if (moveToReservered) {
          if (reservedCpu + vmCPU <= totalCpu) {
            capacityCpu.setReservedCapacity(reservedCpu + vmCPU);
          }
          if (reservedMem + vmMem <= totalMem) {
            capacityMemory.setReservedCapacity(reservedMem + vmMem);
          }
        }
      } else {
        if (reservedCpu >= vmCPU) {
          capacityCpu.setReservedCapacity(reservedCpu - vmCPU);
        }
        if (reservedMem >= vmMem) {
          capacityMemory.setReservedCapacity(reservedMem - vmMem);
        }
      }

      s_logger.debug(
          "release cpu from host: "
              + hostId
              + ", old used: "
              + usedCpu
              + ",reserved: "
              + reservedCpu
              + ", actual total: "
              + actualTotalCpu
              + ", total with overprovisioning: "
              + totalCpu
              + "; new used: "
              + capacityCpu.getUsedCapacity()
              + ",reserved:"
              + capacityCpu.getReservedCapacity()
              + "; movedfromreserved: "
              + moveFromReserved
              + ",moveToReservered"
              + moveToReservered);

      s_logger.debug(
          "release mem from host: "
              + hostId
              + ", old used: "
              + usedMem
              + ",reserved: "
              + reservedMem
              + ", total: "
              + totalMem
              + "; new used: "
              + capacityMemory.getUsedCapacity()
              + ",reserved:"
              + capacityMemory.getReservedCapacity()
              + "; movedfromreserved: "
              + moveFromReserved
              + ",moveToReservered"
              + moveToReservered);

      _capacityDao.update(capacityCpu.getId(), capacityCpu);
      _capacityDao.update(capacityMemory.getId(), capacityMemory);
      txn.commit();
      return true;
    } catch (Exception e) {
      s_logger.debug("Failed to transit vm's state, due to " + e.getMessage());
      txn.rollback();
      return false;
    }
  }
  @Override
  public Boolean isVmAlive(VirtualMachine vm, Host host) {
    if (!vm.getType().isUsedBySystem()) {
      s_logger.debug("Not a System Vm, unable to determine state of " + vm + " returning null");
    }

    if (s_logger.isDebugEnabled()) {
      s_logger.debug("Testing if " + vm + " is alive");
    }

    if (vm.getHostId() == null) {
      s_logger.debug("There's no host id for " + vm);
      return null;
    }

    HostVO vmHost = _hostDao.findById(vm.getHostId());
    if (vmHost == null) {
      s_logger.debug("Unable to retrieve the host by using id " + vm.getHostId());
      return null;
    }

    List<? extends Nic> nics = _networkMgr.getNicsForTraffic(vm.getId(), TrafficType.Management);
    if (nics.size() == 0) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug(
            "Unable to find a management nic, cannot ping this system VM, unable to determine state of "
                + vm
                + " returning null");
      }
      return null;
    }

    for (Nic nic : nics) {
      if (nic.getIp4Address() == null) {
        continue;
      }
      // get the data center IP address, find a host on the pod, use that host to ping the data
      // center IP address
      List<Long> otherHosts = findHostByPod(vmHost.getPodId(), vm.getHostId());
      for (Long otherHost : otherHosts) {
        Status vmState = testIpAddress(otherHost, nic.getIp4Address());
        if (vmState == null) {
          // can't get information from that host, try the next one
          continue;
        }
        if (vmState == Status.Up) {
          if (s_logger.isDebugEnabled()) {
            s_logger.debug(
                "successfully pinged vm's private IP ("
                    + vm.getPrivateIpAddress()
                    + "), returning that the VM is up");
          }
          return Boolean.TRUE;
        } else if (vmState == Status.Down) {
          // We can't ping the VM directly...if we can ping the host, then report the VM down.
          // If we can't ping the host, then we don't have enough information.
          Status vmHostState = testIpAddress(otherHost, vmHost.getPrivateIpAddress());
          if ((vmHostState != null) && (vmHostState == Status.Up)) {
            if (s_logger.isDebugEnabled()) {
              s_logger.debug(
                  "successfully pinged vm's host IP ("
                      + vmHost.getPrivateIpAddress()
                      + "), but could not ping VM, returning that the VM is down");
            }
            return Boolean.FALSE;
          }
        }
      }
    }

    if (s_logger.isDebugEnabled()) {
      s_logger.debug("unable to determine state of " + vm + " returning null");
    }
    return null;
  }
  @Override
  public void CheckAndDestroyTunnel(VirtualMachine vm) {
    if (!_isEnabled) {
      return;
    }

    List<UserVmVO> userVms = _userVmDao.listByAccountIdAndHostId(vm.getAccountId(), vm.getHostId());
    if (vm.getType() == VirtualMachine.Type.User) {
      if (userVms.size() > 1) {
        return;
      }

      List<DomainRouterVO> routers =
          _routerDao.findBy(vm.getAccountId(), vm.getDataCenterIdToDeployIn());
      for (DomainRouterVO router : routers) {
        if (router.getHostId() == vm.getHostId()) {
          return;
        }
      }
    } else if (vm.getType() == VirtualMachine.Type.DomainRouter && userVms.size() != 0) {
      return;
    }

    try {
      /* Now we are last one on host, destroy all tunnels of my account */
      Command cmd = new OvsDestroyTunnelCommand(vm.getAccountId(), "[]");
      Answer ans = _agentMgr.send(vm.getHostId(), cmd);
      handleDestroyTunnelAnswer(ans, vm.getHostId(), 0, vm.getAccountId());

      /* Then ask hosts have peer tunnel with me to destroy them */
      List<OvsTunnelAccountVO> peers =
          _tunnelAccountDao.listByToAccount(vm.getHostId(), vm.getAccountId());
      for (OvsTunnelAccountVO p : peers) {
        cmd = new OvsDestroyTunnelCommand(p.getAccount(), p.getPortName());
        ans = _agentMgr.send(p.getFrom(), cmd);
        handleDestroyTunnelAnswer(ans, p.getFrom(), p.getTo(), p.getAccount());
      }
    } catch (Exception e) {
      s_logger.warn(
          String.format(
              "Destroy tunnel(account:%1$s, hostId:%2$s) failed",
              vm.getAccountId(), vm.getHostId()),
          e);
    }
  }
  @DB
  protected void CheckAndCreateTunnel(VirtualMachine instance, DeployDestination dest) {
    if (!_isEnabled) {
      return;
    }

    if (instance.getType() != VirtualMachine.Type.User
        && instance.getType() != VirtualMachine.Type.DomainRouter) {
      return;
    }

    long hostId = dest.getHost().getId();
    long accountId = instance.getAccountId();
    List<UserVmVO> vms = _userVmDao.listByAccountId(accountId);
    List<DomainRouterVO> routers =
        _routerDao.findBy(accountId, instance.getDataCenterIdToDeployIn());
    List<VMInstanceVO> ins = new ArrayList<VMInstanceVO>();
    if (vms != null) {
      ins.addAll(vms);
    }
    if (routers.size() != 0) {
      ins.addAll(routers);
    }
    List<Pair<Long, Integer>> toHosts = new ArrayList<Pair<Long, Integer>>();
    List<Pair<Long, Integer>> fromHosts = new ArrayList<Pair<Long, Integer>>();
    int key;

    for (VMInstanceVO v : ins) {
      Long rh = v.getHostId();
      if (rh == null || rh.longValue() == hostId) {
        continue;
      }

      OvsTunnelAccountVO ta =
          _tunnelAccountDao.getByFromToAccount(hostId, rh.longValue(), accountId);
      if (ta == null) {
        key = getGreKey(hostId, rh.longValue(), accountId);
        if (key == -1) {
          s_logger.warn(
              String.format(
                  "Cannot get GRE key for from=%1$s to=%2$s accountId=%3$s, tunnel create failed",
                  hostId, rh.longValue(), accountId));
          continue;
        }

        Pair<Long, Integer> p = new Pair<Long, Integer>(rh, Integer.valueOf(key));
        if (!toHosts.contains(p)) {
          toHosts.add(p);
        }
      }

      ta = _tunnelAccountDao.getByFromToAccount(rh.longValue(), hostId, accountId);
      if (ta == null) {
        key = getGreKey(rh.longValue(), hostId, accountId);
        if (key == -1) {
          s_logger.warn(
              String.format(
                  "Cannot get GRE key for from=%1$s to=%2$s accountId=%3$s, tunnel create failed",
                  rh.longValue(), hostId, accountId));
          continue;
        }

        Pair<Long, Integer> p = new Pair<Long, Integer>(rh, Integer.valueOf(key));
        if (!fromHosts.contains(p)) {
          fromHosts.add(p);
        }
      }
    }

    try {
      String myIp = dest.getHost().getPrivateIpAddress();
      for (Pair<Long, Integer> i : toHosts) {
        HostVO rHost = _hostDao.findById(i.first());
        Commands cmds =
            new Commands(
                new OvsCreateTunnelCommand(
                    rHost.getPrivateIpAddress(),
                    i.second().toString(),
                    Long.valueOf(hostId),
                    i.first(),
                    accountId,
                    myIp));
        s_logger.debug("Ask host " + hostId + " to create gre tunnel to " + i.first());
        Answer[] answers = _agentMgr.send(hostId, cmds);
        handleCreateTunnelAnswer(answers);
      }

      for (Pair<Long, Integer> i : fromHosts) {
        HostVO rHost = _hostDao.findById(i.first());
        Commands cmd2s =
            new Commands(
                new OvsCreateTunnelCommand(
                    myIp,
                    i.second().toString(),
                    i.first(),
                    Long.valueOf(hostId),
                    accountId,
                    rHost.getPrivateIpAddress()));
        s_logger.debug("Ask host " + i.first() + " to create gre tunnel to " + hostId);
        Answer[] answers = _agentMgr.send(i.first(), cmd2s);
        handleCreateTunnelAnswer(answers);
      }
    } catch (Exception e) {
      s_logger.debug("Ovs Tunnel network created tunnel failed", e);
    }
  }
  protected Long stopVM(final HaWorkVO work) throws ConcurrentOperationException {
    VirtualMachine vm = _itMgr.findById(work.getInstanceId());
    if (vm == null) {
      s_logger.info("No longer can find VM " + work.getInstanceId() + ". Throwing away " + work);
      work.setStep(Step.Done);
      return null;
    }
    s_logger.info("Stopping " + vm);
    try {
      if (work.getWorkType() == WorkType.Stop) {
        _itMgr.advanceStop(vm.getUuid(), false);
        s_logger.info("Successfully stopped " + vm);
        return null;
      } else if (work.getWorkType() == WorkType.CheckStop) {
        if ((vm.getState() != work.getPreviousState())
            || vm.getUpdated() != work.getUpdateTime()
            || vm.getHostId() == null
            || vm.getHostId().longValue() != work.getHostId()) {
          s_logger.info(
              vm
                  + " is different now.  Scheduled Host: "
                  + work.getHostId()
                  + " Current Host: "
                  + (vm.getHostId() != null ? vm.getHostId() : "none")
                  + " State: "
                  + vm.getState());
          return null;
        }
        _itMgr.advanceStop(vm.getUuid(), false);
        s_logger.info("Stop for " + vm + " was successful");
        return null;
      } else if (work.getWorkType() == WorkType.ForceStop) {
        if ((vm.getState() != work.getPreviousState())
            || vm.getUpdated() != work.getUpdateTime()
            || vm.getHostId() == null
            || vm.getHostId().longValue() != work.getHostId()) {
          s_logger.info(
              vm
                  + " is different now.  Scheduled Host: "
                  + work.getHostId()
                  + " Current Host: "
                  + (vm.getHostId() != null ? vm.getHostId() : "none")
                  + " State: "
                  + vm.getState());
          return null;
        }
        _itMgr.advanceStop(vm.getUuid(), true);
        s_logger.info("Stop for " + vm + " was successful");
        return null;
      } else {
        assert false
            : "Who decided there's other steps but didn't modify the guy who does the work?";
      }
    } catch (final ResourceUnavailableException e) {
      s_logger.debug("Agnet is not available" + e.getMessage());
    } catch (OperationTimedoutException e) {
      s_logger.debug("operation timed out: " + e.getMessage());
    }

    work.setTimesTried(work.getTimesTried() + 1);
    if (s_logger.isDebugEnabled()) {
      s_logger.debug("Stop was unsuccessful.  Rescheduling");
    }
    return (System.currentTimeMillis() >> 10) + _stopRetryInterval;
  }
  protected Long restart(HaWorkVO work) {
    List<HaWorkVO> items = _haDao.listFutureHaWorkForVm(work.getInstanceId(), work.getId());
    if (items.size() > 0) {
      StringBuilder str =
          new StringBuilder(
              "Cancelling this work item because newer ones have been scheduled.  Work Ids = [");
      for (HaWorkVO item : items) {
        str.append(item.getId()).append(", ");
      }
      str.delete(str.length() - 2, str.length()).append("]");
      s_logger.info(str.toString());
      return null;
    }

    items = _haDao.listRunningHaWorkForVm(work.getInstanceId());
    if (items.size() > 0) {
      StringBuilder str =
          new StringBuilder(
              "Waiting because there's HA work being executed on an item currently.  Work Ids =[");
      for (HaWorkVO item : items) {
        str.append(item.getId()).append(", ");
      }
      str.delete(str.length() - 2, str.length()).append("]");
      s_logger.info(str.toString());
      return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
    }

    long vmId = work.getInstanceId();

    VirtualMachine vm = _itMgr.findById(work.getInstanceId());
    if (vm == null) {
      s_logger.info("Unable to find vm: " + vmId);
      return null;
    }

    s_logger.info("HA on " + vm);
    if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) {
      s_logger.info(
          "VM "
              + vm
              + " has been changed.  Current State = "
              + vm.getState()
              + " Previous State = "
              + work.getPreviousState()
              + " last updated = "
              + vm.getUpdated()
              + " previous updated = "
              + work.getUpdateTime());
      return null;
    }

    AlertManager.AlertType alertType = AlertManager.AlertType.ALERT_TYPE_USERVM;
    if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
      alertType = AlertManager.AlertType.ALERT_TYPE_DOMAIN_ROUTER;
    } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
      alertType = AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY;
    } else if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) {
      alertType = AlertManager.AlertType.ALERT_TYPE_SSVM;
    }

    HostVO host = _hostDao.findById(work.getHostId());
    boolean isHostRemoved = false;
    if (host == null) {
      host = _hostDao.findByIdIncludingRemoved(work.getHostId());
      if (host != null) {
        s_logger.debug(
            "VM "
                + vm.toString()
                + " is now no longer on host "
                + work.getHostId()
                + " as the host is removed");
        isHostRemoved = true;
      }
    }

    DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
    HostPodVO podVO = _podDao.findById(host.getPodId());
    String hostDesc =
        "name: "
            + host.getName()
            + "(id:"
            + host.getId()
            + "), availability zone: "
            + dcVO.getName()
            + ", pod: "
            + podVO.getName();

    Boolean alive = null;
    if (work.getStep() == Step.Investigating) {
      if (!isHostRemoved) {
        if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) {
          s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId());
          return null;
        }

        Investigator investigator = null;
        for (Investigator it : investigators) {
          investigator = it;
          alive = investigator.isVmAlive(vm, host);
          s_logger.info(investigator.getName() + " found " + vm + "to be alive? " + alive);
          if (alive != null) {
            break;
          }
        }

        boolean fenced = false;
        if (alive == null) {
          s_logger.debug("Fencing off VM that we don't know the state of");
          for (FenceBuilder fb : fenceBuilders) {
            Boolean result = fb.fenceOff(vm, host);
            s_logger.info("Fencer " + fb.getName() + " returned " + result);
            if (result != null && result) {
              fenced = true;
              break;
            }
          }

        } else if (!alive) {
          fenced = true;
        } else {
          s_logger.debug(
              "VM " + vm.getHostName() + " is found to be alive by " + investigator.getName());
          if (host.getStatus() == Status.Up) {
            s_logger.info(vm + " is alive and host is up. No need to restart it.");
            return null;
          } else {
            s_logger.debug("Rescheduling because the host is not up but the vm is alive");
            return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
          }
        }

        if (!fenced) {
          s_logger.debug("We were unable to fence off the VM " + vm);
          _alertMgr.sendAlert(
              alertType,
              vm.getDataCenterId(),
              vm.getPodIdToDeployIn(),
              "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
              "Insufficient capacity to restart VM, name: "
                  + vm.getHostName()
                  + ", id: "
                  + vmId
                  + " which was running on host "
                  + hostDesc);
          return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
        }

        try {
          _itMgr.advanceStop(vm.getUuid(), true);
        } catch (ResourceUnavailableException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (OperationTimedoutException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (ConcurrentOperationException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        }

        work.setStep(Step.Scheduled);
        _haDao.update(work.getId(), work);
      } else {
        s_logger.debug(
            "How come that HA step is Investigating and the host is removed? Calling forced Stop on Vm anyways");
        try {
          _itMgr.advanceStop(vm.getUuid(), true);
        } catch (ResourceUnavailableException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (OperationTimedoutException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (ConcurrentOperationException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        }
      }
    }

    vm = _itMgr.findById(vm.getId());

    if (!_forceHA && !vm.isHaEnabled()) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("VM is not HA enabled so we're done.");
      }
      return null; // VM doesn't require HA
    }

    if (!volumeMgr.canVmRestartOnAnotherServer(vm.getId())) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("VM can not restart on another server.");
      }
      return null;
    }

    if (work.getTimesTried() > _maxRetries) {
      s_logger.warn("Retried to max times so deleting: " + vmId);
      return null;
    }

    try {
      HashMap<VirtualMachineProfile.Param, Object> params =
          new HashMap<VirtualMachineProfile.Param, Object>();
      if (_haTag != null) {
        params.put(VirtualMachineProfile.Param.HaTag, _haTag);
      }

      try {
        // First try starting the vm with its original planner, if it doesn't succeed send HAPlanner
        // as its an emergency.
        _itMgr.advanceStart(vm.getUuid(), params, null);
      } catch (InsufficientCapacityException e) {
        s_logger.warn("Failed to deploy vm " + vmId + " with original planner, sending HAPlanner");
        _itMgr.advanceStart(vm.getUuid(), params, _haPlanners.get(0));
      }

      VMInstanceVO started = _instanceDao.findById(vm.getId());
      if (started != null && started.getState() == VirtualMachine.State.Running) {
        s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId());
        return null;
      }

      if (s_logger.isDebugEnabled()) {
        s_logger.debug(
            "Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval);
      }
    } catch (final InsufficientCapacityException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodIdToDeployIn(),
          "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
          "Insufficient capacity to restart VM, name: "
              + vm.getHostName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
    } catch (final ResourceUnavailableException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodIdToDeployIn(),
          "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getHostName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
    } catch (ConcurrentOperationException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodIdToDeployIn(),
          "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getHostName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
    } catch (OperationTimedoutException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodIdToDeployIn(),
          "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getHostName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
    }
    vm = _itMgr.findById(vm.getId());
    work.setUpdateTime(vm.getUpdated());
    work.setPreviousState(vm.getState());
    return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
  }