public Long migrate(final HaWorkVO work) {
    long vmId = work.getInstanceId();

    long srcHostId = work.getHostId();
    try {
      work.setStep(Step.Migrating);
      _haDao.update(work.getId(), work);

      VMInstanceVO vm = _instanceDao.findById(vmId);
      // First try starting the vm with its original planner, if it doesn't succeed send HAPlanner
      // as its an emergency.
      boolean result = false;
      try {
        _itMgr.migrateAway(vm.getUuid(), srcHostId, null);
      } catch (InsufficientServerCapacityException e) {
        s_logger.warn("Failed to deploy vm " + vmId + " with original planner, sending HAPlanner");
        _itMgr.migrateAway(vm.getUuid(), srcHostId, _haPlanners.get(0));
      }
      return null;
    } catch (InsufficientServerCapacityException e) {
      s_logger.warn("Insufficient capacity for migrating a VM.");
      _resourceMgr.maintenanceFailed(srcHostId);
      return (System.currentTimeMillis() >> 10) + _migrateRetryInterval;
    }
  }
  protected Long destroyVM(HaWorkVO work) {
    final VMInstanceVO vm = _itMgr.findByIdAndType(work.getType(), work.getInstanceId());
    s_logger.info("Destroying " + vm.toString());
    try {
      if (vm.getState() != State.Destroyed) {
        s_logger.info("VM is no longer in Destroyed state " + vm.toString());
        return null;
      }

      if (vm.getHostId() != null) {
        if (_itMgr.destroy(vm, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount())) {
          s_logger.info("Successfully destroy " + vm);
          return null;
        }
        s_logger.debug("Stop for " + vm + " was unsuccessful.");
      } else {
        if (s_logger.isDebugEnabled()) {
          s_logger.debug(vm + " has already been stopped");
        }
        return null;
      }
    } catch (final AgentUnavailableException e) {
      s_logger.debug("Agnet is not available" + e.getMessage());
    } catch (OperationTimedoutException e) {
      s_logger.debug("operation timed out: " + e.getMessage());
    } catch (ConcurrentOperationException e) {
      s_logger.debug("concurrent operation: " + e.getMessage());
    }

    work.setTimesTried(work.getTimesTried() + 1);
    return (System.currentTimeMillis() >> 10) + _stopRetryInterval;
  }
  @Override
  public void deployVirtualMachine(
      String reservationId, String caller, Map<VirtualMachineProfile.Param, Object> params)
      throws InsufficientCapacityException, ResourceUnavailableException {
    // grab the VM Id and destination using the reservationId.

    VMReservationVO vmReservation = _reservationDao.findByReservationId(reservationId);
    long vmId = vmReservation.getVmId();

    VMInstanceVO vm = _vmDao.findById(vmId);
    // Pass it down
    Long poolId = null;
    Map<Long, Long> storage = vmReservation.getVolumeReservation();
    if (storage != null) {
      List<Long> volIdList = new ArrayList<Long>(storage.keySet());
      if (volIdList != null && !volIdList.isEmpty()) {
        poolId = storage.get(volIdList.get(0));
      }
    }

    DataCenterDeployment reservedPlan =
        new DataCenterDeployment(
            vm.getDataCenterId(),
            vmReservation.getPodId(),
            vmReservation.getClusterId(),
            vmReservation.getHostId(),
            null,
            null);
    try {
      VMInstanceVO vmDeployed =
          _itMgr.start(
              vm,
              params,
              _userDao.findById(new Long(caller)),
              _accountDao.findById(vm.getAccountId()),
              reservedPlan);
    } catch (Exception ex) {
      // Retry the deployment without using the reservation plan
      DataCenterDeployment plan =
          new DataCenterDeployment(vm.getDataCenterId(), null, null, null, null, null);
      _itMgr.start(
          vm,
          params,
          _userDao.findById(new Long(caller)),
          _accountDao.findById(vm.getAccountId()),
          plan);
    }
  }
Example #4
0
  @Override
  public NicTO getNicTO(
      final VirtualRouter router, final Long networkId, final String broadcastUri) {
    final NicProfile nicProfile = _networkModel.getNicProfile(router, networkId, broadcastUri);

    return _itMgr.toNicTO(nicProfile, router.getHypervisorType());
  }
Example #5
0
 protected DomainRouterVO start(
     DomainRouterVO router,
     final User user,
     final Account caller,
     final Map<Param, Object> params,
     final DeploymentPlan planToDeploy)
     throws StorageUnavailableException, InsufficientCapacityException,
         ConcurrentOperationException, ResourceUnavailableException {
   s_logger.debug("Starting router " + router);
   try {
     _itMgr.advanceStart(router.getUuid(), params, planToDeploy, null);
   } catch (final OperationTimedoutException e) {
     throw new ResourceUnavailableException(
         "Starting router " + router + " failed! " + e.toString(),
         DataCenter.class,
         router.getDataCenterId());
   }
   if (router.isStopPending()) {
     s_logger.info(
         "Clear the stop pending flag of router "
             + router.getHostName()
             + " after start router successfully!");
     router.setStopPending(false);
     router = _routerDao.persist(router);
   }
   // We don't want the failure of VPN Connection affect the status of
   // router, so we try to make connection
   // only after router start successfully
   final Long vpcId = router.getVpcId();
   if (vpcId != null) {
     _s2sVpnMgr.reconnectDisconnectedVpnByVpc(vpcId);
   }
   return _routerDao.findById(router.getId());
 }
  @Override
  public boolean stopvirtualmachine(VMEntityVO vmEntityVO, String caller)
      throws ResourceUnavailableException {

    VMInstanceVO vm = _vmDao.findByUuid(vmEntityVO.getUuid());
    return _itMgr.stop(
        vm, _userDao.findById(new Long(caller)), _accountDao.findById(vm.getAccountId()));
  }
  @Override
  public boolean destroyVirtualMachine(VMEntityVO vmEntityVO, String caller)
      throws AgentUnavailableException, OperationTimedoutException, ConcurrentOperationException {

    VMInstanceVO vm = _vmDao.findByUuid(vmEntityVO.getUuid());
    return _itMgr.destroy(
        vm, _userDao.findById(new Long(caller)), _accountDao.findById(vm.getAccountId()));
  }
  @Override
  public boolean destroySecStorageVm(long vmId) {
    SecondaryStorageVmVO ssvm = _secStorageVmDao.findById(vmId);

    try {
      return _itMgr.expunge(ssvm, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount());
    } catch (ResourceUnavailableException e) {
      s_logger.warn("Unable to expunge " + ssvm, e);
      return false;
    }
  }
 private DomainRouterVO start(
     DomainRouterVO elbVm, User user, Account caller, Map<Param, Object> params)
     throws StorageUnavailableException, InsufficientCapacityException,
         ConcurrentOperationException, ResourceUnavailableException {
   s_logger.debug("Starting ELB VM " + elbVm);
   if (_itMgr.start(elbVm, params, user, caller) != null) {
     return _routerDao.findById(elbVm.getId());
   } else {
     return null;
   }
 }
  @Override
  public boolean stopSecStorageVm(long secStorageVmId) {
    SecondaryStorageVmVO secStorageVm = _secStorageVmDao.findById(secStorageVmId);
    if (secStorageVm == null) {
      String msg =
          "Stopping secondary storage vm failed: secondary storage vm "
              + secStorageVmId
              + " no longer exists";
      if (s_logger.isDebugEnabled()) {
        s_logger.debug(msg);
      }
      return false;
    }
    try {
      if (secStorageVm.getHostId() != null) {
        GlobalLock secStorageVmLock =
            GlobalLock.getInternLock(getSecStorageVmLockName(secStorageVm.getId()));
        try {
          if (secStorageVmLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_SYNC)) {
            try {
              boolean result =
                  _itMgr.stop(
                      secStorageVm, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount());
              if (result) {}

              return result;
            } finally {
              secStorageVmLock.unlock();
            }
          } else {
            String msg = "Unable to acquire secondary storage vm lock : " + secStorageVm.toString();
            s_logger.debug(msg);
            return false;
          }
        } finally {
          secStorageVmLock.releaseRef();
        }
      }

      // vm was already stopped, return true
      return true;
    } catch (ResourceUnavailableException e) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug(
            "Stopping secondary storage vm "
                + secStorageVm.getHostName()
                + " faled : exception "
                + e.toString());
      }
      return false;
    }
  }
 private DomainRouterVO stop(DomainRouterVO elbVm, boolean forced, User user, Account caller)
     throws ConcurrentOperationException, ResourceUnavailableException {
   s_logger.debug("Stopping ELB vm " + elbVm);
   try {
     if (_itMgr.advanceStop(elbVm, forced, user, caller)) {
       return _routerDao.findById(elbVm.getId());
     } else {
       return null;
     }
   } catch (OperationTimedoutException e) {
     throw new CloudRuntimeException("Unable to stop " + elbVm, e);
   }
 }
  @Override
  public boolean configure(String name, Map<String, Object> params) throws ConfigurationException {

    if (s_logger.isInfoEnabled()) {
      s_logger.info("Start configuring AgentBasedConsoleProxyManager");
    }

    _name = name;

    ComponentLocator locator = ComponentLocator.getCurrentLocator();
    ConfigurationDao configDao = locator.getDao(ConfigurationDao.class);
    if (configDao == null) {
      throw new ConfigurationException("Unable to get the configuration dao.");
    }

    Map<String, String> configs = configDao.getConfiguration("management-server", params);
    String value = configs.get("consoleproxy.url.port");
    if (value != null) {
      _consoleProxyUrlPort =
          NumbersUtil.parseInt(value, ConsoleProxyManager.DEFAULT_PROXY_URL_PORT);
    }

    value = configs.get("consoleproxy.port");
    if (value != null) {
      _consoleProxyPort = NumbersUtil.parseInt(value, ConsoleProxyManager.DEFAULT_PROXY_VNC_PORT);
    }

    value = configs.get("consoleproxy.sslEnabled");
    if (value != null && value.equalsIgnoreCase("true")) {
      _sslEnabled = true;
    }

    _instance = configs.get("instance.name");

    _consoleProxyUrlDomain = configs.get("consoleproxy.url.domain");

    _listener = new ConsoleProxyListener(this);
    _agentMgr.registerForHostEvents(_listener, true, true, false);

    _itMgr.registerGuru(VirtualMachine.Type.ConsoleProxy, this);

    if (s_logger.isInfoEnabled()) {
      s_logger.info(
          "AgentBasedConsoleProxyManager has been configured. SSL enabled: " + _sslEnabled);
    }
    return true;
  }
Example #13
0
  @Override
  public VirtualRouter destroyRouter(
      final long routerId, final Account caller, final Long callerUserId)
      throws ResourceUnavailableException, ConcurrentOperationException {

    if (s_logger.isDebugEnabled()) {
      s_logger.debug("Attempting to destroy router " + routerId);
    }

    final DomainRouterVO router = _routerDao.findById(routerId);
    if (router == null) {
      return null;
    }

    _accountMgr.checkAccess(caller, null, true, router);

    _itMgr.expunge(router.getUuid());
    _routerDao.remove(router.getId());
    return router;
  }
 @Override
 public SecondaryStorageVmVO startSecStorageVm(long secStorageVmId) {
   try {
     SecondaryStorageVmVO secStorageVm = _secStorageVmDao.findById(secStorageVmId);
     Account systemAcct = _accountMgr.getSystemAccount();
     User systemUser = _accountMgr.getSystemUser();
     return _itMgr.start(secStorageVm, null, systemUser, systemAcct);
   } catch (StorageUnavailableException e) {
     s_logger.warn("Exception while trying to start secondary storage vm", e);
     return null;
   } catch (InsufficientCapacityException e) {
     s_logger.warn("Exception while trying to start secondary storage vm", e);
     return null;
   } catch (ResourceUnavailableException e) {
     s_logger.warn("Exception while trying to start secondary storage vm", e);
     return null;
   } catch (Exception e) {
     s_logger.warn("Exception while trying to start secondary storage vm", e);
     return null;
   }
 }
Example #15
0
  @Override
  public void reallocateRouterNetworks(
      final RouterDeploymentDefinition routerDeploymentDefinition,
      final VirtualRouter router,
      final VMTemplateVO template,
      final HypervisorType hType)
      throws ConcurrentOperationException, InsufficientCapacityException {
    final ServiceOfferingVO routerOffering =
        _serviceOfferingDao.findById(routerDeploymentDefinition.getServiceOfferingId());

    final LinkedHashMap<Network, List<? extends NicProfile>> networks =
        configureDefaultNics(routerDeploymentDefinition);

    _itMgr.allocate(
        router.getInstanceName(),
        template,
        routerOffering,
        networks,
        routerDeploymentDefinition.getPlan(),
        hType);
  }
  void garbageCollectUnusedElbVms() {
    List<DomainRouterVO> unusedElbVms = _elbVmMapDao.listUnusedElbVms();
    if (unusedElbVms != null && unusedElbVms.size() > 0)
      s_logger.info("Found " + unusedElbVms.size() + " unused ELB vms");
    Set<Long> currentGcCandidates = new HashSet<Long>();
    for (DomainRouterVO elbVm : unusedElbVms) {
      currentGcCandidates.add(elbVm.getId());
    }
    _gcCandidateElbVmIds.retainAll(currentGcCandidates);
    currentGcCandidates.removeAll(_gcCandidateElbVmIds);
    User user = _accountService.getSystemUser();
    for (Long elbVmId : _gcCandidateElbVmIds) {
      DomainRouterVO elbVm = _routerDao.findById(elbVmId);
      boolean gceed = false;

      try {
        s_logger.info("Attempting to stop ELB VM: " + elbVm);
        stop(elbVm, true, user, _systemAcct);
        gceed = true;
      } catch (ConcurrentOperationException e) {
        s_logger.warn("Unable to stop unused ELB vm " + elbVm + " due to ", e);
      } catch (ResourceUnavailableException e) {
        s_logger.warn("Unable to stop unused ELB vm " + elbVm + " due to ", e);
        continue;
      }
      if (gceed) {
        try {
          s_logger.info("Attempting to destroy ELB VM: " + elbVm);
          _itMgr.expunge(elbVm, user, _systemAcct);
        } catch (ResourceUnavailableException e) {
          s_logger.warn("Unable to destroy unused ELB vm " + elbVm + " due to ", e);
          gceed = false;
        }
      }
      if (!gceed) {
        currentGcCandidates.add(elbVm.getId());
      }
    }
    _gcCandidateElbVmIds = currentGcCandidates;
  }
  public Long migrate(final HaWorkVO work) {
    long vmId = work.getInstanceId();

    long srcHostId = work.getHostId();
    try {
      work.setStep(Step.Migrating);
      _haDao.update(work.getId(), work);

      if (!_itMgr.migrateAway(work.getType(), vmId, srcHostId)) {
        s_logger.warn("Unable to migrate vm from " + srcHostId);
        _resourceMgr.maintenanceFailed(srcHostId);
      }
      return null;
    } catch (InsufficientServerCapacityException e) {
      s_logger.warn("Insufficient capacity for migrating a VM.");
      _resourceMgr.maintenanceFailed(srcHostId);
      return (System.currentTimeMillis() >> 10) + _migrateRetryInterval;
    } catch (VirtualMachineMigrationException e) {
      s_logger.warn("Looks like VM is still starting, we need to retry migrating the VM later.");
      _resourceMgr.maintenanceFailed(srcHostId);
      return (System.currentTimeMillis() >> 10) + _migrateRetryInterval;
    }
  }
  @Override
  public void scheduleRestart(VMInstanceVO vm, boolean investigate) {
    Long hostId = vm.getHostId();
    if (hostId == null) {
      try {
        s_logger.debug("Found a vm that is scheduled to be restarted but has no host id: " + vm);
        _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount());
      } catch (ResourceUnavailableException e) {
        assert false : "How do we hit this when force is true?";
        throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
      } catch (OperationTimedoutException e) {
        assert false : "How do we hit this when force is true?";
        throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
      } catch (ConcurrentOperationException e) {
        assert false : "How do we hit this when force is true?";
        throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
      }
      return;
    }

    if (vm.getHypervisorType() == HypervisorType.VMware) {
      s_logger.info("Skip HA for VMware VM " + vm.getInstanceName());
      return;
    }

    if (!investigate) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug(
            "VM does not require investigation so I'm marking it as Stopped: " + vm.toString());
      }

      short alertType = AlertManager.ALERT_TYPE_USERVM;
      if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
        alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER;
      } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
        alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY;
      } else if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) {
        alertType = AlertManager.ALERT_TYPE_SSVM;
      }

      if (!(_forceHA || vm.isHaEnabled())) {
        String hostDesc =
            "id:"
                + vm.getHostId()
                + ", availability zone id:"
                + vm.getDataCenterIdToDeployIn()
                + ", pod id:"
                + vm.getPodIdToDeployIn();
        _alertMgr.sendAlert(
            alertType,
            vm.getDataCenterIdToDeployIn(),
            vm.getPodIdToDeployIn(),
            "VM (name: "
                + vm.getHostName()
                + ", id: "
                + vm.getId()
                + ") stopped unexpectedly on host "
                + hostDesc,
            "Virtual Machine "
                + vm.getHostName()
                + " (id: "
                + vm.getId()
                + ") running on host ["
                + vm.getHostId()
                + "] stopped unexpectedly.");

        if (s_logger.isDebugEnabled()) {
          s_logger.debug("VM is not HA enabled so we're done.");
        }
      }

      try {
        _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount());
      } catch (ResourceUnavailableException e) {
        assert false : "How do we hit this when force is true?";
        throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
      } catch (OperationTimedoutException e) {
        assert false : "How do we hit this when force is true?";
        throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
      } catch (ConcurrentOperationException e) {
        assert false : "How do we hit this when force is true?";
        throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
      }
    }

    List<HaWorkVO> items = _haDao.findPreviousHA(vm.getId());
    int maxRetries = 0;
    for (HaWorkVO item : items) {
      if (maxRetries < item.getTimesTried() && !item.canScheduleNew(_timeBetweenFailures)) {
        maxRetries = item.getTimesTried();
        break;
      }
    }

    HaWorkVO work =
        new HaWorkVO(
            vm.getId(),
            vm.getType(),
            WorkType.HA,
            investigate ? Step.Investigating : Step.Scheduled,
            hostId,
            vm.getState(),
            maxRetries + 1,
            vm.getUpdated());
    _haDao.persist(work);

    if (s_logger.isInfoEnabled()) {
      s_logger.info("Schedule vm for HA:  " + vm);
    }

    wakeupWorkers();
  }
  @Override
  public boolean configure(String name, Map<String, Object> params) throws ConfigurationException {
    if (s_logger.isInfoEnabled()) {
      s_logger.info("Start configuring secondary storage vm manager : " + name);
    }

    _name = name;

    ComponentLocator locator = ComponentLocator.getCurrentLocator();
    ConfigurationDao configDao = locator.getDao(ConfigurationDao.class);
    if (configDao == null) {
      throw new ConfigurationException("Unable to get the configuration dao.");
    }

    Map<String, String> configs = configDao.getConfiguration("management-server", params);

    _secStorageVmRamSize =
        NumbersUtil.parseInt(configs.get("secstorage.vm.ram.size"), DEFAULT_SS_VM_RAMSIZE);
    _secStorageVmCpuMHz =
        NumbersUtil.parseInt(configs.get("secstorage.vm.cpu.mhz"), DEFAULT_SS_VM_CPUMHZ);
    String useServiceVM = configDao.getValue("secondary.storage.vm");
    boolean _useServiceVM = false;
    if ("true".equalsIgnoreCase(useServiceVM)) {
      _useServiceVM = true;
    }

    String sslcopy = configDao.getValue("secstorage.encrypt.copy");
    if ("true".equalsIgnoreCase(sslcopy)) {
      _useSSlCopy = true;
    }

    _allowedInternalSites = configDao.getValue("secstorage.allowed.internal.sites");

    String value = configs.get("secstorage.capacityscan.interval");
    _capacityScanInterval = NumbersUtil.parseLong(value, DEFAULT_CAPACITY_SCAN_INTERVAL);

    _instance = configs.get("instance.name");
    if (_instance == null) {
      _instance = "DEFAULT";
    }

    Map<String, String> agentMgrConfigs = configDao.getConfiguration("AgentManager", params);
    _mgmt_host = agentMgrConfigs.get("host");
    if (_mgmt_host == null) {
      s_logger.warn(
          "Critical warning! Please configure your management server host address right after you have started your management server and then restart it, otherwise you won't have access to secondary storage");
    }

    value = agentMgrConfigs.get("port");
    _mgmt_port = NumbersUtil.parseInt(value, 8250);

    _listener = new SecondaryStorageListener(this, _agentMgr);
    _agentMgr.registerForHostEvents(_listener, true, false, true);

    _itMgr.registerGuru(VirtualMachine.Type.SecondaryStorageVm, this);

    _useLocalStorage = Boolean.parseBoolean(configs.get(Config.SystemVMUseLocalStorage.key()));
    _serviceOffering =
        new ServiceOfferingVO(
            "System Offering For Secondary Storage VM",
            1,
            _secStorageVmRamSize,
            _secStorageVmCpuMHz,
            null,
            null,
            false,
            null,
            _useLocalStorage,
            true,
            null,
            true,
            VirtualMachine.Type.SecondaryStorageVm,
            true);
    _serviceOffering.setUniqueName("Cloud.com-SecondaryStorage");
    _serviceOffering = _offeringDao.persistSystemServiceOffering(_serviceOffering);

    // this can sometimes happen, if DB is manually or programmatically manipulated
    if (_serviceOffering == null) {
      String msg =
          "Data integrity problem : System Offering For Secondary Storage VM has been removed?";
      s_logger.error(msg);
      throw new ConfigurationException(msg);
    }

    if (_useServiceVM) {
      _loadScanner = new SystemVmLoadScanner<Long>(this);
      _loadScanner.initScan(STARTUP_DELAY, _capacityScanInterval);
    }
    if (s_logger.isInfoEnabled()) {
      s_logger.info("Secondary storage vm Manager is configured.");
    }
    return true;
  }
  @Override
  public boolean configure(String name, Map<String, Object> params) throws ConfigurationException {
    final Map<String, String> configs = _configDao.getConfiguration("AgentManager", params);
    _systemAcct = _accountService.getSystemAccount();
    _instance = configs.get("instance.name");
    if (_instance == null) {
      _instance = "VM";
    }
    _mgmtCidr = _configDao.getValue(Config.ManagementNetwork.key());
    _mgmtHost = _configDao.getValue(Config.ManagementHostIPAdr.key());

    boolean useLocalStorage =
        Boolean.parseBoolean(configs.get(Config.SystemVMUseLocalStorage.key()));

    _elasticLbVmRamSize =
        NumbersUtil.parseInt(
            configs.get(Config.ElasticLoadBalancerVmMemory.key()), DEFAULT_ELB_VM_RAMSIZE);
    _elasticLbvmCpuMHz =
        NumbersUtil.parseInt(
            configs.get(Config.ElasticLoadBalancerVmCpuMhz.key()), DEFAULT_ELB_VM_CPU_MHZ);
    _elasticLbvmNumCpu =
        NumbersUtil.parseInt(configs.get(Config.ElasticLoadBalancerVmNumVcpu.key()), 1);
    _elasticLbVmOffering =
        new ServiceOfferingVO(
            "System Offering For Elastic LB VM",
            _elasticLbvmNumCpu,
            _elasticLbVmRamSize,
            _elasticLbvmCpuMHz,
            0,
            0,
            true,
            null,
            useLocalStorage,
            true,
            null,
            true,
            VirtualMachine.Type.ElasticLoadBalancerVm,
            true);
    _elasticLbVmOffering.setUniqueName(ServiceOffering.elbVmDefaultOffUniqueName);
    _elasticLbVmOffering = _serviceOfferingDao.persistSystemServiceOffering(_elasticLbVmOffering);

    String enabled = _configDao.getValue(Config.ElasticLoadBalancerEnabled.key());
    _enabled = (enabled == null) ? false : Boolean.parseBoolean(enabled);
    s_logger.info("Elastic Load balancer enabled: " + _enabled);
    if (_enabled) {
      String traffType = _configDao.getValue(Config.ElasticLoadBalancerNetwork.key());
      if ("guest".equalsIgnoreCase(traffType)) {
        _frontendTrafficType = TrafficType.Guest;
      } else if ("public".equalsIgnoreCase(traffType)) {
        _frontendTrafficType = TrafficType.Public;
      } else
        throw new ConfigurationException(
            "ELB: Traffic type for front end of load balancer has to be guest or public; found : "
                + traffType);
      s_logger.info("ELB: Elastic Load Balancer: will balance on " + traffType);
      int gcIntervalMinutes =
          NumbersUtil.parseInt(configs.get(Config.ElasticLoadBalancerVmGcInterval.key()), 5);
      if (gcIntervalMinutes < 5) gcIntervalMinutes = 5;
      s_logger.info(
          "ELB: Elastic Load Balancer: scheduling GC to run every "
              + gcIntervalMinutes
              + " minutes");
      _gcThreadPool = Executors.newScheduledThreadPool(1, new NamedThreadFactory("ELBVM-GC"));
      _gcThreadPool.scheduleAtFixedRate(
          new CleanupThread(), gcIntervalMinutes, gcIntervalMinutes, TimeUnit.MINUTES);
      _itMgr.registerGuru(VirtualMachine.Type.ElasticLoadBalancerVm, this);
    }

    return true;
  }
  public DomainRouterVO deployELBVm(
      Network guestNetwork, DeployDestination dest, Account owner, Map<Param, Object> params)
      throws ConcurrentOperationException, ResourceUnavailableException,
          InsufficientCapacityException {
    long dcId = dest.getDataCenter().getId();

    // lock guest network
    Long guestNetworkId = guestNetwork.getId();
    guestNetwork = _networkDao.acquireInLockTable(guestNetworkId);

    if (guestNetwork == null) {
      throw new ConcurrentOperationException("Unable to acquire network lock: " + guestNetworkId);
    }

    try {

      if (_networkModel.isNetworkSystem(guestNetwork)
          || guestNetwork.getGuestType() == Network.GuestType.Shared) {
        owner = _accountService.getSystemAccount();
      }

      if (s_logger.isDebugEnabled()) {
        s_logger.debug(
            "Starting a ELB vm for network configurations: " + guestNetwork + " in " + dest);
      }
      assert guestNetwork.getState() == Network.State.Implemented
              || guestNetwork.getState() == Network.State.Setup
              || guestNetwork.getState() == Network.State.Implementing
          : "Network is not yet fully implemented: " + guestNetwork;

      DataCenterDeployment plan = null;
      DomainRouterVO elbVm = null;

      plan = new DataCenterDeployment(dcId, dest.getPod().getId(), null, null, null, null);

      if (elbVm == null) {
        long id = _routerDao.getNextInSequence(Long.class, "id");
        if (s_logger.isDebugEnabled()) {
          s_logger.debug("Creating the ELB vm " + id);
        }

        List<? extends NetworkOffering> offerings =
            _networkModel.getSystemAccountNetworkOfferings(NetworkOffering.SystemControlNetwork);
        NetworkOffering controlOffering = offerings.get(0);
        NetworkVO controlConfig =
            _networkMgr.setupNetwork(_systemAcct, controlOffering, plan, null, null, false).get(0);

        List<Pair<NetworkVO, NicProfile>> networks = new ArrayList<Pair<NetworkVO, NicProfile>>(2);
        NicProfile guestNic = new NicProfile();
        guestNic.setDefaultNic(true);
        networks.add(new Pair<NetworkVO, NicProfile>(controlConfig, null));
        networks.add(new Pair<NetworkVO, NicProfile>((NetworkVO) guestNetwork, guestNic));

        VMTemplateVO template = _templateDao.findSystemVMTemplate(dcId);

        String typeString = "ElasticLoadBalancerVm";
        Long physicalNetworkId = _networkModel.getPhysicalNetworkId(guestNetwork);
        PhysicalNetworkServiceProvider provider =
            _physicalProviderDao.findByServiceProvider(physicalNetworkId, typeString);
        if (provider == null) {
          throw new CloudRuntimeException(
              "Cannot find service provider "
                  + typeString
                  + " in physical network "
                  + physicalNetworkId);
        }
        VirtualRouterProvider vrProvider =
            _vrProviderDao.findByNspIdAndType(
                provider.getId(), VirtualRouterProviderType.ElasticLoadBalancerVm);
        if (vrProvider == null) {
          throw new CloudRuntimeException(
              "Cannot find virtual router provider "
                  + typeString
                  + " as service provider "
                  + provider.getId());
        }

        elbVm =
            new DomainRouterVO(
                id,
                _elasticLbVmOffering.getId(),
                vrProvider.getId(),
                VirtualMachineName.getSystemVmName(id, _instance, _elbVmNamePrefix),
                template.getId(),
                template.getHypervisorType(),
                template.getGuestOSId(),
                owner.getDomainId(),
                owner.getId(),
                false,
                0,
                false,
                RedundantState.UNKNOWN,
                _elasticLbVmOffering.getOfferHA(),
                false,
                VirtualMachine.Type.ElasticLoadBalancerVm,
                null);
        elbVm.setRole(Role.LB);
        elbVm = _itMgr.allocate(elbVm, template, _elasticLbVmOffering, networks, plan, null, owner);
        // TODO: create usage stats
      }

      State state = elbVm.getState();
      if (state != State.Running) {
        elbVm =
            this.start(
                elbVm, _accountService.getSystemUser(), _accountService.getSystemAccount(), params);
      }

      return elbVm;
    } finally {
      _networkDao.releaseFromLockTable(guestNetworkId);
    }
  }
  protected Long stopVM(final HaWorkVO work) throws ConcurrentOperationException {
    VMInstanceVO vm = _itMgr.findByIdAndType(work.getType(), work.getInstanceId());
    if (vm == null) {
      s_logger.info("No longer can find VM " + work.getInstanceId() + ". Throwing away " + work);
      work.setStep(Step.Done);
      return null;
    }
    s_logger.info("Stopping " + vm);
    try {
      if (work.getWorkType() == WorkType.Stop) {
        if (_itMgr.advanceStop(
            vm, false, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount())) {
          s_logger.info("Successfully stopped " + vm);
          return null;
        }
      } else if (work.getWorkType() == WorkType.CheckStop) {
        if ((vm.getState() != work.getPreviousState())
            || vm.getUpdated() != work.getUpdateTime()
            || vm.getHostId() == null
            || vm.getHostId().longValue() != work.getHostId()) {
          s_logger.info(
              vm
                  + " is different now.  Scheduled Host: "
                  + work.getHostId()
                  + " Current Host: "
                  + (vm.getHostId() != null ? vm.getHostId() : "none")
                  + " State: "
                  + vm.getState());
          return null;
        }
        if (_itMgr.advanceStop(
            vm, false, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount())) {
          s_logger.info("Stop for " + vm + " was successful");
          return null;
        }
      } else if (work.getWorkType() == WorkType.ForceStop) {
        if ((vm.getState() != work.getPreviousState())
            || vm.getUpdated() != work.getUpdateTime()
            || vm.getHostId() == null
            || vm.getHostId().longValue() != work.getHostId()) {
          s_logger.info(
              vm
                  + " is different now.  Scheduled Host: "
                  + work.getHostId()
                  + " Current Host: "
                  + (vm.getHostId() != null ? vm.getHostId() : "none")
                  + " State: "
                  + vm.getState());
          return null;
        }
        if (_itMgr.advanceStop(
            vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount())) {
          s_logger.info("Stop for " + vm + " was successful");
          return null;
        }
      } else {
        assert false
            : "Who decided there's other steps but didn't modify the guy who does the work?";
      }
    } catch (final ResourceUnavailableException e) {
      s_logger.debug("Agnet is not available" + e.getMessage());
    } catch (OperationTimedoutException e) {
      s_logger.debug("operation timed out: " + e.getMessage());
    }

    work.setTimesTried(work.getTimesTried() + 1);
    if (s_logger.isDebugEnabled()) {
      s_logger.debug("Stop was unsuccessful.  Rescheduling");
    }
    return (System.currentTimeMillis() >> 10) + _stopRetryInterval;
  }
  protected Long restart(final HaWorkVO work) {
    final long vmId = work.getInstanceId();

    final VirtualMachineGuru<VMInstanceVO> mgr = findManager(work.getType());
    if (mgr == null) {
      s_logger.warn(
          "Unable to find a handler for " + work.getType().toString() + ", throwing out " + vmId);
      return null;
    }

    VMInstanceVO vm = mgr.get(vmId);
    if (vm == null) {
      s_logger.info("Unable to find vm: " + vmId);
      return null;
    }

    s_logger.info("HA on " + vm.toString());
    if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) {
      s_logger.info(
          "VM "
              + vm.toString()
              + " has been changed.  Current State = "
              + vm.getState()
              + " Previous State = "
              + work.getPreviousState()
              + " last updated = "
              + vm.getUpdated()
              + " previous updated = "
              + work.getUpdateTime());
      return null;
    }

    final HostVO host = _hostDao.findById(work.getHostId());

    DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
    HostPodVO podVO = _podDao.findById(host.getPodId());
    String hostDesc =
        "name: "
            + host.getName()
            + "(id:"
            + host.getId()
            + "), availability zone: "
            + dcVO.getName()
            + ", pod: "
            + podVO.getName();

    short alertType = AlertManager.ALERT_TYPE_USERVM;
    if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER;
    } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY;
    }

    Boolean alive = null;
    if (work.getStep() == Step.Investigating) {
      if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) {
        s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId());
        if (vm.getState() == State.Starting && vm.getUpdated() == work.getUpdateTime()) {
          _itMgr.stateTransitTo(vm, Event.AgentReportStopped, null);
        }
        return null;
      }

      Enumeration<Investigator> en = _investigators.enumeration();
      Investigator investigator = null;
      while (en.hasMoreElements()) {
        investigator = en.nextElement();
        alive = investigator.isVmAlive(vm, host);
        if (alive != null) {
          s_logger.debug(
              investigator.getName() + " found VM " + vm.getName() + "to be alive? " + alive);
          break;
        }
      }
      if (alive != null && alive) {
        s_logger.debug("VM " + vm.getName() + " is found to be alive by " + investigator.getName());
        if (host.getStatus() == Status.Up) {
          compareState(vm, new AgentVmInfo(vm.getInstanceName(), mgr, State.Running), false);
          return null;
        } else {
          s_logger.debug("Rescheduling because the host is not up but the vm is alive");
          return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
        }
      }

      boolean fenced = false;
      if (alive == null || !alive) {
        fenced = true;
        s_logger.debug("Fencing off VM that we don't know the state of");
        Enumeration<FenceBuilder> enfb = _fenceBuilders.enumeration();
        while (enfb.hasMoreElements()) {
          final FenceBuilder fb = enfb.nextElement();
          Boolean result = fb.fenceOff(vm, host);
          if (result != null && !result) {
            fenced = false;
          }
        }
      }

      if (alive == null && !fenced) {
        s_logger.debug("We were unable to fence off the VM " + vm.toString());
        _alertMgr.sendAlert(
            alertType,
            vm.getDataCenterId(),
            vm.getPodId(),
            "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
            "Insufficient capacity to restart VM, name: "
                + vm.getName()
                + ", id: "
                + vmId
                + " which was running on host "
                + hostDesc);
        return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
      }

      mgr.completeStopCommand(vm);

      work.setStep(Step.Scheduled);
      _haDao.update(work.getId(), work);
    }

    // send an alert for VMs that stop unexpectedly
    _alertMgr.sendAlert(
        alertType,
        vm.getDataCenterId(),
        vm.getPodId(),
        "VM (name: "
            + vm.getName()
            + ", id: "
            + vmId
            + ") stopped unexpectedly on host "
            + hostDesc,
        "Virtual Machine "
            + vm.getName()
            + " (id: "
            + vm.getId()
            + ") running on host ["
            + hostDesc
            + "] stopped unexpectedly.");

    vm = mgr.get(vm.getId());

    if (!_forceHA && !vm.isHaEnabled()) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("VM is not HA enabled so we're done.");
      }
      return null; // VM doesn't require HA
    }

    if (!_storageMgr.canVmRestartOnAnotherServer(vm.getId())) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("VM can not restart on another server.");
      }
      return null;
    }

    if (work.getTimesTried() > _maxRetries) {
      s_logger.warn("Retried to max times so deleting: " + vmId);
      return null;
    }

    try {
      VMInstanceVO started = mgr.start(vm.getId(), 0);
      if (started != null) {
        s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId());
        return null;
      }

      if (s_logger.isDebugEnabled()) {
        s_logger.debug(
            "Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval);
      }
      vm = mgr.get(vm.getId());
      work.setUpdateTime(vm.getUpdated());
      work.setPreviousState(vm.getState());
      return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
    } catch (final InsufficientCapacityException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
          "Insufficient capacity to restart VM, name: "
              + vm.getName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
      return null;
    } catch (final StorageUnavailableException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
      return null;
    } catch (ConcurrentOperationException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
      return null;
    } catch (ExecutionException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
      return null;
    }
  }
  protected Map<String, Object> createSecStorageVmInstance(
      long dataCenterId, SecondaryStorageVm.Role role) {
    HostVO secHost = _hostDao.findSecondaryStorageHost(dataCenterId);
    if (secHost == null) {
      String msg =
          "No secondary storage available in zone "
              + dataCenterId
              + ", cannot create secondary storage vm";
      s_logger.warn(msg);
      throw new CloudRuntimeException(msg);
    }

    long id = _secStorageVmDao.getNextInSequence(Long.class, "id");
    String name = VirtualMachineName.getSystemVmName(id, _instance, "s").intern();
    Account systemAcct = _accountMgr.getSystemAccount();

    DataCenterDeployment plan = new DataCenterDeployment(dataCenterId);
    DataCenter dc = _dcDao.findById(plan.getDataCenterId());

    List<NetworkOfferingVO> defaultOffering =
        _networkMgr.getSystemAccountNetworkOfferings(NetworkOfferingVO.SystemPublicNetwork);

    if (dc.getNetworkType() == NetworkType.Basic || dc.isSecurityGroupEnabled()) {
      defaultOffering =
          _networkMgr.getSystemAccountNetworkOfferings(NetworkOfferingVO.SystemGuestNetwork);
    }

    List<NetworkOfferingVO> offerings =
        _networkMgr.getSystemAccountNetworkOfferings(
            NetworkOfferingVO.SystemControlNetwork, NetworkOfferingVO.SystemManagementNetwork);
    List<Pair<NetworkVO, NicProfile>> networks =
        new ArrayList<Pair<NetworkVO, NicProfile>>(offerings.size() + 1);
    NicProfile defaultNic = new NicProfile();
    defaultNic.setDefaultNic(true);
    defaultNic.setDeviceId(2);
    try {
      networks.add(
          new Pair<NetworkVO, NicProfile>(
              _networkMgr
                  .setupNetwork(systemAcct, defaultOffering.get(0), plan, null, null, false, false)
                  .get(0),
              defaultNic));
      for (NetworkOfferingVO offering : offerings) {
        networks.add(
            new Pair<NetworkVO, NicProfile>(
                _networkMgr
                    .setupNetwork(systemAcct, offering, plan, null, null, false, false)
                    .get(0),
                null));
      }
    } catch (ConcurrentOperationException e) {
      s_logger.info("Unable to setup due to concurrent operation. " + e);
      return new HashMap<String, Object>();
    }

    VMTemplateVO template = _templateDao.findSystemVMTemplate(dataCenterId);
    if (template == null) {
      s_logger.debug("Can't find a template to start");
      throw new CloudRuntimeException("Insufficient capacity exception");
    }

    SecondaryStorageVmVO secStorageVm =
        new SecondaryStorageVmVO(
            id,
            _serviceOffering.getId(),
            name,
            template.getId(),
            template.getHypervisorType(),
            template.getGuestOSId(),
            dataCenterId,
            systemAcct.getDomainId(),
            systemAcct.getId(),
            role,
            _serviceOffering.getOfferHA());
    try {
      secStorageVm =
          _itMgr.allocate(
              secStorageVm, template, _serviceOffering, networks, plan, null, systemAcct);
    } catch (InsufficientCapacityException e) {
      s_logger.warn("InsufficientCapacity", e);
      throw new CloudRuntimeException("Insufficient capacity exception", e);
    }

    Map<String, Object> context = new HashMap<String, Object>();
    context.put("secStorageVmId", secStorageVm.getId());
    return context;
  }
  /**
   * compareState does as its name suggests and compares the states between management server and
   * agent. It returns whether something should be cleaned up
   */
  protected Command compareState(VMInstanceVO vm, final AgentVmInfo info, final boolean fullSync) {
    State agentState = info.state;
    final String agentName = info.name;
    final State serverState = vm.getState();
    final String serverName = vm.getName();

    Command command = null;

    if (s_logger.isDebugEnabled()) {
      s_logger.debug(
          "VM "
              + serverName
              + ": server state = "
              + serverState.toString()
              + " and agent state = "
              + agentState.toString());
    }

    if (agentState == State.Error) {
      agentState = State.Stopped;

      short alertType = AlertManager.ALERT_TYPE_USERVM;
      if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
        alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER;
      } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
        alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY;
      }

      HostPodVO podVO = _podDao.findById(vm.getPodId());
      DataCenterVO dcVO = _dcDao.findById(vm.getDataCenterId());
      HostVO hostVO = _hostDao.findById(vm.getHostId());

      String hostDesc =
          "name: "
              + hostVO.getName()
              + " (id:"
              + hostVO.getId()
              + "), availability zone: "
              + dcVO.getName()
              + ", pod: "
              + podVO.getName();
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "VM (name: "
              + vm.getName()
              + ", id: "
              + vm.getId()
              + ") stopped on host "
              + hostDesc
              + " due to storage failure",
          "Virtual Machine "
              + vm.getName()
              + " (id: "
              + vm.getId()
              + ") running on host ["
              + vm.getHostId()
              + "] stopped due to storage failure.");
    }

    if (serverState == State.Migrating) {
      s_logger.debug("Skipping vm in migrating state: " + vm.toString());
      return null;
    }

    if (agentState == serverState) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("Both states are " + agentState.toString() + " for " + serverName);
      }
      assert (agentState == State.Stopped || agentState == State.Running)
          : "If the states we send up is changed, this must be changed.";
      _itMgr.stateTransitTo(
          vm,
          agentState == State.Stopped
              ? VirtualMachine.Event.AgentReportStopped
              : VirtualMachine.Event.AgentReportRunning,
          vm.getHostId());
      if (agentState == State.Stopped) {
        s_logger.debug("State matches but the agent said stopped so let's send a cleanup anyways.");
        return info.mgr.cleanup(vm, agentName);
      }
      return null;
    }

    if (agentState == State.Stopped) {
      // This state means the VM on the agent was detected previously
      // and now is gone.  This is slightly different than if the VM
      // was never completed but we still send down a Stop Command
      // to ensure there's cleanup.
      if (serverState == State.Running) {
        // Our records showed that it should be running so let's restart it.
        vm = info.mgr.get(vm.getId());
        scheduleRestart(vm, false);
        command = info.mgr.cleanup(vm, agentName);
      } else if (serverState == State.Stopping) {
        if (fullSync) {
          s_logger.debug("VM is in stopping state on full sync.  Updating the status to stopped");
          vm = info.mgr.get(vm.getId());
          info.mgr.completeStopCommand(vm);
          command = info.mgr.cleanup(vm, agentName);
        } else {
          s_logger.debug("Ignoring VM in stopping mode: " + vm.getName());
        }
      } else if (serverState == State.Starting) {
        s_logger.debug("Ignoring VM in starting mode: " + vm.getName());
      } else {
        s_logger.debug("Sending cleanup to a stopped vm: " + agentName);
        _itMgr.stateTransitTo(vm, VirtualMachine.Event.AgentReportStopped, null);
        command = info.mgr.cleanup(vm, agentName);
      }
    } else if (agentState == State.Running) {
      if (serverState == State.Starting) {
        if (fullSync) {
          s_logger.debug("VM state is starting on full sync so updating it to running");
          vm = info.mgr.get(vm.getId());
          info.mgr.completeStartCommand(vm);
        }
      } else if (serverState == State.Stopping) {
        if (fullSync) {
          s_logger.debug("VM state is in stopping on fullsync so resend stop.");
          vm = info.mgr.get(vm.getId());
          info.mgr.completeStopCommand(vm);
          command = info.mgr.cleanup(vm, agentName);
        } else {
          s_logger.debug("VM is in stopping state so no action.");
        }
      } else if (serverState == State.Destroyed
          || serverState == State.Stopped
          || serverState == State.Expunging) {
        s_logger.debug("VM state is in stopped so stopping it on the agent");
        vm = info.mgr.get(vm.getId());
        command = info.mgr.cleanup(vm, agentName);
      } else {
        _itMgr.stateTransitTo(vm, VirtualMachine.Event.AgentReportRunning, vm.getHostId());
      }
    } /*else if (agentState == State.Unknown) {
          if (serverState == State.Running) {
              if (fullSync) {
                  vm = info.handler.get(vm.getId());
              }
              scheduleRestart(vm, false);
          } else if (serverState == State.Starting) {
              if (fullSync) {
                  vm = info.handler.get(vm.getId());
              }
              scheduleRestart(vm, false);
          } else if (serverState == State.Stopping) {
              if (fullSync) {
                  s_logger.debug("VM state is stopping in full sync.  Resending stop");
                  command = info.handler.cleanup(vm, agentName);
              }
          }
      }*/
    return command;
  }
  protected Long restart(HaWorkVO work) {
    List<HaWorkVO> items = _haDao.listFutureHaWorkForVm(work.getInstanceId(), work.getId());
    if (items.size() > 0) {
      StringBuilder str =
          new StringBuilder(
              "Cancelling this work item because newer ones have been scheduled.  Work Ids = [");
      for (HaWorkVO item : items) {
        str.append(item.getId()).append(", ");
      }
      str.delete(str.length() - 2, str.length()).append("]");
      s_logger.info(str.toString());
      return null;
    }

    items = _haDao.listRunningHaWorkForVm(work.getInstanceId());
    if (items.size() > 0) {
      StringBuilder str =
          new StringBuilder(
              "Waiting because there's HA work being executed on an item currently.  Work Ids =[");
      for (HaWorkVO item : items) {
        str.append(item.getId()).append(", ");
      }
      str.delete(str.length() - 2, str.length()).append("]");
      s_logger.info(str.toString());
      return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
    }

    long vmId = work.getInstanceId();

    VMInstanceVO vm = _itMgr.findByIdAndType(work.getType(), work.getInstanceId());
    if (vm == null) {
      s_logger.info("Unable to find vm: " + vmId);
      return null;
    }

    s_logger.info("HA on " + vm);
    if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) {
      s_logger.info(
          "VM "
              + vm
              + " has been changed.  Current State = "
              + vm.getState()
              + " Previous State = "
              + work.getPreviousState()
              + " last updated = "
              + vm.getUpdated()
              + " previous updated = "
              + work.getUpdateTime());
      return null;
    }

    short alertType = AlertManager.ALERT_TYPE_USERVM;
    if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER;
    } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY;
    } else if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_SSVM;
    }

    HostVO host = _hostDao.findById(work.getHostId());
    boolean isHostRemoved = false;
    if (host == null) {
      host = _hostDao.findByIdIncludingRemoved(work.getHostId());
      if (host != null) {
        s_logger.debug(
            "VM "
                + vm.toString()
                + " is now no longer on host "
                + work.getHostId()
                + " as the host is removed");
        isHostRemoved = true;
      }
    }

    DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
    HostPodVO podVO = _podDao.findById(host.getPodId());
    String hostDesc =
        "name: "
            + host.getName()
            + "(id:"
            + host.getId()
            + "), availability zone: "
            + dcVO.getName()
            + ", pod: "
            + podVO.getName();

    Boolean alive = null;
    if (work.getStep() == Step.Investigating) {
      if (!isHostRemoved) {
        if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) {
          s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId());
          return null;
        }

        Enumeration<Investigator> en = _investigators.enumeration();
        Investigator investigator = null;
        while (en.hasMoreElements()) {
          investigator = en.nextElement();
          alive = investigator.isVmAlive(vm, host);
          s_logger.info(investigator.getName() + " found " + vm + "to be alive? " + alive);
          if (alive != null) {
            break;
          }
        }
        boolean fenced = false;
        if (alive == null) {
          s_logger.debug("Fencing off VM that we don't know the state of");
          Enumeration<FenceBuilder> enfb = _fenceBuilders.enumeration();
          while (enfb.hasMoreElements()) {
            FenceBuilder fb = enfb.nextElement();
            Boolean result = fb.fenceOff(vm, host);
            s_logger.info("Fencer " + fb.getName() + " returned " + result);
            if (result != null && result) {
              fenced = true;
              break;
            }
          }
        } else if (!alive) {
          fenced = true;
        } else {
          s_logger.debug(
              "VM " + vm.getHostName() + " is found to be alive by " + investigator.getName());
          if (host.getStatus() == Status.Up) {
            s_logger.info(vm + " is alive and host is up. No need to restart it.");
            return null;
          } else {
            s_logger.debug("Rescheduling because the host is not up but the vm is alive");
            return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
          }
        }

        if (!fenced) {
          s_logger.debug("We were unable to fence off the VM " + vm);
          _alertMgr.sendAlert(
              alertType,
              vm.getDataCenterIdToDeployIn(),
              vm.getPodIdToDeployIn(),
              "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
              "Insufficient capacity to restart VM, name: "
                  + vm.getHostName()
                  + ", id: "
                  + vmId
                  + " which was running on host "
                  + hostDesc);
          return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
        }

        try {
          _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount());
        } catch (ResourceUnavailableException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (OperationTimedoutException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (ConcurrentOperationException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        }

        work.setStep(Step.Scheduled);
        _haDao.update(work.getId(), work);
      } else {
        s_logger.debug(
            "How come that HA step is Investigating and the host is removed? Calling forced Stop on Vm anyways");
        try {
          _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount());
        } catch (ResourceUnavailableException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (OperationTimedoutException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (ConcurrentOperationException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        }
      }
    }

    vm = _itMgr.findByIdAndType(vm.getType(), vm.getId());

    if (!_forceHA && !vm.isHaEnabled()) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("VM is not HA enabled so we're done.");
      }
      return null; // VM doesn't require HA
    }

    if (!_storageMgr.canVmRestartOnAnotherServer(vm.getId())) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("VM can not restart on another server.");
      }
      return null;
    }

    if (work.getTimesTried() > _maxRetries) {
      s_logger.warn("Retried to max times so deleting: " + vmId);
      return null;
    }

    try {
      VMInstanceVO started =
          _itMgr.advanceStart(
              vm,
              new HashMap<VirtualMachineProfile.Param, Object>(),
              _accountMgr.getSystemUser(),
              _accountMgr.getSystemAccount());
      if (started != null) {
        s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId());
        return null;
      }

      if (s_logger.isDebugEnabled()) {
        s_logger.debug(
            "Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval);
      }
    } catch (final InsufficientCapacityException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterIdToDeployIn(),
          vm.getPodIdToDeployIn(),
          "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
          "Insufficient capacity to restart VM, name: "
              + vm.getHostName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
    } catch (final ResourceUnavailableException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterIdToDeployIn(),
          vm.getPodIdToDeployIn(),
          "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getHostName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
    } catch (ConcurrentOperationException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterIdToDeployIn(),
          vm.getPodIdToDeployIn(),
          "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getHostName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
    } catch (OperationTimedoutException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterIdToDeployIn(),
          vm.getPodIdToDeployIn(),
          "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getHostName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
    }
    vm = _itMgr.findByIdAndType(vm.getType(), vm.getId());
    work.setUpdateTime(vm.getUpdated());
    work.setPreviousState(vm.getState());
    return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
  }
  public Long migrate(final HaWorkVO work) {
    final long vmId = work.getInstanceId();

    final VirtualMachineGuru<VMInstanceVO> mgr = findManager(work.getType());

    VMInstanceVO vm = mgr.get(vmId);
    if (vm == null || vm.getRemoved() != null) {
      s_logger.debug("Unable to find the vm " + vmId);
      return null;
    }

    s_logger.info("Migrating vm: " + vm.toString());
    if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) {
      s_logger.info("VM is not longer running on the current hostId");
      return null;
    }

    short alertType = AlertManager.ALERT_TYPE_USERVM_MIGRATE;
    if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER_MIGRATE;
    } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY_MIGRATE;
    }

    HostVO fromHost = _hostDao.findById(vm.getHostId());
    String fromHostName = ((fromHost == null) ? "unknown" : fromHost.getName());
    HostVO toHost = null;
    if (work.getStep() == Step.Scheduled) {
      if (vm.getState() != State.Running) {
        s_logger.info(
            "VM's state is not ready for migration. "
                + vm.toString()
                + " State is "
                + vm.getState().toString());
        return (System.currentTimeMillis() >> 10) + _migrateRetryInterval;
      }

      DataCenterVO dcVO = _dcDao.findById(fromHost.getDataCenterId());
      HostPodVO podVO = _podDao.findById(fromHost.getPodId());

      try {
        toHost = mgr.prepareForMigration(vm);
        if (toHost == null) {
          if (s_logger.isDebugEnabled()) {
            s_logger.debug("Unable to find a host for migrating vm " + vmId);
          }
          _alertMgr.sendAlert(
              alertType,
              vm.getDataCenterId(),
              vm.getPodId(),
              "Unable to migrate vm "
                  + vm.getName()
                  + " from host "
                  + fromHostName
                  + " in zone "
                  + dcVO.getName()
                  + " and pod "
                  + podVO.getName(),
              "Unable to find a suitable host");
        }
      } catch (final InsufficientCapacityException e) {
        s_logger.warn("Unable to mgirate due to insufficient capacity " + vm.toString());
        _alertMgr.sendAlert(
            alertType,
            vm.getDataCenterId(),
            vm.getPodId(),
            "Unable to migrate vm "
                + vm.getName()
                + " from host "
                + fromHostName
                + " in zone "
                + dcVO.getName()
                + " and pod "
                + podVO.getName(),
            "Insufficient capacity");
      } catch (final StorageUnavailableException e) {
        s_logger.warn("Storage is unavailable: " + vm.toString());
        _alertMgr.sendAlert(
            alertType,
            vm.getDataCenterId(),
            vm.getPodId(),
            "Unable to migrate vm "
                + vm.getName()
                + " from host "
                + fromHostName
                + " in zone "
                + dcVO.getName()
                + " and pod "
                + podVO.getName(),
            "Storage is gone.");
      }

      if (toHost == null) {
        _agentMgr.maintenanceFailed(vm.getHostId());
        return null;
      }

      if (s_logger.isDebugEnabled()) {
        s_logger.debug("Migrating from " + work.getHostId() + " to " + toHost.getId());
      }
      work.setStep(Step.Migrating);
      work.setHostId(toHost.getId());
      _haDao.update(work.getId(), work);
    }

    if (work.getStep() == Step.Migrating) {
      vm = mgr.get(vmId); // let's see if anything has changed.
      boolean migrated = false;
      if (vm == null
          || vm.getRemoved() != null
          || vm.getHostId() == null
          || !_itMgr.stateTransitTo(vm, Event.MigrationRequested, vm.getHostId())) {
        s_logger.info("Migration cancelled because state has changed: " + vm.toString());
      } else {
        try {
          boolean isWindows =
              _guestOSCategoryDao
                  .findById(_guestOSDao.findById(vm.getGuestOSId()).getCategoryId())
                  .getName()
                  .equalsIgnoreCase("Windows");
          MigrateCommand cmd =
              new MigrateCommand(vm.getInstanceName(), toHost.getPrivateIpAddress(), isWindows);
          Answer answer = _agentMgr.send(fromHost.getId(), cmd);
          if (answer != null && answer.getResult()) {
            migrated = true;
            _storageMgr.unshare(vm, fromHost);
            work.setStep(Step.Investigating);
            _haDao.update(work.getId(), work);
          }
        } catch (final AgentUnavailableException e) {
          s_logger.debug("host became unavailable");
        } catch (final OperationTimedoutException e) {
          s_logger.debug("operation timed out");
          if (e.isActive()) {
            scheduleRestart(vm, true);
          }
        }
      }

      if (!migrated) {
        s_logger.info("Migration was unsuccessful.  Cleaning up: " + vm.toString());

        DataCenterVO dcVO = _dcDao.findById(vm.getDataCenterId());
        HostPodVO podVO = _podDao.findById(vm.getPodId());
        _alertMgr.sendAlert(
            alertType,
            fromHost.getDataCenterId(),
            fromHost.getPodId(),
            "Unable to migrate vm "
                + vm.getName()
                + " from host "
                + fromHost.getName()
                + " in zone "
                + dcVO.getName()
                + " and pod "
                + podVO.getName(),
            "Migrate Command failed.  Please check logs.");

        _itMgr.stateTransitTo(vm, Event.MigrationFailedOnSource, toHost.getId());
        _agentMgr.maintenanceFailed(vm.getHostId());

        Command cleanup = mgr.cleanup(vm, null);
        _agentMgr.easySend(toHost.getId(), cleanup);
        _storageMgr.unshare(vm, toHost);

        return null;
      }
    }

    if (toHost == null) {
      toHost = _hostDao.findById(work.getHostId());
    }
    DataCenterVO dcVO = _dcDao.findById(toHost.getDataCenterId());
    HostPodVO podVO = _podDao.findById(toHost.getPodId());

    try {
      if (!mgr.completeMigration(vm, toHost)) {
        _alertMgr.sendAlert(
            alertType,
            toHost.getDataCenterId(),
            toHost.getPodId(),
            "Unable to migrate "
                + vmId
                + " to host "
                + toHost.getName()
                + " in zone "
                + dcVO.getName()
                + " and pod "
                + podVO.getName(),
            "Migration not completed");
        s_logger.warn("Unable to complete migration: " + vm.toString());
      } else {
        s_logger.info("Migration is complete: " + vm.toString());
      }
      return null;
    } catch (final AgentUnavailableException e) {
      s_logger.warn("Agent is unavailable for " + vm.toString());
    } catch (final OperationTimedoutException e) {
      s_logger.warn("Operation timed outfor " + vm.toString());
    }
    _itMgr.stateTransitTo(vm, Event.MigrationFailedOnDest, toHost.getId());
    return (System.currentTimeMillis() >> 10) + _migrateRetryInterval;
  }