Ejemplo n.º 1
0
  @Override
  public void handleSingleWorkingRedundantRouter(
      final List<? extends VirtualRouter> connectedRouters,
      final List<? extends VirtualRouter> disconnectedRouters,
      final String reason)
      throws ResourceUnavailableException {
    if (connectedRouters.isEmpty() || disconnectedRouters.isEmpty()) {
      return;
    }

    for (final VirtualRouter virtualRouter : connectedRouters) {
      if (!virtualRouter.getIsRedundantRouter()) {
        throw new ResourceUnavailableException(
            "Who is calling this with non-redundant router or non-domain router?",
            DataCenter.class,
            virtualRouter.getDataCenterId());
      }
    }

    for (final VirtualRouter virtualRouter : disconnectedRouters) {
      if (!virtualRouter.getIsRedundantRouter()) {
        throw new ResourceUnavailableException(
            "Who is calling this with non-redundant router or non-domain router?",
            DataCenter.class,
            virtualRouter.getDataCenterId());
      }
    }

    final DomainRouterVO connectedRouter = (DomainRouterVO) connectedRouters.get(0);
    DomainRouterVO disconnectedRouter = (DomainRouterVO) disconnectedRouters.get(0);

    if (s_logger.isDebugEnabled()) {
      s_logger.debug(
          "About to stop the router "
              + disconnectedRouter.getInstanceName()
              + " due to: "
              + reason);
    }
    final String title =
        "Virtual router "
            + disconnectedRouter.getInstanceName()
            + " would be stopped after connecting back, due to "
            + reason;
    final String context =
        "Virtual router (name: "
            + disconnectedRouter.getInstanceName()
            + ", id: "
            + disconnectedRouter.getId()
            + ") would be stopped after connecting back, due to: "
            + reason;
    _alertMgr.sendAlert(
        AlertManager.AlertType.ALERT_TYPE_DOMAIN_ROUTER,
        disconnectedRouter.getDataCenterId(),
        disconnectedRouter.getPodIdToDeployIn(),
        title,
        context);
    disconnectedRouter.setStopPending(true);
    disconnectedRouter = _routerDao.persist(disconnectedRouter);
  }
Ejemplo n.º 2
0
  @Override
  public Boolean fenceOff(VirtualMachine vm, Host host) {
    if (host.getHypervisorType() != HypervisorType.KVM
        && host.getHypervisorType() != HypervisorType.LXC) {
      s_logger.warn("Don't know how to fence non kvm hosts " + host.getHypervisorType());
      return null;
    }

    List<HostVO> hosts = _resourceMgr.listAllHostsInCluster(host.getClusterId());
    FenceCommand fence = new FenceCommand(vm, host);

    int i = 0;
    for (HostVO h : hosts) {
      if (h.getHypervisorType() == HypervisorType.KVM
          || h.getHypervisorType() == HypervisorType.LXC) {
        if (h.getStatus() != Status.Up) {
          continue;
        }

        i++;

        if (h.getId() == host.getId()) {
          continue;
        }
        FenceAnswer answer;
        try {
          answer = (FenceAnswer) _agentMgr.send(h.getId(), fence);
        } catch (AgentUnavailableException e) {
          s_logger.info("Moving on to the next host because " + h.toString() + " is unavailable");
          continue;
        } catch (OperationTimedoutException e) {
          s_logger.info("Moving on to the next host because " + h.toString() + " is unavailable");
          continue;
        }
        if (answer != null && answer.getResult()) {
          return true;
        }
      }
    }

    _alertMgr.sendAlert(
        AlertManager.AlertType.ALERT_TYPE_HOST,
        host.getDataCenterId(),
        host.getPodId(),
        "Unable to fence off host: " + host.getId(),
        "Fencing off host "
            + host.getId()
            + " did not succeed after asking "
            + i
            + " hosts. "
            + "Check Agent logs for more information.");

    s_logger.error("Unable to fence off " + vm.toString() + " on " + host.toString());

    return false;
  }
  @Override
  public void scheduleRestartForVmsOnHost(final HostVO host) {

    if (host.getType() != Host.Type.Routing) {
      return;
    }
    s_logger.warn("Scheduling restart for VMs on host " + host.getId());

    final List<VMInstanceVO> vms = _instanceDao.listByHostId(host.getId());
    final DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());

    // send an email alert that the host is down
    StringBuilder sb = null;
    if ((vms != null) && !vms.isEmpty()) {
      sb = new StringBuilder();
      sb.append("  Starting HA on the following VMs: ");
      // collect list of vm names for the alert email
      VMInstanceVO vm = vms.get(0);
      if (vm.isHaEnabled()) {
        sb.append(" " + vm.getName());
      }
      for (int i = 1; i < vms.size(); i++) {
        vm = vms.get(i);
        if (vm.isHaEnabled()) {
          sb.append(" " + vm.getName());
        }
      }
    }

    // send an email alert that the host is down, include VMs
    HostPodVO podVO = _podDao.findById(host.getPodId());
    String hostDesc =
        "name: "
            + host.getName()
            + " (id:"
            + host.getId()
            + "), availability zone: "
            + dcVO.getName()
            + ", pod: "
            + podVO.getName();

    _alertMgr.sendAlert(
        AlertManager.ALERT_TYPE_HOST,
        host.getDataCenterId(),
        host.getPodId(),
        "Host is down, " + hostDesc,
        "Host [" + hostDesc + "] is down." + ((sb != null) ? sb.toString() : ""));

    for (final VMInstanceVO vm : vms) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("Notifying HA Mgr of to investigate vm " + vm.getId() + "-" + vm.getName());
      }
      scheduleRestart(vm, true);
    }
  }
  // Returns server component used by server manager to operate the plugin.
  // Server component is a ServerResource. If a connected agent is used, the
  // ServerResource is
  // ignored in favour of another created in response to
  @Override
  public final Map<? extends ServerResource, Map<String, String>> find(
      final long dcId,
      final Long podId,
      final Long clusterId,
      final URI uri,
      final String username,
      final String password,
      final List<String> hostTags)
      throws DiscoveryException {

    if (s_logger.isInfoEnabled()) {
      s_logger.info(
          "Discover host. dc(zone): "
              + dcId
              + ", pod: "
              + podId
              + ", cluster: "
              + clusterId
              + ", uri host: "
              + uri.getHost());
    }

    // Assertions
    if (podId == null) {
      if (s_logger.isInfoEnabled()) {
        s_logger.info("No pod is assigned, skipping the discovery in" + " Hyperv discoverer");
      }
      return null;
    }
    ClusterVO cluster = _clusterDao.findById(clusterId); // ClusterVO exists
    // in the
    // database
    if (cluster == null) {
      if (s_logger.isInfoEnabled()) {
        s_logger.info("No cluster in database for cluster id " + clusterId);
      }
      return null;
    }
    if (cluster.getHypervisorType() != HypervisorType.Hyperv) {
      if (s_logger.isInfoEnabled()) {
        s_logger.info("Cluster " + clusterId + "is not for Hyperv hypervisors");
      }
      return null;
    }
    if (!uri.getScheme().equals("http")) {
      String msg =
          "urlString is not http so we're not taking care of" + " the discovery for this: " + uri;
      s_logger.debug(msg);
      return null;
    }

    try {
      String hostname = uri.getHost();
      InetAddress ia = InetAddress.getByName(hostname);
      String agentIp = ia.getHostAddress();
      String uuidSeed = agentIp;
      String guidWithTail = calcServerResourceGuid(uuidSeed) + "-HypervResource";

      if (_resourceMgr.findHostByGuid(guidWithTail) != null) {
        s_logger.debug(
            "Skipping " + agentIp + " because " + guidWithTail + " is already in the database.");
        return null;
      }

      s_logger.info(
          "Creating"
              + HypervDirectConnectResource.class.getName()
              + " HypervDummyResourceBase for zone/pod/cluster "
              + dcId
              + "/"
              + podId
              + "/"
              + clusterId);

      // Some Hypervisors organise themselves in pools.
      // The startup command tells us what pool they are using.
      // In the meantime, we have to place a GUID corresponding to the
      // pool in the database
      // This GUID may change.
      if (cluster.getGuid() == null) {
        cluster.setGuid(
            UUID.nameUUIDFromBytes(String.valueOf(clusterId).getBytes(Charset.forName("UTF-8")))
                .toString());
        _clusterDao.update(clusterId, cluster);
      }

      // Settings required by all server resources managing a hypervisor
      Map<String, Object> params = new HashMap<String, Object>();
      params.put("zone", Long.toString(dcId));
      params.put("pod", Long.toString(podId));
      params.put("cluster", Long.toString(clusterId));
      params.put("guid", guidWithTail);
      params.put("ipaddress", agentIp);

      // Hyper-V specific settings
      Map<String, String> details = new HashMap<String, String>();
      details.put("url", uri.getHost());
      details.put("username", username);
      details.put("password", password);
      details.put("cluster.guid", cluster.getGuid());

      params.putAll(details);

      HypervDirectConnectResource resource = new HypervDirectConnectResource();
      resource.configure(agentIp, params);

      // Assert
      // TODO: test by using bogus URL and bogus virtual path in URL
      ReadyCommand ping = new ReadyCommand();
      Answer pingAns = resource.executeRequest(ping);
      if (pingAns == null || !pingAns.getResult()) {
        String errMsg = "Agent not running, or no route to agent on at " + uri;
        s_logger.debug(errMsg);
        throw new DiscoveryException(errMsg);
      }

      Map<HypervDirectConnectResource, Map<String, String>> resources =
          new HashMap<HypervDirectConnectResource, Map<String, String>>();
      resources.put(resource, details);

      // TODO: does the resource have to create a connection?
      return resources;
    } catch (ConfigurationException e) {
      _alertMgr.sendAlert(
          AlertManager.AlertType.ALERT_TYPE_HOST,
          dcId,
          podId,
          "Unable to add " + uri.getHost(),
          "Error is " + e.getMessage());
      s_logger.warn("Unable to instantiate " + uri.getHost(), e);
    } catch (UnknownHostException e) {
      _alertMgr.sendAlert(
          AlertManager.AlertType.ALERT_TYPE_HOST,
          dcId,
          podId,
          "Unable to add " + uri.getHost(),
          "Error is " + e.getMessage());

      s_logger.warn("Unable to instantiate " + uri.getHost(), e);
    } catch (Exception e) {
      String msg = " can't setup agent, due to " + e.toString() + " - " + e.getMessage();
      s_logger.warn(msg);
    }
    return null;
  }
  protected Long restart(HaWorkVO work) {
    List<HaWorkVO> items = _haDao.listFutureHaWorkForVm(work.getInstanceId(), work.getId());
    if (items.size() > 0) {
      StringBuilder str =
          new StringBuilder(
              "Cancelling this work item because newer ones have been scheduled.  Work Ids = [");
      for (HaWorkVO item : items) {
        str.append(item.getId()).append(", ");
      }
      str.delete(str.length() - 2, str.length()).append("]");
      s_logger.info(str.toString());
      return null;
    }

    items = _haDao.listRunningHaWorkForVm(work.getInstanceId());
    if (items.size() > 0) {
      StringBuilder str =
          new StringBuilder(
              "Waiting because there's HA work being executed on an item currently.  Work Ids =[");
      for (HaWorkVO item : items) {
        str.append(item.getId()).append(", ");
      }
      str.delete(str.length() - 2, str.length()).append("]");
      s_logger.info(str.toString());
      return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
    }

    long vmId = work.getInstanceId();

    VMInstanceVO vm = _itMgr.findByIdAndType(work.getType(), work.getInstanceId());
    if (vm == null) {
      s_logger.info("Unable to find vm: " + vmId);
      return null;
    }

    s_logger.info("HA on " + vm);
    if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) {
      s_logger.info(
          "VM "
              + vm
              + " has been changed.  Current State = "
              + vm.getState()
              + " Previous State = "
              + work.getPreviousState()
              + " last updated = "
              + vm.getUpdated()
              + " previous updated = "
              + work.getUpdateTime());
      return null;
    }

    short alertType = AlertManager.ALERT_TYPE_USERVM;
    if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER;
    } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY;
    } else if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_SSVM;
    }

    HostVO host = _hostDao.findById(work.getHostId());
    boolean isHostRemoved = false;
    if (host == null) {
      host = _hostDao.findByIdIncludingRemoved(work.getHostId());
      if (host != null) {
        s_logger.debug(
            "VM "
                + vm.toString()
                + " is now no longer on host "
                + work.getHostId()
                + " as the host is removed");
        isHostRemoved = true;
      }
    }

    DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
    HostPodVO podVO = _podDao.findById(host.getPodId());
    String hostDesc =
        "name: "
            + host.getName()
            + "(id:"
            + host.getId()
            + "), availability zone: "
            + dcVO.getName()
            + ", pod: "
            + podVO.getName();

    Boolean alive = null;
    if (work.getStep() == Step.Investigating) {
      if (!isHostRemoved) {
        if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) {
          s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId());
          return null;
        }

        Enumeration<Investigator> en = _investigators.enumeration();
        Investigator investigator = null;
        while (en.hasMoreElements()) {
          investigator = en.nextElement();
          alive = investigator.isVmAlive(vm, host);
          s_logger.info(investigator.getName() + " found " + vm + "to be alive? " + alive);
          if (alive != null) {
            break;
          }
        }
        boolean fenced = false;
        if (alive == null) {
          s_logger.debug("Fencing off VM that we don't know the state of");
          Enumeration<FenceBuilder> enfb = _fenceBuilders.enumeration();
          while (enfb.hasMoreElements()) {
            FenceBuilder fb = enfb.nextElement();
            Boolean result = fb.fenceOff(vm, host);
            s_logger.info("Fencer " + fb.getName() + " returned " + result);
            if (result != null && result) {
              fenced = true;
              break;
            }
          }
        } else if (!alive) {
          fenced = true;
        } else {
          s_logger.debug(
              "VM " + vm.getHostName() + " is found to be alive by " + investigator.getName());
          if (host.getStatus() == Status.Up) {
            s_logger.info(vm + " is alive and host is up. No need to restart it.");
            return null;
          } else {
            s_logger.debug("Rescheduling because the host is not up but the vm is alive");
            return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
          }
        }

        if (!fenced) {
          s_logger.debug("We were unable to fence off the VM " + vm);
          _alertMgr.sendAlert(
              alertType,
              vm.getDataCenterIdToDeployIn(),
              vm.getPodIdToDeployIn(),
              "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
              "Insufficient capacity to restart VM, name: "
                  + vm.getHostName()
                  + ", id: "
                  + vmId
                  + " which was running on host "
                  + hostDesc);
          return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
        }

        try {
          _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount());
        } catch (ResourceUnavailableException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (OperationTimedoutException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (ConcurrentOperationException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        }

        work.setStep(Step.Scheduled);
        _haDao.update(work.getId(), work);
      } else {
        s_logger.debug(
            "How come that HA step is Investigating and the host is removed? Calling forced Stop on Vm anyways");
        try {
          _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount());
        } catch (ResourceUnavailableException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (OperationTimedoutException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (ConcurrentOperationException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        }
      }
    }

    vm = _itMgr.findByIdAndType(vm.getType(), vm.getId());

    if (!_forceHA && !vm.isHaEnabled()) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("VM is not HA enabled so we're done.");
      }
      return null; // VM doesn't require HA
    }

    if (!_storageMgr.canVmRestartOnAnotherServer(vm.getId())) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("VM can not restart on another server.");
      }
      return null;
    }

    if (work.getTimesTried() > _maxRetries) {
      s_logger.warn("Retried to max times so deleting: " + vmId);
      return null;
    }

    try {
      VMInstanceVO started =
          _itMgr.advanceStart(
              vm,
              new HashMap<VirtualMachineProfile.Param, Object>(),
              _accountMgr.getSystemUser(),
              _accountMgr.getSystemAccount());
      if (started != null) {
        s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId());
        return null;
      }

      if (s_logger.isDebugEnabled()) {
        s_logger.debug(
            "Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval);
      }
    } catch (final InsufficientCapacityException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterIdToDeployIn(),
          vm.getPodIdToDeployIn(),
          "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
          "Insufficient capacity to restart VM, name: "
              + vm.getHostName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
    } catch (final ResourceUnavailableException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterIdToDeployIn(),
          vm.getPodIdToDeployIn(),
          "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getHostName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
    } catch (ConcurrentOperationException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterIdToDeployIn(),
          vm.getPodIdToDeployIn(),
          "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getHostName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
    } catch (OperationTimedoutException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterIdToDeployIn(),
          vm.getPodIdToDeployIn(),
          "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getHostName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
    }
    vm = _itMgr.findByIdAndType(vm.getType(), vm.getId());
    work.setUpdateTime(vm.getUpdated());
    work.setPreviousState(vm.getState());
    return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
  }
  @Override
  public void scheduleRestart(VMInstanceVO vm, boolean investigate) {
    Long hostId = vm.getHostId();
    if (hostId == null) {
      try {
        s_logger.debug("Found a vm that is scheduled to be restarted but has no host id: " + vm);
        _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount());
      } catch (ResourceUnavailableException e) {
        assert false : "How do we hit this when force is true?";
        throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
      } catch (OperationTimedoutException e) {
        assert false : "How do we hit this when force is true?";
        throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
      } catch (ConcurrentOperationException e) {
        assert false : "How do we hit this when force is true?";
        throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
      }
      return;
    }

    if (vm.getHypervisorType() == HypervisorType.VMware) {
      s_logger.info("Skip HA for VMware VM " + vm.getInstanceName());
      return;
    }

    if (!investigate) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug(
            "VM does not require investigation so I'm marking it as Stopped: " + vm.toString());
      }

      short alertType = AlertManager.ALERT_TYPE_USERVM;
      if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
        alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER;
      } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
        alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY;
      } else if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) {
        alertType = AlertManager.ALERT_TYPE_SSVM;
      }

      if (!(_forceHA || vm.isHaEnabled())) {
        String hostDesc =
            "id:"
                + vm.getHostId()
                + ", availability zone id:"
                + vm.getDataCenterIdToDeployIn()
                + ", pod id:"
                + vm.getPodIdToDeployIn();
        _alertMgr.sendAlert(
            alertType,
            vm.getDataCenterIdToDeployIn(),
            vm.getPodIdToDeployIn(),
            "VM (name: "
                + vm.getHostName()
                + ", id: "
                + vm.getId()
                + ") stopped unexpectedly on host "
                + hostDesc,
            "Virtual Machine "
                + vm.getHostName()
                + " (id: "
                + vm.getId()
                + ") running on host ["
                + vm.getHostId()
                + "] stopped unexpectedly.");

        if (s_logger.isDebugEnabled()) {
          s_logger.debug("VM is not HA enabled so we're done.");
        }
      }

      try {
        _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount());
      } catch (ResourceUnavailableException e) {
        assert false : "How do we hit this when force is true?";
        throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
      } catch (OperationTimedoutException e) {
        assert false : "How do we hit this when force is true?";
        throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
      } catch (ConcurrentOperationException e) {
        assert false : "How do we hit this when force is true?";
        throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
      }
    }

    List<HaWorkVO> items = _haDao.findPreviousHA(vm.getId());
    int maxRetries = 0;
    for (HaWorkVO item : items) {
      if (maxRetries < item.getTimesTried() && !item.canScheduleNew(_timeBetweenFailures)) {
        maxRetries = item.getTimesTried();
        break;
      }
    }

    HaWorkVO work =
        new HaWorkVO(
            vm.getId(),
            vm.getType(),
            WorkType.HA,
            investigate ? Step.Investigating : Step.Scheduled,
            hostId,
            vm.getState(),
            maxRetries + 1,
            vm.getUpdated());
    _haDao.persist(work);

    if (s_logger.isInfoEnabled()) {
      s_logger.info("Schedule vm for HA:  " + vm);
    }

    wakeupWorkers();
  }
  @Override
  public Map<? extends ServerResource, Map<String, String>> find(
      long dcId,
      Long podId,
      Long clusterId,
      URI url,
      String username,
      String password,
      List<String> hostTags)
      throws DiscoveryException {

    if (s_logger.isInfoEnabled())
      s_logger.info(
          "Discover host. dc: "
              + dcId
              + ", pod: "
              + podId
              + ", cluster: "
              + clusterId
              + ", uri host: "
              + url.getHost());

    if (podId == null) {
      if (s_logger.isInfoEnabled())
        s_logger.info(
            "No pod is assigned, assuming that it is not for vmware and skip it to next discoverer");
      return null;
    }

    ClusterVO cluster = _clusterDao.findById(clusterId);
    if (cluster == null || cluster.getHypervisorType() != HypervisorType.VMware) {
      if (s_logger.isInfoEnabled())
        s_logger.info("invalid cluster id or cluster is not for VMware hypervisors");
      return null;
    }

    List<HostVO> hosts = _resourceMgr.listAllHostsInCluster(clusterId);
    if (hosts != null && hosts.size() > 0) {
      int maxHostsPerCluster =
          _hvCapabilitiesDao.getMaxHostsPerCluster(
              hosts.get(0).getHypervisorType(), hosts.get(0).getHypervisorVersion());
      if (hosts.size() > maxHostsPerCluster) {
        String msg =
            "VMware cluster "
                + cluster.getName()
                + " is too big to add new host now. (current configured cluster size: "
                + maxHostsPerCluster
                + ")";
        s_logger.error(msg);
        throw new DiscoveredWithErrorException(msg);
      }
    }

    String privateTrafficLabel = null;
    String publicTrafficLabel = null;
    String guestTrafficLabel = null;
    Map<String, String> vsmCredentials = null;

    VirtualSwitchType defaultVirtualSwitchType = VirtualSwitchType.StandardVirtualSwitch;

    String paramGuestVswitchType = null;
    String paramGuestVswitchName = null;
    String paramPublicVswitchType = null;
    String paramPublicVswitchName = null;

    VmwareTrafficLabel guestTrafficLabelObj = new VmwareTrafficLabel(TrafficType.Guest);
    VmwareTrafficLabel publicTrafficLabelObj = new VmwareTrafficLabel(TrafficType.Public);
    Map<String, String> clusterDetails = _clusterDetailsDao.findDetails(clusterId);
    DataCenterVO zone = _dcDao.findById(dcId);
    NetworkType zoneType = zone.getNetworkType();
    _readGlobalConfigParameters();

    // Set default physical network end points for public and guest traffic
    // Private traffic will be only on standard vSwitch for now.
    if (useDVS) {
      // Parse url parameters for type of vswitch and name of vswitch specified at cluster level
      paramGuestVswitchType = _urlParams.get(ApiConstants.VSWITCH_TYPE_GUEST_TRAFFIC);
      paramGuestVswitchName = _urlParams.get(ApiConstants.VSWITCH_NAME_GUEST_TRAFFIC);
      paramPublicVswitchType = _urlParams.get(ApiConstants.VSWITCH_TYPE_PUBLIC_TRAFFIC);
      paramPublicVswitchName = _urlParams.get(ApiConstants.VSWITCH_NAME_PUBLIC_TRAFFIC);
      defaultVirtualSwitchType = getDefaultVirtualSwitchType();
    }

    // Zone level vSwitch Type depends on zone level traffic labels
    //
    // User can override Zone wide vswitch type (for public and guest) by providing following
    // optional parameters in addClusterCmd
    // param "guestvswitchtype" with valid values vmwaredvs, vmwaresvs, nexusdvs
    // param "publicvswitchtype" with valid values vmwaredvs, vmwaresvs, nexusdvs
    //
    // Format of label is <VSWITCH>,<VLANID>,<VSWITCHTYPE>
    // If a field <VLANID> OR <VSWITCHTYPE> is not present leave it empty.
    // Ex: 1) vswitch0
    // 2) dvswitch0,200,vmwaredvs
    // 3) nexusepp0,300,nexusdvs
    // 4) vswitch1,400,vmwaresvs
    // 5) vswitch0
    // default vswitchtype is 'vmwaresvs'.
    // <VSWITCHTYPE> 'vmwaresvs' is for vmware standard vswitch
    // <VSWITCHTYPE> 'vmwaredvs' is for vmware distributed virtual switch
    // <VSWITCHTYPE> 'nexusdvs' is for cisco nexus distributed virtual switch
    // Get zone wide traffic labels for Guest traffic and Public traffic
    guestTrafficLabel = _netmgr.getDefaultGuestTrafficLabel(dcId, HypervisorType.VMware);

    // Process traffic label information provided at zone level and cluster level
    guestTrafficLabelObj =
        getTrafficInfo(
            TrafficType.Guest,
            guestTrafficLabel,
            defaultVirtualSwitchType,
            paramGuestVswitchType,
            paramGuestVswitchName,
            clusterId);

    if (zoneType == NetworkType.Advanced) {
      // Get zone wide traffic label for Public traffic
      publicTrafficLabel = _netmgr.getDefaultPublicTrafficLabel(dcId, HypervisorType.VMware);

      // Process traffic label information provided at zone level and cluster level
      publicTrafficLabelObj =
          getTrafficInfo(
              TrafficType.Public,
              publicTrafficLabel,
              defaultVirtualSwitchType,
              paramPublicVswitchType,
              paramPublicVswitchName,
              clusterId);

      // Configuration Check: A physical network cannot be shared by different types of virtual
      // switches.
      //
      // Check if different vswitch types are chosen for same physical network
      // 1. Get physical network for guest traffic - multiple networks
      // 2. Get physical network for public traffic - single network
      // See if 2 is in 1
      //  if no - pass
      //  if yes - compare publicTrafficLabelObj.getVirtualSwitchType() ==
      // guestTrafficLabelObj.getVirtualSwitchType()
      //      true  - pass
      //      false - throw exception - fail cluster add operation

      List<? extends PhysicalNetwork> pNetworkListGuestTraffic =
          _netmgr.getPhysicalNtwksSupportingTrafficType(dcId, TrafficType.Guest);
      List<? extends PhysicalNetwork> pNetworkListPublicTraffic =
          _netmgr.getPhysicalNtwksSupportingTrafficType(dcId, TrafficType.Public);
      // Public network would be on single physical network hence getting first object of the list
      // would suffice.
      PhysicalNetwork pNetworkPublic = pNetworkListPublicTraffic.get(0);
      if (pNetworkListGuestTraffic.contains(pNetworkPublic)) {
        if (publicTrafficLabelObj.getVirtualSwitchType()
            != guestTrafficLabelObj.getVirtualSwitchType()) {
          String msg =
              "Both public traffic and guest traffic is over same physical network "
                  + pNetworkPublic
                  + ". And virtual switch type chosen for each traffic is different"
                  + ". A physical network cannot be shared by different types of virtual switches.";
          s_logger.error(msg);
          throw new InvalidParameterValueException(msg);
        }
      }
    } else {
      // Distributed virtual switch is not supported in Basic zone for now.
      // Private / Management network traffic is not yet supported over distributed virtual switch.
      if (guestTrafficLabelObj.getVirtualSwitchType() != VirtualSwitchType.StandardVirtualSwitch) {
        String msg =
            "Detected that Guest traffic is over Distributed virtual switch in Basic zone. Only Standard vSwitch is supported in Basic zone.";
        s_logger.error(msg);
        throw new DiscoveredWithErrorException(msg);
      }
    }

    privateTrafficLabel = _netmgr.getDefaultManagementTrafficLabel(dcId, HypervisorType.VMware);
    if (privateTrafficLabel != null) {
      s_logger.info("Detected private network label : " + privateTrafficLabel);
    }

    if (nexusDVS) {
      if (zoneType != NetworkType.Basic) {
        publicTrafficLabel = _netmgr.getDefaultPublicTrafficLabel(dcId, HypervisorType.VMware);
        if (publicTrafficLabel != null) {
          s_logger.info("Detected public network label : " + publicTrafficLabel);
        }
      }
      // Get physical network label
      guestTrafficLabel = _netmgr.getDefaultGuestTrafficLabel(dcId, HypervisorType.VMware);
      if (guestTrafficLabel != null) {
        s_logger.info("Detected guest network label : " + guestTrafficLabel);
      }
      vsmCredentials = _vmwareMgr.getNexusVSMCredentialsByClusterId(clusterId);
    }

    VmwareContext context = null;
    try {
      context = VmwareContextFactory.create(url.getHost(), username, password);
      if (privateTrafficLabel != null)
        context.registerStockObject("privateTrafficLabel", privateTrafficLabel);

      if (nexusDVS) {
        if (vsmCredentials != null) {
          s_logger.info("Stocking credentials of Nexus VSM");
          context.registerStockObject("vsmcredentials", vsmCredentials);
        }
      }
      List<ManagedObjectReference> morHosts =
          _vmwareMgr.addHostToPodCluster(
              context, dcId, podId, clusterId, URLDecoder.decode(url.getPath()));
      if (morHosts == null) s_logger.info("Found 0 hosts.");
      if (privateTrafficLabel != null) context.uregisterStockObject("privateTrafficLabel");

      if (morHosts == null) {
        s_logger.error(
            "Unable to find host or cluster based on url: " + URLDecoder.decode(url.getPath()));
        return null;
      }

      ManagedObjectReference morCluster = null;
      clusterDetails = _clusterDetailsDao.findDetails(clusterId);
      if (clusterDetails.get("url") != null) {
        URI uriFromCluster = new URI(UriUtils.encodeURIComponent(clusterDetails.get("url")));
        morCluster = context.getHostMorByPath(URLDecoder.decode(uriFromCluster.getPath()));

        if (morCluster == null
            || !morCluster.getType().equalsIgnoreCase("ClusterComputeResource")) {
          s_logger.warn(
              "Cluster url does not point to a valid vSphere cluster, url: "
                  + clusterDetails.get("url"));
          return null;
        } else {
          ClusterMO clusterMo = new ClusterMO(context, morCluster);
          ClusterDasConfigInfo dasConfig = clusterMo.getDasConfig();
          if (dasConfig != null
              && dasConfig.isEnabled() != null
              && dasConfig.isEnabled().booleanValue()) {
            clusterDetails.put("NativeHA", "true");
            _clusterDetailsDao.persist(clusterId, clusterDetails);
          }
        }
      }

      if (!validateDiscoveredHosts(context, morCluster, morHosts)) {
        if (morCluster == null)
          s_logger.warn(
              "The discovered host is not standalone host, can not be added to a standalone cluster");
        else s_logger.warn("The discovered host does not belong to the cluster");
        return null;
      }

      Map<VmwareResource, Map<String, String>> resources =
          new HashMap<VmwareResource, Map<String, String>>();
      for (ManagedObjectReference morHost : morHosts) {
        Map<String, String> details = new HashMap<String, String>();
        Map<String, Object> params = new HashMap<String, Object>();

        HostMO hostMo = new HostMO(context, morHost);
        details.put("url", hostMo.getHostName());
        details.put("username", username);
        details.put("password", password);
        String guid = morHost.getType() + ":" + morHost.getValue() + "@" + url.getHost();
        details.put("guid", guid);

        params.put("url", hostMo.getHostName());
        params.put("username", username);
        params.put("password", password);
        params.put("zone", Long.toString(dcId));
        params.put("pod", Long.toString(podId));
        params.put("cluster", Long.toString(clusterId));
        params.put("guid", guid);
        if (privateTrafficLabel != null) {
          params.put("private.network.vswitch.name", privateTrafficLabel);
        }
        params.put("guestTrafficInfo", guestTrafficLabelObj);
        params.put("publicTrafficInfo", publicTrafficLabelObj);

        VmwareResource resource = new VmwareResource();
        try {
          resource.configure("VMware", params);
        } catch (ConfigurationException e) {
          _alertMgr.sendAlert(
              AlertManager.ALERT_TYPE_HOST,
              dcId,
              podId,
              "Unable to add " + url.getHost(),
              "Error is " + e.getMessage());
          s_logger.warn("Unable to instantiate " + url.getHost(), e);
        }
        resource.start();

        resources.put(resource, details);
      }

      // place a place holder guid derived from cluster ID
      cluster.setGuid(UUID.nameUUIDFromBytes(String.valueOf(clusterId).getBytes()).toString());
      _clusterDao.update(clusterId, cluster);

      return resources;
    } catch (DiscoveredWithErrorException e) {
      throw e;
    } catch (Exception e) {
      s_logger.warn(
          "Unable to connect to Vmware vSphere server. service address: " + url.getHost());
      return null;
    } finally {
      if (context != null) context.close();
    }
  }
  public Long migrate(final HaWorkVO work) {
    final long vmId = work.getInstanceId();

    final VirtualMachineGuru<VMInstanceVO> mgr = findManager(work.getType());

    VMInstanceVO vm = mgr.get(vmId);
    if (vm == null || vm.getRemoved() != null) {
      s_logger.debug("Unable to find the vm " + vmId);
      return null;
    }

    s_logger.info("Migrating vm: " + vm.toString());
    if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) {
      s_logger.info("VM is not longer running on the current hostId");
      return null;
    }

    short alertType = AlertManager.ALERT_TYPE_USERVM_MIGRATE;
    if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER_MIGRATE;
    } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY_MIGRATE;
    }

    HostVO fromHost = _hostDao.findById(vm.getHostId());
    String fromHostName = ((fromHost == null) ? "unknown" : fromHost.getName());
    HostVO toHost = null;
    if (work.getStep() == Step.Scheduled) {
      if (vm.getState() != State.Running) {
        s_logger.info(
            "VM's state is not ready for migration. "
                + vm.toString()
                + " State is "
                + vm.getState().toString());
        return (System.currentTimeMillis() >> 10) + _migrateRetryInterval;
      }

      DataCenterVO dcVO = _dcDao.findById(fromHost.getDataCenterId());
      HostPodVO podVO = _podDao.findById(fromHost.getPodId());

      try {
        toHost = mgr.prepareForMigration(vm);
        if (toHost == null) {
          if (s_logger.isDebugEnabled()) {
            s_logger.debug("Unable to find a host for migrating vm " + vmId);
          }
          _alertMgr.sendAlert(
              alertType,
              vm.getDataCenterId(),
              vm.getPodId(),
              "Unable to migrate vm "
                  + vm.getName()
                  + " from host "
                  + fromHostName
                  + " in zone "
                  + dcVO.getName()
                  + " and pod "
                  + podVO.getName(),
              "Unable to find a suitable host");
        }
      } catch (final InsufficientCapacityException e) {
        s_logger.warn("Unable to mgirate due to insufficient capacity " + vm.toString());
        _alertMgr.sendAlert(
            alertType,
            vm.getDataCenterId(),
            vm.getPodId(),
            "Unable to migrate vm "
                + vm.getName()
                + " from host "
                + fromHostName
                + " in zone "
                + dcVO.getName()
                + " and pod "
                + podVO.getName(),
            "Insufficient capacity");
      } catch (final StorageUnavailableException e) {
        s_logger.warn("Storage is unavailable: " + vm.toString());
        _alertMgr.sendAlert(
            alertType,
            vm.getDataCenterId(),
            vm.getPodId(),
            "Unable to migrate vm "
                + vm.getName()
                + " from host "
                + fromHostName
                + " in zone "
                + dcVO.getName()
                + " and pod "
                + podVO.getName(),
            "Storage is gone.");
      }

      if (toHost == null) {
        _agentMgr.maintenanceFailed(vm.getHostId());
        return null;
      }

      if (s_logger.isDebugEnabled()) {
        s_logger.debug("Migrating from " + work.getHostId() + " to " + toHost.getId());
      }
      work.setStep(Step.Migrating);
      work.setHostId(toHost.getId());
      _haDao.update(work.getId(), work);
    }

    if (work.getStep() == Step.Migrating) {
      vm = mgr.get(vmId); // let's see if anything has changed.
      boolean migrated = false;
      if (vm == null
          || vm.getRemoved() != null
          || vm.getHostId() == null
          || !_itMgr.stateTransitTo(vm, Event.MigrationRequested, vm.getHostId())) {
        s_logger.info("Migration cancelled because state has changed: " + vm.toString());
      } else {
        try {
          boolean isWindows =
              _guestOSCategoryDao
                  .findById(_guestOSDao.findById(vm.getGuestOSId()).getCategoryId())
                  .getName()
                  .equalsIgnoreCase("Windows");
          MigrateCommand cmd =
              new MigrateCommand(vm.getInstanceName(), toHost.getPrivateIpAddress(), isWindows);
          Answer answer = _agentMgr.send(fromHost.getId(), cmd);
          if (answer != null && answer.getResult()) {
            migrated = true;
            _storageMgr.unshare(vm, fromHost);
            work.setStep(Step.Investigating);
            _haDao.update(work.getId(), work);
          }
        } catch (final AgentUnavailableException e) {
          s_logger.debug("host became unavailable");
        } catch (final OperationTimedoutException e) {
          s_logger.debug("operation timed out");
          if (e.isActive()) {
            scheduleRestart(vm, true);
          }
        }
      }

      if (!migrated) {
        s_logger.info("Migration was unsuccessful.  Cleaning up: " + vm.toString());

        DataCenterVO dcVO = _dcDao.findById(vm.getDataCenterId());
        HostPodVO podVO = _podDao.findById(vm.getPodId());
        _alertMgr.sendAlert(
            alertType,
            fromHost.getDataCenterId(),
            fromHost.getPodId(),
            "Unable to migrate vm "
                + vm.getName()
                + " from host "
                + fromHost.getName()
                + " in zone "
                + dcVO.getName()
                + " and pod "
                + podVO.getName(),
            "Migrate Command failed.  Please check logs.");

        _itMgr.stateTransitTo(vm, Event.MigrationFailedOnSource, toHost.getId());
        _agentMgr.maintenanceFailed(vm.getHostId());

        Command cleanup = mgr.cleanup(vm, null);
        _agentMgr.easySend(toHost.getId(), cleanup);
        _storageMgr.unshare(vm, toHost);

        return null;
      }
    }

    if (toHost == null) {
      toHost = _hostDao.findById(work.getHostId());
    }
    DataCenterVO dcVO = _dcDao.findById(toHost.getDataCenterId());
    HostPodVO podVO = _podDao.findById(toHost.getPodId());

    try {
      if (!mgr.completeMigration(vm, toHost)) {
        _alertMgr.sendAlert(
            alertType,
            toHost.getDataCenterId(),
            toHost.getPodId(),
            "Unable to migrate "
                + vmId
                + " to host "
                + toHost.getName()
                + " in zone "
                + dcVO.getName()
                + " and pod "
                + podVO.getName(),
            "Migration not completed");
        s_logger.warn("Unable to complete migration: " + vm.toString());
      } else {
        s_logger.info("Migration is complete: " + vm.toString());
      }
      return null;
    } catch (final AgentUnavailableException e) {
      s_logger.warn("Agent is unavailable for " + vm.toString());
    } catch (final OperationTimedoutException e) {
      s_logger.warn("Operation timed outfor " + vm.toString());
    }
    _itMgr.stateTransitTo(vm, Event.MigrationFailedOnDest, toHost.getId());
    return (System.currentTimeMillis() >> 10) + _migrateRetryInterval;
  }
  /**
   * compareState does as its name suggests and compares the states between management server and
   * agent. It returns whether something should be cleaned up
   */
  protected Command compareState(VMInstanceVO vm, final AgentVmInfo info, final boolean fullSync) {
    State agentState = info.state;
    final String agentName = info.name;
    final State serverState = vm.getState();
    final String serverName = vm.getName();

    Command command = null;

    if (s_logger.isDebugEnabled()) {
      s_logger.debug(
          "VM "
              + serverName
              + ": server state = "
              + serverState.toString()
              + " and agent state = "
              + agentState.toString());
    }

    if (agentState == State.Error) {
      agentState = State.Stopped;

      short alertType = AlertManager.ALERT_TYPE_USERVM;
      if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
        alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER;
      } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
        alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY;
      }

      HostPodVO podVO = _podDao.findById(vm.getPodId());
      DataCenterVO dcVO = _dcDao.findById(vm.getDataCenterId());
      HostVO hostVO = _hostDao.findById(vm.getHostId());

      String hostDesc =
          "name: "
              + hostVO.getName()
              + " (id:"
              + hostVO.getId()
              + "), availability zone: "
              + dcVO.getName()
              + ", pod: "
              + podVO.getName();
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "VM (name: "
              + vm.getName()
              + ", id: "
              + vm.getId()
              + ") stopped on host "
              + hostDesc
              + " due to storage failure",
          "Virtual Machine "
              + vm.getName()
              + " (id: "
              + vm.getId()
              + ") running on host ["
              + vm.getHostId()
              + "] stopped due to storage failure.");
    }

    if (serverState == State.Migrating) {
      s_logger.debug("Skipping vm in migrating state: " + vm.toString());
      return null;
    }

    if (agentState == serverState) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("Both states are " + agentState.toString() + " for " + serverName);
      }
      assert (agentState == State.Stopped || agentState == State.Running)
          : "If the states we send up is changed, this must be changed.";
      _itMgr.stateTransitTo(
          vm,
          agentState == State.Stopped
              ? VirtualMachine.Event.AgentReportStopped
              : VirtualMachine.Event.AgentReportRunning,
          vm.getHostId());
      if (agentState == State.Stopped) {
        s_logger.debug("State matches but the agent said stopped so let's send a cleanup anyways.");
        return info.mgr.cleanup(vm, agentName);
      }
      return null;
    }

    if (agentState == State.Stopped) {
      // This state means the VM on the agent was detected previously
      // and now is gone.  This is slightly different than if the VM
      // was never completed but we still send down a Stop Command
      // to ensure there's cleanup.
      if (serverState == State.Running) {
        // Our records showed that it should be running so let's restart it.
        vm = info.mgr.get(vm.getId());
        scheduleRestart(vm, false);
        command = info.mgr.cleanup(vm, agentName);
      } else if (serverState == State.Stopping) {
        if (fullSync) {
          s_logger.debug("VM is in stopping state on full sync.  Updating the status to stopped");
          vm = info.mgr.get(vm.getId());
          info.mgr.completeStopCommand(vm);
          command = info.mgr.cleanup(vm, agentName);
        } else {
          s_logger.debug("Ignoring VM in stopping mode: " + vm.getName());
        }
      } else if (serverState == State.Starting) {
        s_logger.debug("Ignoring VM in starting mode: " + vm.getName());
      } else {
        s_logger.debug("Sending cleanup to a stopped vm: " + agentName);
        _itMgr.stateTransitTo(vm, VirtualMachine.Event.AgentReportStopped, null);
        command = info.mgr.cleanup(vm, agentName);
      }
    } else if (agentState == State.Running) {
      if (serverState == State.Starting) {
        if (fullSync) {
          s_logger.debug("VM state is starting on full sync so updating it to running");
          vm = info.mgr.get(vm.getId());
          info.mgr.completeStartCommand(vm);
        }
      } else if (serverState == State.Stopping) {
        if (fullSync) {
          s_logger.debug("VM state is in stopping on fullsync so resend stop.");
          vm = info.mgr.get(vm.getId());
          info.mgr.completeStopCommand(vm);
          command = info.mgr.cleanup(vm, agentName);
        } else {
          s_logger.debug("VM is in stopping state so no action.");
        }
      } else if (serverState == State.Destroyed
          || serverState == State.Stopped
          || serverState == State.Expunging) {
        s_logger.debug("VM state is in stopped so stopping it on the agent");
        vm = info.mgr.get(vm.getId());
        command = info.mgr.cleanup(vm, agentName);
      } else {
        _itMgr.stateTransitTo(vm, VirtualMachine.Event.AgentReportRunning, vm.getHostId());
      }
    } /*else if (agentState == State.Unknown) {
          if (serverState == State.Running) {
              if (fullSync) {
                  vm = info.handler.get(vm.getId());
              }
              scheduleRestart(vm, false);
          } else if (serverState == State.Starting) {
              if (fullSync) {
                  vm = info.handler.get(vm.getId());
              }
              scheduleRestart(vm, false);
          } else if (serverState == State.Stopping) {
              if (fullSync) {
                  s_logger.debug("VM state is stopping in full sync.  Resending stop");
                  command = info.handler.cleanup(vm, agentName);
              }
          }
      }*/
    return command;
  }
  protected Long restart(final HaWorkVO work) {
    final long vmId = work.getInstanceId();

    final VirtualMachineGuru<VMInstanceVO> mgr = findManager(work.getType());
    if (mgr == null) {
      s_logger.warn(
          "Unable to find a handler for " + work.getType().toString() + ", throwing out " + vmId);
      return null;
    }

    VMInstanceVO vm = mgr.get(vmId);
    if (vm == null) {
      s_logger.info("Unable to find vm: " + vmId);
      return null;
    }

    s_logger.info("HA on " + vm.toString());
    if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) {
      s_logger.info(
          "VM "
              + vm.toString()
              + " has been changed.  Current State = "
              + vm.getState()
              + " Previous State = "
              + work.getPreviousState()
              + " last updated = "
              + vm.getUpdated()
              + " previous updated = "
              + work.getUpdateTime());
      return null;
    }

    final HostVO host = _hostDao.findById(work.getHostId());

    DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
    HostPodVO podVO = _podDao.findById(host.getPodId());
    String hostDesc =
        "name: "
            + host.getName()
            + "(id:"
            + host.getId()
            + "), availability zone: "
            + dcVO.getName()
            + ", pod: "
            + podVO.getName();

    short alertType = AlertManager.ALERT_TYPE_USERVM;
    if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER;
    } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY;
    }

    Boolean alive = null;
    if (work.getStep() == Step.Investigating) {
      if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) {
        s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId());
        if (vm.getState() == State.Starting && vm.getUpdated() == work.getUpdateTime()) {
          _itMgr.stateTransitTo(vm, Event.AgentReportStopped, null);
        }
        return null;
      }

      Enumeration<Investigator> en = _investigators.enumeration();
      Investigator investigator = null;
      while (en.hasMoreElements()) {
        investigator = en.nextElement();
        alive = investigator.isVmAlive(vm, host);
        if (alive != null) {
          s_logger.debug(
              investigator.getName() + " found VM " + vm.getName() + "to be alive? " + alive);
          break;
        }
      }
      if (alive != null && alive) {
        s_logger.debug("VM " + vm.getName() + " is found to be alive by " + investigator.getName());
        if (host.getStatus() == Status.Up) {
          compareState(vm, new AgentVmInfo(vm.getInstanceName(), mgr, State.Running), false);
          return null;
        } else {
          s_logger.debug("Rescheduling because the host is not up but the vm is alive");
          return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
        }
      }

      boolean fenced = false;
      if (alive == null || !alive) {
        fenced = true;
        s_logger.debug("Fencing off VM that we don't know the state of");
        Enumeration<FenceBuilder> enfb = _fenceBuilders.enumeration();
        while (enfb.hasMoreElements()) {
          final FenceBuilder fb = enfb.nextElement();
          Boolean result = fb.fenceOff(vm, host);
          if (result != null && !result) {
            fenced = false;
          }
        }
      }

      if (alive == null && !fenced) {
        s_logger.debug("We were unable to fence off the VM " + vm.toString());
        _alertMgr.sendAlert(
            alertType,
            vm.getDataCenterId(),
            vm.getPodId(),
            "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
            "Insufficient capacity to restart VM, name: "
                + vm.getName()
                + ", id: "
                + vmId
                + " which was running on host "
                + hostDesc);
        return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
      }

      mgr.completeStopCommand(vm);

      work.setStep(Step.Scheduled);
      _haDao.update(work.getId(), work);
    }

    // send an alert for VMs that stop unexpectedly
    _alertMgr.sendAlert(
        alertType,
        vm.getDataCenterId(),
        vm.getPodId(),
        "VM (name: "
            + vm.getName()
            + ", id: "
            + vmId
            + ") stopped unexpectedly on host "
            + hostDesc,
        "Virtual Machine "
            + vm.getName()
            + " (id: "
            + vm.getId()
            + ") running on host ["
            + hostDesc
            + "] stopped unexpectedly.");

    vm = mgr.get(vm.getId());

    if (!_forceHA && !vm.isHaEnabled()) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("VM is not HA enabled so we're done.");
      }
      return null; // VM doesn't require HA
    }

    if (!_storageMgr.canVmRestartOnAnotherServer(vm.getId())) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("VM can not restart on another server.");
      }
      return null;
    }

    if (work.getTimesTried() > _maxRetries) {
      s_logger.warn("Retried to max times so deleting: " + vmId);
      return null;
    }

    try {
      VMInstanceVO started = mgr.start(vm.getId(), 0);
      if (started != null) {
        s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId());
        return null;
      }

      if (s_logger.isDebugEnabled()) {
        s_logger.debug(
            "Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval);
      }
      vm = mgr.get(vm.getId());
      work.setUpdateTime(vm.getUpdated());
      work.setPreviousState(vm.getState());
      return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
    } catch (final InsufficientCapacityException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
          "Insufficient capacity to restart VM, name: "
              + vm.getName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
      return null;
    } catch (final StorageUnavailableException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
      return null;
    } catch (ConcurrentOperationException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
      return null;
    } catch (ExecutionException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
      return null;
    }
  }
  @Override
  public void scheduleRestart(VMInstanceVO vm, final boolean investigate) {
    Long hostId = vm.getHostId();
    VirtualMachineGuru<VMInstanceVO> mgr = findManager(vm.getType());
    vm = mgr.get(vm.getId());
    if (!investigate) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug(
            "VM does not require investigation so I'm marking it as Stopped: " + vm.toString());
      }

      short alertType = AlertManager.ALERT_TYPE_USERVM;
      if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
        alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER;
      } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
        alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY;
      }

      if (!(_forceHA || vm.isHaEnabled())) {

        _alertMgr.sendAlert(
            alertType,
            vm.getDataCenterId(),
            vm.getPodId(),
            "VM (name: "
                + vm.getName()
                + ", id: "
                + vm.getId()
                + ") stopped unexpectedly on host "
                + vm.getHostId(),
            "Virtual Machine "
                + vm.getName()
                + " (id: "
                + vm.getId()
                + ") running on host ["
                + vm.getHostId()
                + "] stopped unexpectedly.");

        if (s_logger.isDebugEnabled()) {
          s_logger.debug("VM is not HA enabled so we're done.");
        }
      }

      mgr.completeStopCommand(vm);
    }

    final List<HaWorkVO> items = _haDao.findPreviousHA(vm.getId());
    int maxRetries = 0;
    for (final HaWorkVO item : items) {
      if (maxRetries < item.getTimesTried() && !item.canScheduleNew(_timeBetweenFailures)) {
        maxRetries = item.getTimesTried();
        break;
      }
    }

    final HaWorkVO work =
        new HaWorkVO(
            vm.getId(),
            vm.getType(),
            WorkType.HA,
            investigate ? Step.Investigating : Step.Scheduled,
            hostId,
            vm.getState(),
            maxRetries + 1,
            vm.getUpdated());
    _haDao.persist(work);

    if (s_logger.isInfoEnabled()) {
      s_logger.info("Schedule vm for HA:  " + vm.toString());
    }

    wakeupWorkers();
  }
  @Override
  public void handleTemplateSync(DataStore store) {
    if (store == null) {
      s_logger.warn("Huh? image store is null");
      return;
    }
    long storeId = store.getId();

    // add lock to make template sync for a data store only be done once
    String lockString = "templatesync.storeId:" + storeId;
    GlobalLock syncLock = GlobalLock.getInternLock(lockString);
    try {
      if (syncLock.lock(3)) {
        try {
          Long zoneId = store.getScope().getScopeId();

          Map<String, TemplateProp> templateInfos = listTemplate(store);
          if (templateInfos == null) {
            return;
          }

          Set<VMTemplateVO> toBeDownloaded = new HashSet<VMTemplateVO>();
          List<VMTemplateVO> allTemplates = null;
          if (zoneId == null) {
            // region wide store
            allTemplates = _templateDao.listAllActive();
          } else {
            // zone wide store
            allTemplates = _templateDao.listAllInZone(zoneId);
          }
          List<VMTemplateVO> rtngTmplts = _templateDao.listAllSystemVMTemplates();
          List<VMTemplateVO> defaultBuiltin = _templateDao.listDefaultBuiltinTemplates();

          if (rtngTmplts != null) {
            for (VMTemplateVO rtngTmplt : rtngTmplts) {
              if (!allTemplates.contains(rtngTmplt)) {
                allTemplates.add(rtngTmplt);
              }
            }
          }

          if (defaultBuiltin != null) {
            for (VMTemplateVO builtinTmplt : defaultBuiltin) {
              if (!allTemplates.contains(builtinTmplt)) {
                allTemplates.add(builtinTmplt);
              }
            }
          }

          toBeDownloaded.addAll(allTemplates);

          for (VMTemplateVO tmplt : allTemplates) {
            String uniqueName = tmplt.getUniqueName();
            TemplateDataStoreVO tmpltStore =
                _vmTemplateStoreDao.findByStoreTemplate(storeId, tmplt.getId());
            if (templateInfos.containsKey(uniqueName)) {
              TemplateProp tmpltInfo = templateInfos.remove(uniqueName);
              toBeDownloaded.remove(tmplt);
              if (tmpltStore != null) {
                s_logger.info("Template Sync found " + uniqueName + " already in the image store");
                if (tmpltStore.getDownloadState() != Status.DOWNLOADED) {
                  tmpltStore.setErrorString("");
                }
                if (tmpltInfo.isCorrupted()) {
                  tmpltStore.setDownloadState(Status.DOWNLOAD_ERROR);
                  String msg =
                      "Template "
                          + tmplt.getName()
                          + ":"
                          + tmplt.getId()
                          + " is corrupted on secondary storage "
                          + tmpltStore.getId();
                  tmpltStore.setErrorString(msg);
                  s_logger.info("msg");
                  if (tmplt.getUrl() == null) {
                    msg =
                        "Private Template ("
                            + tmplt
                            + ") with install path "
                            + tmpltInfo.getInstallPath()
                            + "is corrupted, please check in image store: "
                            + tmpltStore.getDataStoreId();
                    s_logger.warn(msg);
                  } else {
                    s_logger.info(
                        "Removing template_store_ref entry for corrupted template "
                            + tmplt.getName());
                    _vmTemplateStoreDao.remove(tmpltStore.getId());
                    toBeDownloaded.add(tmplt);
                  }

                } else {
                  tmpltStore.setDownloadPercent(100);
                  tmpltStore.setDownloadState(Status.DOWNLOADED);
                  tmpltStore.setState(ObjectInDataStoreStateMachine.State.Ready);
                  tmpltStore.setInstallPath(tmpltInfo.getInstallPath());
                  tmpltStore.setSize(tmpltInfo.getSize());
                  tmpltStore.setPhysicalSize(tmpltInfo.getPhysicalSize());
                  tmpltStore.setLastUpdated(new Date());
                  // update size in vm_template table
                  VMTemplateVO tmlpt = _templateDao.findById(tmplt.getId());
                  tmlpt.setSize(tmpltInfo.getSize());
                  _templateDao.update(tmplt.getId(), tmlpt);

                  // Skipping limit checks for SYSTEM Account and for the templates created from
                  // volumes or snapshots
                  // which already got checked and incremented during createTemplate API call.
                  if (tmpltInfo.getSize() > 0
                      && tmplt.getAccountId() != Account.ACCOUNT_ID_SYSTEM
                      && tmplt.getUrl() != null) {
                    long accountId = tmplt.getAccountId();
                    try {
                      _resourceLimitMgr.checkResourceLimit(
                          _accountMgr.getAccount(accountId),
                          com.cloud.configuration.Resource.ResourceType.secondary_storage,
                          tmpltInfo.getSize() - UriUtils.getRemoteSize(tmplt.getUrl()));
                    } catch (ResourceAllocationException e) {
                      s_logger.warn(e.getMessage());
                      _alertMgr.sendAlert(
                          AlertManager.AlertType.ALERT_TYPE_RESOURCE_LIMIT_EXCEEDED,
                          zoneId,
                          null,
                          e.getMessage(),
                          e.getMessage());
                    } finally {
                      _resourceLimitMgr.recalculateResourceCount(
                          accountId,
                          _accountMgr.getAccount(accountId).getDomainId(),
                          com.cloud.configuration.Resource.ResourceType.secondary_storage
                              .getOrdinal());
                    }
                  }
                }
                _vmTemplateStoreDao.update(tmpltStore.getId(), tmpltStore);
              } else {
                tmpltStore =
                    new TemplateDataStoreVO(
                        storeId,
                        tmplt.getId(),
                        new Date(),
                        100,
                        Status.DOWNLOADED,
                        null,
                        null,
                        null,
                        tmpltInfo.getInstallPath(),
                        tmplt.getUrl());
                tmpltStore.setSize(tmpltInfo.getSize());
                tmpltStore.setPhysicalSize(tmpltInfo.getPhysicalSize());
                tmpltStore.setDataStoreRole(store.getRole());
                _vmTemplateStoreDao.persist(tmpltStore);

                // update size in vm_template table
                VMTemplateVO tmlpt = _templateDao.findById(tmplt.getId());
                tmlpt.setSize(tmpltInfo.getSize());
                _templateDao.update(tmplt.getId(), tmlpt);
                associateTemplateToZone(tmplt.getId(), zoneId);
              }
            } else {
              s_logger.info(
                  "Template Sync did not find "
                      + uniqueName
                      + " on image store "
                      + storeId
                      + ", may request download based on available hypervisor types");
              if (tmpltStore != null) {
                if (isRegionStore(store)
                    && tmpltStore.getDownloadState()
                        == VMTemplateStorageResourceAssoc.Status.DOWNLOADED
                    && tmpltStore.getState() == State.Ready
                    && tmpltStore.getInstallPath() == null) {
                  s_logger.info(
                      "Keep fake entry in template store table for migration of previous NFS to object store");
                } else {
                  s_logger.info(
                      "Removing leftover template "
                          + uniqueName
                          + " entry from template store table");
                  // remove those leftover entries
                  _vmTemplateStoreDao.remove(tmpltStore.getId());
                }
              }
            }
          }

          if (toBeDownloaded.size() > 0) {
            /* Only download templates whose hypervirsor type is in the zone */
            List<HypervisorType> availHypers = _clusterDao.getAvailableHypervisorInZone(zoneId);
            if (availHypers.isEmpty()) {
              /*
               * This is for cloudzone, local secondary storage resource
               * started before cluster created
               */
              availHypers.add(HypervisorType.KVM);
            }
            /* Baremetal need not to download any template */
            availHypers.remove(HypervisorType.BareMetal);
            availHypers.add(HypervisorType.None); // bug 9809: resume ISO
            // download.
            for (VMTemplateVO tmplt : toBeDownloaded) {
              if (tmplt.getUrl() == null) { // If url is null we can't
                s_logger.info(
                    "Skip downloading template "
                        + tmplt.getUniqueName()
                        + " since no url is specified.");
                continue;
              }
              // if this is private template, skip sync to a new image store
              if (!tmplt.isPublicTemplate()
                  && !tmplt.isFeatured()
                  && tmplt.getTemplateType() != TemplateType.SYSTEM) {
                s_logger.info(
                    "Skip sync downloading private template "
                        + tmplt.getUniqueName()
                        + " to a new image store");
                continue;
              }

              // if this is a region store, and there is already an DOWNLOADED entry there without
              // install_path information, which
              // means that this is a duplicate entry from migration of previous NFS to staging.
              if (isRegionStore(store)) {
                TemplateDataStoreVO tmpltStore =
                    _vmTemplateStoreDao.findByStoreTemplate(storeId, tmplt.getId());
                if (tmpltStore != null
                    && tmpltStore.getDownloadState()
                        == VMTemplateStorageResourceAssoc.Status.DOWNLOADED
                    && tmpltStore.getState() == State.Ready
                    && tmpltStore.getInstallPath() == null) {
                  s_logger.info("Skip sync template for migration of previous NFS to object store");
                  continue;
                }
              }

              if (availHypers.contains(tmplt.getHypervisorType())) {
                s_logger.info(
                    "Downloading template "
                        + tmplt.getUniqueName()
                        + " to image store "
                        + store.getName());
                associateTemplateToZone(tmplt.getId(), zoneId);
                TemplateInfo tmpl =
                    _templateFactory.getTemplate(tmplt.getId(), DataStoreRole.Image);
                createTemplateAsync(tmpl, store, null);
              } else {
                s_logger.info(
                    "Skip downloading template "
                        + tmplt.getUniqueName()
                        + " since current data center does not have hypervisor "
                        + tmplt.getHypervisorType().toString());
              }
            }
          }

          for (String uniqueName : templateInfos.keySet()) {
            TemplateProp tInfo = templateInfos.get(uniqueName);
            if (_tmpltMgr.templateIsDeleteable(tInfo.getId())) {
              // we cannot directly call deleteTemplateSync here to
              // reuse delete logic since in this case, our db does not have
              // this template at all.
              TemplateObjectTO tmplTO = new TemplateObjectTO();
              tmplTO.setDataStore(store.getTO());
              tmplTO.setPath(tInfo.getInstallPath());
              tmplTO.setId(tInfo.getId());
              DeleteCommand dtCommand = new DeleteCommand(tmplTO);
              EndPoint ep = _epSelector.select(store);
              Answer answer = null;
              if (ep == null) {
                String errMsg =
                    "No remote endpoint to send command, check if host or ssvm is down?";
                s_logger.error(errMsg);
                answer = new Answer(dtCommand, false, errMsg);
              } else {
                answer = ep.sendMessage(dtCommand);
              }
              if (answer == null || !answer.getResult()) {
                s_logger.info("Failed to deleted template at store: " + store.getName());

              } else {
                String description =
                    "Deleted template "
                        + tInfo.getTemplateName()
                        + " on secondary storage "
                        + storeId;
                s_logger.info(description);
              }
            }
          }
        } finally {
          syncLock.unlock();
        }
      } else {
        s_logger.info(
            "Couldn't get global lock on "
                + lockString
                + ", another thread may be doing template sync on data store "
                + storeId
                + " now.");
      }
    } finally {
      syncLock.releaseRef();
    }
  }
Ejemplo n.º 13
0
  @Override
  public void handleTemplateSync(HostVO ssHost) {
    if (ssHost == null) {
      s_logger.warn("Huh? ssHost is null");
      return;
    }
    long sserverId = ssHost.getId();
    long zoneId = ssHost.getDataCenterId();
    if (!(ssHost.getType() == Host.Type.SecondaryStorage
        || ssHost.getType() == Host.Type.LocalSecondaryStorage)) {
      s_logger.warn("Huh? Agent id " + sserverId + " is not secondary storage host");
      return;
    }

    Map<String, TemplateInfo> templateInfos = listTemplate(ssHost);
    if (templateInfos == null) {
      return;
    }

    Set<VMTemplateVO> toBeDownloaded = new HashSet<VMTemplateVO>();
    List<VMTemplateVO> allTemplates = _templateDao.listAllInZone(zoneId);
    List<VMTemplateVO> rtngTmplts = _templateDao.listAllSystemVMTemplates();
    List<VMTemplateVO> defaultBuiltin = _templateDao.listDefaultBuiltinTemplates();

    if (rtngTmplts != null) {
      for (VMTemplateVO rtngTmplt : rtngTmplts) {
        if (!allTemplates.contains(rtngTmplt)) {
          allTemplates.add(rtngTmplt);
        }
      }
    }

    if (defaultBuiltin != null) {
      for (VMTemplateVO builtinTmplt : defaultBuiltin) {
        if (!allTemplates.contains(builtinTmplt)) {
          allTemplates.add(builtinTmplt);
        }
      }
    }

    toBeDownloaded.addAll(allTemplates);

    for (VMTemplateVO tmplt : allTemplates) {
      String uniqueName = tmplt.getUniqueName();
      VMTemplateHostVO tmpltHost = _vmTemplateHostDao.findByHostTemplate(sserverId, tmplt.getId());
      if (templateInfos.containsKey(uniqueName)) {
        TemplateInfo tmpltInfo = templateInfos.remove(uniqueName);
        toBeDownloaded.remove(tmplt);
        if (tmpltHost != null) {
          s_logger.info(
              "Template Sync found " + tmplt.getName() + " already in the template host table");
          if (tmpltHost.getDownloadState() != Status.DOWNLOADED) {
            tmpltHost.setErrorString("");
          }
          if (tmpltInfo.isCorrupted()) {
            tmpltHost.setDownloadState(Status.DOWNLOAD_ERROR);
            String msg =
                "Template "
                    + tmplt.getName()
                    + ":"
                    + tmplt.getId()
                    + " is corrupted on secondary storage "
                    + tmpltHost.getId();
            tmpltHost.setErrorString(msg);
            s_logger.info("msg");
            if (tmplt.getUrl() == null) {
              msg =
                  "Private Template ("
                      + tmplt
                      + ") with install path "
                      + tmpltInfo.getInstallPath()
                      + "is corrupted, please check in secondary storage: "
                      + tmpltHost.getHostId();
              s_logger.warn(msg);
            } else {
              toBeDownloaded.add(tmplt);
            }

          } else {
            tmpltHost.setDownloadPercent(100);
            tmpltHost.setDownloadState(Status.DOWNLOADED);
            tmpltHost.setInstallPath(tmpltInfo.getInstallPath());
            tmpltHost.setSize(tmpltInfo.getSize());
            tmpltHost.setPhysicalSize(tmpltInfo.getPhysicalSize());
            tmpltHost.setLastUpdated(new Date());

            // Skipping limit checks for SYSTEM Account and for the templates created from volumes
            // or snapshots
            // which already got checked and incremented during createTemplate API call.
            if (tmpltInfo.getSize() > 0
                && tmplt.getAccountId() != Account.ACCOUNT_ID_SYSTEM
                && tmplt.getUrl() != null) {
              long accountId = tmplt.getAccountId();
              try {
                _resourceLimitMgr.checkResourceLimit(
                    _accountMgr.getAccount(accountId),
                    com.cloud.configuration.Resource.ResourceType.secondary_storage,
                    tmpltInfo.getSize() - UriUtils.getRemoteSize(tmplt.getUrl()));
              } catch (ResourceAllocationException e) {
                s_logger.warn(e.getMessage());
                _alertMgr.sendAlert(
                    _alertMgr.ALERT_TYPE_RESOURCE_LIMIT_EXCEEDED,
                    ssHost.getDataCenterId(),
                    null,
                    e.getMessage(),
                    e.getMessage());
              } finally {
                _resourceLimitMgr.recalculateResourceCount(
                    accountId,
                    _accountMgr.getAccount(accountId).getDomainId(),
                    com.cloud.configuration.Resource.ResourceType.secondary_storage.getOrdinal());
              }
            }
          }
          _vmTemplateHostDao.update(tmpltHost.getId(), tmpltHost);
        } else {
          tmpltHost =
              new VMTemplateHostVO(
                  sserverId,
                  tmplt.getId(),
                  new Date(),
                  100,
                  Status.DOWNLOADED,
                  null,
                  null,
                  null,
                  tmpltInfo.getInstallPath(),
                  tmplt.getUrl());
          tmpltHost.setSize(tmpltInfo.getSize());
          tmpltHost.setPhysicalSize(tmpltInfo.getPhysicalSize());
          _vmTemplateHostDao.persist(tmpltHost);
          VMTemplateZoneVO tmpltZoneVO =
              _vmTemplateZoneDao.findByZoneTemplate(zoneId, tmplt.getId());
          if (tmpltZoneVO == null) {
            tmpltZoneVO = new VMTemplateZoneVO(zoneId, tmplt.getId(), new Date());
            _vmTemplateZoneDao.persist(tmpltZoneVO);
          } else {
            tmpltZoneVO.setLastUpdated(new Date());
            _vmTemplateZoneDao.update(tmpltZoneVO.getId(), tmpltZoneVO);
          }
        }

        continue;
      }
      if (tmpltHost != null && tmpltHost.getDownloadState() != Status.DOWNLOADED) {
        s_logger.info(
            "Template Sync did not find "
                + tmplt.getName()
                + " ready on server "
                + sserverId
                + ", will request download to start/resume shortly");

      } else if (tmpltHost == null) {
        s_logger.info(
            "Template Sync did not find "
                + tmplt.getName()
                + " on the server "
                + sserverId
                + ", will request download shortly");
        VMTemplateHostVO templtHost =
            new VMTemplateHostVO(
                sserverId,
                tmplt.getId(),
                new Date(),
                0,
                Status.NOT_DOWNLOADED,
                null,
                null,
                null,
                null,
                tmplt.getUrl());
        _vmTemplateHostDao.persist(templtHost);
        VMTemplateZoneVO tmpltZoneVO = _vmTemplateZoneDao.findByZoneTemplate(zoneId, tmplt.getId());
        if (tmpltZoneVO == null) {
          tmpltZoneVO = new VMTemplateZoneVO(zoneId, tmplt.getId(), new Date());
          _vmTemplateZoneDao.persist(tmpltZoneVO);
        } else {
          tmpltZoneVO.setLastUpdated(new Date());
          _vmTemplateZoneDao.update(tmpltZoneVO.getId(), tmpltZoneVO);
        }
      }
    }

    if (toBeDownloaded.size() > 0) {
      /* Only download templates whose hypervirsor type is in the zone */
      List<HypervisorType> availHypers = _clusterDao.getAvailableHypervisorInZone(zoneId);
      if (availHypers.isEmpty()) {
        /*
         * This is for cloudzone, local secondary storage resource
         * started before cluster created
         */
        availHypers.add(HypervisorType.KVM);
      }
      /* Baremetal need not to download any template */
      availHypers.remove(HypervisorType.BareMetal);
      availHypers.add(HypervisorType.None); // bug 9809: resume ISO
      // download.
      for (VMTemplateVO tmplt : toBeDownloaded) {
        if (tmplt.getUrl() == null) { // If url is null we can't
          // initiate the download
          continue;
        }
        // if this is private template, and there is no record for this
        // template in this sHost, skip
        if (!tmplt.isPublicTemplate() && !tmplt.isFeatured()) {
          VMTemplateHostVO tmpltHost =
              _vmTemplateHostDao.findByHostTemplate(sserverId, tmplt.getId());
          if (tmpltHost == null) {
            continue;
          }
        }
        if (availHypers.contains(tmplt.getHypervisorType())) {
          if (_swiftMgr.isSwiftEnabled()) {
            if (_swiftMgr.isTemplateInstalled(tmplt.getId())) {
              continue;
            }
          }
          s_logger.debug(
              "Template " + tmplt.getName() + " needs to be downloaded to " + ssHost.getName());
          downloadTemplateToStorage(tmplt, ssHost);
        } else {
          s_logger.info(
              "Skipping download of template "
                  + tmplt.getName()
                  + " since we don't have any "
                  + tmplt.getHypervisorType()
                  + " hypervisors");
        }
      }
    }

    for (String uniqueName : templateInfos.keySet()) {
      TemplateInfo tInfo = templateInfos.get(uniqueName);
      List<UserVmVO> userVmUsingIso = _userVmDao.listByIsoId(tInfo.getId());
      // check if there is any Vm using this ISO.
      if (userVmUsingIso == null || userVmUsingIso.isEmpty()) {
        DeleteTemplateCommand dtCommand =
            new DeleteTemplateCommand(ssHost.getStorageUrl(), tInfo.getInstallPath());
        try {
          _agentMgr.sendToSecStorage(ssHost, dtCommand, null);
        } catch (AgentUnavailableException e) {
          String err =
              "Failed to delete "
                  + tInfo.getTemplateName()
                  + " on secondary storage "
                  + sserverId
                  + " which isn't in the database";
          s_logger.error(err);
          return;
        }

        String description =
            "Deleted template "
                + tInfo.getTemplateName()
                + " on secondary storage "
                + sserverId
                + " since it isn't in the database";
        s_logger.info(description);
      }
    }
  }
Ejemplo n.º 14
0
  @Override
  public void handleVolumeSync(HostVO ssHost) {
    if (ssHost == null) {
      s_logger.warn("Huh? ssHost is null");
      return;
    }
    long sserverId = ssHost.getId();
    if (!(ssHost.getType() == Host.Type.SecondaryStorage
        || ssHost.getType() == Host.Type.LocalSecondaryStorage)) {
      s_logger.warn("Huh? Agent id " + sserverId + " is not secondary storage host");
      return;
    }

    Map<Long, TemplateInfo> volumeInfos = listVolume(ssHost);
    if (volumeInfos == null) {
      return;
    }

    List<VolumeHostVO> dbVolumes = _volumeHostDao.listBySecStorage(sserverId);
    List<VolumeHostVO> toBeDownloaded = new ArrayList<VolumeHostVO>(dbVolumes);
    for (VolumeHostVO volumeHost : dbVolumes) {
      VolumeVO volume = _volumeDao.findById(volumeHost.getVolumeId());
      // Exists then don't download
      if (volumeInfos.containsKey(volume.getId())) {
        TemplateInfo volInfo = volumeInfos.remove(volume.getId());
        toBeDownloaded.remove(volumeHost);
        s_logger.info(
            "Volume Sync found " + volume.getUuid() + " already in the volume host table");
        if (volumeHost.getDownloadState() != Status.DOWNLOADED) {
          volumeHost.setErrorString("");
        }
        if (volInfo.isCorrupted()) {
          volumeHost.setDownloadState(Status.DOWNLOAD_ERROR);
          String msg = "Volume " + volume.getUuid() + " is corrupted on secondary storage ";
          volumeHost.setErrorString(msg);
          s_logger.info("msg");
          if (volumeHost.getDownloadUrl() == null) {
            msg =
                "Volume ("
                    + volume.getUuid()
                    + ") with install path "
                    + volInfo.getInstallPath()
                    + "is corrupted, please check in secondary storage: "
                    + volumeHost.getHostId();
            s_logger.warn(msg);
          } else {
            toBeDownloaded.add(volumeHost);
          }

        } else { // Put them in right status
          volumeHost.setDownloadPercent(100);
          volumeHost.setDownloadState(Status.DOWNLOADED);
          volumeHost.setInstallPath(volInfo.getInstallPath());
          volumeHost.setSize(volInfo.getSize());
          volumeHost.setPhysicalSize(volInfo.getPhysicalSize());
          volumeHost.setLastUpdated(new Date());
          _volumeHostDao.update(volumeHost.getId(), volumeHost);

          if (volume.getSize() == 0) {
            // Set volume size in volumes table
            volume.setSize(volInfo.getSize());
            _volumeDao.update(volumeHost.getVolumeId(), volume);
          }

          if (volInfo.getSize() > 0) {
            try {
              String url = _volumeHostDao.findByVolumeId(volume.getId()).getDownloadUrl();
              _resourceLimitMgr.checkResourceLimit(
                  _accountMgr.getAccount(volume.getAccountId()),
                  com.cloud.configuration.Resource.ResourceType.secondary_storage,
                  volInfo.getSize() - UriUtils.getRemoteSize(url));
            } catch (ResourceAllocationException e) {
              s_logger.warn(e.getMessage());
              _alertMgr.sendAlert(
                  _alertMgr.ALERT_TYPE_RESOURCE_LIMIT_EXCEEDED,
                  volume.getDataCenterId(),
                  volume.getPodId(),
                  e.getMessage(),
                  e.getMessage());
            } finally {
              _resourceLimitMgr.recalculateResourceCount(
                  volume.getAccountId(),
                  volume.getDomainId(),
                  com.cloud.configuration.Resource.ResourceType.secondary_storage.getOrdinal());
            }
          }
        }
        continue;
      }
      // Volume is not on secondary but we should download.
      if (volumeHost.getDownloadState() != Status.DOWNLOADED) {
        s_logger.info(
            "Volume Sync did not find "
                + volume.getName()
                + " ready on server "
                + sserverId
                + ", will request download to start/resume shortly");
        toBeDownloaded.add(volumeHost);
      }
    }

    // Download volumes which haven't been downloaded yet.
    if (toBeDownloaded.size() > 0) {
      for (VolumeHostVO volumeHost : toBeDownloaded) {
        if (volumeHost.getDownloadUrl() == null) { // If url is null we can't initiate the download
          continue;
        }
        s_logger.debug(
            "Volume "
                + volumeHost.getVolumeId()
                + " needs to be downloaded to "
                + ssHost.getName());
        downloadVolumeToStorage(
            _volumeDao.findById(volumeHost.getVolumeId()),
            ssHost,
            volumeHost.getDownloadUrl(),
            volumeHost.getChecksum(),
            volumeHost.getFormat());
      }
    }

    // Delete volumes which are not present on DB.
    for (Long uniqueName : volumeInfos.keySet()) {
      TemplateInfo vInfo = volumeInfos.get(uniqueName);
      DeleteVolumeCommand dtCommand =
          new DeleteVolumeCommand(ssHost.getStorageUrl(), vInfo.getInstallPath());
      try {
        _agentMgr.sendToSecStorage(ssHost, dtCommand, null);
      } catch (AgentUnavailableException e) {
        String err =
            "Failed to delete "
                + vInfo.getTemplateName()
                + " on secondary storage "
                + sserverId
                + " which isn't in the database";
        s_logger.error(err);
        return;
      }

      String description =
          "Deleted volume "
              + vInfo.getTemplateName()
              + " on secondary storage "
              + sserverId
              + " since it isn't in the database";
      s_logger.info(description);
    }
  }