コード例 #1
0
  @Override
  public void scheduleRestartForVmsOnHost(final HostVO host) {

    if (host.getType() != Host.Type.Routing) {
      return;
    }
    s_logger.warn("Scheduling restart for VMs on host " + host.getId());

    final List<VMInstanceVO> vms = _instanceDao.listByHostId(host.getId());
    final DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());

    // send an email alert that the host is down
    StringBuilder sb = null;
    if ((vms != null) && !vms.isEmpty()) {
      sb = new StringBuilder();
      sb.append("  Starting HA on the following VMs: ");
      // collect list of vm names for the alert email
      VMInstanceVO vm = vms.get(0);
      if (vm.isHaEnabled()) {
        sb.append(" " + vm.getName());
      }
      for (int i = 1; i < vms.size(); i++) {
        vm = vms.get(i);
        if (vm.isHaEnabled()) {
          sb.append(" " + vm.getName());
        }
      }
    }

    // send an email alert that the host is down, include VMs
    HostPodVO podVO = _podDao.findById(host.getPodId());
    String hostDesc =
        "name: "
            + host.getName()
            + " (id:"
            + host.getId()
            + "), availability zone: "
            + dcVO.getName()
            + ", pod: "
            + podVO.getName();

    _alertMgr.sendAlert(
        AlertManager.ALERT_TYPE_HOST,
        host.getDataCenterId(),
        host.getPodId(),
        "Host is down, " + hostDesc,
        "Host [" + hostDesc + "] is down." + ((sb != null) ? sb.toString() : ""));

    for (final VMInstanceVO vm : vms) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("Notifying HA Mgr of to investigate vm " + vm.getId() + "-" + vm.getName());
      }
      scheduleRestart(vm, true);
    }
  }
コード例 #2
0
  protected Long restart(HaWorkVO work) {
    List<HaWorkVO> items = _haDao.listFutureHaWorkForVm(work.getInstanceId(), work.getId());
    if (items.size() > 0) {
      StringBuilder str =
          new StringBuilder(
              "Cancelling this work item because newer ones have been scheduled.  Work Ids = [");
      for (HaWorkVO item : items) {
        str.append(item.getId()).append(", ");
      }
      str.delete(str.length() - 2, str.length()).append("]");
      s_logger.info(str.toString());
      return null;
    }

    items = _haDao.listRunningHaWorkForVm(work.getInstanceId());
    if (items.size() > 0) {
      StringBuilder str =
          new StringBuilder(
              "Waiting because there's HA work being executed on an item currently.  Work Ids =[");
      for (HaWorkVO item : items) {
        str.append(item.getId()).append(", ");
      }
      str.delete(str.length() - 2, str.length()).append("]");
      s_logger.info(str.toString());
      return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
    }

    long vmId = work.getInstanceId();

    VMInstanceVO vm = _itMgr.findByIdAndType(work.getType(), work.getInstanceId());
    if (vm == null) {
      s_logger.info("Unable to find vm: " + vmId);
      return null;
    }

    s_logger.info("HA on " + vm);
    if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) {
      s_logger.info(
          "VM "
              + vm
              + " has been changed.  Current State = "
              + vm.getState()
              + " Previous State = "
              + work.getPreviousState()
              + " last updated = "
              + vm.getUpdated()
              + " previous updated = "
              + work.getUpdateTime());
      return null;
    }

    short alertType = AlertManager.ALERT_TYPE_USERVM;
    if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER;
    } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY;
    } else if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_SSVM;
    }

    HostVO host = _hostDao.findById(work.getHostId());
    boolean isHostRemoved = false;
    if (host == null) {
      host = _hostDao.findByIdIncludingRemoved(work.getHostId());
      if (host != null) {
        s_logger.debug(
            "VM "
                + vm.toString()
                + " is now no longer on host "
                + work.getHostId()
                + " as the host is removed");
        isHostRemoved = true;
      }
    }

    DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
    HostPodVO podVO = _podDao.findById(host.getPodId());
    String hostDesc =
        "name: "
            + host.getName()
            + "(id:"
            + host.getId()
            + "), availability zone: "
            + dcVO.getName()
            + ", pod: "
            + podVO.getName();

    Boolean alive = null;
    if (work.getStep() == Step.Investigating) {
      if (!isHostRemoved) {
        if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) {
          s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId());
          return null;
        }

        Enumeration<Investigator> en = _investigators.enumeration();
        Investigator investigator = null;
        while (en.hasMoreElements()) {
          investigator = en.nextElement();
          alive = investigator.isVmAlive(vm, host);
          s_logger.info(investigator.getName() + " found " + vm + "to be alive? " + alive);
          if (alive != null) {
            break;
          }
        }
        boolean fenced = false;
        if (alive == null) {
          s_logger.debug("Fencing off VM that we don't know the state of");
          Enumeration<FenceBuilder> enfb = _fenceBuilders.enumeration();
          while (enfb.hasMoreElements()) {
            FenceBuilder fb = enfb.nextElement();
            Boolean result = fb.fenceOff(vm, host);
            s_logger.info("Fencer " + fb.getName() + " returned " + result);
            if (result != null && result) {
              fenced = true;
              break;
            }
          }
        } else if (!alive) {
          fenced = true;
        } else {
          s_logger.debug(
              "VM " + vm.getHostName() + " is found to be alive by " + investigator.getName());
          if (host.getStatus() == Status.Up) {
            s_logger.info(vm + " is alive and host is up. No need to restart it.");
            return null;
          } else {
            s_logger.debug("Rescheduling because the host is not up but the vm is alive");
            return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
          }
        }

        if (!fenced) {
          s_logger.debug("We were unable to fence off the VM " + vm);
          _alertMgr.sendAlert(
              alertType,
              vm.getDataCenterIdToDeployIn(),
              vm.getPodIdToDeployIn(),
              "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
              "Insufficient capacity to restart VM, name: "
                  + vm.getHostName()
                  + ", id: "
                  + vmId
                  + " which was running on host "
                  + hostDesc);
          return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
        }

        try {
          _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount());
        } catch (ResourceUnavailableException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (OperationTimedoutException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (ConcurrentOperationException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        }

        work.setStep(Step.Scheduled);
        _haDao.update(work.getId(), work);
      } else {
        s_logger.debug(
            "How come that HA step is Investigating and the host is removed? Calling forced Stop on Vm anyways");
        try {
          _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount());
        } catch (ResourceUnavailableException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (OperationTimedoutException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        } catch (ConcurrentOperationException e) {
          assert false : "How do we hit this when force is true?";
          throw new CloudRuntimeException("Caught exception even though it should be handled.", e);
        }
      }
    }

    vm = _itMgr.findByIdAndType(vm.getType(), vm.getId());

    if (!_forceHA && !vm.isHaEnabled()) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("VM is not HA enabled so we're done.");
      }
      return null; // VM doesn't require HA
    }

    if (!_storageMgr.canVmRestartOnAnotherServer(vm.getId())) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("VM can not restart on another server.");
      }
      return null;
    }

    if (work.getTimesTried() > _maxRetries) {
      s_logger.warn("Retried to max times so deleting: " + vmId);
      return null;
    }

    try {
      VMInstanceVO started =
          _itMgr.advanceStart(
              vm,
              new HashMap<VirtualMachineProfile.Param, Object>(),
              _accountMgr.getSystemUser(),
              _accountMgr.getSystemAccount());
      if (started != null) {
        s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId());
        return null;
      }

      if (s_logger.isDebugEnabled()) {
        s_logger.debug(
            "Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval);
      }
    } catch (final InsufficientCapacityException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterIdToDeployIn(),
          vm.getPodIdToDeployIn(),
          "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
          "Insufficient capacity to restart VM, name: "
              + vm.getHostName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
    } catch (final ResourceUnavailableException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterIdToDeployIn(),
          vm.getPodIdToDeployIn(),
          "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getHostName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
    } catch (ConcurrentOperationException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterIdToDeployIn(),
          vm.getPodIdToDeployIn(),
          "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getHostName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
    } catch (OperationTimedoutException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterIdToDeployIn(),
          vm.getPodIdToDeployIn(),
          "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getHostName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
    }
    vm = _itMgr.findByIdAndType(vm.getType(), vm.getId());
    work.setUpdateTime(vm.getUpdated());
    work.setPreviousState(vm.getState());
    return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
  }
コード例 #3
0
  protected void createDb() {
    DataCenterVO dc =
        new DataCenterVO(
            UUID.randomUUID().toString(),
            "test",
            "8.8.8.8",
            null,
            "10.0.0.1",
            null,
            "10.0.0.1/24",
            null,
            null,
            NetworkType.Basic,
            null,
            null,
            true,
            true,
            null,
            null);
    dc = dcDao.persist(dc);
    dcId = dc.getId();

    HostPodVO pod =
        new HostPodVO(UUID.randomUUID().toString(), dc.getId(), "255.255.255.255", "", 8, "test");
    pod = podDao.persist(pod);
    podId = pod.getId();

    ClusterVO cluster = new ClusterVO(dc.getId(), pod.getId(), "devcloud cluster");
    cluster.setHypervisorType(HypervisorType.XenServer.toString());
    cluster.setClusterType(ClusterType.CloudManaged);
    cluster.setManagedState(ManagedState.Managed);
    cluster = clusterDao.persist(cluster);
    clusterId = cluster.getId();

    DataStoreProvider provider =
        providerMgr.getDataStoreProvider("ancient primary data store provider");
    storage = new StoragePoolVO();
    storage.setDataCenterId(dcId);
    storage.setPodId(podId);
    storage.setPoolType(StoragePoolType.NetworkFilesystem);
    storage.setClusterId(clusterId);
    storage.setStatus(StoragePoolStatus.Up);
    storage.setScope(ScopeType.CLUSTER);
    storage.setAvailableBytes(1000);
    storage.setCapacityBytes(20000);
    storage.setHostAddress(UUID.randomUUID().toString());
    storage.setPath(UUID.randomUUID().toString());
    storage.setStorageProviderName(provider.getName());
    storage = storagePoolDao.persist(storage);
    storagePoolId = storage.getId();

    storageMgr.createCapacityEntry(storage.getId());

    diskOffering = new DiskOfferingVO();
    diskOffering.setDiskSize(500);
    diskOffering.setName("test-disk");
    diskOffering.setSystemUse(false);
    diskOffering.setUseLocalStorage(false);
    diskOffering.setCustomized(false);
    diskOffering.setRecreatable(false);
    diskOffering = diskOfferingDao.persist(diskOffering);
    diskOfferingId = diskOffering.getId();

    volume =
        new VolumeVO(
            Volume.Type.ROOT,
            "volume",
            dcId,
            1,
            1,
            diskOffering.getId(),
            diskOffering.getDiskSize());
    volume = volumeDao.persist(volume);
    volumeId = volume.getId();
  }
コード例 #4
0
  @Override
  @DB
  public BaremetalPxeVO addPxeServer(AddBaremetalPxeCmd cmd) {
    AddBaremetalPxePingServerCmd pcmd = (AddBaremetalPxePingServerCmd) cmd;

    PhysicalNetworkVO pNetwork = null;
    long zoneId;

    if (cmd.getPhysicalNetworkId() == null
        || cmd.getUrl() == null
        || cmd.getUsername() == null
        || cmd.getPassword() == null) {
      throw new IllegalArgumentException(
          "At least one of the required parameters(physical network id, url, username, password) is null");
    }

    pNetwork = _physicalNetworkDao.findById(cmd.getPhysicalNetworkId());
    if (pNetwork == null) {
      throw new IllegalArgumentException(
          "Could not find phyical network with ID: " + cmd.getPhysicalNetworkId());
    }
    zoneId = pNetwork.getDataCenterId();

    PhysicalNetworkServiceProviderVO ntwkSvcProvider =
        _physicalNetworkServiceProviderDao.findByServiceProvider(
            pNetwork.getId(), BaremetalPxeManager.BAREMETAL_PXE_SERVICE_PROVIDER.getName());
    if (ntwkSvcProvider == null) {
      throw new CloudRuntimeException(
          "Network Service Provider: "
              + BaremetalPxeManager.BAREMETAL_PXE_SERVICE_PROVIDER.getName()
              + " is not enabled in the physical network: "
              + cmd.getPhysicalNetworkId()
              + "to add this device");
    } else if (ntwkSvcProvider.getState() == PhysicalNetworkServiceProvider.State.Shutdown) {
      throw new CloudRuntimeException(
          "Network Service Provider: "
              + ntwkSvcProvider.getProviderName()
              + " is in shutdown state in the physical network: "
              + cmd.getPhysicalNetworkId()
              + "to add this device");
    }

    HostPodVO pod = _podDao.findById(cmd.getPodId());
    if (pod == null) {
      throw new IllegalArgumentException("Could not find pod with ID: " + cmd.getPodId());
    }

    List<HostVO> pxes =
        _resourceMgr.listAllUpAndEnabledHosts(Host.Type.BaremetalPxe, null, cmd.getPodId(), zoneId);
    if (pxes.size() != 0) {
      throw new IllegalArgumentException(
          "Already had a PXE server in Pod: " + cmd.getPodId() + " zone: " + zoneId);
    }

    String storageServerIp = pcmd.getPingStorageServerIp();
    if (storageServerIp == null) {
      throw new IllegalArgumentException("No IP for storage server specified");
    }
    String pingDir = pcmd.getPingDir();
    if (pingDir == null) {
      throw new IllegalArgumentException("No direcotry for storage server specified");
    }
    String tftpDir = pcmd.getTftpDir();
    if (tftpDir == null) {
      throw new IllegalArgumentException("No TFTP directory specified");
    }

    String cifsUsername = pcmd.getPingStorageServerUserName();
    if (cifsUsername == null || cifsUsername.equalsIgnoreCase("")) {
      cifsUsername = "******";
    }
    String cifsPassword = pcmd.getPingStorageServerPassword();
    if (cifsPassword == null || cifsPassword.equalsIgnoreCase("")) {
      cifsPassword = "******";
    }

    URI uri;
    try {
      uri = new URI(cmd.getUrl());
    } catch (Exception e) {
      s_logger.debug(e);
      throw new IllegalArgumentException(e.getMessage());
    }
    String ipAddress = uri.getHost();

    String guid =
        getPxeServerGuid(
            Long.toString(zoneId) + "-" + pod.getId(), BaremetalPxeType.PING.toString(), ipAddress);

    ServerResource resource = null;
    Map params = new HashMap<String, String>();
    params.put(BaremetalPxeService.PXE_PARAM_ZONE, Long.toString(zoneId));
    params.put(BaremetalPxeService.PXE_PARAM_POD, String.valueOf(pod.getId()));
    params.put(BaremetalPxeService.PXE_PARAM_IP, ipAddress);
    params.put(BaremetalPxeService.PXE_PARAM_USERNAME, cmd.getUsername());
    params.put(BaremetalPxeService.PXE_PARAM_PASSWORD, cmd.getPassword());
    params.put(BaremetalPxeService.PXE_PARAM_PING_STORAGE_SERVER_IP, storageServerIp);
    params.put(BaremetalPxeService.PXE_PARAM_PING_ROOT_DIR, pingDir);
    params.put(BaremetalPxeService.PXE_PARAM_TFTP_DIR, tftpDir);
    params.put(BaremetalPxeService.PXE_PARAM_PING_STORAGE_SERVER_USERNAME, cifsUsername);
    params.put(BaremetalPxeService.PXE_PARAM_PING_STORAGE_SERVER_PASSWORD, cifsPassword);
    params.put(BaremetalPxeService.PXE_PARAM_GUID, guid);

    resource = new BaremetalPingPxeResource();
    try {
      resource.configure("PING PXE resource", params);
    } catch (Exception e) {
      s_logger.debug(e);
      throw new CloudRuntimeException(e.getMessage());
    }

    Host pxeServer = _resourceMgr.addHost(zoneId, resource, Host.Type.BaremetalPxe, params);
    if (pxeServer == null) {
      throw new CloudRuntimeException("Cannot add PXE server as a host");
    }

    BaremetalPxeVO vo = new BaremetalPxeVO();
    Transaction txn = Transaction.currentTxn();
    vo.setHostId(pxeServer.getId());
    vo.setNetworkServiceProviderId(ntwkSvcProvider.getId());
    vo.setPodId(pod.getId());
    vo.setPhysicalNetworkId(pcmd.getPhysicalNetworkId());
    vo.setDeviceType(BaremetalPxeType.PING.toString());
    txn.start();
    _pxeDao.persist(vo);
    txn.commit();
    return vo;
  }
コード例 #5
0
  public Long migrate(final HaWorkVO work) {
    final long vmId = work.getInstanceId();

    final VirtualMachineGuru<VMInstanceVO> mgr = findManager(work.getType());

    VMInstanceVO vm = mgr.get(vmId);
    if (vm == null || vm.getRemoved() != null) {
      s_logger.debug("Unable to find the vm " + vmId);
      return null;
    }

    s_logger.info("Migrating vm: " + vm.toString());
    if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) {
      s_logger.info("VM is not longer running on the current hostId");
      return null;
    }

    short alertType = AlertManager.ALERT_TYPE_USERVM_MIGRATE;
    if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER_MIGRATE;
    } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY_MIGRATE;
    }

    HostVO fromHost = _hostDao.findById(vm.getHostId());
    String fromHostName = ((fromHost == null) ? "unknown" : fromHost.getName());
    HostVO toHost = null;
    if (work.getStep() == Step.Scheduled) {
      if (vm.getState() != State.Running) {
        s_logger.info(
            "VM's state is not ready for migration. "
                + vm.toString()
                + " State is "
                + vm.getState().toString());
        return (System.currentTimeMillis() >> 10) + _migrateRetryInterval;
      }

      DataCenterVO dcVO = _dcDao.findById(fromHost.getDataCenterId());
      HostPodVO podVO = _podDao.findById(fromHost.getPodId());

      try {
        toHost = mgr.prepareForMigration(vm);
        if (toHost == null) {
          if (s_logger.isDebugEnabled()) {
            s_logger.debug("Unable to find a host for migrating vm " + vmId);
          }
          _alertMgr.sendAlert(
              alertType,
              vm.getDataCenterId(),
              vm.getPodId(),
              "Unable to migrate vm "
                  + vm.getName()
                  + " from host "
                  + fromHostName
                  + " in zone "
                  + dcVO.getName()
                  + " and pod "
                  + podVO.getName(),
              "Unable to find a suitable host");
        }
      } catch (final InsufficientCapacityException e) {
        s_logger.warn("Unable to mgirate due to insufficient capacity " + vm.toString());
        _alertMgr.sendAlert(
            alertType,
            vm.getDataCenterId(),
            vm.getPodId(),
            "Unable to migrate vm "
                + vm.getName()
                + " from host "
                + fromHostName
                + " in zone "
                + dcVO.getName()
                + " and pod "
                + podVO.getName(),
            "Insufficient capacity");
      } catch (final StorageUnavailableException e) {
        s_logger.warn("Storage is unavailable: " + vm.toString());
        _alertMgr.sendAlert(
            alertType,
            vm.getDataCenterId(),
            vm.getPodId(),
            "Unable to migrate vm "
                + vm.getName()
                + " from host "
                + fromHostName
                + " in zone "
                + dcVO.getName()
                + " and pod "
                + podVO.getName(),
            "Storage is gone.");
      }

      if (toHost == null) {
        _agentMgr.maintenanceFailed(vm.getHostId());
        return null;
      }

      if (s_logger.isDebugEnabled()) {
        s_logger.debug("Migrating from " + work.getHostId() + " to " + toHost.getId());
      }
      work.setStep(Step.Migrating);
      work.setHostId(toHost.getId());
      _haDao.update(work.getId(), work);
    }

    if (work.getStep() == Step.Migrating) {
      vm = mgr.get(vmId); // let's see if anything has changed.
      boolean migrated = false;
      if (vm == null
          || vm.getRemoved() != null
          || vm.getHostId() == null
          || !_itMgr.stateTransitTo(vm, Event.MigrationRequested, vm.getHostId())) {
        s_logger.info("Migration cancelled because state has changed: " + vm.toString());
      } else {
        try {
          boolean isWindows =
              _guestOSCategoryDao
                  .findById(_guestOSDao.findById(vm.getGuestOSId()).getCategoryId())
                  .getName()
                  .equalsIgnoreCase("Windows");
          MigrateCommand cmd =
              new MigrateCommand(vm.getInstanceName(), toHost.getPrivateIpAddress(), isWindows);
          Answer answer = _agentMgr.send(fromHost.getId(), cmd);
          if (answer != null && answer.getResult()) {
            migrated = true;
            _storageMgr.unshare(vm, fromHost);
            work.setStep(Step.Investigating);
            _haDao.update(work.getId(), work);
          }
        } catch (final AgentUnavailableException e) {
          s_logger.debug("host became unavailable");
        } catch (final OperationTimedoutException e) {
          s_logger.debug("operation timed out");
          if (e.isActive()) {
            scheduleRestart(vm, true);
          }
        }
      }

      if (!migrated) {
        s_logger.info("Migration was unsuccessful.  Cleaning up: " + vm.toString());

        DataCenterVO dcVO = _dcDao.findById(vm.getDataCenterId());
        HostPodVO podVO = _podDao.findById(vm.getPodId());
        _alertMgr.sendAlert(
            alertType,
            fromHost.getDataCenterId(),
            fromHost.getPodId(),
            "Unable to migrate vm "
                + vm.getName()
                + " from host "
                + fromHost.getName()
                + " in zone "
                + dcVO.getName()
                + " and pod "
                + podVO.getName(),
            "Migrate Command failed.  Please check logs.");

        _itMgr.stateTransitTo(vm, Event.MigrationFailedOnSource, toHost.getId());
        _agentMgr.maintenanceFailed(vm.getHostId());

        Command cleanup = mgr.cleanup(vm, null);
        _agentMgr.easySend(toHost.getId(), cleanup);
        _storageMgr.unshare(vm, toHost);

        return null;
      }
    }

    if (toHost == null) {
      toHost = _hostDao.findById(work.getHostId());
    }
    DataCenterVO dcVO = _dcDao.findById(toHost.getDataCenterId());
    HostPodVO podVO = _podDao.findById(toHost.getPodId());

    try {
      if (!mgr.completeMigration(vm, toHost)) {
        _alertMgr.sendAlert(
            alertType,
            toHost.getDataCenterId(),
            toHost.getPodId(),
            "Unable to migrate "
                + vmId
                + " to host "
                + toHost.getName()
                + " in zone "
                + dcVO.getName()
                + " and pod "
                + podVO.getName(),
            "Migration not completed");
        s_logger.warn("Unable to complete migration: " + vm.toString());
      } else {
        s_logger.info("Migration is complete: " + vm.toString());
      }
      return null;
    } catch (final AgentUnavailableException e) {
      s_logger.warn("Agent is unavailable for " + vm.toString());
    } catch (final OperationTimedoutException e) {
      s_logger.warn("Operation timed outfor " + vm.toString());
    }
    _itMgr.stateTransitTo(vm, Event.MigrationFailedOnDest, toHost.getId());
    return (System.currentTimeMillis() >> 10) + _migrateRetryInterval;
  }
コード例 #6
0
  /**
   * compareState does as its name suggests and compares the states between management server and
   * agent. It returns whether something should be cleaned up
   */
  protected Command compareState(VMInstanceVO vm, final AgentVmInfo info, final boolean fullSync) {
    State agentState = info.state;
    final String agentName = info.name;
    final State serverState = vm.getState();
    final String serverName = vm.getName();

    Command command = null;

    if (s_logger.isDebugEnabled()) {
      s_logger.debug(
          "VM "
              + serverName
              + ": server state = "
              + serverState.toString()
              + " and agent state = "
              + agentState.toString());
    }

    if (agentState == State.Error) {
      agentState = State.Stopped;

      short alertType = AlertManager.ALERT_TYPE_USERVM;
      if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
        alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER;
      } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
        alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY;
      }

      HostPodVO podVO = _podDao.findById(vm.getPodId());
      DataCenterVO dcVO = _dcDao.findById(vm.getDataCenterId());
      HostVO hostVO = _hostDao.findById(vm.getHostId());

      String hostDesc =
          "name: "
              + hostVO.getName()
              + " (id:"
              + hostVO.getId()
              + "), availability zone: "
              + dcVO.getName()
              + ", pod: "
              + podVO.getName();
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "VM (name: "
              + vm.getName()
              + ", id: "
              + vm.getId()
              + ") stopped on host "
              + hostDesc
              + " due to storage failure",
          "Virtual Machine "
              + vm.getName()
              + " (id: "
              + vm.getId()
              + ") running on host ["
              + vm.getHostId()
              + "] stopped due to storage failure.");
    }

    if (serverState == State.Migrating) {
      s_logger.debug("Skipping vm in migrating state: " + vm.toString());
      return null;
    }

    if (agentState == serverState) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("Both states are " + agentState.toString() + " for " + serverName);
      }
      assert (agentState == State.Stopped || agentState == State.Running)
          : "If the states we send up is changed, this must be changed.";
      _itMgr.stateTransitTo(
          vm,
          agentState == State.Stopped
              ? VirtualMachine.Event.AgentReportStopped
              : VirtualMachine.Event.AgentReportRunning,
          vm.getHostId());
      if (agentState == State.Stopped) {
        s_logger.debug("State matches but the agent said stopped so let's send a cleanup anyways.");
        return info.mgr.cleanup(vm, agentName);
      }
      return null;
    }

    if (agentState == State.Stopped) {
      // This state means the VM on the agent was detected previously
      // and now is gone.  This is slightly different than if the VM
      // was never completed but we still send down a Stop Command
      // to ensure there's cleanup.
      if (serverState == State.Running) {
        // Our records showed that it should be running so let's restart it.
        vm = info.mgr.get(vm.getId());
        scheduleRestart(vm, false);
        command = info.mgr.cleanup(vm, agentName);
      } else if (serverState == State.Stopping) {
        if (fullSync) {
          s_logger.debug("VM is in stopping state on full sync.  Updating the status to stopped");
          vm = info.mgr.get(vm.getId());
          info.mgr.completeStopCommand(vm);
          command = info.mgr.cleanup(vm, agentName);
        } else {
          s_logger.debug("Ignoring VM in stopping mode: " + vm.getName());
        }
      } else if (serverState == State.Starting) {
        s_logger.debug("Ignoring VM in starting mode: " + vm.getName());
      } else {
        s_logger.debug("Sending cleanup to a stopped vm: " + agentName);
        _itMgr.stateTransitTo(vm, VirtualMachine.Event.AgentReportStopped, null);
        command = info.mgr.cleanup(vm, agentName);
      }
    } else if (agentState == State.Running) {
      if (serverState == State.Starting) {
        if (fullSync) {
          s_logger.debug("VM state is starting on full sync so updating it to running");
          vm = info.mgr.get(vm.getId());
          info.mgr.completeStartCommand(vm);
        }
      } else if (serverState == State.Stopping) {
        if (fullSync) {
          s_logger.debug("VM state is in stopping on fullsync so resend stop.");
          vm = info.mgr.get(vm.getId());
          info.mgr.completeStopCommand(vm);
          command = info.mgr.cleanup(vm, agentName);
        } else {
          s_logger.debug("VM is in stopping state so no action.");
        }
      } else if (serverState == State.Destroyed
          || serverState == State.Stopped
          || serverState == State.Expunging) {
        s_logger.debug("VM state is in stopped so stopping it on the agent");
        vm = info.mgr.get(vm.getId());
        command = info.mgr.cleanup(vm, agentName);
      } else {
        _itMgr.stateTransitTo(vm, VirtualMachine.Event.AgentReportRunning, vm.getHostId());
      }
    } /*else if (agentState == State.Unknown) {
          if (serverState == State.Running) {
              if (fullSync) {
                  vm = info.handler.get(vm.getId());
              }
              scheduleRestart(vm, false);
          } else if (serverState == State.Starting) {
              if (fullSync) {
                  vm = info.handler.get(vm.getId());
              }
              scheduleRestart(vm, false);
          } else if (serverState == State.Stopping) {
              if (fullSync) {
                  s_logger.debug("VM state is stopping in full sync.  Resending stop");
                  command = info.handler.cleanup(vm, agentName);
              }
          }
      }*/
    return command;
  }
コード例 #7
0
  protected Long restart(final HaWorkVO work) {
    final long vmId = work.getInstanceId();

    final VirtualMachineGuru<VMInstanceVO> mgr = findManager(work.getType());
    if (mgr == null) {
      s_logger.warn(
          "Unable to find a handler for " + work.getType().toString() + ", throwing out " + vmId);
      return null;
    }

    VMInstanceVO vm = mgr.get(vmId);
    if (vm == null) {
      s_logger.info("Unable to find vm: " + vmId);
      return null;
    }

    s_logger.info("HA on " + vm.toString());
    if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) {
      s_logger.info(
          "VM "
              + vm.toString()
              + " has been changed.  Current State = "
              + vm.getState()
              + " Previous State = "
              + work.getPreviousState()
              + " last updated = "
              + vm.getUpdated()
              + " previous updated = "
              + work.getUpdateTime());
      return null;
    }

    final HostVO host = _hostDao.findById(work.getHostId());

    DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
    HostPodVO podVO = _podDao.findById(host.getPodId());
    String hostDesc =
        "name: "
            + host.getName()
            + "(id:"
            + host.getId()
            + "), availability zone: "
            + dcVO.getName()
            + ", pod: "
            + podVO.getName();

    short alertType = AlertManager.ALERT_TYPE_USERVM;
    if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER;
    } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY;
    }

    Boolean alive = null;
    if (work.getStep() == Step.Investigating) {
      if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) {
        s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId());
        if (vm.getState() == State.Starting && vm.getUpdated() == work.getUpdateTime()) {
          _itMgr.stateTransitTo(vm, Event.AgentReportStopped, null);
        }
        return null;
      }

      Enumeration<Investigator> en = _investigators.enumeration();
      Investigator investigator = null;
      while (en.hasMoreElements()) {
        investigator = en.nextElement();
        alive = investigator.isVmAlive(vm, host);
        if (alive != null) {
          s_logger.debug(
              investigator.getName() + " found VM " + vm.getName() + "to be alive? " + alive);
          break;
        }
      }
      if (alive != null && alive) {
        s_logger.debug("VM " + vm.getName() + " is found to be alive by " + investigator.getName());
        if (host.getStatus() == Status.Up) {
          compareState(vm, new AgentVmInfo(vm.getInstanceName(), mgr, State.Running), false);
          return null;
        } else {
          s_logger.debug("Rescheduling because the host is not up but the vm is alive");
          return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
        }
      }

      boolean fenced = false;
      if (alive == null || !alive) {
        fenced = true;
        s_logger.debug("Fencing off VM that we don't know the state of");
        Enumeration<FenceBuilder> enfb = _fenceBuilders.enumeration();
        while (enfb.hasMoreElements()) {
          final FenceBuilder fb = enfb.nextElement();
          Boolean result = fb.fenceOff(vm, host);
          if (result != null && !result) {
            fenced = false;
          }
        }
      }

      if (alive == null && !fenced) {
        s_logger.debug("We were unable to fence off the VM " + vm.toString());
        _alertMgr.sendAlert(
            alertType,
            vm.getDataCenterId(),
            vm.getPodId(),
            "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
            "Insufficient capacity to restart VM, name: "
                + vm.getName()
                + ", id: "
                + vmId
                + " which was running on host "
                + hostDesc);
        return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
      }

      mgr.completeStopCommand(vm);

      work.setStep(Step.Scheduled);
      _haDao.update(work.getId(), work);
    }

    // send an alert for VMs that stop unexpectedly
    _alertMgr.sendAlert(
        alertType,
        vm.getDataCenterId(),
        vm.getPodId(),
        "VM (name: "
            + vm.getName()
            + ", id: "
            + vmId
            + ") stopped unexpectedly on host "
            + hostDesc,
        "Virtual Machine "
            + vm.getName()
            + " (id: "
            + vm.getId()
            + ") running on host ["
            + hostDesc
            + "] stopped unexpectedly.");

    vm = mgr.get(vm.getId());

    if (!_forceHA && !vm.isHaEnabled()) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("VM is not HA enabled so we're done.");
      }
      return null; // VM doesn't require HA
    }

    if (!_storageMgr.canVmRestartOnAnotherServer(vm.getId())) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("VM can not restart on another server.");
      }
      return null;
    }

    if (work.getTimesTried() > _maxRetries) {
      s_logger.warn("Retried to max times so deleting: " + vmId);
      return null;
    }

    try {
      VMInstanceVO started = mgr.start(vm.getId(), 0);
      if (started != null) {
        s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId());
        return null;
      }

      if (s_logger.isDebugEnabled()) {
        s_logger.debug(
            "Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval);
      }
      vm = mgr.get(vm.getId());
      work.setUpdateTime(vm.getUpdated());
      work.setPreviousState(vm.getState());
      return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
    } catch (final InsufficientCapacityException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
          "Insufficient capacity to restart VM, name: "
              + vm.getName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
      return null;
    } catch (final StorageUnavailableException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
      return null;
    } catch (ConcurrentOperationException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
      return null;
    } catch (ExecutionException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
      return null;
    }
  }
コード例 #8
0
  @Test(priority = -1)
  public void setUp() {
    ComponentContext.initComponentsLifeCycle();

    host = hostDao.findByGuid(this.getHostGuid());
    if (host != null) {
      dcId = host.getDataCenterId();
      clusterId = host.getClusterId();
      podId = host.getPodId();
      return;
    }
    // create data center
    DataCenterVO dc =
        new DataCenterVO(
            UUID.randomUUID().toString(),
            "test",
            "8.8.8.8",
            null,
            "10.0.0.1",
            null,
            "10.0.0.1/24",
            null,
            null,
            NetworkType.Basic,
            null,
            null,
            true,
            true,
            null,
            null);
    dc = dcDao.persist(dc);
    dcId = dc.getId();
    // create pod

    HostPodVO pod =
        new HostPodVO(
            UUID.randomUUID().toString(),
            dc.getId(),
            this.getHostGateway(),
            this.getHostCidr(),
            8,
            "test");
    pod = podDao.persist(pod);
    podId = pod.getId();
    // create xen cluster
    ClusterVO cluster = new ClusterVO(dc.getId(), pod.getId(), "devcloud cluster");
    cluster.setHypervisorType(HypervisorType.XenServer.toString());
    cluster.setClusterType(ClusterType.CloudManaged);
    cluster.setManagedState(ManagedState.Managed);
    cluster = clusterDao.persist(cluster);
    clusterId = cluster.getId();
    // create xen host

    host = new HostVO(this.getHostGuid());
    host.setName("devcloud xen host");
    host.setType(Host.Type.Routing);
    host.setPrivateIpAddress(this.getHostIp());
    host.setDataCenterId(dc.getId());
    host.setVersion("6.0.1");
    host.setAvailable(true);
    host.setSetup(true);
    host.setPodId(podId);
    host.setLastPinged(0);
    host.setResourceState(ResourceState.Enabled);
    host.setHypervisorType(HypervisorType.XenServer);
    host.setClusterId(cluster.getId());

    host = hostDao.persist(host);

    imageStore = new ImageStoreVO();
    imageStore.setName("test");
    imageStore.setDataCenterId(dcId);
    imageStore.setProviderName("CloudStack ImageStore Provider");
    imageStore.setRole(DataStoreRole.Image);
    imageStore.setUrl(this.getSecondaryStorage());
    imageStore.setUuid(UUID.randomUUID().toString());
    imageStore = imageStoreDao.persist(imageStore);
  }
コード例 #9
0
  @Test(priority = -1)
  public void setUp() {
    ComponentContext.initComponentsLifeCycle();

    host = hostDao.findByGuid(this.getHostGuid());
    if (host != null) {
      dcId = host.getDataCenterId();
      clusterId = host.getClusterId();
      podId = host.getPodId();
      imageStore = this.imageStoreDao.findByName(imageStoreName);
    } else {
      // create data center
      DataCenterVO dc =
          new DataCenterVO(
              UUID.randomUUID().toString(),
              "test",
              "8.8.8.8",
              null,
              "10.0.0.1",
              null,
              "10.0.0.1/24",
              null,
              null,
              NetworkType.Basic,
              null,
              null,
              true,
              true,
              null,
              null);
      dc = dcDao.persist(dc);
      dcId = dc.getId();
      // create pod

      HostPodVO pod =
          new HostPodVO(
              UUID.randomUUID().toString(),
              dc.getId(),
              this.getHostGateway(),
              this.getHostCidr(),
              8,
              "test");
      pod = podDao.persist(pod);
      podId = pod.getId();
      // create xen cluster
      ClusterVO cluster = new ClusterVO(dc.getId(), pod.getId(), "devcloud cluster");
      cluster.setHypervisorType(HypervisorType.VMware.toString());
      cluster.setClusterType(ClusterType.ExternalManaged);
      cluster.setManagedState(ManagedState.Managed);
      cluster = clusterDao.persist(cluster);
      clusterId = cluster.getId();

      // setup vcenter
      ClusterDetailsVO clusterDetailVO = new ClusterDetailsVO(cluster.getId(), "url", null);
      this.clusterDetailsDao.persist(clusterDetailVO);
      clusterDetailVO = new ClusterDetailsVO(cluster.getId(), "username", null);
      this.clusterDetailsDao.persist(clusterDetailVO);
      clusterDetailVO = new ClusterDetailsVO(cluster.getId(), "password", null);
      this.clusterDetailsDao.persist(clusterDetailVO);
      // create xen host

      host = new HostVO(this.getHostGuid());
      host.setName("devcloud vmware host");
      host.setType(Host.Type.Routing);
      host.setPrivateIpAddress(this.getHostIp());
      host.setDataCenterId(dc.getId());
      host.setVersion("6.0.1");
      host.setAvailable(true);
      host.setSetup(true);
      host.setPodId(podId);
      host.setLastPinged(0);
      host.setResourceState(ResourceState.Enabled);
      host.setHypervisorType(HypervisorType.VMware);
      host.setClusterId(cluster.getId());

      host = hostDao.persist(host);

      imageStore = new ImageStoreVO();
      imageStore.setName(imageStoreName);
      imageStore.setDataCenterId(dcId);
      imageStore.setProviderName("CloudStack ImageStore Provider");
      imageStore.setRole(DataStoreRole.Image);
      imageStore.setUrl(this.getSecondaryStorage());
      imageStore.setUuid(UUID.randomUUID().toString());
      imageStore.setProtocol("nfs");
      imageStore = imageStoreDao.persist(imageStore);
    }

    image = new VMTemplateVO();
    image.setTemplateType(TemplateType.USER);
    image.setUrl(this.getTemplateUrl());
    image.setUniqueName(UUID.randomUUID().toString());
    image.setName(UUID.randomUUID().toString());
    image.setPublicTemplate(true);
    image.setFeatured(true);
    image.setRequiresHvm(true);
    image.setBits(64);
    image.setFormat(Storage.ImageFormat.VHD);
    image.setEnablePassword(true);
    image.setEnableSshKey(true);
    image.setGuestOSId(1);
    image.setBootable(true);
    image.setPrepopulate(true);
    image.setCrossZones(true);
    image.setExtractable(true);

    image = imageDataDao.persist(image);

    /*
     * TemplateDataStoreVO templateStore = new TemplateDataStoreVO();
     *
     * templateStore.setDataStoreId(imageStore.getId());
     * templateStore.setDownloadPercent(100);
     * templateStore.setDownloadState(Status.DOWNLOADED);
     * templateStore.setDownloadUrl(imageStore.getUrl());
     * templateStore.setInstallPath(this.getImageInstallPath());
     * templateStore.setTemplateId(image.getId());
     * templateStoreDao.persist(templateStore);
     */

    DataStore store = this.dataStoreMgr.getDataStore(imageStore.getId(), DataStoreRole.Image);
    TemplateInfo template = templateFactory.getTemplate(image.getId(), DataStoreRole.Image);
    DataObject templateOnStore = store.create(template);
    TemplateObjectTO to = new TemplateObjectTO();
    to.setPath(this.getImageInstallPath());
    CopyCmdAnswer answer = new CopyCmdAnswer(to);
    templateOnStore.processEvent(Event.CreateOnlyRequested);
    templateOnStore.processEvent(Event.OperationSuccessed, answer);
  }
コード例 #10
0
  private void generateEmailAlert(
      DataCenterVO dc,
      HostPodVO pod,
      ClusterVO cluster,
      double totalCapacity,
      double usedCapacity,
      short capacityType) {

    String msgSubject = null;
    String msgContent = null;
    String totalStr;
    String usedStr;
    String pctStr = formatPercent(usedCapacity / totalCapacity);
    short alertType = -1;
    Long podId = pod == null ? null : pod.getId();
    Long clusterId = cluster == null ? null : cluster.getId();

    switch (capacityType) {

        // Cluster Level
      case CapacityVO.CAPACITY_TYPE_MEMORY:
        msgSubject =
            "System Alert: Low Available Memory in cluster "
                + cluster.getName()
                + " pod "
                + pod.getName()
                + " of availablity zone "
                + dc.getName();
        totalStr = formatBytesToMegabytes(totalCapacity);
        usedStr = formatBytesToMegabytes(usedCapacity);
        msgContent =
            "System memory is low, total: "
                + totalStr
                + " MB, used: "
                + usedStr
                + " MB ("
                + pctStr
                + "%)";
        alertType = ALERT_TYPE_MEMORY;
        break;
      case CapacityVO.CAPACITY_TYPE_CPU:
        msgSubject =
            "System Alert: Low Unallocated CPU in cluster "
                + cluster.getName()
                + " pod "
                + pod.getName()
                + " of availablity zone "
                + dc.getName();
        totalStr = _dfWhole.format(totalCapacity);
        usedStr = _dfWhole.format(usedCapacity);
        msgContent =
            "Unallocated CPU is low, total: "
                + totalStr
                + " Mhz, used: "
                + usedStr
                + " Mhz ("
                + pctStr
                + "%)";
        alertType = ALERT_TYPE_CPU;
        break;
      case CapacityVO.CAPACITY_TYPE_STORAGE:
        msgSubject =
            "System Alert: Low Available Storage in cluster "
                + cluster.getName()
                + " pod "
                + pod.getName()
                + " of availablity zone "
                + dc.getName();
        totalStr = formatBytesToMegabytes(totalCapacity);
        usedStr = formatBytesToMegabytes(usedCapacity);
        msgContent =
            "Available storage space is low, total: "
                + totalStr
                + " MB, used: "
                + usedStr
                + " MB ("
                + pctStr
                + "%)";
        alertType = ALERT_TYPE_STORAGE;
        break;
      case CapacityVO.CAPACITY_TYPE_STORAGE_ALLOCATED:
        msgSubject =
            "System Alert: Remaining unallocated Storage is low in cluster "
                + cluster.getName()
                + " pod "
                + pod.getName()
                + " of availablity zone "
                + dc.getName();
        totalStr = formatBytesToMegabytes(totalCapacity);
        usedStr = formatBytesToMegabytes(usedCapacity);
        msgContent =
            "Unallocated storage space is low, total: "
                + totalStr
                + " MB, allocated: "
                + usedStr
                + " MB ("
                + pctStr
                + "%)";
        alertType = ALERT_TYPE_STORAGE_ALLOCATED;
        break;
      case CapacityVO.CAPACITY_TYPE_LOCAL_STORAGE:
        msgSubject =
            "System Alert: Remaining unallocated Local Storage is low in cluster "
                + cluster.getName()
                + " pod "
                + pod.getName()
                + " of availablity zone "
                + dc.getName();
        totalStr = formatBytesToMegabytes(totalCapacity);
        usedStr = formatBytesToMegabytes(usedCapacity);
        msgContent =
            "Unallocated storage space is low, total: "
                + totalStr
                + " MB, allocated: "
                + usedStr
                + " MB ("
                + pctStr
                + "%)";
        alertType = ALERT_TYPE_LOCAL_STORAGE;
        break;

        // Pod Level
      case CapacityVO.CAPACITY_TYPE_PRIVATE_IP:
        msgSubject =
            "System Alert: Number of unallocated private IPs is low in pod "
                + pod.getName()
                + " of availablity zone "
                + dc.getName();
        totalStr = Double.toString(totalCapacity);
        usedStr = Double.toString(usedCapacity);
        msgContent =
            "Number of unallocated private IPs is low, total: "
                + totalStr
                + ", allocated: "
                + usedStr
                + " ("
                + pctStr
                + "%)";
        alertType = ALERT_TYPE_PRIVATE_IP;
        break;

        // Zone Level
      case CapacityVO.CAPACITY_TYPE_SECONDARY_STORAGE:
        msgSubject =
            "System Alert: Low Available Secondary Storage in availablity zone " + dc.getName();
        totalStr = formatBytesToMegabytes(totalCapacity);
        usedStr = formatBytesToMegabytes(usedCapacity);
        msgContent =
            "Available secondary storage space is low, total: "
                + totalStr
                + " MB, used: "
                + usedStr
                + " MB ("
                + pctStr
                + "%)";
        alertType = ALERT_TYPE_SECONDARY_STORAGE;
        break;
      case CapacityVO.CAPACITY_TYPE_VIRTUAL_NETWORK_PUBLIC_IP:
        msgSubject =
            "System Alert: Number of unallocated virtual network public IPs is low in availablity zone "
                + dc.getName();
        totalStr = Double.toString(totalCapacity);
        usedStr = Double.toString(usedCapacity);
        msgContent =
            "Number of unallocated public IPs is low, total: "
                + totalStr
                + ", allocated: "
                + usedStr
                + " ("
                + pctStr
                + "%)";
        alertType = ALERT_TYPE_VIRTUAL_NETWORK_PUBLIC_IP;
        break;
      case CapacityVO.CAPACITY_TYPE_DIRECT_ATTACHED_PUBLIC_IP:
        msgSubject =
            "System Alert: Number of unallocated direct attached public IPs is low in availablity zone "
                + dc.getName();
        totalStr = Double.toString(totalCapacity);
        usedStr = Double.toString(usedCapacity);
        msgContent =
            "Number of unallocated direct attached public IPs is low, total: "
                + totalStr
                + ", allocated: "
                + usedStr
                + " ("
                + pctStr
                + "%)";
        alertType = ALERT_TYPE_DIRECT_ATTACHED_PUBLIC_IP;
        break;
      case CapacityVO.CAPACITY_TYPE_VLAN:
        msgSubject =
            "System Alert: Number of unallocated VLANs is low in availablity zone " + dc.getName();
        totalStr = Double.toString(totalCapacity);
        usedStr = Double.toString(usedCapacity);
        msgContent =
            "Number of unallocated VLANs is low, total: "
                + totalStr
                + ", allocated: "
                + usedStr
                + " ("
                + pctStr
                + "%)";
        alertType = ALERT_TYPE_VLAN;
        break;
    }

    try {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug(msgSubject);
        s_logger.debug(msgContent);
      }
      _emailAlert.sendAlert(alertType, dc.getId(), podId, clusterId, msgSubject, msgContent);
    } catch (Exception ex) {
      s_logger.error("Exception in CapacityChecker", ex);
    }
  }
コード例 #11
0
  public void checkForAlerts() {

    recalculateCapacity();

    // abort if we can't possibly send an alert...
    if (_emailAlert == null) {
      return;
    }

    // Get all datacenters, pods and clusters in the system.
    List<DataCenterVO> dataCenterList = _dcDao.listAll();
    List<ClusterVO> clusterList = _clusterDao.listAll();
    List<HostPodVO> podList = _podDao.listAll();
    // Get capacity types at different levels
    List<Short> dataCenterCapacityTypes = getCapacityTypesAtZoneLevel();
    List<Short> podCapacityTypes = getCapacityTypesAtPodLevel();
    List<Short> clusterCapacityTypes = getCapacityTypesAtClusterLevel();

    // Generate Alerts for Zone Level capacities
    for (DataCenterVO dc : dataCenterList) {
      for (Short capacityType : dataCenterCapacityTypes) {
        List<SummedCapacity> capacity = new ArrayList<SummedCapacity>();
        capacity = _capacityDao.findCapacityBy(capacityType.intValue(), dc.getId(), null, null);

        if (capacityType == Capacity.CAPACITY_TYPE_SECONDARY_STORAGE) {
          capacity.add(getUsedStats(capacityType, dc.getId(), null, null));
        }
        if (capacity == null || capacity.size() == 0) {
          continue;
        }
        double totalCapacity = capacity.get(0).getTotalCapacity();
        double usedCapacity = capacity.get(0).getUsedCapacity();
        if (totalCapacity != 0
            && usedCapacity / totalCapacity > _capacityTypeThresholdMap.get(capacityType)) {
          generateEmailAlert(dc, null, null, totalCapacity, usedCapacity, capacityType);
        }
      }
    }

    // Generate Alerts for Pod Level capacities
    for (HostPodVO pod : podList) {
      for (Short capacityType : podCapacityTypes) {
        List<SummedCapacity> capacity =
            _capacityDao.findCapacityBy(
                capacityType.intValue(), pod.getDataCenterId(), pod.getId(), null);
        if (capacity == null || capacity.size() == 0) {
          continue;
        }
        double totalCapacity = capacity.get(0).getTotalCapacity();
        double usedCapacity = capacity.get(0).getUsedCapacity();
        if (totalCapacity != 0
            && usedCapacity / totalCapacity > _capacityTypeThresholdMap.get(capacityType)) {
          generateEmailAlert(
              ApiDBUtils.findZoneById(pod.getDataCenterId()),
              pod,
              null,
              totalCapacity,
              usedCapacity,
              capacityType);
        }
      }
    }

    // Generate Alerts for Cluster Level capacities
    for (ClusterVO cluster : clusterList) {
      for (Short capacityType : clusterCapacityTypes) {
        List<SummedCapacity> capacity = new ArrayList<SummedCapacity>();
        float overProvFactor = 1f;
        capacity =
            _capacityDao.findCapacityBy(
                capacityType.intValue(), cluster.getDataCenterId(), null, cluster.getId());

        if (capacityType == Capacity.CAPACITY_TYPE_STORAGE) {
          capacity.add(
              getUsedStats(
                  capacityType, cluster.getDataCenterId(), cluster.getPodId(), cluster.getId()));
        }
        if (capacity == null || capacity.size() == 0) {
          continue;
        }
        if (capacityType == Capacity.CAPACITY_TYPE_CPU) {
          overProvFactor = ApiDBUtils.getCpuOverprovisioningFactor();
        }

        double totalCapacity = capacity.get(0).getTotalCapacity() * overProvFactor;
        double usedCapacity =
            capacity.get(0).getUsedCapacity() + capacity.get(0).getReservedCapacity();
        if (totalCapacity != 0
            && usedCapacity / totalCapacity > _capacityTypeThresholdMap.get(capacityType)) {
          generateEmailAlert(
              ApiDBUtils.findZoneById(cluster.getDataCenterId()),
              ApiDBUtils.findPodById(cluster.getPodId()),
              cluster,
              totalCapacity,
              usedCapacity,
              capacityType);
        }
      }
    }
  }
コード例 #12
0
  @Override
  @DB
  public void recalculateCapacity() {
    // FIXME: the right way to do this is to register a listener (see RouterStatsListener,
    // VMSyncListener)
    //        for the vm sync state.  The listener model has connects/disconnects to keep things in
    // sync much better
    //        than this model right now, so when a VM is started, we update the amount allocated,
    // and when a VM
    //        is stopped we updated the amount allocated, and when VM sync reports a changed state,
    // we update
    //        the amount allocated.  Hopefully it's limited to 3 entry points and will keep the
    // amount allocated
    //        per host accurate.

    try {

      if (s_logger.isDebugEnabled()) {
        s_logger.debug("recalculating system capacity");
        s_logger.debug("Executing cpu/ram capacity update");
      }

      // Calculate CPU and RAM capacities
      // 	get all hosts...even if they are not in 'UP' state
      List<HostVO> hosts = _resourceMgr.listAllHostsInAllZonesByType(Host.Type.Routing);
      for (HostVO host : hosts) {
        _capacityMgr.updateCapacityForHost(host);
      }

      if (s_logger.isDebugEnabled()) {
        s_logger.debug("Done executing cpu/ram capacity update");
        s_logger.debug("Executing storage capacity update");
      }
      // Calculate storage pool capacity
      List<StoragePoolVO> storagePools = _storagePoolDao.listAll();
      for (StoragePoolVO pool : storagePools) {
        long disk = 0l;
        Pair<Long, Long> sizes = _volumeDao.getCountAndTotalByPool(pool.getId());
        disk = sizes.second();
        if (pool.isShared()) {
          _storageMgr.createCapacityEntry(pool, Capacity.CAPACITY_TYPE_STORAGE_ALLOCATED, disk);
        } else {
          _storageMgr.createCapacityEntry(pool, Capacity.CAPACITY_TYPE_LOCAL_STORAGE, disk);
        }
      }

      if (s_logger.isDebugEnabled()) {
        s_logger.debug("Done executing storage capacity update");
        s_logger.debug("Executing capacity updates public ip and Vlans");
      }

      List<DataCenterVO> datacenters = _dcDao.listAll();
      for (DataCenterVO datacenter : datacenters) {
        long dcId = datacenter.getId();

        // NOTE
        // What happens if we have multiple vlans? Dashboard currently shows stats
        // with no filter based on a vlan
        // ideal way would be to remove out the vlan param, and filter only on dcId
        // implementing the same

        // Calculate new Public IP capacity for Virtual Network
        if (datacenter.getNetworkType() == NetworkType.Advanced) {
          createOrUpdateIpCapacity(dcId, null, CapacityVO.CAPACITY_TYPE_VIRTUAL_NETWORK_PUBLIC_IP);
        }

        // Calculate new Public IP capacity for Direct Attached Network
        createOrUpdateIpCapacity(dcId, null, CapacityVO.CAPACITY_TYPE_DIRECT_ATTACHED_PUBLIC_IP);

        if (datacenter.getNetworkType() == NetworkType.Advanced) {
          // Calculate VLAN's capacity
          createOrUpdateVlanCapacity(dcId);
        }
      }

      if (s_logger.isDebugEnabled()) {
        s_logger.debug("Done capacity updates for public ip and Vlans");
        s_logger.debug("Executing capacity updates for private ip");
      }

      // Calculate new Private IP capacity
      List<HostPodVO> pods = _podDao.listAll();
      for (HostPodVO pod : pods) {
        long podId = pod.getId();
        long dcId = pod.getDataCenterId();

        createOrUpdateIpCapacity(dcId, podId, CapacityVO.CAPACITY_TYPE_PRIVATE_IP);
      }

      if (s_logger.isDebugEnabled()) {
        s_logger.debug("Done executing capacity updates for private ip");
        s_logger.debug("Done recalculating system capacity");
      }

    } catch (Throwable t) {
      s_logger.error("Caught exception in recalculating capacity", t);
    }
  }