예제 #1
0
  @Override
  public void execute()
      throws ResourceUnavailableException, InsufficientCapacityException,
          ResourceAllocationException {
    try {
      UserContext.current().setEventDetails("Vm Id: " + getId());

      UserVm result;
      result = _userVmService.startVirtualMachine(this);

      if (result != null) {
        UserVmResponse response =
            _responseGenerator.createUserVmResponse("virtualmachine", result).get(0);
        response.setResponseName(getCommandName());
        this.setResponseObject(response);
      } else {
        throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to start a vm");
      }
    } catch (ConcurrentOperationException ex) {
      s_logger.warn("Exception: ", ex);
      throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, ex.getMessage());
    } catch (StorageUnavailableException ex) {
      s_logger.warn("Exception: ", ex);
      throw new ServerApiException(ApiErrorCode.RESOURCE_UNAVAILABLE_ERROR, ex.getMessage());
    } catch (ExecutionException ex) {
      s_logger.warn("Exception: ", ex);
      throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, ex.getMessage());
    }
  }
  protected Long restart(final HaWorkVO work) {
    final long vmId = work.getInstanceId();

    final VirtualMachineGuru<VMInstanceVO> mgr = findManager(work.getType());
    if (mgr == null) {
      s_logger.warn(
          "Unable to find a handler for " + work.getType().toString() + ", throwing out " + vmId);
      return null;
    }

    VMInstanceVO vm = mgr.get(vmId);
    if (vm == null) {
      s_logger.info("Unable to find vm: " + vmId);
      return null;
    }

    s_logger.info("HA on " + vm.toString());
    if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) {
      s_logger.info(
          "VM "
              + vm.toString()
              + " has been changed.  Current State = "
              + vm.getState()
              + " Previous State = "
              + work.getPreviousState()
              + " last updated = "
              + vm.getUpdated()
              + " previous updated = "
              + work.getUpdateTime());
      return null;
    }

    final HostVO host = _hostDao.findById(work.getHostId());

    DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
    HostPodVO podVO = _podDao.findById(host.getPodId());
    String hostDesc =
        "name: "
            + host.getName()
            + "(id:"
            + host.getId()
            + "), availability zone: "
            + dcVO.getName()
            + ", pod: "
            + podVO.getName();

    short alertType = AlertManager.ALERT_TYPE_USERVM;
    if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER;
    } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
      alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY;
    }

    Boolean alive = null;
    if (work.getStep() == Step.Investigating) {
      if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) {
        s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId());
        if (vm.getState() == State.Starting && vm.getUpdated() == work.getUpdateTime()) {
          _itMgr.stateTransitTo(vm, Event.AgentReportStopped, null);
        }
        return null;
      }

      Enumeration<Investigator> en = _investigators.enumeration();
      Investigator investigator = null;
      while (en.hasMoreElements()) {
        investigator = en.nextElement();
        alive = investigator.isVmAlive(vm, host);
        if (alive != null) {
          s_logger.debug(
              investigator.getName() + " found VM " + vm.getName() + "to be alive? " + alive);
          break;
        }
      }
      if (alive != null && alive) {
        s_logger.debug("VM " + vm.getName() + " is found to be alive by " + investigator.getName());
        if (host.getStatus() == Status.Up) {
          compareState(vm, new AgentVmInfo(vm.getInstanceName(), mgr, State.Running), false);
          return null;
        } else {
          s_logger.debug("Rescheduling because the host is not up but the vm is alive");
          return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
        }
      }

      boolean fenced = false;
      if (alive == null || !alive) {
        fenced = true;
        s_logger.debug("Fencing off VM that we don't know the state of");
        Enumeration<FenceBuilder> enfb = _fenceBuilders.enumeration();
        while (enfb.hasMoreElements()) {
          final FenceBuilder fb = enfb.nextElement();
          Boolean result = fb.fenceOff(vm, host);
          if (result != null && !result) {
            fenced = false;
          }
        }
      }

      if (alive == null && !fenced) {
        s_logger.debug("We were unable to fence off the VM " + vm.toString());
        _alertMgr.sendAlert(
            alertType,
            vm.getDataCenterId(),
            vm.getPodId(),
            "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
            "Insufficient capacity to restart VM, name: "
                + vm.getName()
                + ", id: "
                + vmId
                + " which was running on host "
                + hostDesc);
        return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
      }

      mgr.completeStopCommand(vm);

      work.setStep(Step.Scheduled);
      _haDao.update(work.getId(), work);
    }

    // send an alert for VMs that stop unexpectedly
    _alertMgr.sendAlert(
        alertType,
        vm.getDataCenterId(),
        vm.getPodId(),
        "VM (name: "
            + vm.getName()
            + ", id: "
            + vmId
            + ") stopped unexpectedly on host "
            + hostDesc,
        "Virtual Machine "
            + vm.getName()
            + " (id: "
            + vm.getId()
            + ") running on host ["
            + hostDesc
            + "] stopped unexpectedly.");

    vm = mgr.get(vm.getId());

    if (!_forceHA && !vm.isHaEnabled()) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("VM is not HA enabled so we're done.");
      }
      return null; // VM doesn't require HA
    }

    if (!_storageMgr.canVmRestartOnAnotherServer(vm.getId())) {
      if (s_logger.isDebugEnabled()) {
        s_logger.debug("VM can not restart on another server.");
      }
      return null;
    }

    if (work.getTimesTried() > _maxRetries) {
      s_logger.warn("Retried to max times so deleting: " + vmId);
      return null;
    }

    try {
      VMInstanceVO started = mgr.start(vm.getId(), 0);
      if (started != null) {
        s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId());
        return null;
      }

      if (s_logger.isDebugEnabled()) {
        s_logger.debug(
            "Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval);
      }
      vm = mgr.get(vm.getId());
      work.setUpdateTime(vm.getUpdated());
      work.setPreviousState(vm.getState());
      return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
    } catch (final InsufficientCapacityException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
          "Insufficient capacity to restart VM, name: "
              + vm.getName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
      return null;
    } catch (final StorageUnavailableException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
      return null;
    } catch (ConcurrentOperationException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
      return null;
    } catch (ExecutionException e) {
      s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
      _alertMgr.sendAlert(
          alertType,
          vm.getDataCenterId(),
          vm.getPodId(),
          "Unable to restart " + vm.getName() + " which was running on host " + hostDesc,
          "The Storage is unavailable for trying to restart VM, name: "
              + vm.getName()
              + ", id: "
              + vmId
              + " which was running on host "
              + hostDesc);
      return null;
    }
  }