private static void markSlaveForDeletion(AzureSlave slave, String message) { slave.setTemplateStatus(Constants.TEMPLATE_STATUS_DISBALED, message); if (slave.toComputer() != null) { slave .toComputer() .setTemporarilyOffline(true, OfflineCause.create(Messages._Slave_Failed_To_Connect())); } slave.setDeleteSlave(true); }
public void execute(TaskListener arg0) throws IOException, InterruptedException { for (Computer computer : Hudson.getInstance().getComputers()) { if (computer instanceof AzureComputer) { AzureComputer azureComputer = (AzureComputer) computer; AzureSlave slaveNode = azureComputer.getNode(); try { if (azureComputer.isOffline()) { if (!slaveNode.isDeleteSlave()) { // Find out if node exists in azure , if not continue with delete else do not delete // node // although it is offline. May be JNLP or SSH launch is in progress if (AzureManagementServiceDelegate.isVirtualMachineExists(slaveNode)) { LOGGER.info( "AzureSlaveCleanUpTask: execute: VM " + slaveNode.getDisplayName() + " exists in cloud"); continue; } } int retryCount = 0; boolean successful = false; // Retrying for 30 times with 30 seconds wait time between each retry while (retryCount < 30 && !successful) { try { slaveNode.idleTimeout(); successful = true; } catch (Exception e) { retryCount++; LOGGER.info( "AzureSlaveCleanUpTask: execute: Exception occured while calling timeout on node , \n" + "Will retry again after 30 seconds. Current retry count " + retryCount + "\n" + "Error code " + e.getMessage()); // We won't get exception for RNF , so for other exception types we can retry try { Thread.sleep(30 * 1000); } catch (InterruptedException e1) { e1.printStackTrace(); } } } Hudson.getInstance().removeNode(slaveNode); } } catch (Exception e) { LOGGER.severe("AzureSlaveCleanUpTask: execute: failed to remove node " + e); } } } }
/** this methods wait for node to be available */ private void waitUntilOnline(final AzureSlave slave) { LOGGER.info("Azure Cloud: waitUntilOnline: for slave " + slave.getDisplayName()); ExecutorService executorService = Executors.newCachedThreadPool(); Callable<String> callableTask = new Callable<String>() { public String call() { try { slave.toComputer().waitUntilOnline(); } catch (InterruptedException e) { // just ignore } return "success"; } }; Future<String> future = executorService.submit(callableTask); try { // 30 minutes is decent time for the node to be alive String result = future.get(30, TimeUnit.MINUTES); LOGGER.info("Azure Cloud: waitUntilOnline: node is alive , result " + result); } catch (TimeoutException ex) { LOGGER.info("Azure Cloud: waitUntilOnline: Got TimeoutException " + ex); markSlaveForDeletion(slave, Constants.JNLP_POST_PROV_LAUNCH_FAIL); } catch (InterruptedException ex) { LOGGER.info("Azure Cloud: InterruptedException: Got TimeoutException " + ex); markSlaveForDeletion(slave, Constants.JNLP_POST_PROV_LAUNCH_FAIL); } catch (ExecutionException ex) { LOGGER.info("Azure Cloud: ExecutionException: Got TimeoutException " + ex); markSlaveForDeletion(slave, Constants.JNLP_POST_PROV_LAUNCH_FAIL); } finally { future.cancel(true); executorService.shutdown(); } }
/** Checks if node configuration matches with template definition. */ private static boolean isNodeEligibleForReuse( AzureSlave slaveNode, AzureSlaveTemplate slaveTemplate) { // Do not reuse slave if it is marked for deletion. if (slaveNode.isDeleteSlave()) { return false; } // Check for null label and mode. if (AzureUtil.isNull(slaveNode.getLabelString()) && (slaveNode.getMode() == Node.Mode.NORMAL)) { return true; } if (AzureUtil.isNotNull(slaveNode.getLabelString()) && slaveNode.getLabelString().equalsIgnoreCase(slaveTemplate.getLabels())) { return true; } return false; }