private void loadAllProperties() throws IOException { // First load all the properties for (FlowProps fprops : flow.getFlowProps()) { String source = fprops.getSource(); File propsPath = new File(execDir, source); Props props = new Props(null, propsPath); sharedProps.put(source, props); } // Resolve parents for (FlowProps fprops : flow.getFlowProps()) { if (fprops.getInheritedSource() != null) { String source = fprops.getSource(); String inherit = fprops.getInheritedSource(); Props props = sharedProps.get(source); Props inherits = sharedProps.get(inherit); props.setParent(inherits); } else { String source = fprops.getSource(); Props props = sharedProps.get(source); props.setParent(globalProps); } } }
@SuppressWarnings("unchecked") private void handleAjaxUpdateRequest(HttpServletRequest req, Map<String, Object> respMap) throws ServletException, IOException { ArrayList<Object> updateTimesList = (ArrayList<Object>) JSONUtils.parseJSONFromString(getParam(req, UPDATE_TIME_LIST_PARAM)); ArrayList<Object> execIDList = (ArrayList<Object>) JSONUtils.parseJSONFromString(getParam(req, EXEC_ID_LIST_PARAM)); ArrayList<Object> updateList = new ArrayList<Object>(); for (int i = 0; i < execIDList.size(); ++i) { long updateTime = JSONUtils.getLongFromObject(updateTimesList.get(i)); int execId = (Integer) execIDList.get(i); ExecutableFlow flow = flowRunnerManager.getExecutableFlow(execId); if (flow == null) { Map<String, Object> errorResponse = new HashMap<String, Object>(); errorResponse.put(RESPONSE_ERROR, "Flow does not exist"); errorResponse.put(UPDATE_MAP_EXEC_ID, execId); updateList.add(errorResponse); continue; } if (flow.getUpdateTime() > updateTime) { updateList.add(flow.toUpdateObject(updateTime)); } } respMap.put(RESPONSE_UPDATED_FLOWS, updateList); }
private Props loadJobProps(ExecutableNode node) throws IOException { Props props = null; String source = node.getJobSource(); if (source == null) { return null; } // load the override props if any try { props = projectLoader.fetchProjectProperty( flow.getProjectId(), flow.getVersion(), node.getId() + ".jor"); } catch (ProjectManagerException e) { e.printStackTrace(); logger.error("Error loading job override property for job " + node.getId()); } File path = new File(execDir, source); if (props == null) { // if no override prop, load the original one on disk try { props = new Props(null, path); } catch (IOException e) { e.printStackTrace(); logger.error("Error loading job file " + source + " for job " + node.getId()); } } // setting this fake source as this will be used to determine the location of log files. if (path.getPath() != null) { props.setSource(path.getPath()); } return props; }
private void setupFlowExecution() { int projectId = flow.getProjectId(); int version = flow.getVersion(); String flowId = flow.getFlowId(); // Add a bunch of common azkaban properties commonProps = PropsUtils.addCommonFlowProperties(flow); // Create execution dir createLogger(flowId); if (this.watcher != null) { this.watcher.setLogger(logger); } logger.info( "Running execid:" + execId + " flow:" + flowId + " project:" + projectId + " version:" + version); if (pipelineExecId != null) { logger.info( "Running simulateously with " + pipelineExecId + ". Pipelining level " + pipelineLevel); } // The current thread is used for interrupting blocks flowRunnerThread = Thread.currentThread(); flowRunnerThread.setName("FlowRunner-exec-" + flow.getExecutionId()); }
private void handleAjaxFlowStatus(Map<String, Object> respMap, int execid) { ExecutableFlow flow = flowRunnerManager.getExecutableFlow(execid); if (flow == null) { respMap.put(STATUS_PARAM, RESPONSE_NOTFOUND); } else { respMap.put(STATUS_PARAM, flow.getStatus().toString()); respMap.put(RESPONSE_UPDATETIME, flow.getUpdateTime()); } }
private Status getImpliedStatus(ExecutableNode node) { switch (node.getStatus()) { case FAILED: case KILLED: case SKIPPED: case SUCCEEDED: case QUEUED: case RUNNING: return null; default: break; } boolean shouldKill = false; for (String dependency : node.getInNodes()) { ExecutableNode dependencyNode = flow.getExecutableNode(dependency); Status depStatus = dependencyNode.getStatus(); switch (depStatus) { case FAILED: case KILLED: shouldKill = true; case SKIPPED: case SUCCEEDED: continue; case RUNNING: case QUEUED: case DISABLED: return null; default: // Return null means it's not ready to run. return null; } } ExecutionOptions options = flow.getExecutionOptions(); if (shouldKill || flowCancelled || (flowFailed && options.getFailureAction() != FailureAction.FINISH_ALL_POSSIBLE)) { return Status.KILLED; } // If it's disabled but ready to run, we want to make sure it continues being disabled. if (node.getStatus() == Status.DISABLED) { return Status.DISABLED; } // All good to go, ready to run. return Status.READY; }
private boolean isFlowFinished() { if (!activeJobRunners.isEmpty()) { return false; } for (String end : flow.getEndNodes()) { ExecutableNode node = flow.getExecutableNode(end); if (!Status.isStatusFinished(node.getStatus())) { return false; } } return true; }
private synchronized void updateFlow(long time) { try { flow.setUpdateTime(time); executorLoader.updateExecutableFlow(flow); } catch (ExecutorManagerException e) { logger.error("Error updating flow.", e); } }
public void kill(String user) { synchronized (mainSyncObj) { logger.info("Flow killed by " + user); flow.setStatus(Status.KILLED); kill(); updateFlow(); } interrupt(); }
public void retryJobs(List<String> jobIds, String user) { synchronized (mainSyncObj) { for (String jobId : jobIds) { ExecutableNode node = flow.getExecutableNode(jobId); if (node == null) { logger.error( "Job " + jobId + " doesn't exist in execution " + flow.getExecutionId() + ". Cannot retry."); continue; } if (Status.isStatusFinished(node.getStatus())) { // Resets the status and increments the attempt number node.resetForRetry(); reEnableDependents(node); logger.info("Re-enabling job " + node.getJobId() + " attempt " + node.getAttempt()); } else { logger.error("Cannot retry job " + jobId + " since it hasn't run yet. User " + user); continue; } } boolean isFailureFound = false; for (ExecutableNode node : flow.getExecutableNodes()) { Status nodeStatus = node.getStatus(); if (nodeStatus == Status.FAILED || nodeStatus == Status.KILLED) { isFailureFound = true; break; } } if (!isFailureFound) { flow.setStatus(Status.RUNNING); flow.setUpdateTime(System.currentTimeMillis()); flowFailed = false; } updateFlow(); interrupt(); } }
public void resume(String user) { synchronized (mainSyncObj) { if (!flowPaused) { logger.info("Cannot resume flow that isn't paused"); } else { logger.info("Flow resumed by " + user); flowPaused = false; if (flowFailed) { flow.setStatus(Status.FAILED_FINISHING); } else if (flowCancelled) { flow.setStatus(Status.KILLED); } else { flow.setStatus(Status.RUNNING); } updateFlow(); } } }
public FlowRunner( ExecutableFlow flow, ExecutorLoader executorLoader, ProjectLoader projectLoader, JobTypeManager jobtypeManager) throws ExecutorManagerException { this.execId = flow.getExecutionId(); this.flow = flow; this.executorLoader = executorLoader; this.projectLoader = projectLoader; this.execDir = new File(flow.getExecutionPath()); this.jobtypeManager = jobtypeManager; ExecutionOptions options = flow.getExecutionOptions(); this.pipelineLevel = options.getPipelineLevel(); this.pipelineExecId = options.getPipelineExecutionId(); this.proxyUsers = flow.getProxyUsers(); }
public File getJobAttachmentFile(String jobId, int attempt) { ExecutableNode node = flow.getExecutableNodePath(jobId); File path = new File(execDir, node.getJobSource()); String attachmentFileName = JobRunner.createAttachmentFileName(node, attempt); File attachmentFile = new File(path.getParentFile(), attachmentFileName); if (!attachmentFile.exists()) { return null; } return attachmentFile; }
private FlowRunner createFlowRunner(String flowName, HashMap<String, String> flowParams) throws Exception { Flow flow = flowMap.get(flowName); int exId = id++; ExecutableFlow exFlow = new ExecutableFlow(project, flow); exFlow.setExecutionPath(workingDir.getPath()); exFlow.setExecutionId(exId); exFlow.getExecutionOptions().addAllFlowParameters(flowParams); fakeExecutorLoader.uploadExecutableFlow(exFlow); FlowRunner runner = new FlowRunner( fakeExecutorLoader.fetchExecutableFlow(exId), fakeExecutorLoader, fakeProjectLoader, jobtypeManager); return runner; }
public File getJobMetaDataFile(String jobId, int attempt) { ExecutableNode node = flow.getExecutableNode(jobId); File path = new File(execDir, node.getJobPropsSource()); String metaDataFileName = JobRunner.createMetaDataFileName(execId, jobId, attempt); File metaDataFile = new File(path.getParentFile(), metaDataFileName); if (!metaDataFile.exists()) { return null; } return metaDataFile; }
/** * Constructor. If executorService is null, then it will create it's own for thread pools. * * @param flow * @param executorLoader * @param projectLoader * @param jobtypeManager * @param executorService * @throws ExecutorManagerException */ public FlowRunner( ExecutableFlow flow, ExecutorLoader executorLoader, ProjectLoader projectLoader, JobTypeManager jobtypeManager, ExecutorService executorService) throws ExecutorManagerException { this.execId = flow.getExecutionId(); this.flow = flow; this.executorLoader = executorLoader; this.projectLoader = projectLoader; this.execDir = new File(flow.getExecutionPath()); this.jobtypeManager = jobtypeManager; ExecutionOptions options = flow.getExecutionOptions(); this.pipelineLevel = options.getPipelineLevel(); this.pipelineExecId = options.getPipelineExecutionId(); this.failureAction = options.getFailureAction(); this.proxyUsers = flow.getProxyUsers(); this.executorService = executorService; this.finishedNodes = new SwapQueue<ExecutableNode>(); }
@SuppressWarnings("unchecked") private void setupFlowExecution() { int projectId = flow.getProjectId(); int version = flow.getVersion(); String flowId = flow.getFlowId(); // Add a bunch of common azkaban properties Props commonFlowProps = PropsUtils.addCommonFlowProperties(null, flow); if (flow.getJobSource() != null) { String source = flow.getJobSource(); Props flowProps = sharedProps.get(source); flowProps.setParent(commonFlowProps); commonFlowProps = flowProps; } // If there are flow overrides, we apply them now. Map<String, String> flowParam = flow.getExecutionOptions().getFlowParameters(); if (flowParam != null && !flowParam.isEmpty()) { commonFlowProps = new Props(commonFlowProps, flowParam); } flow.setInputProps(commonFlowProps); // Create execution dir createLogger(flowId); if (this.watcher != null) { this.watcher.setLogger(logger); } logger.info( "Running execid:" + execId + " flow:" + flowId + " project:" + projectId + " version:" + version); if (pipelineExecId != null) { logger.info( "Running simulateously with " + pipelineExecId + ". Pipelining level " + pipelineLevel); } // The current thread is used for interrupting blocks flowRunnerThread = Thread.currentThread(); flowRunnerThread.setName("FlowRunner-exec-" + flow.getExecutionId()); }
private void cancel() { synchronized (mainSyncObj) { logger.info("Cancel has been called on flow " + execId); flowPaused = false; flowCancelled = true; if (watcher != null) { logger.info("Watcher is attached. Stopping watcher."); watcher.stopWatcher(); logger.info("Watcher cancelled status is " + watcher.isWatchCancelled()); } logger.info("Cancelling " + activeJobRunners.size() + " jobs."); for (JobRunner runner : activeJobRunners.values()) { runner.cancel(); } if (flow.getStatus() != Status.FAILED && flow.getStatus() != Status.FAILED_FINISHING) { logger.info("Setting flow status to " + Status.KILLED.toString()); flow.setStatus(Status.KILLED); } } }
public void run() { try { if (this.executorService == null) { this.executorService = Executors.newFixedThreadPool(numJobThreads); } setupFlowExecution(); flow.setStartTime(System.currentTimeMillis()); updateFlowReference(); logger.info("Updating initial flow directory."); updateFlow(); logger.info("Fetching job and shared properties."); loadAllProperties(); this.fireEventListeners(Event.create(this, Type.FLOW_STARTED)); runFlow(); } catch (Throwable t) { if (logger != null) { logger.error("An error has occurred during the running of the flow. Quiting.", t); } flow.setStatus(Status.FAILED); } finally { if (watcher != null) { logger.info("Watcher is attached. Stopping watcher."); watcher.stopWatcher(); logger.info("Watcher cancelled status is " + watcher.isWatchCancelled()); } flow.setEndTime(System.currentTimeMillis()); logger.info("Setting end time for flow " + execId + " to " + System.currentTimeMillis()); closeLogger(); updateFlow(); this.fireEventListeners(Event.create(this, Type.FLOW_FINISHED)); } }
public void pause(String user) { synchronized (mainSyncObj) { if (!flowFinished) { logger.info("Flow paused by " + user); flowPaused = true; flow.setStatus(Status.PAUSED); updateFlow(); } else { logger.info("Cannot pause finished flow. Called by user " + user); } } interrupt(); }
private void reEnableDependents(ExecutableNode node) { for (String dependent : node.getOutNodes()) { ExecutableNode dependentNode = flow.getExecutableNode(dependent); if (dependentNode.getStatus() == Status.KILLED) { dependentNode.setStatus(Status.READY); dependentNode.setUpdateTime(System.currentTimeMillis()); reEnableDependents(dependentNode); } else if (dependentNode.getStatus() == Status.SKIPPED) { dependentNode.setStatus(Status.DISABLED); dependentNode.setUpdateTime(System.currentTimeMillis()); reEnableDependents(dependentNode); } } }
public void retryFailures(String user) { synchronized (mainSyncObj) { logger.info("Retrying failures invoked by " + user); ArrayList<String> failures = new ArrayList<String>(); for (ExecutableNode node : flow.getExecutableNodes()) { if (node.getStatus() == Status.FAILED) { failures.add(node.getJobId()); } else if (node.getStatus() == Status.KILLED) { node.setStartTime(-1); node.setEndTime(-1); node.setStatus(Status.READY); } } retryJobs(failures, user); } }
private List<ExecutableNode> findReadyJobsToRun() { ArrayList<ExecutableNode> jobsToRun = new ArrayList<ExecutableNode>(); for (ExecutableNode node : flow.getExecutableNodes()) { if (Status.isStatusFinished(node.getStatus())) { continue; } else { // Check the dependencies to see if execution conditions are met, // and what the status should be set to. Status impliedStatus = getImpliedStatus(node); if (getImpliedStatus(node) != null) { node.setStatus(impliedStatus); jobsToRun.add(node); } } } return jobsToRun; }
/** * Main method that executes the jobs. * * @throws Exception */ private void runFlow() throws Exception { logger.info("Starting flows"); flow.setStatus(Status.RUNNING); updateFlow(); while (!flowFinished) { synchronized (mainSyncObj) { if (flowPaused) { try { mainSyncObj.wait(CHECK_WAIT_MS); } catch (InterruptedException e) { } continue; } else { List<ExecutableNode> jobsReadyToRun = findReadyJobsToRun(); if (!jobsReadyToRun.isEmpty() && !flowCancelled) { for (ExecutableNode node : jobsReadyToRun) { long currentTime = System.currentTimeMillis(); // Queue a job only if it's ready to run. if (node.getStatus() == Status.READY) { // Collect output props from the job's dependencies. Props outputProps = collectOutputProps(node); node.setStatus(Status.QUEUED); JobRunner runner = createJobRunner(node, outputProps); logger.info("Submitting job " + node.getJobId() + " to run."); try { executorService.submit(runner); jobRunners.put(node.getJobId(), runner); activeJobRunners.put(node.getJobId(), runner); } catch (RejectedExecutionException e) { logger.error(e); } ; } // If killed, then auto complete and KILL else if (node.getStatus() == Status.KILLED) { logger.info("Killing " + node.getJobId() + " due to prior errors."); node.setStartTime(currentTime); node.setEndTime(currentTime); fireEventListeners(Event.create(this, Type.JOB_FINISHED, node)); } // If disabled, then we auto skip else if (node.getStatus() == Status.DISABLED) { logger.info("Skipping disabled job " + node.getJobId() + "."); node.setStartTime(currentTime); node.setEndTime(currentTime); node.setStatus(Status.SKIPPED); fireEventListeners(Event.create(this, Type.JOB_FINISHED, node)); } } updateFlow(); } else { if (isFlowFinished() || flowCancelled) { flowFinished = true; break; } try { mainSyncObj.wait(CHECK_WAIT_MS); } catch (InterruptedException e) { } } } } } if (flowCancelled) { try { logger.info("Flow was force cancelled cleaning up."); for (JobRunner activeRunner : activeJobRunners.values()) { activeRunner.cancel(); } for (ExecutableNode node : flow.getExecutableNodes()) { if (Status.isStatusFinished(node.getStatus())) { continue; } else if (node.getStatus() == Status.DISABLED) { node.setStatus(Status.SKIPPED); } else { node.setStatus(Status.KILLED); } fireEventListeners(Event.create(this, Type.JOB_FINISHED, node)); } } catch (Exception e) { logger.error(e); } updateFlow(); } logger.info("Finishing up flow. Awaiting Termination"); executorService.shutdown(); synchronized (mainSyncObj) { switch (flow.getStatus()) { case FAILED_FINISHING: logger.info("Setting flow status to Failed."); flow.setStatus(Status.FAILED); case FAILED: case KILLED: logger.info("Flow is set to " + flow.getStatus().toString()); break; default: flow.setStatus(Status.SUCCEEDED); logger.info("Flow is set to " + flow.getStatus().toString()); } } }
private JobRunner createJobRunner(ExecutableNode node, Props previousOutput) { String source = node.getJobPropsSource(); String propsSource = node.getPropsSource(); // If no properties are set, we just set the global properties. Props parentProps = propsSource == null ? globalProps : sharedProps.get(propsSource); // Set up overrides ExecutionOptions options = flow.getExecutionOptions(); @SuppressWarnings("unchecked") Props flowProps = new Props(null, options.getFlowParameters()); flowProps.putAll(commonProps); flowProps.setParent(parentProps); parentProps = flowProps; // We add the previous job output and put into this props. if (previousOutput != null) { Props earliestParent = previousOutput.getEarliestAncestor(); earliestParent.setParent(parentProps); parentProps = previousOutput; } // Load job file. File path = new File(execDir, source); Props prop = null; // load the override props if any try { prop = projectLoader.fetchProjectProperty( flow.getProjectId(), flow.getVersion(), node.getJobId() + ".jor"); } catch (ProjectManagerException e) { e.printStackTrace(); logger.error("Error loading job override property for job " + node.getJobId()); } if (prop == null) { // if no override prop, load the original one on disk try { prop = new Props(null, path); } catch (IOException e) { e.printStackTrace(); logger.error("Error loading job file " + source + " for job " + node.getJobId()); } } // setting this fake source as this will be used to determine the location of log files. prop.setSource(path.getPath()); prop.setParent(parentProps); JobRunner jobRunner = new JobRunner(node, prop, path.getParentFile(), executorLoader, jobtypeManager); if (watcher != null) { jobRunner.setPipeline(watcher, pipelineLevel); } if (validateUserProxy) { jobRunner.setValidatedProxyUsers(proxyUsers); } jobRunner.setDelayStart(node.getDelayedExecution()); jobRunner.setLogSettings(logger, jobLogFileSize, jobLogNumFiles); jobRunner.addListener(listener); return jobRunner; }
@SuppressWarnings("unchecked") public static ExecutableFlow createExecutableFlowFromObject(Object obj) { ExecutableFlow exFlow = new ExecutableFlow(); HashMap<String, Object> flowObj = (HashMap<String, Object>) obj; exFlow.executionId = (Integer) flowObj.get("executionId"); exFlow.executionPath = (String) flowObj.get("executionPath"); exFlow.flowId = (String) flowObj.get("flowId"); exFlow.projectId = (Integer) flowObj.get("projectId"); exFlow.submitTime = JSONUtils.getLongFromObject(flowObj.get("submitTime")); exFlow.startTime = JSONUtils.getLongFromObject(flowObj.get("startTime")); exFlow.endTime = JSONUtils.getLongFromObject(flowObj.get("endTime")); exFlow.flowStatus = Status.valueOf((String) flowObj.get("status")); exFlow.submitUser = (String) flowObj.get("submitUser"); exFlow.version = (Integer) flowObj.get("version"); if (flowObj.containsKey("flowParameters")) { exFlow.flowParameters = new HashMap<String, String>((Map<String, String>) flowObj.get("flowParameters")); } // Failure notification if (flowObj.containsKey("notifyOnFirstFailure")) { exFlow.notifyOnFirstFailure = (Boolean) flowObj.get("notifyOnFirstFailure"); } if (flowObj.containsKey("notifyOnLastFailure")) { exFlow.notifyOnLastFailure = (Boolean) flowObj.get("notifyOnLastFailure"); } // Failure action if (flowObj.containsKey("failureAction")) { exFlow.failureAction = FailureAction.valueOf((String) flowObj.get("failureAction")); } exFlow.pipelineLevel = (Integer) flowObj.get("pipelineLevel"); // Copy nodes List<Object> nodes = (List<Object>) flowObj.get("nodes"); for (Object nodeObj : nodes) { ExecutableNode node = ExecutableNode.createNodeFromObject(nodeObj, exFlow); exFlow.executableNodes.put(node.getJobId(), node); } List<Object> properties = (List<Object>) flowObj.get("properties"); for (Object propNode : properties) { HashMap<String, Object> fprop = (HashMap<String, Object>) propNode; String source = (String) fprop.get("source"); String inheritedSource = (String) fprop.get("inherited"); FlowProps flowProps = new FlowProps(inheritedSource, source); exFlow.flowProps.put(source, flowProps); } // Success emails exFlow.setSuccessEmails((List<String>) flowObj.get("successEmails")); // Failure emails exFlow.setFailureEmails((List<String>) flowObj.get("failureEmails")); if (flowObj.containsKey("proxyUsers")) { ArrayList<String> proxyUserList = (ArrayList<String>) flowObj.get("proxyUsers"); exFlow.setProxyUsers(new HashSet<String>(proxyUserList)); } return exFlow; }
/** * @param props This method is to put in any job properties customization before feeding to the * job. */ private void customizeJobProperties(Props props) { boolean memoryCheck = flow.getExecutionOptions().getMemoryCheck(); props.put(ProcessJob.AZKABAN_MEMORY_CHECK, Boolean.toString(memoryCheck)); }