/** * Trains the network on this mini batch and returns a list of futures for each training job * * @param trainingBatch the trees to iterate on */ public List<Future<Object>> fitAsync(final List<Tree> trainingBatch) { int count = 0; List<Future<Object>> futureBatch = new ArrayList<>(); for (final Tree t : trainingBatch) { log.info("Working mini batch " + count++); futureBatch.add( Futures.future( new Callable<Object>() { @Override public Object call() throws Exception { forwardPropagateTree(t); try { INDArray params = getParameters(); INDArray gradient = getValueGradient(trainingBatch); if (params.length() != gradient.length()) throw new IllegalStateException("Params not equal to gradient!"); setParams(params.subi(gradient)); } catch (NegativeArraySizeException e) { log.warn( "Couldnt compute parameters due to negative array size...for trees " + t); } return null; } }, rnTnActorSystem.dispatcher())); } return futureBatch; }
public static void main(String[] args) throws Exception { final ActorSystem system = system(); final ExecutionContextExecutor dispatcher = system.dispatcher(); Future<Long> fr = Futures.future(task, dispatcher); Future<Long> sc = Futures.future(task, dispatcher); Future<Long> th = Futures.future(task, dispatcher); Future<Long> fo = Futures.future(task, dispatcher); fr.onComplete(complete, dispatcher); sc.onComplete(complete, dispatcher); th.onComplete(complete, dispatcher); fo.onComplete(complete, dispatcher); Future<Iterable<Long>> sec = Futures.sequence(Arrays.asList(fr, sc, th, fo), dispatcher); Patterns.pipe(sec, dispatcher) .to(system.actorOf(Props.create(F.class))) .future() .ready(Duration.create(20, TimeUnit.SECONDS), null); Await.ready(system.terminate(), Duration.Inf()); }
public Future<Instance> startInstanceAsync(AWSCredentials credentials) { Future<Instance> f = circuitBreaker.callWithCircuitBreaker( () -> Futures.future(() -> startInstance(credentials), executionContext)); PartialFunction<Throwable, Future<Instance>> recovery = new PFBuilder<Throwable, Future<Instance>>() .match( AmazonClientException.class, ex -> ex.isRetryable(), ex -> startInstanceAsync(credentials)) .build(); return f.recoverWith(recovery, executionContext); }
public Future<TerminateInstancesResult> terminateInstancesAsync( AmazonEC2Client client, Instance... instances) { List<String> ids = Arrays.stream(instances).map(i -> i.getInstanceId()).collect(Collectors.toList()); TerminateInstancesRequest request = new TerminateInstancesRequest(ids); Future<TerminateInstancesResult> f = circuitBreaker.callWithCircuitBreaker( () -> Futures.future(() -> client.terminateInstances(request), executionContext)); PartialFunction<Throwable, Future<TerminateInstancesResult>> recovery = new PFBuilder<Throwable, Future<TerminateInstancesResult>>() .match( AmazonClientException.class, ex -> ex.isRetryable(), ex -> terminateInstancesAsync(client, instances)) .build(); return f.recoverWith(recovery, executionContext); }
void scheduleOrUpdateConsumers(List<List<ExecutionEdge>> allConsumers) { final int numConsumers = allConsumers.size(); if (numConsumers > 1) { fail( new IllegalStateException( "Currently, only a single consumer group per partition is supported.")); } else if (numConsumers == 0) { return; } for (ExecutionEdge edge : allConsumers.get(0)) { final ExecutionVertex consumerVertex = edge.getTarget(); final Execution consumer = consumerVertex.getCurrentExecutionAttempt(); final ExecutionState consumerState = consumer.getState(); final IntermediateResultPartition partition = edge.getSource(); // ---------------------------------------------------------------- // Consumer is created => try to deploy and cache input channel // descriptors if there is a deployment race // ---------------------------------------------------------------- if (consumerState == CREATED) { final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt(); consumerVertex.cachePartitionInfo( PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution)); // When deploying a consuming task, its task deployment descriptor will contain all // deployment information available at the respective time. It is possible that some // of the partitions to be consumed have not been created yet. These are updated // runtime via the update messages. // // TODO The current approach may send many update messages even though the consuming // task has already been deployed with all necessary information. We have to check // whether this is a problem and fix it, if it is. future( new Callable<Boolean>() { @Override public Boolean call() throws Exception { try { consumerVertex.scheduleForExecution( consumerVertex.getExecutionGraph().getScheduler(), consumerVertex.getExecutionGraph().isQueuedSchedulingAllowed()); } catch (Throwable t) { fail( new IllegalStateException( "Could not schedule consumer " + "vertex " + consumerVertex, t)); } return true; } }, executionContext); // double check to resolve race conditions if (consumerVertex.getExecutionState() == RUNNING) { consumerVertex.sendPartitionInfos(); } } // ---------------------------------------------------------------- // Consumer is running => send update message now // ---------------------------------------------------------------- else { if (consumerState == RUNNING) { final SimpleSlot consumerSlot = consumer.getAssignedResource(); if (consumerSlot == null) { // The consumer has been reset concurrently continue; } final Instance consumerInstance = consumerSlot.getInstance(); final ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), attemptId); final Instance partitionInstance = partition.getProducer().getCurrentAssignedResource().getInstance(); final ResultPartitionLocation partitionLocation; if (consumerInstance.equals(partitionInstance)) { // Consuming task is deployed to the same instance as the partition => local partitionLocation = ResultPartitionLocation.createLocal(); } else { // Different instances => remote final ConnectionID connectionId = new ConnectionID( partitionInstance.getInstanceConnectionInfo(), partition.getIntermediateResult().getConnectionIndex()); partitionLocation = ResultPartitionLocation.createRemote(connectionId); } final InputChannelDeploymentDescriptor descriptor = new InputChannelDeploymentDescriptor(partitionId, partitionLocation); final UpdatePartitionInfo updateTaskMessage = new UpdateTaskSinglePartitionInfo( consumer.getAttemptId(), partition.getIntermediateResult().getId(), descriptor); sendUpdatePartitionInfoRpcCall(consumerSlot, updateTaskMessage); } // ---------------------------------------------------------------- // Consumer is scheduled or deploying => cache input channel // deployment descriptors and send update message later // ---------------------------------------------------------------- else if (consumerState == SCHEDULED || consumerState == DEPLOYING) { final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt(); consumerVertex.cachePartitionInfo( PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution)); // double check to resolve race conditions if (consumerVertex.getExecutionState() == RUNNING) { consumerVertex.sendPartitionInfos(); } } } } }