@Test public void testGenerateScopeCustom() { MetricRegistry registry = new MetricRegistry(new Configuration()); TaskManagerScopeFormat tmFormat = new TaskManagerScopeFormat("abc"); TaskManagerJobScopeFormat jmFormat = new TaskManagerJobScopeFormat("def", tmFormat); TaskScopeFormat taskFormat = new TaskScopeFormat("<tm_id>.<job_id>.<task_id>.<task_attempt_id>", jmFormat); JobID jid = new JobID(); AbstractID vertexId = new AbstractID(); AbstractID executionId = new AbstractID(); TaskManagerMetricGroup tmGroup = new TaskManagerMetricGroup(registry, "theHostName", "test-tm-id"); JobMetricGroup jmGroup = new JobMetricGroup(registry, tmGroup, jid, "myJobName"); TaskMetricGroup taskGroup = new TaskMetricGroup( registry, jmGroup, taskFormat, vertexId, executionId, "aTaskName", 13, 2); assertArrayEquals( new String[] {"test-tm-id", jid.toString(), vertexId.toString(), executionId.toString()}, taskGroup.getScopeComponents()); assertEquals( String.format("test-tm-id.%s.%s.%s", jid, vertexId, executionId), taskGroup.getScopeString()); }
@Test public void testTriggerSavepointFailure() throws Exception { replaceStdOutAndStdErr(); try { JobID jobId = new JobID(); ActorGateway jobManager = mock(ActorGateway.class); Promise<Object> triggerResponse = new scala.concurrent.impl.Promise.DefaultPromise<>(); when(jobManager.ask( Mockito.eq(new JobManagerMessages.TriggerSavepoint(jobId)), Mockito.any(FiniteDuration.class))) .thenReturn(triggerResponse.future()); Exception testException = new Exception("expectedTestException"); triggerResponse.success(new JobManagerMessages.TriggerSavepointFailure(jobId, testException)); CliFrontend frontend = new MockCliFrontend(CliFrontendTestUtils.getConfigDir(), jobManager); String[] parameters = {jobId.toString()}; int returnCode = frontend.savepoint(parameters); assertTrue(returnCode != 0); verify(jobManager, times(1)) .ask( Mockito.eq(new JobManagerMessages.TriggerSavepoint(jobId)), Mockito.any(FiniteDuration.class)); assertTrue(buffer.toString().contains("expectedTestException")); } finally { restoreStdOutAndStdErr(); } }
/** * Handles an incoming DELETE request from a BLOB client. * * @param inputStream The input stream to read the request from. * @param outputStream The output stream to write the response to. * @throws java.io.IOException Thrown if an I/O error occurs while reading the request data from * the input stream. */ private void delete(InputStream inputStream, OutputStream outputStream, byte[] buf) throws IOException { try { int type = inputStream.read(); if (type < 0) { throw new EOFException("Premature end of DELETE request"); } if (type == CONTENT_ADDRESSABLE) { BlobKey key = BlobKey.readFromInputStream(inputStream); File blobFile = this.blobServer.getStorageLocation(key); if (blobFile.exists() && !blobFile.delete()) { throw new IOException("Cannot delete BLOB file " + blobFile.getAbsolutePath()); } } else if (type == NAME_ADDRESSABLE) { byte[] jidBytes = new byte[JobID.SIZE]; readFully(inputStream, jidBytes, 0, JobID.SIZE, "JobID"); JobID jobID = JobID.fromByteArray(jidBytes); String key = readKey(buf, inputStream); File blobFile = this.blobServer.getStorageLocation(jobID, key); if (blobFile.exists() && !blobFile.delete()) { throw new IOException("Cannot delete BLOB file " + blobFile.getAbsolutePath()); } } else if (type == JOB_ID_SCOPE) { byte[] jidBytes = new byte[JobID.SIZE]; readFully(inputStream, jidBytes, 0, JobID.SIZE, "JobID"); JobID jobID = JobID.fromByteArray(jidBytes); blobServer.deleteJobDirectory(jobID); } else { throw new IOException("Unrecognized addressing type: " + type); } outputStream.write(RETURN_OKAY); } catch (Throwable t) { LOG.error("DELETE operation failed", t); try { writeErrorToStream(outputStream, t); } catch (IOException e) { // since we are in an exception case, it means not much that we could not send the error // ignore this } clientSocket.close(); } }
protected String internalJobId() { return jobID.toString(); }
/** * Receives an AcknowledgeCheckpoint message and returns whether the message was associated with a * pending checkpoint. * * @param message Checkpoint ack from the task manager * @return Flag indicating whether the ack'd checkpoint was associated with a pending checkpoint. * @throws Exception If the checkpoint cannot be added to the completed checkpoint store. */ public boolean receiveAcknowledgeMessage(AcknowledgeCheckpoint message) throws Exception { if (shutdown || message == null) { return false; } if (!job.equals(message.getJob())) { LOG.error("Received AcknowledgeCheckpoint message for wrong job: {}", message); return false; } final long checkpointId = message.getCheckpointId(); CompletedCheckpoint completed = null; PendingCheckpoint checkpoint; // Flag indicating whether the ack message was for a known pending // checkpoint. boolean isPendingCheckpoint; synchronized (lock) { // we need to check inside the lock for being shutdown as well, otherwise we // get races and invalid error log messages if (shutdown) { return false; } checkpoint = pendingCheckpoints.get(checkpointId); if (checkpoint != null && !checkpoint.isDiscarded()) { isPendingCheckpoint = true; if (checkpoint.acknowledgeTask( message.getTaskExecutionId(), message.getState(), message.getStateSize(), null)) { // TODO: Give KV-state to the acknowledgeTask method if (checkpoint.isFullyAcknowledged()) { completed = checkpoint.toCompletedCheckpoint(); completedCheckpointStore.addCheckpoint(completed); LOG.info( "Completed checkpoint " + checkpointId + " (in " + completed.getDuration() + " ms)"); if (LOG.isDebugEnabled()) { StringBuilder builder = new StringBuilder(); for (Map.Entry<JobVertexID, TaskState> entry : completed.getTaskStates().entrySet()) { builder .append("JobVertexID: ") .append(entry.getKey()) .append(" {") .append(entry.getValue()) .append("}"); } LOG.debug(builder.toString()); } pendingCheckpoints.remove(checkpointId); rememberRecentCheckpointId(checkpointId); dropSubsumedCheckpoints(completed.getTimestamp()); onFullyAcknowledgedCheckpoint(completed); triggerQueuedRequests(); } } else { // checkpoint did not accept message LOG.error( "Received duplicate or invalid acknowledge message for checkpoint " + checkpointId + " , task " + message.getTaskExecutionId()); } } else if (checkpoint != null) { // this should not happen throw new IllegalStateException( "Received message for discarded but non-removed checkpoint " + checkpointId); } else { // message is for an unknown checkpoint, or comes too late (checkpoint disposed) if (recentPendingCheckpoints.contains(checkpointId)) { isPendingCheckpoint = true; LOG.warn("Received late message for now expired checkpoint attempt " + checkpointId); } else { isPendingCheckpoint = false; } } } // send the confirmation messages to the necessary targets. we do this here // to be outside the lock scope if (completed != null) { final long timestamp = completed.getTimestamp(); for (ExecutionVertex ev : tasksToCommitTo) { Execution ee = ev.getCurrentExecutionAttempt(); if (ee != null) { ExecutionAttemptID attemptId = ee.getAttemptId(); NotifyCheckpointComplete notifyMessage = new NotifyCheckpointComplete(job, attemptId, checkpointId, timestamp); ev.sendMessageToCurrentExecution(notifyMessage, ee.getAttemptId()); } } statsTracker.onCompletedCheckpoint(completed); } return isPendingCheckpoint; }
/** * Receives a {@link DeclineCheckpoint} message and returns whether the message was associated * with a pending checkpoint. * * @param message Checkpoint decline from the task manager * @return Flag indicating whether the declined checkpoint was associated with a pending * checkpoint. */ public boolean receiveDeclineMessage(DeclineCheckpoint message) throws Exception { if (shutdown || message == null) { return false; } if (!job.equals(message.getJob())) { LOG.error("Received DeclineCheckpoint message for wrong job: {}", message); return false; } final long checkpointId = message.getCheckpointId(); PendingCheckpoint checkpoint; // Flag indicating whether the ack message was for a known pending // checkpoint. boolean isPendingCheckpoint; synchronized (lock) { // we need to check inside the lock for being shutdown as well, otherwise we // get races and invalid error log messages if (shutdown) { return false; } checkpoint = pendingCheckpoints.get(checkpointId); if (checkpoint != null && !checkpoint.isDiscarded()) { isPendingCheckpoint = true; LOG.info( "Discarding checkpoint " + checkpointId + " because of checkpoint decline from task " + message.getTaskExecutionId()); pendingCheckpoints.remove(checkpointId); checkpoint.discard(userClassLoader); rememberRecentCheckpointId(checkpointId); boolean haveMoreRecentPending = false; Iterator<Map.Entry<Long, PendingCheckpoint>> entries = pendingCheckpoints.entrySet().iterator(); while (entries.hasNext()) { PendingCheckpoint p = entries.next().getValue(); if (!p.isDiscarded() && p.getCheckpointTimestamp() >= checkpoint.getCheckpointTimestamp()) { haveMoreRecentPending = true; break; } } if (!haveMoreRecentPending && !triggerRequestQueued) { LOG.info("Triggering new checkpoint because of discarded checkpoint " + checkpointId); triggerCheckpoint(System.currentTimeMillis()); } else if (!haveMoreRecentPending) { LOG.info( "Promoting queued checkpoint request because of discarded checkpoint " + checkpointId); triggerQueuedRequests(); } } else if (checkpoint != null) { // this should not happen throw new IllegalStateException( "Received message for discarded but non-removed checkpoint " + checkpointId); } else { // message is for an unknown checkpoint, or comes too late (checkpoint disposed) if (recentPendingCheckpoints.contains(checkpointId)) { isPendingCheckpoint = true; LOG.info( "Received another decline checkpoint message for now expired checkpoint attempt " + checkpointId); } else { isPendingCheckpoint = false; } } } return isPendingCheckpoint; }
/** * Handles an incoming PUT request from a BLOB client. * * @param inputStream The input stream to read incoming data from. * @param outputStream The output stream to send data back to the client. * @param buf An auxiliary buffer for data serialization/deserialization. */ private void put(InputStream inputStream, OutputStream outputStream, byte[] buf) throws IOException { JobID jobID = null; String key = null; MessageDigest md = null; File incomingFile = null; FileOutputStream fos = null; try { final int contentAddressable = inputStream.read(); if (contentAddressable < 0) { throw new EOFException("Premature end of PUT request"); } if (contentAddressable == NAME_ADDRESSABLE) { // Receive the job ID and key byte[] jidBytes = new byte[JobID.SIZE]; readFully(inputStream, jidBytes, 0, JobID.SIZE, "JobID"); jobID = JobID.fromByteArray(jidBytes); key = readKey(buf, inputStream); } else if (contentAddressable == CONTENT_ADDRESSABLE) { md = BlobUtils.createMessageDigest(); } else { throw new IOException("Unknown type of BLOB addressing."); } if (LOG.isDebugEnabled()) { if (contentAddressable == NAME_ADDRESSABLE) { LOG.debug(String.format("Received PUT request for BLOB under %s / \"%s\"", jobID, key)); } else { LOG.debug("Received PUT request for content addressable BLOB"); } } incomingFile = blobServer.createTemporaryFilename(); fos = new FileOutputStream(incomingFile); while (true) { final int bytesExpected = readLength(inputStream); if (bytesExpected == -1) { // done break; } if (bytesExpected > BUFFER_SIZE) { throw new IOException("Unexpected number of incoming bytes: " + bytesExpected); } readFully(inputStream, buf, 0, bytesExpected, "buffer"); fos.write(buf, 0, bytesExpected); if (md != null) { md.update(buf, 0, bytesExpected); } } fos.close(); if (contentAddressable == NAME_ADDRESSABLE) { File storageFile = this.blobServer.getStorageLocation(jobID, key); Files.move(incomingFile, storageFile); incomingFile = null; outputStream.write(RETURN_OKAY); } else { BlobKey blobKey = new BlobKey(md.digest()); File storageFile = blobServer.getStorageLocation(blobKey); Files.move(incomingFile, storageFile); incomingFile = null; // Return computed key to client for validation outputStream.write(RETURN_OKAY); blobKey.writeToOutputStream(outputStream); } } catch (SocketException e) { // happens when the other side disconnects LOG.debug("Socket connection closed", e); } catch (Throwable t) { LOG.error("PUT operation failed", t); try { writeErrorToStream(outputStream, t); } catch (IOException e) { // since we are in an exception case, it means not much that we could not send the error // ignore this } clientSocket.close(); } finally { if (fos != null) { try { fos.close(); } catch (Throwable t) { LOG.warn("Cannot close stream to BLOB staging file", t); } } if (incomingFile != null) { if (!incomingFile.delete()) { LOG.warn("Cannot delete BLOB server staging file " + incomingFile.getAbsolutePath()); } } } }
/** * Handles an incoming GET request from a BLOB client. * * @param inputStream the input stream to read incoming data from * @param outputStream the output stream to send data back to the client * @param buf an auxiliary buffer for data serialization/deserialization * @throws IOException thrown if an I/O error occurs while reading/writing data from/to the * respective streams */ private void get(InputStream inputStream, OutputStream outputStream, byte[] buf) throws IOException { File blobFile; try { final int contentAddressable = inputStream.read(); if (contentAddressable < 0) { throw new EOFException("Premature end of GET request"); } if (contentAddressable == NAME_ADDRESSABLE) { // Receive the job ID and key byte[] jidBytes = new byte[JobID.SIZE]; readFully(inputStream, jidBytes, 0, JobID.SIZE, "JobID"); JobID jobID = JobID.fromByteArray(jidBytes); String key = readKey(buf, inputStream); blobFile = this.blobServer.getStorageLocation(jobID, key); } else if (contentAddressable == CONTENT_ADDRESSABLE) { final BlobKey key = BlobKey.readFromInputStream(inputStream); blobFile = blobServer.getStorageLocation(key); } else { throw new IOException("Unknown type of BLOB addressing."); } // Check if BLOB exists if (!blobFile.exists()) { throw new IOException("Cannot find required BLOB at " + blobFile.getAbsolutePath()); } if (blobFile.length() > Integer.MAX_VALUE) { throw new IOException("BLOB size exceeds the maximum size (2 GB)."); } outputStream.write(RETURN_OKAY); // up to here, an error can give a good message } catch (Throwable t) { LOG.error("GET operation failed", t); try { writeErrorToStream(outputStream, t); } catch (IOException e) { // since we are in an exception case, it means not much that we could not send the error // ignore this } clientSocket.close(); return; } // from here on, we started sending data, so all we can do is close the connection when // something happens try { int blobLen = (int) blobFile.length(); writeLength(blobLen, outputStream); FileInputStream fis = new FileInputStream(blobFile); try { int bytesRemaining = blobLen; while (bytesRemaining > 0) { int read = fis.read(buf); if (read < 0) { throw new IOException( "Premature end of BLOB file stream for " + blobFile.getAbsolutePath()); } outputStream.write(buf, 0, read); bytesRemaining -= read; } } finally { fis.close(); } } catch (SocketException e) { // happens when the other side disconnects LOG.debug("Socket connection closed", e); } catch (Throwable t) { LOG.error("GET operation failed", t); clientSocket.close(); } }