private void execute(BulkWrites bulkWrites) throws Exception { assert bulkWrites != null : "bulk writes passed in are null"; if (LOG.isDebugEnabled()) SpliceLogUtils.debug(LOG, "Executing BulkWriteAction: id=%d, bulkWrites=%s", id, bulkWrites); boolean first = true; LinkedList<BulkWrites> writesToPerform = Lists.newLinkedList(); writesToPerform.add(bulkWrites); WriteAttemptContext ctx = new WriteAttemptContext(); do { if (!first) { retryCounter.increment(); } first = false; BulkWrites nextWrite = writesToPerform.removeFirst(); assert nextWrite != null : "next write is null"; ctx.reset(); ctx.attemptCount++; if (ctx.attemptCount > 100 && ctx.attemptCount % 50 == 0) { int numRows = nextWrite.numEntries(); SpliceLogUtils.warn( LOG, "BulkWriteAction[%d rows] is taking a long time with %d attempts: id=%d", numRows, ctx.attemptCount, id); LOG.warn("Attempting bulk write " + nextWrite); } executeSingle(nextWrite, ctx); /* * We need to do an exponential backoff to ensure that our cache has a chance to invalidate, or * simply because we were told to wait a bit by the write pipeline (i.e. we were rejected). */ if (ctx.shouldSleep()) { clock.sleep(PipelineUtils.getPauseTime(ctx.attemptCount, 10), TimeUnit.MILLISECONDS); } if (ctx.directRetry) writesToPerform.add(nextWrite); else if (ctx.nextWriteSet != null && ctx.nextWriteSet.size() > 0) { ctx.failed(); // rebuild a new buffer to retry from any records that need retrying addToRetryCallBuffer(ctx.nextWriteSet, bulkWrites.getTxn(), ctx.refreshCache); } if (retryPipingCallBuffer != null) { writesToPerform.addAll(retryPipingCallBuffer.getBulkWrites()); retryPipingCallBuffer = null; } } while (writesToPerform.size() > 0); }
private void executeSingle(BulkWrites nextWrite, WriteAttemptContext ctx) throws Exception { retryPipingCallBuffer = null; if (!shouldWrite(nextWrite)) return; if (LOG.isTraceEnabled()) SpliceLogUtils.trace( LOG, "Getting next BulkWrites in loop: id=%d, nextBulkWrites=%s", id, nextWrite); // used to indicate that the exception was thrown inside the try{} block explicitly, and should // just be re-thrown boolean thrown = false; try { BulkWriter writer = writerFactory.newWriter(tableName); writeTimer.startTiming(); BulkWritesResult bulkWritesResult = writer.write(nextWrite, ctx.refreshCache); writeTimer.stopTiming(); Iterator<BulkWrite> bws = nextWrite.getBulkWrites().iterator(); Collection<BulkWriteResult> results = bulkWritesResult.getBulkWriteResults(); for (BulkWriteResult bulkWriteResult : results) { WriteResponse globalResponse = writeConfiguration.processGlobalResult(bulkWriteResult); BulkWrite currentBulkWrite = bws.next(); switch (globalResponse) { case SUCCESS: writtenCounter.add(currentBulkWrite.getSize()); break; // success can be ignored case THROW_ERROR: thrown = true; thrownErrorsRows.add(currentBulkWrite.getSize()); throw parseIntoException(bulkWriteResult); case RETRY: retriedRows.add(currentBulkWrite.getSize()); /* * The entire BulkWrite needs to be retried--either because it was rejected outright, * or because the region moved/split/something else. */ ctx.rejected(); if (RETRY_LOG.isDebugEnabled()) SpliceLogUtils.debug( RETRY_LOG, "Retrying write after receiving global RETRY response: id=%d, bulkWriteResult=%s, bulkWrite=%s", id, bulkWriteResult, currentBulkWrite); else if (ctx.attemptCount > 100 && ctx.attemptCount % 50 == 0) { SpliceLogUtils.warn( LOG, "Retrying write after receiving global RETRY response: id=%d, bulkWriteResult=%s, bulkWrite=%s", id, bulkWriteResult, currentBulkWrite); } ctx.addBulkWrites(currentBulkWrite.getMutations()); ctx.refreshCache = ctx.refreshCache || bulkWriteResult.getGlobalResult().refreshCache(); ctx.sleep = true; // always sleep due to rejection, even if we don't need to refresh the cache break; case PARTIAL: partialRows.add(currentBulkWrite.getSize()); partialFailureCounter.increment(); WriteResponse writeResponse = writeConfiguration.partialFailure(bulkWriteResult, currentBulkWrite); switch (writeResponse) { case THROW_ERROR: partialThrownErrorRows.add(currentBulkWrite.getSize()); thrown = true; throw parseIntoException(bulkWriteResult); case RETRY: if (RETRY_LOG.isTraceEnabled()) { SpliceLogUtils.trace( RETRY_LOG, "Retrying write after receiving partial %s response: id=%d, bulkWriteResult=%s, failureCountsByType=%s", writeResponse, id, bulkWriteResult, getFailedRowsMessage(bulkWriteResult.getFailedRows())); } else if (RETRY_LOG.isDebugEnabled()) { SpliceLogUtils.debug( RETRY_LOG, "Retrying write after receiving partial %s response: id=%d, bulkWriteResult=%s", writeResponse, id, bulkWriteResult); } else if (ctx.attemptCount > 100 && ctx.attemptCount % 50 == 0) { SpliceLogUtils.warn( LOG, "Retrying write after receiving partial RETRY response: id=%d, bulkWriteResult=%s, bulkWrite=%s", id, bulkWriteResult, currentBulkWrite); } Collection<KVPair> writes = PipelineUtils.doPartialRetry(currentBulkWrite, bulkWriteResult, id); if (writes.size() > 0) { ctx.addBulkWrites(writes); // only redo cache if you have a failure not a lock contention issue boolean isFailure = bulkWriteResult.getFailedRows() != null && bulkWriteResult.getFailedRows().size() > 0; ctx.refreshCache = ctx.refreshCache || isFailure; ctx.sleep = true; // always sleep } writtenCounter.add(currentBulkWrite.getSize() - writes.size()); partialRetriedRows.add(writes.size()); break; case IGNORE: partialIgnoredRows.add(currentBulkWrite.getSize()); SpliceLogUtils.warn( RETRY_LOG, "Ignoring write after receiving unknown partial %s response: id=%d, bulkWriteResult=%s", writeResponse, id, bulkWriteResult); break; default: throw new IllegalStateException( "Programmer error: Unknown partial response: " + writeResponse); } break; case IGNORE: ignoredRows.add(currentBulkWrite.getSize()); SpliceLogUtils.warn(LOG, "Global response indicates ignoring, so we ignore"); break; default: SpliceLogUtils.error(LOG, "Global response went down default path, assert"); throw new IllegalStateException( "Programmer error: Unknown global response: " + globalResponse); } } } catch (Throwable e) { if (LOG.isTraceEnabled()) SpliceLogUtils.trace(LOG, "Caught throwable: id=%d", id); globalErrorCounter.increment(); if (thrown) throw new ExecutionException(e); //noinspection ThrowableResultOfMethodCallIgnored if (pipelineExceptionFactory.processPipelineException(e) instanceof PipelineTooBusy) { ctx.rejected(); /* * the pipeline has indicated that it is too busy to accept any writes just now. So we need * to retry the entire BulkWrites that we were send. But since it's too busy (and not some other error), * there's no need to refresh caches on our account. However, we *do* need to do a backoff to ensure * that we let the Pipeline cool down a bit. */ if (RETRY_LOG.isDebugEnabled()) SpliceLogUtils.debug( RETRY_LOG, "Retrying write after receiving RegionTooBusyException: id=%d", id); regionTooBusy.increment(); ctx.sleep = true; ctx.directRetry(); return; } WriteResponse writeResponse = writeConfiguration.globalError(e); switch (writeResponse) { case THROW_ERROR: if (LOG.isTraceEnabled()) SpliceLogUtils.trace( LOG, "Throwing error after receiving global error: id=%d, class=%s, message=%s", id, e.getClass(), e.getMessage()); catchThrownRows.add(nextWrite.numEntries()); throw new ExecutionException(e); case RETRY: errors.add(e); ctx.rejected(); if (RETRY_LOG.isDebugEnabled()) { SpliceLogUtils.debug( RETRY_LOG, "Retrying write after receiving global error: id=%d, class=%s, message=%s", id, e.getClass(), e.getMessage()); if (RETRY_LOG.isTraceEnabled()) { RETRY_LOG.trace("Global error exception received: ", e); } } else if (ctx.attemptCount > 100 && ctx.attemptCount % 50 == 0) { SpliceLogUtils.debug( LOG, "Retrying write after receiving global error: id=%d, class=%s, message=%s", id, e.getClass(), e.getMessage()); } ctx.sleep = true; ctx.refreshCache = true; for (BulkWrite bw : nextWrite.getBulkWrites()) { ctx.addBulkWrites(bw.getMutations()); catchRetriedRows.add(bw.getSize()); } break; default: LOG.warn( String.format( "Ignoring error after receiving unknown global error %s response: id=%d ", writeResponse, id), e); throw new IllegalStateException( "Programmer error: Unknown global response: " + writeResponse); } } }