private boolean shouldWrite(BulkWrites nextWrite) { if (LOG.isTraceEnabled()) SpliceLogUtils.trace(LOG, "[%d] next bulkWrites %s", id, nextWrite); if (nextWrite == null || nextWrite.numEntries() == 0) { if (LOG.isDebugEnabled()) SpliceLogUtils.debug(LOG, "No actual writes in BulkWrites %s", nextWrite); return false; } return true; }
@Override public WriteStats call() throws Exception { statusReporter.numExecutingFlushes.incrementAndGet(); reportSize(); long start = System.currentTimeMillis(); try { Timer totalTimer = metricFactory.newTimer(); totalTimer.startTiming(); if (LOG.isDebugEnabled()) SpliceLogUtils.debug( LOG, "Calling BulkWriteAction: id=%d, initialBulkWritesSize=%d, initialKVPairSize=%d", id, bulkWrites.numEntries(), bulkWrites.numEntries()); execute(bulkWrites); totalTimer.stopTiming(); if (metricFactory.isActive()) return new SimpleWriteStats( writtenCounter.getTotal(), retryCounter.getTotal(), thrownErrorsRows.getTotal(), retriedRows.getTotal(), partialRows.getTotal(), partialThrownErrorRows.getTotal(), partialRetriedRows.getTotal(), partialIgnoredRows.getTotal(), partialWrite.getTotal(), ignoredRows.getTotal(), catchThrownRows.getTotal(), catchRetriedRows.getTotal(), regionTooBusy.getTotal()); else return WriteStats.NOOP_WRITE_STATS; } finally { long timeTakenMs = System.currentTimeMillis() - start; long numRecords = bulkWrites.numEntries(); writeConfiguration.writeComplete(timeTakenMs, numRecords); statusReporter.complete(timeTakenMs); bulkWrites = null; } }
private void reportSize() { boolean success; long bufferEntries = bulkWrites.numEntries(); int numRegions = bulkWrites.numRegions(); statusReporter.totalFlushEntries.addAndGet(bufferEntries); statusReporter.totalFlushRegions.addAndGet(numRegions); do { long currentMax = statusReporter.maxFlushRegions.get(); success = currentMax >= bufferEntries || statusReporter.maxFlushRegions.compareAndSet(currentMax, bufferEntries); } while (!success); do { long currentMin = statusReporter.minFlushRegions.get(); success = currentMin <= bufferEntries || statusReporter.minFlushRegions.compareAndSet(currentMin, bufferEntries); } while (!success); do { long currentMax = statusReporter.maxFlushEntries.get(); success = currentMax >= bufferEntries || statusReporter.maxFlushEntries.compareAndSet(currentMax, bufferEntries); } while (!success); do { long currentMin = statusReporter.minFlushEntries.get(); success = currentMin <= bufferEntries || statusReporter.minFlushEntries.compareAndSet(currentMin, bufferEntries); } while (!success); long bufferSizeBytes = bulkWrites.getBufferHeapSize(); statusReporter.totalFlushSizeBytes.addAndGet(bufferSizeBytes); do { long currentMax = statusReporter.maxFlushSizeBytes.get(); success = currentMax >= bufferSizeBytes || statusReporter.maxFlushSizeBytes.compareAndSet(currentMax, bufferSizeBytes); } while (!success); do { long currentMin = statusReporter.minFlushSizeBytes.get(); success = currentMin <= bufferSizeBytes || statusReporter.maxFlushSizeBytes.compareAndSet(currentMin, bufferSizeBytes); } while (!success); }
private void execute(BulkWrites bulkWrites) throws Exception { assert bulkWrites != null : "bulk writes passed in are null"; if (LOG.isDebugEnabled()) SpliceLogUtils.debug(LOG, "Executing BulkWriteAction: id=%d, bulkWrites=%s", id, bulkWrites); boolean first = true; LinkedList<BulkWrites> writesToPerform = Lists.newLinkedList(); writesToPerform.add(bulkWrites); WriteAttemptContext ctx = new WriteAttemptContext(); do { if (!first) { retryCounter.increment(); } first = false; BulkWrites nextWrite = writesToPerform.removeFirst(); assert nextWrite != null : "next write is null"; ctx.reset(); ctx.attemptCount++; if (ctx.attemptCount > 100 && ctx.attemptCount % 50 == 0) { int numRows = nextWrite.numEntries(); SpliceLogUtils.warn( LOG, "BulkWriteAction[%d rows] is taking a long time with %d attempts: id=%d", numRows, ctx.attemptCount, id); LOG.warn("Attempting bulk write " + nextWrite); } executeSingle(nextWrite, ctx); /* * We need to do an exponential backoff to ensure that our cache has a chance to invalidate, or * simply because we were told to wait a bit by the write pipeline (i.e. we were rejected). */ if (ctx.shouldSleep()) { clock.sleep(PipelineUtils.getPauseTime(ctx.attemptCount, 10), TimeUnit.MILLISECONDS); } if (ctx.directRetry) writesToPerform.add(nextWrite); else if (ctx.nextWriteSet != null && ctx.nextWriteSet.size() > 0) { ctx.failed(); // rebuild a new buffer to retry from any records that need retrying addToRetryCallBuffer(ctx.nextWriteSet, bulkWrites.getTxn(), ctx.refreshCache); } if (retryPipingCallBuffer != null) { writesToPerform.addAll(retryPipingCallBuffer.getBulkWrites()); retryPipingCallBuffer = null; } } while (writesToPerform.size() > 0); }
private void executeSingle(BulkWrites nextWrite, WriteAttemptContext ctx) throws Exception { retryPipingCallBuffer = null; if (!shouldWrite(nextWrite)) return; if (LOG.isTraceEnabled()) SpliceLogUtils.trace( LOG, "Getting next BulkWrites in loop: id=%d, nextBulkWrites=%s", id, nextWrite); // used to indicate that the exception was thrown inside the try{} block explicitly, and should // just be re-thrown boolean thrown = false; try { BulkWriter writer = writerFactory.newWriter(tableName); writeTimer.startTiming(); BulkWritesResult bulkWritesResult = writer.write(nextWrite, ctx.refreshCache); writeTimer.stopTiming(); Iterator<BulkWrite> bws = nextWrite.getBulkWrites().iterator(); Collection<BulkWriteResult> results = bulkWritesResult.getBulkWriteResults(); for (BulkWriteResult bulkWriteResult : results) { WriteResponse globalResponse = writeConfiguration.processGlobalResult(bulkWriteResult); BulkWrite currentBulkWrite = bws.next(); switch (globalResponse) { case SUCCESS: writtenCounter.add(currentBulkWrite.getSize()); break; // success can be ignored case THROW_ERROR: thrown = true; thrownErrorsRows.add(currentBulkWrite.getSize()); throw parseIntoException(bulkWriteResult); case RETRY: retriedRows.add(currentBulkWrite.getSize()); /* * The entire BulkWrite needs to be retried--either because it was rejected outright, * or because the region moved/split/something else. */ ctx.rejected(); if (RETRY_LOG.isDebugEnabled()) SpliceLogUtils.debug( RETRY_LOG, "Retrying write after receiving global RETRY response: id=%d, bulkWriteResult=%s, bulkWrite=%s", id, bulkWriteResult, currentBulkWrite); else if (ctx.attemptCount > 100 && ctx.attemptCount % 50 == 0) { SpliceLogUtils.warn( LOG, "Retrying write after receiving global RETRY response: id=%d, bulkWriteResult=%s, bulkWrite=%s", id, bulkWriteResult, currentBulkWrite); } ctx.addBulkWrites(currentBulkWrite.getMutations()); ctx.refreshCache = ctx.refreshCache || bulkWriteResult.getGlobalResult().refreshCache(); ctx.sleep = true; // always sleep due to rejection, even if we don't need to refresh the cache break; case PARTIAL: partialRows.add(currentBulkWrite.getSize()); partialFailureCounter.increment(); WriteResponse writeResponse = writeConfiguration.partialFailure(bulkWriteResult, currentBulkWrite); switch (writeResponse) { case THROW_ERROR: partialThrownErrorRows.add(currentBulkWrite.getSize()); thrown = true; throw parseIntoException(bulkWriteResult); case RETRY: if (RETRY_LOG.isTraceEnabled()) { SpliceLogUtils.trace( RETRY_LOG, "Retrying write after receiving partial %s response: id=%d, bulkWriteResult=%s, failureCountsByType=%s", writeResponse, id, bulkWriteResult, getFailedRowsMessage(bulkWriteResult.getFailedRows())); } else if (RETRY_LOG.isDebugEnabled()) { SpliceLogUtils.debug( RETRY_LOG, "Retrying write after receiving partial %s response: id=%d, bulkWriteResult=%s", writeResponse, id, bulkWriteResult); } else if (ctx.attemptCount > 100 && ctx.attemptCount % 50 == 0) { SpliceLogUtils.warn( LOG, "Retrying write after receiving partial RETRY response: id=%d, bulkWriteResult=%s, bulkWrite=%s", id, bulkWriteResult, currentBulkWrite); } Collection<KVPair> writes = PipelineUtils.doPartialRetry(currentBulkWrite, bulkWriteResult, id); if (writes.size() > 0) { ctx.addBulkWrites(writes); // only redo cache if you have a failure not a lock contention issue boolean isFailure = bulkWriteResult.getFailedRows() != null && bulkWriteResult.getFailedRows().size() > 0; ctx.refreshCache = ctx.refreshCache || isFailure; ctx.sleep = true; // always sleep } writtenCounter.add(currentBulkWrite.getSize() - writes.size()); partialRetriedRows.add(writes.size()); break; case IGNORE: partialIgnoredRows.add(currentBulkWrite.getSize()); SpliceLogUtils.warn( RETRY_LOG, "Ignoring write after receiving unknown partial %s response: id=%d, bulkWriteResult=%s", writeResponse, id, bulkWriteResult); break; default: throw new IllegalStateException( "Programmer error: Unknown partial response: " + writeResponse); } break; case IGNORE: ignoredRows.add(currentBulkWrite.getSize()); SpliceLogUtils.warn(LOG, "Global response indicates ignoring, so we ignore"); break; default: SpliceLogUtils.error(LOG, "Global response went down default path, assert"); throw new IllegalStateException( "Programmer error: Unknown global response: " + globalResponse); } } } catch (Throwable e) { if (LOG.isTraceEnabled()) SpliceLogUtils.trace(LOG, "Caught throwable: id=%d", id); globalErrorCounter.increment(); if (thrown) throw new ExecutionException(e); //noinspection ThrowableResultOfMethodCallIgnored if (pipelineExceptionFactory.processPipelineException(e) instanceof PipelineTooBusy) { ctx.rejected(); /* * the pipeline has indicated that it is too busy to accept any writes just now. So we need * to retry the entire BulkWrites that we were send. But since it's too busy (and not some other error), * there's no need to refresh caches on our account. However, we *do* need to do a backoff to ensure * that we let the Pipeline cool down a bit. */ if (RETRY_LOG.isDebugEnabled()) SpliceLogUtils.debug( RETRY_LOG, "Retrying write after receiving RegionTooBusyException: id=%d", id); regionTooBusy.increment(); ctx.sleep = true; ctx.directRetry(); return; } WriteResponse writeResponse = writeConfiguration.globalError(e); switch (writeResponse) { case THROW_ERROR: if (LOG.isTraceEnabled()) SpliceLogUtils.trace( LOG, "Throwing error after receiving global error: id=%d, class=%s, message=%s", id, e.getClass(), e.getMessage()); catchThrownRows.add(nextWrite.numEntries()); throw new ExecutionException(e); case RETRY: errors.add(e); ctx.rejected(); if (RETRY_LOG.isDebugEnabled()) { SpliceLogUtils.debug( RETRY_LOG, "Retrying write after receiving global error: id=%d, class=%s, message=%s", id, e.getClass(), e.getMessage()); if (RETRY_LOG.isTraceEnabled()) { RETRY_LOG.trace("Global error exception received: ", e); } } else if (ctx.attemptCount > 100 && ctx.attemptCount % 50 == 0) { SpliceLogUtils.debug( LOG, "Retrying write after receiving global error: id=%d, class=%s, message=%s", id, e.getClass(), e.getMessage()); } ctx.sleep = true; ctx.refreshCache = true; for (BulkWrite bw : nextWrite.getBulkWrites()) { ctx.addBulkWrites(bw.getMutations()); catchRetriedRows.add(bw.getSize()); } break; default: LOG.warn( String.format( "Ignoring error after receiving unknown global error %s response: id=%d ", writeResponse, id), e); throw new IllegalStateException( "Programmer error: Unknown global response: " + writeResponse); } } }