public boolean scheduleAndWait( final String id, ExtractionNamespace namespace, long waitForFirstRun) { if (scheduleOrUpdate(id, namespace)) { log.debug("Scheduled new namespace [%s]: %s", id, namespace); } else { log.debug("Namespace [%s] already running: %s", id, namespace); } final NamespaceImplData namespaceImplData = implData.get(id); if (namespaceImplData == null) { log.warn("NamespaceLookupExtractorFactory[%s] - deleted during start", id); return false; } boolean success = false; try { success = namespaceImplData.firstRun.await(waitForFirstRun, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { log.error(e, "NamespaceLookupExtractorFactory[%s] - interrupted during start", id); } if (!success) { delete(id); } return success; }
@Override protected void map(Object key, Object value, Context context) throws IOException, InterruptedException { try { final InputRow inputRow; try { inputRow = parseInputRow(value, parser); } catch (Exception e) { if (config.isIgnoreInvalidRows()) { log.debug(e, "Ignoring invalid row [%s] due to parsing error", value.toString()); context .getCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER) .increment(1); return; // we're ignoring this invalid row } else { throw e; } } if (!granularitySpec.bucketIntervals().isPresent() || granularitySpec .bucketInterval(new DateTime(inputRow.getTimestampFromEpoch())) .isPresent()) { innerMap(inputRow, value, context); } } catch (RuntimeException e) { throw new RE(e, "Failure on row[%s]", value); } }
// return value means actually schedule or not public boolean scheduleOrUpdate(final String id, ExtractionNamespace namespace) { final NamespaceImplData implDatum = implData.get(id); if (implDatum == null) { // New, probably schedule(id, namespace); return true; } if (!implDatum.enabled.get()) { // Race condition. Someone else disabled it first, go ahead and reschedule schedule(id, namespace); return true; } // Live one. Check if it needs updated if (implDatum.namespace.equals(namespace)) { // skip if no update return false; } if (log.isDebugEnabled()) { log.debug("Namespace [%s] needs updated to [%s]", implDatum.namespace, namespace); } removeNamespaceLocalMetadata(implDatum); schedule(id, namespace); return true; }
@Override public void log(Request request, Response response) { if (logger.isDebugEnabled()) { logger.debug( "%s %s %s", request.getMethod(), request.getUri().toString(), request.getProtocol().toString()); } }
@LifecycleStop public void stop() { if (log4jShutdown != null) { log.debug("Shutting down log4j"); log4jShutdown.stop(); } else { log.warn("Log4j shutdown was registered in lifecycle but no shutdown object exists!"); } }
public static void addNextRow( final Supplier<Committer> committerSupplier, final Firehose firehose, final Plumber plumber, final boolean reportParseExceptions, final FireDepartmentMetrics metrics) { try { final InputRow inputRow = firehose.nextRow(); if (inputRow == null) { if (reportParseExceptions) { throw new ParseException("null input row"); } else { log.debug("Discarded null input row, considering unparseable."); metrics.incrementUnparseable(); return; } } // Included in ParseException try/catch, as additional parsing can be done during indexing. int numRows = plumber.add(inputRow, committerSupplier); if (numRows == -1) { metrics.incrementThrownAway(); log.debug("Discarded row[%s], considering thrownAway.", inputRow); return; } metrics.incrementProcessed(); } catch (ParseException e) { if (reportParseExceptions) { throw e; } else { log.debug(e, "Discarded row due to exception, considering unparseable."); metrics.incrementUnparseable(); } } catch (IndexSizeExceededException e) { // Shouldn't happen if this is only being called by a single thread. // plumber.add should be swapping out indexes before they fill up. throw new ISE(e, "WTF?! Index size exceeded, this shouldn't happen. Bad Plumber!"); } }
/** * Clears out resources used by the namespace such as threads. Implementations may override this * and call super.delete(...) if they have resources of their own which need cleared. * * <p>This particular method is NOT thread safe, and any impl which is intended to be thread safe * should safe-guard calls to this method. * * @param ns The namespace to be deleted * @return True if a deletion occurred, false if no deletion occurred. * @throws ISE if there is an error cancelling the namespace's future task */ public boolean delete(final String ns) { final NamespaceImplData implDatum = implData.get(ns); final boolean deleted = removeNamespaceLocalMetadata(implDatum); // At this point we have won leader election on canceling this implDatum if (deleted) { log.info("Deleting namespace [%s]", ns); lastVersion.remove(implDatum.name); return true; } else { log.debug("Did not delete namespace [%s]", ns); return false; } }
@Override public String apply(final String key) { if (key == null) { return null; } final String presentVal; try { presentVal = loadingCache.get(key, new applyCallable(key)); return Strings.emptyToNull(presentVal); } catch (ExecutionException e) { LOGGER.debug("value not found for key [%s]", key); return null; } }
@Override public List<String> unapply(final String value) { // null value maps to empty list if (value == null) { return Collections.EMPTY_LIST; } final List<String> retList; try { retList = reverseLoadingCache.get(value, new unapplyCallable(value)); return retList; } catch (ExecutionException e) { LOGGER.debug("list of keys not found for value [%s]", value); return Collections.EMPTY_LIST; } }
@Override public boolean delete(final String namespaceKey) { final Lock lock = nsLocks.get(namespaceKey); lock.lock(); try { super.delete(namespaceKey); final String mmapDBkey = currentNamespaceCache.get(namespaceKey); if (mmapDBkey != null) { final long pre = tmpFile.length(); mmapDB.delete(mmapDBkey); dataSize.set(tmpFile.length()); log.debug("MapDB file size: pre %d post %d", pre, dataSize.get()); return true; } else { return false; } } finally { lock.unlock(); } }
@LifecycleStart public void start() { log.debug("Log4j shutter downer is waiting"); }
@Override protected void map(String key, String value, final Context context) throws IOException, InterruptedException { final InputSplit split = context.getInputSplit(); if (!(split instanceof DatasourceInputSplit)) { throw new IAE( "Unexpected split type. Expected [%s] was [%s]", DatasourceInputSplit.class.getCanonicalName(), split.getClass().getCanonicalName()); } final String tmpDirLoc = context.getConfiguration().get(TMP_FILE_LOC_KEY); final File tmpDir = Paths.get(tmpDirLoc).toFile(); final DataSegment segment = Iterables.getOnlyElement(((DatasourceInputSplit) split).getSegments()).getSegment(); final HadoopDruidConverterConfig config = converterConfigFromConfiguration(context.getConfiguration()); context.setStatus("DOWNLOADING"); context.progress(); final Path inPath = new Path(JobHelper.getURIFromSegment(segment)); final File inDir = new File(tmpDir, "in"); if (inDir.exists() && !inDir.delete()) { log.warn("Could not delete [%s]", inDir); } if (!inDir.mkdir() && (!inDir.exists() || inDir.isDirectory())) { log.warn("Unable to make directory"); } final long inSize = JobHelper.unzipNoGuava(inPath, context.getConfiguration(), inDir, context); log.debug("Loaded %d bytes into [%s] for converting", inSize, inDir.getAbsolutePath()); context.getCounter(COUNTER_GROUP, COUNTER_LOADED).increment(inSize); context.setStatus("CONVERTING"); context.progress(); final File outDir = new File(tmpDir, "out"); if (!outDir.mkdir() && (!outDir.exists() || !outDir.isDirectory())) { throw new IOException(String.format("Could not create output directory [%s]", outDir)); } HadoopDruidConverterConfig.INDEX_MERGER.convert( inDir, outDir, config.getIndexSpec(), JobHelper.progressIndicatorForContext(context)); if (config.isValidate()) { context.setStatus("Validating"); HadoopDruidConverterConfig.INDEX_IO.validateTwoSegments(inDir, outDir); } context.progress(); context.setStatus("Starting PUSH"); final Path baseOutputPath = new Path(config.getSegmentOutputPath()); final FileSystem outputFS = baseOutputPath.getFileSystem(context.getConfiguration()); final DataSegment finalSegmentTemplate = segment.withVersion(segment.getVersion() + "_converted"); final DataSegment finalSegment = JobHelper.serializeOutIndex( finalSegmentTemplate, context.getConfiguration(), context, context.getTaskAttemptID(), outDir, JobHelper.makeSegmentOutputPath(baseOutputPath, outputFS, finalSegmentTemplate)); context.progress(); context.setStatus("Finished PUSH"); final String finalSegmentString = HadoopDruidConverterConfig.jsonMapper.writeValueAsString(finalSegment); context .getConfiguration() .set(ConvertingOutputFormat.PUBLISHED_SEGMENT_KEY, finalSegmentString); context.write(new Text("dataSegment"), new Text(finalSegmentString)); context.getCounter(COUNTER_GROUP, COUNTER_WRITTEN).increment(finalSegment.getSize()); context.progress(); context.setStatus("Ready To Commit"); }
// For testing purposes this is protected protected <T extends ExtractionNamespace> ListenableFuture<?> schedule( final String id, final T namespace, final ExtractionNamespaceCacheFactory<T> factory, final Runnable postRunnable, final String cacheId) { log.debug("Trying to update namespace [%s]", id); final NamespaceImplData implDatum = implData.get(id); if (implDatum != null) { synchronized (implDatum.enabled) { if (implDatum.enabled.get()) { // We also check at the end of the function, but fail fast here throw new IAE( "Namespace [%s] already exists! Leaving prior running", namespace.toString()); } } } final long updateMs = namespace.getPollMs(); final CountDownLatch startLatch = new CountDownLatch(1); final Runnable command = new Runnable() { @Override public void run() { try { startLatch.await(); // wait for "election" to leadership or cancellation if (!Thread.currentThread().isInterrupted()) { final Map<String, String> cache = getCacheMap(cacheId); final String preVersion = lastVersion.get(id); final Callable<String> runnable = factory.getCachePopulator(id, namespace, preVersion, cache); tasksStarted.incrementAndGet(); final String newVersion = runnable.call(); if (preVersion != null && preVersion.equals(newVersion)) { throw new CancellationException( String.format("Version `%s` already exists", preVersion)); } if (newVersion != null) { lastVersion.put(id, newVersion); } postRunnable.run(); log.debug("Namespace [%s] successfully updated", id); } } catch (Throwable t) { delete(cacheId); if (t instanceof CancellationException) { log.debug(t, "Namespace [%s] cancelled", id); } else { log.error(t, "Failed update namespace [%s]", namespace); } if (Thread.currentThread().isInterrupted()) { throw Throwables.propagate(t); } } } }; ListenableFuture<?> future; try { if (updateMs > 0) { future = listeningScheduledExecutorService.scheduleAtFixedRate( command, 0, updateMs, TimeUnit.MILLISECONDS); } else { future = listeningScheduledExecutorService.schedule(command, 0, TimeUnit.MILLISECONDS); } final NamespaceImplData me = new NamespaceImplData(future, namespace, id); final NamespaceImplData other = implData.putIfAbsent(id, me); if (other != null) { if (!future.isDone() && !future.cancel(true)) { log.warn("Unable to cancel future for namespace[%s] on race loss", id); } throw new IAE("Namespace [%s] already exists! Leaving prior running", namespace); } else { if (!me.enabled.compareAndSet(false, true)) { log.wtf("How did someone enable this before ME?"); } log.debug("I own namespace [%s]", id); return future; } } finally { startLatch.countDown(); } }