public boolean scheduleAndWait(
      final String id, ExtractionNamespace namespace, long waitForFirstRun) {
    if (scheduleOrUpdate(id, namespace)) {
      log.debug("Scheduled new namespace [%s]: %s", id, namespace);
    } else {
      log.debug("Namespace [%s] already running: %s", id, namespace);
    }

    final NamespaceImplData namespaceImplData = implData.get(id);
    if (namespaceImplData == null) {
      log.warn("NamespaceLookupExtractorFactory[%s] - deleted during start", id);
      return false;
    }

    boolean success = false;
    try {
      success = namespaceImplData.firstRun.await(waitForFirstRun, TimeUnit.MILLISECONDS);
    } catch (InterruptedException e) {
      log.error(e, "NamespaceLookupExtractorFactory[%s] - interrupted during start", id);
    }
    if (!success) {
      delete(id);
    }
    return success;
  }
Esempio n. 2
0
  @Override
  protected void map(Object key, Object value, Context context)
      throws IOException, InterruptedException {
    try {
      final InputRow inputRow;
      try {
        inputRow = parseInputRow(value, parser);
      } catch (Exception e) {
        if (config.isIgnoreInvalidRows()) {
          log.debug(e, "Ignoring invalid row [%s] due to parsing error", value.toString());
          context
              .getCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER)
              .increment(1);
          return; // we're ignoring this invalid row
        } else {
          throw e;
        }
      }

      if (!granularitySpec.bucketIntervals().isPresent()
          || granularitySpec
              .bucketInterval(new DateTime(inputRow.getTimestampFromEpoch()))
              .isPresent()) {
        innerMap(inputRow, value, context);
      }
    } catch (RuntimeException e) {
      throw new RE(e, "Failure on row[%s]", value);
    }
  }
  // return value means actually schedule or not
  public boolean scheduleOrUpdate(final String id, ExtractionNamespace namespace) {
    final NamespaceImplData implDatum = implData.get(id);
    if (implDatum == null) {
      // New, probably
      schedule(id, namespace);
      return true;
    }
    if (!implDatum.enabled.get()) {
      // Race condition. Someone else disabled it first, go ahead and reschedule
      schedule(id, namespace);
      return true;
    }

    // Live one. Check if it needs updated
    if (implDatum.namespace.equals(namespace)) {
      // skip if no update
      return false;
    }
    if (log.isDebugEnabled()) {
      log.debug("Namespace [%s] needs updated to [%s]", implDatum.namespace, namespace);
    }
    removeNamespaceLocalMetadata(implDatum);
    schedule(id, namespace);
    return true;
  }
Esempio n. 4
0
 @Override
 public void log(Request request, Response response) {
   if (logger.isDebugEnabled()) {
     logger.debug(
         "%s %s %s",
         request.getMethod(), request.getUri().toString(), request.getProtocol().toString());
   }
 }
 @LifecycleStop
 public void stop() {
   if (log4jShutdown != null) {
     log.debug("Shutting down log4j");
     log4jShutdown.stop();
   } else {
     log.warn("Log4j shutdown was registered in lifecycle but no shutdown object exists!");
   }
 }
Esempio n. 6
0
  public static void addNextRow(
      final Supplier<Committer> committerSupplier,
      final Firehose firehose,
      final Plumber plumber,
      final boolean reportParseExceptions,
      final FireDepartmentMetrics metrics) {
    try {
      final InputRow inputRow = firehose.nextRow();

      if (inputRow == null) {
        if (reportParseExceptions) {
          throw new ParseException("null input row");
        } else {
          log.debug("Discarded null input row, considering unparseable.");
          metrics.incrementUnparseable();
          return;
        }
      }

      // Included in ParseException try/catch, as additional parsing can be done during indexing.
      int numRows = plumber.add(inputRow, committerSupplier);

      if (numRows == -1) {
        metrics.incrementThrownAway();
        log.debug("Discarded row[%s], considering thrownAway.", inputRow);
        return;
      }

      metrics.incrementProcessed();
    } catch (ParseException e) {
      if (reportParseExceptions) {
        throw e;
      } else {
        log.debug(e, "Discarded row due to exception, considering unparseable.");
        metrics.incrementUnparseable();
      }
    } catch (IndexSizeExceededException e) {
      // Shouldn't happen if this is only being called by a single thread.
      // plumber.add should be swapping out indexes before they fill up.
      throw new ISE(e, "WTF?! Index size exceeded, this shouldn't happen. Bad Plumber!");
    }
  }
 /**
  * Clears out resources used by the namespace such as threads. Implementations may override this
  * and call super.delete(...) if they have resources of their own which need cleared.
  *
  * <p>This particular method is NOT thread safe, and any impl which is intended to be thread safe
  * should safe-guard calls to this method.
  *
  * @param ns The namespace to be deleted
  * @return True if a deletion occurred, false if no deletion occurred.
  * @throws ISE if there is an error cancelling the namespace's future task
  */
 public boolean delete(final String ns) {
   final NamespaceImplData implDatum = implData.get(ns);
   final boolean deleted = removeNamespaceLocalMetadata(implDatum);
   // At this point we have won leader election on canceling this implDatum
   if (deleted) {
     log.info("Deleting namespace [%s]", ns);
     lastVersion.remove(implDatum.name);
     return true;
   } else {
     log.debug("Did not delete namespace [%s]", ns);
     return false;
   }
 }
Esempio n. 8
0
 @Override
 public String apply(final String key) {
   if (key == null) {
     return null;
   }
   final String presentVal;
   try {
     presentVal = loadingCache.get(key, new applyCallable(key));
     return Strings.emptyToNull(presentVal);
   } catch (ExecutionException e) {
     LOGGER.debug("value not found for key [%s]", key);
     return null;
   }
 }
Esempio n. 9
0
 @Override
 public List<String> unapply(final String value) {
   // null value maps to empty list
   if (value == null) {
     return Collections.EMPTY_LIST;
   }
   final List<String> retList;
   try {
     retList = reverseLoadingCache.get(value, new unapplyCallable(value));
     return retList;
   } catch (ExecutionException e) {
     LOGGER.debug("list of keys not found for value [%s]", value);
     return Collections.EMPTY_LIST;
   }
 }
  @Override
  public boolean delete(final String namespaceKey) {

    final Lock lock = nsLocks.get(namespaceKey);
    lock.lock();
    try {
      super.delete(namespaceKey);
      final String mmapDBkey = currentNamespaceCache.get(namespaceKey);
      if (mmapDBkey != null) {
        final long pre = tmpFile.length();
        mmapDB.delete(mmapDBkey);
        dataSize.set(tmpFile.length());
        log.debug("MapDB file size: pre %d  post %d", pre, dataSize.get());
        return true;
      } else {
        return false;
      }
    } finally {
      lock.unlock();
    }
  }
 @LifecycleStart
 public void start() {
   log.debug("Log4j shutter downer is waiting");
 }
    @Override
    protected void map(String key, String value, final Context context)
        throws IOException, InterruptedException {
      final InputSplit split = context.getInputSplit();
      if (!(split instanceof DatasourceInputSplit)) {
        throw new IAE(
            "Unexpected split type. Expected [%s] was [%s]",
            DatasourceInputSplit.class.getCanonicalName(), split.getClass().getCanonicalName());
      }

      final String tmpDirLoc = context.getConfiguration().get(TMP_FILE_LOC_KEY);
      final File tmpDir = Paths.get(tmpDirLoc).toFile();

      final DataSegment segment =
          Iterables.getOnlyElement(((DatasourceInputSplit) split).getSegments()).getSegment();

      final HadoopDruidConverterConfig config =
          converterConfigFromConfiguration(context.getConfiguration());

      context.setStatus("DOWNLOADING");
      context.progress();
      final Path inPath = new Path(JobHelper.getURIFromSegment(segment));
      final File inDir = new File(tmpDir, "in");

      if (inDir.exists() && !inDir.delete()) {
        log.warn("Could not delete [%s]", inDir);
      }

      if (!inDir.mkdir() && (!inDir.exists() || inDir.isDirectory())) {
        log.warn("Unable to make directory");
      }

      final long inSize =
          JobHelper.unzipNoGuava(inPath, context.getConfiguration(), inDir, context);
      log.debug("Loaded %d bytes into [%s] for converting", inSize, inDir.getAbsolutePath());
      context.getCounter(COUNTER_GROUP, COUNTER_LOADED).increment(inSize);

      context.setStatus("CONVERTING");
      context.progress();
      final File outDir = new File(tmpDir, "out");
      if (!outDir.mkdir() && (!outDir.exists() || !outDir.isDirectory())) {
        throw new IOException(String.format("Could not create output directory [%s]", outDir));
      }
      HadoopDruidConverterConfig.INDEX_MERGER.convert(
          inDir, outDir, config.getIndexSpec(), JobHelper.progressIndicatorForContext(context));
      if (config.isValidate()) {
        context.setStatus("Validating");
        HadoopDruidConverterConfig.INDEX_IO.validateTwoSegments(inDir, outDir);
      }
      context.progress();
      context.setStatus("Starting PUSH");
      final Path baseOutputPath = new Path(config.getSegmentOutputPath());
      final FileSystem outputFS = baseOutputPath.getFileSystem(context.getConfiguration());
      final DataSegment finalSegmentTemplate =
          segment.withVersion(segment.getVersion() + "_converted");
      final DataSegment finalSegment =
          JobHelper.serializeOutIndex(
              finalSegmentTemplate,
              context.getConfiguration(),
              context,
              context.getTaskAttemptID(),
              outDir,
              JobHelper.makeSegmentOutputPath(baseOutputPath, outputFS, finalSegmentTemplate));
      context.progress();
      context.setStatus("Finished PUSH");
      final String finalSegmentString =
          HadoopDruidConverterConfig.jsonMapper.writeValueAsString(finalSegment);
      context
          .getConfiguration()
          .set(ConvertingOutputFormat.PUBLISHED_SEGMENT_KEY, finalSegmentString);
      context.write(new Text("dataSegment"), new Text(finalSegmentString));

      context.getCounter(COUNTER_GROUP, COUNTER_WRITTEN).increment(finalSegment.getSize());
      context.progress();
      context.setStatus("Ready To Commit");
    }
  // For testing purposes this is protected
  protected <T extends ExtractionNamespace> ListenableFuture<?> schedule(
      final String id,
      final T namespace,
      final ExtractionNamespaceCacheFactory<T> factory,
      final Runnable postRunnable,
      final String cacheId) {
    log.debug("Trying to update namespace [%s]", id);
    final NamespaceImplData implDatum = implData.get(id);
    if (implDatum != null) {
      synchronized (implDatum.enabled) {
        if (implDatum.enabled.get()) {
          // We also check at the end of the function, but fail fast here
          throw new IAE(
              "Namespace [%s] already exists! Leaving prior running", namespace.toString());
        }
      }
    }
    final long updateMs = namespace.getPollMs();
    final CountDownLatch startLatch = new CountDownLatch(1);

    final Runnable command =
        new Runnable() {
          @Override
          public void run() {
            try {
              startLatch.await(); // wait for "election" to leadership or cancellation
              if (!Thread.currentThread().isInterrupted()) {
                final Map<String, String> cache = getCacheMap(cacheId);
                final String preVersion = lastVersion.get(id);
                final Callable<String> runnable =
                    factory.getCachePopulator(id, namespace, preVersion, cache);

                tasksStarted.incrementAndGet();
                final String newVersion = runnable.call();
                if (preVersion != null && preVersion.equals(newVersion)) {
                  throw new CancellationException(
                      String.format("Version `%s` already exists", preVersion));
                }
                if (newVersion != null) {
                  lastVersion.put(id, newVersion);
                }
                postRunnable.run();
                log.debug("Namespace [%s] successfully updated", id);
              }
            } catch (Throwable t) {
              delete(cacheId);
              if (t instanceof CancellationException) {
                log.debug(t, "Namespace [%s] cancelled", id);
              } else {
                log.error(t, "Failed update namespace [%s]", namespace);
              }
              if (Thread.currentThread().isInterrupted()) {
                throw Throwables.propagate(t);
              }
            }
          }
        };

    ListenableFuture<?> future;
    try {
      if (updateMs > 0) {
        future =
            listeningScheduledExecutorService.scheduleAtFixedRate(
                command, 0, updateMs, TimeUnit.MILLISECONDS);
      } else {
        future = listeningScheduledExecutorService.schedule(command, 0, TimeUnit.MILLISECONDS);
      }

      final NamespaceImplData me = new NamespaceImplData(future, namespace, id);
      final NamespaceImplData other = implData.putIfAbsent(id, me);
      if (other != null) {
        if (!future.isDone() && !future.cancel(true)) {
          log.warn("Unable to cancel future for namespace[%s] on race loss", id);
        }
        throw new IAE("Namespace [%s] already exists! Leaving prior running", namespace);
      } else {
        if (!me.enabled.compareAndSet(false, true)) {
          log.wtf("How did someone enable this before ME?");
        }
        log.debug("I own namespace [%s]", id);
        return future;
      }
    } finally {
      startLatch.countDown();
    }
  }