Beispiel #1
0
 private void setUpStatsExport() {
   // Export the value.
   Stats.export(name, value);
   // Export the rate of this value.
   Stats.export(Rate.of(name + "_per_sec", value).build());
 }
 private static AtomicLong exportLongStat(String template, Object... args) {
   return Stats.exportLong(String.format(template, args));
 }
  @Override
  protected void configure() {
    // Don't worry about clean shutdown, these can be daemon and cleanup-free.
    final ScheduledThreadPoolExecutor executor =
        new ScheduledThreadPoolExecutor(
            ASYNC_WORKER_THREADS.get(),
            new ThreadFactoryBuilder().setNameFormat("AsyncProcessor-%d").setDaemon(true).build());
    Stats.exportSize("timeout_queue_size", executor.getQueue());
    Stats.export(
        new StatImpl<Long>("async_tasks_completed") {
          @Override
          public Long read() {
            return executor.getCompletedTaskCount();
          }
        });

    // AsyncModule itself is not a subclass of PrivateModule because TaskEventModule internally uses
    // a MultiBinder, which cannot span multiple injectors.
    binder()
        .install(
            new PrivateModule() {
              @Override
              protected void configure() {
                bind(new TypeLiteral<Amount<Long, Time>>() {})
                    .toInstance(TRANSIENT_TASK_STATE_TIMEOUT.get());
                bind(ScheduledExecutorService.class).toInstance(executor);

                bind(TaskTimeout.class).in(Singleton.class);
                requireBinding(StatsProvider.class);
                expose(TaskTimeout.class);
              }
            });
    PubsubEventModule.bindSubscriber(binder(), TaskTimeout.class);

    binder()
        .install(
            new PrivateModule() {
              @Override
              protected void configure() {
                bind(TaskGroupsSettings.class)
                    .toInstance(
                        new TaskGroupsSettings(
                            new TruncatedBinaryBackoff(
                                INITIAL_SCHEDULE_DELAY.get(), MAX_SCHEDULE_DELAY.get()),
                            RateLimiter.create(MAX_SCHEDULE_ATTEMPTS_PER_SEC.get())));

                bind(RescheduleCalculatorImpl.RescheduleCalculatorSettings.class)
                    .toInstance(
                        new RescheduleCalculatorImpl.RescheduleCalculatorSettings(
                            new TruncatedBinaryBackoff(
                                INITIAL_FLAPPING_DELAY.get(), MAX_FLAPPING_DELAY.get()),
                            FLAPPING_THRESHOLD.get(),
                            MAX_RESCHEDULING_DELAY.get()));

                bind(RescheduleCalculator.class)
                    .to(RescheduleCalculatorImpl.class)
                    .in(Singleton.class);
                if (ENABLE_PREEMPTOR.get()) {
                  bind(PREEMPTOR_KEY).to(PreemptorImpl.class);
                  bind(PreemptorImpl.class).in(Singleton.class);
                  LOG.info("Preemptor Enabled.");
                } else {
                  bind(PREEMPTOR_KEY).toInstance(NULL_PREEMPTOR);
                  LOG.warning("Preemptor Disabled.");
                }
                expose(PREEMPTOR_KEY);
                bind(new TypeLiteral<Amount<Long, Time>>() {})
                    .annotatedWith(PreemptionDelay.class)
                    .toInstance(PREEMPTION_DELAY.get());
                bind(TaskGroups.class).in(Singleton.class);
                expose(TaskGroups.class);
              }
            });
    bindTaskScheduler(binder(), PREEMPTOR_KEY, RESERVATION_DURATION.get());
    PubsubEventModule.bindSubscriber(binder(), TaskGroups.class);

    binder()
        .install(
            new PrivateModule() {
              @Override
              protected void configure() {
                bind(OfferReturnDelay.class).to(RandomJitterReturnDelay.class);
                bind(ScheduledExecutorService.class).toInstance(executor);
                bind(OfferQueue.class).to(OfferQueueImpl.class);
                bind(OfferQueueImpl.class).in(Singleton.class);
                expose(OfferQueue.class);
              }
            });
    PubsubEventModule.bindSubscriber(binder(), OfferQueue.class);

    binder()
        .install(
            new PrivateModule() {
              @Override
              protected void configure() {
                // TODO(ksweeney): Create a configuration validator module so this can be injected.
                // TODO(William Farner): Revert this once large task counts is cheap ala
                // hierarchichal store
                bind(Integer.class).annotatedWith(PruneThreshold.class).toInstance(100);
                bind(new TypeLiteral<Amount<Long, Time>>() {})
                    .annotatedWith(PruneThreshold.class)
                    .toInstance(HISTORY_PRUNE_THRESHOLD.get());
                bind(ScheduledExecutorService.class).toInstance(executor);

                bind(HistoryPruner.class).in(Singleton.class);
                expose(HistoryPruner.class);
              }
            });
    PubsubEventModule.bindSubscriber(binder(), HistoryPruner.class);
  }
  @VisibleForTesting
  static class LogStream implements org.apache.aurora.scheduler.log.Log.Stream {
    @VisibleForTesting
    static final class OpStats {
      private final String opName;
      private final SlidingStats timing;
      private final AtomicLong timeouts;
      private final AtomicLong failures;

      OpStats(String opName) {
        this.opName = MorePreconditions.checkNotBlank(opName);
        timing = new SlidingStats("scheduler_log_native_" + opName, "nanos");
        timeouts = exportLongStat("scheduler_log_native_%s_timeouts", opName);
        failures = exportLongStat("scheduler_log_native_%s_failures", opName);
      }

      private static AtomicLong exportLongStat(String template, Object... args) {
        return Stats.exportLong(String.format(template, args));
      }
    }

    private static final Function<Log.Entry, LogEntry> MESOS_ENTRY_TO_ENTRY =
        new Function<Log.Entry, LogEntry>() {
          @Override
          public LogEntry apply(Log.Entry entry) {
            return new LogEntry(entry);
          }
        };

    private final OpStats read = new OpStats("read");
    private final OpStats append = new OpStats("append");
    private final OpStats truncate = new OpStats("truncate");
    private final AtomicLong entriesSkipped =
        Stats.exportLong("scheduler_log_native_native_entries_skipped");

    private final LogInterface log;

    private final ReaderInterface reader;
    private final long readTimeout;
    private final TimeUnit readTimeUnit;

    private final Provider<WriterInterface> writerFactory;
    private final long writeTimeout;
    private final TimeUnit writeTimeUnit;

    private final byte[] noopEntry;

    private WriterInterface writer;

    LogStream(
        LogInterface log,
        ReaderInterface reader,
        Amount<Long, Time> readTimeout,
        Provider<WriterInterface> writerFactory,
        Amount<Long, Time> writeTimeout,
        byte[] noopEntry) {

      this.log = log;

      this.reader = reader;
      this.readTimeout = readTimeout.getValue();
      this.readTimeUnit = readTimeout.getUnit().getTimeUnit();

      this.writerFactory = writerFactory;
      this.writeTimeout = writeTimeout.getValue();
      this.writeTimeUnit = writeTimeout.getUnit().getTimeUnit();

      this.noopEntry = noopEntry;
    }

    @Override
    public Iterator<Entry> readAll() throws StreamAccessException {
      // TODO(John Sirois): Currently we must be the coordinator to ensure we get the 'full read'
      // of log entries expected by the users of the org.apache.aurora.scheduler.log.Log interface.
      // Switch to another method of ensuring this when it becomes available in mesos' log
      // interface.
      try {
        append(noopEntry);
      } catch (StreamAccessException e) {
        throw new StreamAccessException("Error writing noop prior to a read", e);
      }

      final Log.Position from = reader.beginning();
      final Log.Position to = end().unwrap();

      // Reading all the entries at once may cause large garbage collections. Instead, we
      // lazily read the entries one by one as they are requested.
      // TODO(Benjamin Hindman): Eventually replace this functionality with functionality
      // from the Mesos Log.
      return new UnmodifiableIterator<Entry>() {
        private long position = Longs.fromByteArray(from.identity());
        private final long endPosition = Longs.fromByteArray(to.identity());
        private Entry entry = null;

        @Override
        public boolean hasNext() {
          if (entry != null) {
            return true;
          }

          while (position <= endPosition) {
            long start = System.nanoTime();
            try {
              Log.Position p = log.position(Longs.toByteArray(position));
              if (LOG.isLoggable(Level.FINE)) {
                LOG.fine("Reading position " + position + " from the log");
              }
              List<Log.Entry> entries = reader.read(p, p, readTimeout, readTimeUnit);

              // N.B. HACK! There is currently no way to "increment" a position. Until the Mesos
              // Log actually provides a way to "stream" the log, we approximate as much by
              // using longs via Log.Position.identity and Log.position.
              position++;

              // Reading positions in this way means it's possible that we get an "invalid" entry
              // (e.g., in the underlying log terminology this would be anything but an append)
              // which will be removed from the returned entries resulting in an empty list.
              // We skip these.
              if (entries.isEmpty()) {
                entriesSkipped.getAndIncrement();
              } else {
                entry = MESOS_ENTRY_TO_ENTRY.apply(Iterables.getOnlyElement(entries));
                return true;
              }
            } catch (TimeoutException e) {
              read.timeouts.getAndIncrement();
              throw new StreamAccessException("Timeout reading from log.", e);
            } catch (Log.OperationFailedException e) {
              read.failures.getAndIncrement();
              throw new StreamAccessException("Problem reading from log", e);
            } finally {
              read.timing.accumulate(System.nanoTime() - start);
            }
          }
          return false;
        }

        @Override
        public Entry next() {
          if (entry == null && !hasNext()) {
            throw new NoSuchElementException();
          }

          Entry result = Preconditions.checkNotNull(entry);
          entry = null;
          return result;
        }
      };
    }

    @Override
    public LogPosition append(final byte[] contents) throws StreamAccessException {
      Preconditions.checkNotNull(contents);

      Log.Position position =
          mutate(
              append,
              new Mutation<Log.Position>() {
                @Override
                public Log.Position apply(WriterInterface logWriter)
                    throws TimeoutException, Log.WriterFailedException {
                  return logWriter.append(contents, writeTimeout, writeTimeUnit);
                }
              });
      return LogPosition.wrap(position);
    }

    @Timed("scheduler_log_native_truncate_before")
    @Override
    public void truncateBefore(org.apache.aurora.scheduler.log.Log.Position position)
        throws StreamAccessException {

      Preconditions.checkArgument(position instanceof LogPosition);

      final Log.Position before = ((LogPosition) position).unwrap();
      mutate(
          truncate,
          new Mutation<Void>() {
            @Override
            public Void apply(WriterInterface logWriter)
                throws TimeoutException, Log.WriterFailedException {
              logWriter.truncate(before, writeTimeout, writeTimeUnit);
              return null;
            }
          });
    }

    @VisibleForTesting
    interface Mutation<T> {
      T apply(WriterInterface writer) throws TimeoutException, Log.WriterFailedException;
    }

    @VisibleForTesting
    synchronized <T> T mutate(OpStats stats, Mutation<T> mutation) {
      long start = System.nanoTime();
      if (writer == null) {
        writer = writerFactory.get();
      }
      try {
        return mutation.apply(writer);
      } catch (TimeoutException e) {
        stats.timeouts.getAndIncrement();
        throw new StreamAccessException("Timeout performing log " + stats.opName, e);
      } catch (Log.WriterFailedException e) {
        stats.failures.getAndIncrement();

        // We must throw away a writer on any write failure - this could be because of a coordinator
        // election in which case we must trigger a new election.
        writer = null;

        throw new StreamAccessException("Problem performing log" + stats.opName, e);
      } finally {
        stats.timing.accumulate(System.nanoTime() - start);
      }
    }

    private LogPosition end() {
      return LogPosition.wrap(reader.ending());
    }

    @Override
    public void close() {
      // noop
    }

    private static class LogPosition implements org.apache.aurora.scheduler.log.Log.Position {
      private final Log.Position underlying;

      LogPosition(Log.Position underlying) {
        this.underlying = underlying;
      }

      static LogPosition wrap(Log.Position position) {
        return new LogPosition(position);
      }

      Log.Position unwrap() {
        return underlying;
      }

      @Override
      public int compareTo(Position o) {
        Preconditions.checkArgument(o instanceof LogPosition);
        return underlying.compareTo(((LogPosition) o).underlying);
      }
    }

    private static class LogEntry implements org.apache.aurora.scheduler.log.Log.Entry {
      private final Log.Entry underlying;

      public LogEntry(Log.Entry entry) {
        this.underlying = entry;
      }

      @Override
      public byte[] contents() {
        return underlying.data;
      }
    }
  }