@Override
  protected void map(Object key, Object value, Context context)
      throws IOException, InterruptedException {
    try {
      final InputRow inputRow;
      try {
        inputRow = parseInputRow(value, parser);
      } catch (Exception e) {
        if (config.isIgnoreInvalidRows()) {
          log.debug(e, "Ignoring invalid row [%s] due to parsing error", value.toString());
          context
              .getCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER)
              .increment(1);
          return; // we're ignoring this invalid row
        } else {
          throw e;
        }
      }

      if (!granularitySpec.bucketIntervals().isPresent()
          || granularitySpec
              .bucketInterval(new DateTime(inputRow.getTimestampFromEpoch()))
              .isPresent()) {
        innerMap(inputRow, value, context);
      }
    } catch (RuntimeException e) {
      throw new RE(e, "Failure on row[%s]", value);
    }
  }
 @Override
 protected void innerMap(InputRow inputRow, Object value, Context context)
     throws IOException, InterruptedException {
   final Map<String, Iterable<String>> dims = Maps.newHashMap();
   for (final String dim : inputRow.getDimensions()) {
     dims.put(dim, inputRow.getDimension(dim));
   }
   helper.emitDimValueCounts(context, new DateTime(inputRow.getTimestampFromEpoch()), dims);
 }
 @Test
 public void simpleFirehoseReadingTest() throws IOException {
   Assert.assertEquals(MAX_SHARD_NUMBER.longValue(), segmentSet.size());
   Integer rowcount = 0;
   try (final IngestSegmentFirehose firehose =
       (IngestSegmentFirehose) factory.connect(rowParser)) {
     while (firehose.hasMore()) {
       InputRow row = firehose.nextRow();
       Assert.assertArrayEquals(new String[] {DIM_NAME}, row.getDimensions().toArray());
       Assert.assertArrayEquals(new String[] {DIM_VALUE}, row.getDimension(DIM_NAME).toArray());
       Assert.assertEquals(METRIC_LONG_VALUE.longValue(), row.getLongMetric(METRIC_LONG_NAME));
       Assert.assertEquals(
           METRIC_FLOAT_VALUE, row.getFloatMetric(METRIC_FLOAT_NAME), METRIC_FLOAT_VALUE * 0.0001);
       ++rowcount;
     }
   }
   Assert.assertEquals((int) MAX_SHARD_NUMBER * MAX_ROWS, (int) rowcount);
 }
 @Override
 protected void innerMap(InputRow inputRow, Object value, Context context)
     throws IOException, InterruptedException {
   final List<Object> groupKey =
       Rows.toGroupKey(rollupGranularity.truncate(inputRow.getTimestampFromEpoch()), inputRow);
   context.write(
       new BytesWritable(HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsBytes(groupKey)),
       NullWritable.get());
 }
  @Test(timeout = 60000)
  public void testPersistFails() throws Exception {
    final MutableBoolean committed = new MutableBoolean(false);
    plumber
        .getSinks()
        .put(
            0L,
            new Sink(
                new Interval(0, TimeUnit.HOURS.toMillis(1)),
                schema,
                tuningConfig,
                new DateTime("2014-12-01T12:34:56.789").toString()));
    plumber.startJob();
    final InputRow row = EasyMock.createNiceMock(InputRow.class);
    EasyMock.expect(row.getTimestampFromEpoch()).andReturn(0L);
    EasyMock.expect(row.getDimensions()).andReturn(new ArrayList<String>());
    EasyMock.replay(row);
    plumber.add(row, Committers.supplierOf(Committers.nil()));
    plumber.persist(
        Committers.supplierFromRunnable(
                new Runnable() {
                  @Override
                  public void run() {
                    committed.setValue(true);
                    throw new RuntimeException();
                  }
                })
            .get());
    while (!committed.booleanValue()) {
      Thread.sleep(100);
    }

    // Exception may need time to propagate
    while (metrics.failedPersists() < 1) {
      Thread.sleep(100);
    }

    Assert.assertEquals(1, metrics.failedPersists());
  }
  private void testPersist(final Object commitMetadata) throws Exception {
    final MutableBoolean committed = new MutableBoolean(false);
    plumber
        .getSinks()
        .put(
            0L,
            new Sink(
                new Interval(0, TimeUnit.HOURS.toMillis(1)),
                schema,
                tuningConfig,
                new DateTime("2014-12-01T12:34:56.789").toString()));
    Assert.assertNull(plumber.startJob());

    final InputRow row = EasyMock.createNiceMock(InputRow.class);
    EasyMock.expect(row.getTimestampFromEpoch()).andReturn(0L);
    EasyMock.expect(row.getDimensions()).andReturn(new ArrayList<String>());
    EasyMock.replay(row);
    final Committer committer =
        new Committer() {
          @Override
          public Object getMetadata() {
            return commitMetadata;
          }

          @Override
          public void run() {
            committed.setValue(true);
          }
        };
    plumber.add(row, Suppliers.ofInstance(committer));
    plumber.persist(committer);

    while (!committed.booleanValue()) {
      Thread.sleep(100);
    }
    plumber.getSinks().clear();
    plumber.finishJob();
  }
Exemple #7
0
    @Override
    public void run() {
      plumber = initPlumber();
      final Period intermediatePersistPeriod = config.getIntermediatePersistPeriod();

      try {
        plumber.startJob();

        // Delay firehose connection to avoid claiming input resources while the plumber is starting
        // up.
        firehose = initFirehose();

        long nextFlush = new DateTime().plus(intermediatePersistPeriod).getMillis();
        while (firehose.hasMore()) {
          InputRow inputRow = null;
          try {
            try {
              inputRow = firehose.nextRow();
            } catch (Exception e) {
              log.debug(e, "thrown away line due to exception, considering unparseable");
              metrics.incrementUnparseable();
              continue;
            }

            boolean lateEvent = false;
            boolean indexLimitExceeded = false;
            try {
              lateEvent = plumber.add(inputRow) == -1;
            } catch (IndexSizeExceededException e) {
              log.info("Index limit exceeded: %s", e.getMessage());
              indexLimitExceeded = true;
            }
            if (indexLimitExceeded || lateEvent) {
              metrics.incrementThrownAway();
              log.debug("Throwing away event[%s]", inputRow);

              if (indexLimitExceeded || System.currentTimeMillis() > nextFlush) {
                plumber.persist(firehose.commit());
                nextFlush = new DateTime().plus(intermediatePersistPeriod).getMillis();
              }

              continue;
            }
            final Sink sink = plumber.getSink(inputRow.getTimestampFromEpoch());
            if ((sink != null && !sink.canAppendRow()) || System.currentTimeMillis() > nextFlush) {
              plumber.persist(firehose.commit());
              nextFlush = new DateTime().plus(intermediatePersistPeriod).getMillis();
            }
            metrics.incrementProcessed();
          } catch (ParseException e) {
            if (inputRow != null) {
              log.error(e, "unparseable line: %s", inputRow);
            }
            metrics.incrementUnparseable();
          }
        }
      } catch (RuntimeException e) {
        log.makeAlert(
                e,
                "RuntimeException aborted realtime processing[%s]",
                fireDepartment.getDataSchema().getDataSource())
            .emit();
        normalExit = false;
        throw e;
      } catch (Error e) {
        log.makeAlert(
                e,
                "Exception aborted realtime processing[%s]",
                fireDepartment.getDataSchema().getDataSource())
            .emit();
        normalExit = false;
        throw e;
      } finally {
        CloseQuietly.close(firehose);
        if (normalExit) {
          plumber.finishJob();
          plumber = null;
          firehose = null;
        }
      }
    }
  @Override
  public TaskStatus run(final TaskToolbox toolbox) throws Exception {
    if (this.plumber != null) {
      throw new IllegalStateException("WTF?!? run with non-null plumber??!");
    }

    // Shed any locks we might have (e.g. if we were uncleanly killed and restarted) since we'll
    // reacquire
    // them if we actually need them
    for (final TaskLock taskLock : getTaskLocks(toolbox)) {
      toolbox.getTaskActionClient().submit(new LockReleaseAction(taskLock.getInterval()));
    }

    boolean normalExit = true;

    // Set up firehose
    final Period intermediatePersistPeriod = fireDepartmentConfig.getIntermediatePersistPeriod();
    final Firehose firehose = firehoseFactory.connect();

    // It would be nice to get the PlumberSchool in the constructor.  Although that will need
    // jackson injectables for
    // stuff like the ServerView, which seems kind of odd?  Perhaps revisit this when Guice has been
    // introduced.
    final RealtimePlumberSchool realtimePlumberSchool =
        new RealtimePlumberSchool(
            windowPeriod, new File(toolbox.getTaskWorkDir(), "persist"), segmentGranularity);
    realtimePlumberSchool.setDefaultMaxPendingPersists(maxPendingPersists);

    final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(this, toolbox);

    // NOTE: We talk to the coordinator in various places in the plumber and we could be more robust
    // to issues
    // with the coordinator.  Right now, we'll block/throw in whatever thread triggered the
    // coordinator behavior,
    // which will typically be either the main data processing loop or the persist thread.

    // Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
    final DataSegmentAnnouncer lockingSegmentAnnouncer =
        new DataSegmentAnnouncer() {
          @Override
          public void announceSegment(final DataSegment segment) throws IOException {
            // Side effect: Calling announceSegment causes a lock to be acquired
            toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
            toolbox.getSegmentAnnouncer().announceSegment(segment);
          }

          @Override
          public void unannounceSegment(final DataSegment segment) throws IOException {
            try {
              toolbox.getSegmentAnnouncer().unannounceSegment(segment);
            } finally {
              toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
            }
          }

          @Override
          public void announceSegments(Iterable<DataSegment> segments) throws IOException {
            // Side effect: Calling announceSegments causes locks to be acquired
            for (DataSegment segment : segments) {
              toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
            }
            toolbox.getSegmentAnnouncer().announceSegments(segments);
          }

          @Override
          public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
            try {
              toolbox.getSegmentAnnouncer().unannounceSegments(segments);
            } finally {
              for (DataSegment segment : segments) {
                toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
              }
            }
          }
        };

    // NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
    // NOTE: (and thus the firehose)

    // Shouldn't usually happen, since we don't expect people to submit tasks that intersect with
    // the
    // realtime window, but if they do it can be problematic. If we decide to care, we can use more
    // threads in
    // the plumber such that waiting for the coordinator doesn't block data processing.
    final VersioningPolicy versioningPolicy =
        new VersioningPolicy() {
          @Override
          public String getVersion(final Interval interval) {
            try {
              // Side effect: Calling getVersion causes a lock to be acquired
              final TaskLock myLock =
                  toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));

              return myLock.getVersion();
            } catch (IOException e) {
              throw Throwables.propagate(e);
            }
          }
        };

    // NOTE: This pusher selects path based purely on global configuration and the DataSegment,
    // which means
    // NOTE: that redundant realtime tasks will upload to the same location. This can cause
    // index.zip and
    // NOTE: descriptor.json to mismatch, or it can cause historical nodes to load different
    // instances of the
    // NOTE: "same" segment.
    realtimePlumberSchool.setDataSegmentPusher(toolbox.getSegmentPusher());
    realtimePlumberSchool.setConglomerate(toolbox.getQueryRunnerFactoryConglomerate());
    realtimePlumberSchool.setQueryExecutorService(toolbox.getQueryExecutorService());
    realtimePlumberSchool.setVersioningPolicy(versioningPolicy);
    realtimePlumberSchool.setSegmentAnnouncer(lockingSegmentAnnouncer);
    realtimePlumberSchool.setSegmentPublisher(segmentPublisher);
    realtimePlumberSchool.setServerView(toolbox.getNewSegmentServerView());
    realtimePlumberSchool.setEmitter(toolbox.getEmitter());

    if (this.rejectionPolicyFactory != null) {
      realtimePlumberSchool.setRejectionPolicyFactory(rejectionPolicyFactory);
    }

    final FireDepartment fireDepartment =
        new FireDepartment(schema, fireDepartmentConfig, null, null);
    final RealtimeMetricsMonitor metricsMonitor =
        new RealtimeMetricsMonitor(ImmutableList.of(fireDepartment));
    this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();
    this.plumber = realtimePlumberSchool.findPlumber(schema, fireDepartment.getMetrics());

    try {
      plumber.startJob();

      // Set up metrics emission
      toolbox.getMonitorScheduler().addMonitor(metricsMonitor);

      // Time to read data!
      long nextFlush = new DateTime().plus(intermediatePersistPeriod).getMillis();
      while (firehose.hasMore()) {
        final InputRow inputRow;
        try {
          inputRow = firehose.nextRow();
          if (inputRow == null) {
            continue;
          }

          final Sink sink = plumber.getSink(inputRow.getTimestampFromEpoch());
          if (sink == null) {
            fireDepartment.getMetrics().incrementThrownAway();
            log.debug("Throwing away event[%s]", inputRow);

            if (System.currentTimeMillis() > nextFlush) {
              plumber.persist(firehose.commit());
              nextFlush = new DateTime().plus(intermediatePersistPeriod).getMillis();
            }

            continue;
          }

          if (sink.isEmpty()) {
            log.info("Task %s: New sink: %s", getId(), sink);
          }

          int currCount = sink.add(inputRow);
          fireDepartment.getMetrics().incrementProcessed();
          if (currCount >= fireDepartmentConfig.getMaxRowsInMemory()
              || System.currentTimeMillis() > nextFlush) {
            plumber.persist(firehose.commit());
            nextFlush = new DateTime().plus(intermediatePersistPeriod).getMillis();
          }
        } catch (FormattedException e) {
          log.warn(e, "unparseable line");
          fireDepartment.getMetrics().incrementUnparseable();
        }
      }
    } catch (Throwable e) {
      normalExit = false;
      log.makeAlert(e, "Exception aborted realtime processing[%s]", schema.getDataSource()).emit();
      throw e;
    } finally {
      if (normalExit) {
        try {
          plumber.persist(firehose.commit());
          plumber.finishJob();
        } catch (Exception e) {
          log.makeAlert(e, "Failed to finish realtime task").emit();
        } finally {
          Closeables.closeQuietly(firehose);
          toolbox.getMonitorScheduler().removeMonitor(metricsMonitor);
        }
      }
    }

    return TaskStatus.success(getId());
  }