Пример #1
0
  /**
   * Start processing events. This uses the Twitter Streaming API to sample Twitter, and process
   * tweets.
   */
  @Override
  public void start() {
    // The channel is the piece of Flume that sits between the Source and Sink,
    // and is used to process events.
    final ChannelProcessor channel = getChannelProcessor();

    final Map<String, String> headers = new HashMap<String, String>();

    // The StatusListener is a twitter4j API, which can be added to a Twitter
    // stream, and will execute methods every time a message comes in through
    // the stream.
    StatusListener listener =
        new StatusListener() {
          // The onStatus method is executed every time a new tweet comes in.
          public void onStatus(Status status) {
            // The EventBuilder is used to build an event using the headers and
            // the raw JSON of a tweet
            logger.debug(status.getUser().getScreenName() + ": " + status.getText());

            headers.put("timestamp", String.valueOf(status.getCreatedAt().getTime()));
            Event event =
                EventBuilder.withBody(DataObjectFactory.getRawJSON(status).getBytes(), headers);

            channel.processEvent(event);
          }

          // This listener will ignore everything except for new tweets
          public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {}

          public void onTrackLimitationNotice(int numberOfLimitedStatuses) {}

          public void onScrubGeo(long userId, long upToStatusId) {}

          public void onException(Exception ex) {}

          public void onStallWarning(StallWarning warning) {}
        };

    logger.debug(
        "Setting up Twitter sample stream using consumer key {} and" + " access token {}",
        new String[] {consumerKey, accessToken});
    // Set up the stream's listener (defined above), and set any necessary
    // security information.
    twitterStream.addListener(listener);
    twitterStream.setOAuthConsumer(consumerKey, consumerSecret);
    AccessToken token = new AccessToken(accessToken, accessTokenSecret);
    twitterStream.setOAuthAccessToken(token);

    // Set up a filter to pull out industry-relevant tweets
    if (keywords.length == 0) {
      logger.debug("Starting up Twitter sampling...");
      twitterStream.sample();
    } else {
      logger.debug("Starting up Twitter filtering...");
      FilterQuery query = new FilterQuery().track(keywords);
      twitterStream.filter(query);
    }
    super.start();
  }
Пример #2
0
  @Override
  public void start() {
    logger.info("Starting thrift source");
    ExecutorService sourceService;
    ThreadFactory threadFactory =
        new ThreadFactoryBuilder().setNameFormat("Flume Thrift IPC Thread %d").build();
    if (maxThreads == 0) {
      sourceService = Executors.newCachedThreadPool(threadFactory);
    } else {
      sourceService = Executors.newFixedThreadPool(maxThreads, threadFactory);
    }
    try {
      serverTransport = new TNonblockingServerSocket(new InetSocketAddress(bindAddress, port));
    } catch (TTransportException e) {
      throw new FlumeException("Failed to start Thrift Source.", e);
    }

    THsHaServer.Args thhsArgs = new THsHaServer.Args(serverTransport);
    thhsArgs.processor(new ThriftSourceProtocol.Processor(new ThriftSourceHandler()));
    //	thhsArgs.transportFactory(new TFramedTransport.Factory());
    thhsArgs.protocolFactory(new TBinaryProtocol.Factory());
    thhsArgs.executorService(sourceService);
    server = new THsHaServer(thhsArgs); // 半同步半异步的服务模型

    servingExecutor =
        Executors.newSingleThreadExecutor(
            new ThreadFactoryBuilder().setNameFormat("Flume Thrift Source I/O Boss").build());
    /** Start serving. */
    servingExecutor.submit(
        new Runnable() {
          @Override
          public void run() {
            server.serve();
          }
        });

    long timeAfterStart = System.currentTimeMillis();
    while (!server.isServing()) {
      try {
        if (System.currentTimeMillis() - timeAfterStart >= 10000) {
          throw new FlumeException("Thrift server failed to start!");
        }
        TimeUnit.MILLISECONDS.sleep(1000);
      } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new FlumeException("Interrupted while waiting for Thrift server" + " to start.", e);
      }
    }
    sourceCounter.start();
    logger.info("Started Thrift source.");
    super.start();
  }
  /** Starts listening for twitter events */
  @Override
  public void start() {
    // The channel connects from source to sink
    final ChannelProcessor channel = getChannelProcessor();

    final Map<String, String> headers = new HashMap<String, String>();

    // The method onStatus() will be called everytime a tweet comes.
    StatusListener listener =
        new StatusListener() {
          // capture new tweet notification
          public void onStatus(Status status) {
            // add creation time in the header
            headers.put("timestamp", String.valueOf(status.getCreatedAt().getTime()));
            Event event =
                EventBuilder.withBody(DataObjectFactory.getRawJSON(status).getBytes(), headers);
            // send to sink
            channel.processEvent(event);
          }

          // ignore all other notifications
          public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {}

          public void onTrackLimitationNotice(int numberOfLimitedStatuses) {}

          public void onScrubGeo(long userId, long upToStatusId) {}

          public void onException(Exception ex) {}

          @Override
          public void onStallWarning(StallWarning arg0) {
            // do nothing
          }
        };

    // add the listener we created + tell the stream all about the security info required
    twitterStream.addListener(listener);
    twitterStream.setOAuthConsumer(consumerKey, consumerSecret);
    AccessToken token = new AccessToken(accessToken, accessTokenSecret);
    twitterStream.setOAuthAccessToken(token);

    // Set up a filter to pull out industry-relevant tweets
    if (searchFor.length == 0) {
      System.out.println("Please setup filter keyword in Flume conf");
    } else {
      // create a filter query for filtering out only the required info
      FilterQuery query = new FilterQuery().track(searchFor);
      twitterStream.filter(query);
    }
    super.start();
  }
Пример #4
0
  @Override
  public synchronized void stop() {
    executor.shutdown();
    try {
      executor.awaitTermination(10L, TimeUnit.SECONDS);
    } catch (InterruptedException ex) {
      logger.info("Interrupted while awaiting termination", ex);
    }
    executor.shutdownNow();

    super.stop();
    sourceCounter.stop();
    logger.info("SpoolDir source {} stopped. Metrics: {}", getName(), sourceCounter);
  }
Пример #5
0
 public void stop() {
   if (server != null && server.isServing()) {
     server.stop();
   }
   servingExecutor.shutdown();
   try {
     if (!servingExecutor.awaitTermination(5, TimeUnit.SECONDS)) {
       servingExecutor.shutdownNow();
     }
   } catch (InterruptedException e) {
     throw new FlumeException("Interrupted while waiting for server to be " + "shutdown.");
   }
   sourceCounter.stop();
   // Thrift will shutdown the executor passed to it.
   super.stop();
 }
Пример #6
0
  @Override
  public synchronized void start() {
    logger.info("SpoolDirectorySource source starting with directory: {}", spoolDirectory);

    executor = Executors.newSingleThreadScheduledExecutor();

    File directory = new File(spoolDirectory);
    try {
      reader =
          new ReliableSpoolingZipFileEventReader.Builder()
              .spoolDirectory(directory)
              .completedSuffix(completedSuffix)
              .ignorePattern(ignorePattern)
              .trackerDirPath(trackerDirPath)
              .annotateFileName(fileHeader)
              .fileNameHeader(fileHeaderKey)
              .annotateBaseName(basenameHeader)
              .baseNameHeader(basenameHeaderKey)
              .deserializerType(deserializerType)
              .deserializerContext(deserializerContext)
              .deletePolicy(deletePolicy)
              .inputCharset(inputCharset)
              .decodeErrorPolicy(decodeErrorPolicy)
              .consumeOrder(consumeOrder)
              .build();
    } catch (IOException ioe) {
      throw new FlumeException("Error instantiating spooling event parser", ioe);
    }

    Runnable runner = new SpoolDirectoryRunnable(reader, sourceCounter);
    executor.scheduleWithFixedDelay(runner, 0, POLL_DELAY_MS, TimeUnit.MILLISECONDS);

    super.start();
    logger.debug("SpoolDirectoryZipSource source started");
    sourceCounter.start();
  }
Пример #7
0
 /** Stops the Source's event processing and shuts down the Twitter stream. */
 @Override
 public void stop() {
   logger.debug("Shutting down Twitter sample stream...");
   twitterStream.shutdown();
   super.stop();
 }
 /** What happens when the stream is shutdown */
 @Override
 public void stop() {
   System.out.println("Shutting down");
   twitterStream.shutdown();
   super.stop();
 }
 @Override
 public synchronized void stop() {
   RabbitMQUtil.close(_Connection, _Channel);
   super.stop();
 }
  /**
   * Start processing events. This uses the Twitter Streaming API to sample Twitter, and process
   * tweets.
   */
  @Override
  public void start() {
    // The channel is the piece of Flume that sits between the Source and Sink,
    // and is used to process events.
    final ChannelProcessor channel = getChannelProcessor();

    final Map<String, String> headers = new HashMap<String, String>();

    // The StatusListener is a twitter4j API, which can be added to a Twitter
    // stream, and will execute methods every time a message comes in through
    // the stream.
    StatusListener listener =
        new StatusListener() {
          // The onStatus method is executed every time a new tweet comes in.
          public void onStatus(Status status) {
            // The EventBuilder is used to build an event using the headers and
            // the raw JSON of a tweet
            logger.debug(status.getUser().getScreenName() + ": " + status.getText());

            headers.put("timestamp", String.valueOf(status.getCreatedAt().getTime()));
            Event event =
                EventBuilder.withBody(DataObjectFactory.getRawJSON(status).getBytes(), headers);
            channel.processEvent(event);
          }

          // This listener will ignore everything except for new tweets
          public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {}

          public void onTrackLimitationNotice(int numberOfLimitedStatuses) {}

          public void onScrubGeo(long userId, long upToStatusId) {}

          public void onException(Exception ex) {
            logger.error("Stream Error ", ex);
          }

          public void onStallWarning(StallWarning warning) {
            int percentFull = warning.getPercentFull();
            logger.warn("Stall Warning Received ", warning);
            if (percentFull > 95) {
              logger.warn("Stallwarning Stream full more han 95 %. Going to wait for 2 minutes");
              try {
                Thread.sleep(2 * 60 * 000);
              } catch (InterruptedException e) {
                e.printStackTrace();
              }
            }
          }
        };

    logger.debug(
        "Setting up Twitter sample stream using consumer key {} and" + " access token {}",
        new String[] {consumerKey, accessToken});
    // Set up the stream's listener (defined above),
    twitterStream.addListener(listener);

    // Set up a filter to pull out industry-relevant tweets
    if (keywords.length == 0) {
      logger.debug("Starting up Twitter sampling...");
      twitterStream.sample();
    } else {
      logger.debug("Starting up Twitter filtering...");

      FilterQuery query = new FilterQuery().track(keywords);
      twitterStream.filter(query);
    }
    super.start();
  }