/**
   * Does the passed status match the filter pattern? This method checks the status text and the
   * expanded url entities
   *
   * @param status Status to check
   * @return True if the filter expression matches, false otherwise
   */
  private boolean matchesFilter(Status status) {
    boolean shouldFilter = false;
    if (filterPattern != null) {
      Matcher m = filterPattern.matcher(status.getText());
      if (m.matches()) shouldFilter = true;

      if (status.getURLEntities() != null) {
        for (URLEntity ue : status.getURLEntities()) {
          URL expUrl = ue.getExpandedURL();
          if (expUrl != null && expUrl.toString() != null) {
            m = filterPattern.matcher(expUrl.toString());
            if (m.matches()) shouldFilter = true;
          }
        }
      }
    }
    return shouldFilter;
  }
Exemplo n.º 2
0
  private static Record buildEntities(Schema schemaEntities, Status status) {
    GenericRecordBuilder builderEntities = new GenericRecordBuilder(schemaEntities);

    if (status.getHashtagEntities().length > 0) {
      Schema schemaHashtagObject = schemaEntities.getField("hashtags").schema().getElementType();
      List<GenericRecord> listHashtagObjects = new ArrayList<>();
      for (HashtagEntity hashtagEntity : status.getHashtagEntities()) {
        GenericRecordBuilder builderHashtagObject = new GenericRecordBuilder(schemaHashtagObject);
        builderHashtagObject.set("text", hashtagEntity.getText());
        builderHashtagObject.set("start", hashtagEntity.getStart());
        builderHashtagObject.set("end", hashtagEntity.getEnd());
        listHashtagObjects.add(builderHashtagObject.build());
      }
      builderEntities.set("hashtags", listHashtagObjects);
    } else builderEntities.set("hashtags", Collections.emptyList());

    if (status.getSymbolEntities().length > 0) {
      Schema schemaSymbolObject = schemaEntities.getField("symbols").schema().getElementType();
      List<GenericRecord> listSymbolObject = new ArrayList<>();
      for (SymbolEntity symbolEntity : status.getSymbolEntities()) {
        GenericRecordBuilder builderSymbolObject = new GenericRecordBuilder(schemaSymbolObject);
        builderSymbolObject.set("text", symbolEntity.getText());
        builderSymbolObject.set("start", symbolEntity.getStart());
        builderSymbolObject.set("end", symbolEntity.getEnd());
        listSymbolObject.add(builderSymbolObject.build());
      }
      builderEntities.set("symbols", listSymbolObject);
    } else builderEntities.set("symbols", Collections.emptyList());

    if (status.getMediaEntities().length > 0) {
      Schema schemaMediaObject = schemaEntities.getField("media").schema().getElementType();
      List<GenericRecord> listMediaObject = new ArrayList<>();
      for (MediaEntity mediaEntity : status.getMediaEntities()) {
        GenericRecordBuilder builderMediaObject = new GenericRecordBuilder(schemaMediaObject);
        builderMediaObject.set("url", mediaEntity.getURL());
        builderMediaObject.set("display_url", mediaEntity.getDisplayURL());
        builderMediaObject.set("expanded_url", mediaEntity.getExpandedURL());
        builderMediaObject.set("id", mediaEntity.getId());
        builderMediaObject.set("media_url", mediaEntity.getMediaURL());
        builderMediaObject.set("media_url_https", mediaEntity.getMediaURLHttps());
        builderMediaObject.set("type", mediaEntity.getType());
        builderMediaObject.set("text", mediaEntity.getText());
        builderMediaObject.set("start", mediaEntity.getStart());
        builderMediaObject.set("end", mediaEntity.getEnd());

        Schema schemaSize = schemaMediaObject.getField("sizes").schema().getValueType();
        GenericRecordBuilder builderSize = new GenericRecordBuilder(schemaSize);
        Map<String, GenericRecord> mapSizes = new HashMap<>(4);
        for (int key : mediaEntity.getSizes().keySet()) {
          Size size = mediaEntity.getSizes().get(key);
          builderSize.set("h", size.getHeight());
          builderSize.set("w", size.getWidth());
          builderSize.set("resize", size.getResize());
          mapSizes.put(Integer.toString(key), builderSize.build());
        }
        builderMediaObject.set("sizes", mapSizes);
        listMediaObject.add(builderMediaObject.build());
      }
      builderEntities.set("media", listMediaObject);
    } else builderEntities.set("media", Collections.emptyList());

    if (status.getURLEntities().length > 0) {
      Schema schemaURLObject = schemaEntities.getField("urls").schema().getElementType();
      List<GenericRecord> listURLObject1 = new ArrayList<>();
      for (URLEntity urlEntity : status.getURLEntities())
        listURLObject1.add(buildURLEntity(schemaURLObject, urlEntity));
      builderEntities.set("urls", listURLObject1);
    } else builderEntities.set("urls", Collections.emptyList());

    if (status.getUserMentionEntities().length > 0) {
      Schema schemaUserMentionObject =
          schemaEntities.getField("user_mentions").schema().getElementType();
      List<GenericRecord> listUserMentionObject = new ArrayList<>();
      for (UserMentionEntity userMentionEntity : status.getUserMentionEntities()) {
        GenericRecordBuilder builderUserMentionObject =
            new GenericRecordBuilder(schemaUserMentionObject);
        builderUserMentionObject.set("name", userMentionEntity.getName());
        builderUserMentionObject.set("screen_name", userMentionEntity.getScreenName());
        builderUserMentionObject.set("text", userMentionEntity.getText());
        builderUserMentionObject.set("id", userMentionEntity.getId());
        builderUserMentionObject.set("start", userMentionEntity.getStart());
        builderUserMentionObject.set("end", userMentionEntity.getEnd());
        listUserMentionObject.add(builderUserMentionObject.build());
      }
      builderEntities.set("user_mentions", listUserMentionObject);
    } else builderEntities.set("user_mentions", Collections.emptyList());

    if (status.getExtendedMediaEntities().length > 0) {
      Schema schemaExtendedMediaObject =
          schemaEntities.getField("extended_entities").schema().getElementType();
      List<GenericRecord> listExtendedMediaObject = new ArrayList<>();
      for (ExtendedMediaEntity extendedMediaEntity : status.getExtendedMediaEntities()) {
        GenericRecordBuilder builderExtendedMediaObject =
            new GenericRecordBuilder(schemaExtendedMediaObject);
        builderExtendedMediaObject.set("url", extendedMediaEntity.getURL());
        builderExtendedMediaObject.set("display_url", extendedMediaEntity.getDisplayURL());
        builderExtendedMediaObject.set("expanded_url", extendedMediaEntity.getExpandedURL());
        builderExtendedMediaObject.set("id", extendedMediaEntity.getId());
        builderExtendedMediaObject.set("media_url", extendedMediaEntity.getMediaURL());
        builderExtendedMediaObject.set("media_url_https", extendedMediaEntity.getMediaURLHttps());
        builderExtendedMediaObject.set("type", extendedMediaEntity.getType());
        builderExtendedMediaObject.set("text", extendedMediaEntity.getText());
        builderExtendedMediaObject.set("start", extendedMediaEntity.getStart());
        builderExtendedMediaObject.set("end", extendedMediaEntity.getEnd());

        Schema schemaSize = schemaExtendedMediaObject.getField("sizes").schema().getValueType();
        GenericRecordBuilder builderSize = new GenericRecordBuilder(schemaSize);
        Map<String, GenericRecord> mapSizes = new HashMap<>(4);
        for (int key : extendedMediaEntity.getSizes().keySet()) {
          Size size = extendedMediaEntity.getSizes().get(key);
          builderSize.set("h", size.getHeight());
          builderSize.set("w", size.getWidth());
          builderSize.set("resize", size.getResize());
          mapSizes.put(Integer.toString(key), builderSize.build());
        }
        builderExtendedMediaObject.set("sizes", mapSizes);

        Schema schemaVideoInfo = schemaExtendedMediaObject.getField("video_info").schema();
        GenericRecordBuilder builderVideoInfo = new GenericRecordBuilder(schemaVideoInfo);
        builderVideoInfo.set("h", extendedMediaEntity.getVideoAspectRatioHeight());
        builderVideoInfo.set("w", extendedMediaEntity.getVideoAspectRatioWidth());
        builderVideoInfo.set("duration_millis", extendedMediaEntity.getVideoDurationMillis());

        Schema schemaVideoVariants = schemaVideoInfo.getField("variants").schema().getElementType();
        List<GenericRecord> listVideoVariants = new ArrayList<>();
        for (Variant extendedVideoVariant : extendedMediaEntity.getVideoVariants()) {
          GenericRecordBuilder builderVideoVariant = new GenericRecordBuilder(schemaVideoVariants);
          builderVideoVariant.set("bitrate", extendedVideoVariant.getBitrate());
          builderVideoVariant.set("content_type", extendedVideoVariant.getContentType());
          builderVideoVariant.set("url", extendedVideoVariant.getUrl());
          listVideoVariants.add(builderVideoVariant.build());
        }
        builderVideoInfo.set("variants", listVideoVariants);
        builderExtendedMediaObject.set("video_info", builderVideoInfo.build());

        listExtendedMediaObject.add(builderExtendedMediaObject.build());
      }
      builderEntities.set("extended_entities", listExtendedMediaObject);
    } else builderEntities.set("extended_entities", Collections.emptyList());
    return builderEntities.build();
  }