Пример #1
0
 /**
  * Transforms a twitter4j.Status object into a JSONObject
  *
  * @param pStatus a twitter4j.Status object
  * @return JSONObject
  */
 @SuppressWarnings("unchecked")
 public static JSONObject getJson(Status pStatus) {
   JSONObject jsonObj = null;
   if (pStatus != null) {
     jsonObj = new JSONObject();
     jsonObj.put("createdAt", pStatus.getCreatedAt());
     jsonObj.put("id", pStatus.getId());
     jsonObj.put("text", pStatus.getText());
     jsonObj.put("source", pStatus.getSource());
     jsonObj.put("isTruncated", pStatus.isTruncated());
     jsonObj.put("inReplyToStatusId", pStatus.getInReplyToStatusId());
     jsonObj.put("inReplyToUserId", pStatus.getInReplyToUserId());
     jsonObj.put("isFavorited", pStatus.isFavorited());
     jsonObj.put("isRetweeted", pStatus.isRetweeted());
     jsonObj.put("favoriteCount", pStatus.getFavoriteCount());
     jsonObj.put("inReplyToScreenName", pStatus.getInReplyToScreenName());
     jsonObj.put("geoLocation", pStatus.getGeoLocation());
     jsonObj.put("place", pStatus.getPlace());
     jsonObj.put("retweetCount", pStatus.getRetweetCount());
     jsonObj.put("isPossiblySensitive", pStatus.isPossiblySensitive());
     jsonObj.put("isoLanguageCode", pStatus.getIsoLanguageCode());
     jsonObj.put("contributorsIDs", pStatus.getContributors());
     jsonObj.put("retweetedStatus", pStatus.getRetweetedStatus());
     jsonObj.put("userMentionEntities", pStatus.getUserMentionEntities());
     jsonObj.put("urlEntities", pStatus.getURLEntities());
     jsonObj.put("hashtagEntities", pStatus.getHashtagEntities());
     jsonObj.put("mediaEntities", pStatus.getMediaEntities());
     jsonObj.put("currentUserRetweetId", pStatus.getCurrentUserRetweetId());
     jsonObj.put("user", pStatus.getUser());
   }
   return jsonObj;
 }
  public void persistirTweets(
      List<Status> tweets, String query, String polaridadIndicada, String codFinalidadData) {
    logger.debug("<====  Inicio Method persistirTweet ====>");
    StatusTweet dto = null;
    HashTag ht = null;
    Url url = null;
    UserMention um = null;
    Media md = null;
    List<UserMention> ums = null;
    List<HashTag> hts = null;
    List<Url> urls = null;
    List<Media> mds = null;
    UserMentionEntity[] ume = null;
    URLEntity[] urle = null;
    HashtagEntity[] hte = null;
    MediaEntity[] mes = null;
    for (Status tweet : tweets) {

      ums = null;
      hts = null;
      urls = null;
      mds = null;
      ume = null;
      urle = null;
      hte = null;
      mes = null;
      dto = null;
      ht = null;
      url = null;
      um = null;
      md = null;
      logger.debug(
          "tweet completo: {}",
          ToStringBuilder.reflectionToString(tweet, ToStringStyle.MULTI_LINE_STYLE));
      dto = new StatusTweet();
      dto.setId(tweet.getId());
      dto.setQuery(query);
      dto.setMensaje(tweet.getText());
      if (tweet.getUser() != null) {
        dto.setScreenNameUser(tweet.getUser().getScreenName());
      }
      dto.setContadorRetweets((int) tweet.getRetweetCount());
      dto.setCodOpinionInicial(polaridadIndicada);
      dto.setFechaHoraRegistro(new Date());
      dto.setFlagEnProceso(FLAG_POR_PROCESAR);
      dto.setCodIdioma(COD_LENGUAJE_CONSULTA_ESPANOL);
      dto.setCodFinalidadData(codFinalidadData);

      if (!ArrayUtils.isEmpty(tweet.getUserMentionEntities())) {
        ums = new ArrayList<UserMention>();
        ume = tweet.getUserMentionEntities();
        int size = ume.length;
        for (int i = 0; i < size; i++) {
          um = new UserMention();
          um.setScreenName(ume[i].getScreenName());
          um.setIdTweet(tweet.getId());
          ums.add(um);
        }
      }

      if (!ArrayUtils.isEmpty(tweet.getURLEntities())) {
        urls = new ArrayList<Url>();
        urle = tweet.getURLEntities();
        int size = urle.length;
        for (int i = 0; i < size; i++) {
          url = new Url();
          url.setIdTweet(tweet.getId());
          url.setUrlCorta(urle[i].getURL());
          url.setUrlLarga(urle[i].getExpandedURL());
          url.setUrlMostrada(urle[i].getDisplayURL());
          url.setFlagEnProceso(FLAG_POR_PROCESAR);
          urls.add(url);
        }
      }

      if (!ArrayUtils.isEmpty(tweet.getMediaEntities())) {
        mds = new ArrayList<Media>();
        mes = tweet.getMediaEntities();
        int size = mes.length;
        for (int i = 0; i < size; i++) {
          md = new Media();
          md.setIdTweet(tweet.getId());
          md.setUrl(mes[i].getURL());
          md.setMediaUrl(mes[i].getMediaURL());
          md.setMediaUrlHttps(mes[i].getMediaURLHttps());
          md.setExpandedUrl(mes[i].getExpandedURL());
          mds.add(md);
        }
      }

      if (!ArrayUtils.isEmpty(tweet.getHashtagEntities())) {
        hts = new ArrayList<HashTag>();
        hte = tweet.getHashtagEntities();
        int size = hte.length;
        for (int i = 0; i < size; i++) {
          ht = new HashTag();
          ht.setTextoTag(hte[i].getText());
          ht.setIdTweet(tweet.getId());
          hts.add(ht);
        }
      }

      dto.setUrls(urls);
      dto.setUsuariosMencionados(ums);
      dto.setHashTags(hts);
      dto.setMedias(mds);

      persistirTweet(dto);
    }
  }
Пример #3
0
  private static Record buildEntities(Schema schemaEntities, Status status) {
    GenericRecordBuilder builderEntities = new GenericRecordBuilder(schemaEntities);

    if (status.getHashtagEntities().length > 0) {
      Schema schemaHashtagObject = schemaEntities.getField("hashtags").schema().getElementType();
      List<GenericRecord> listHashtagObjects = new ArrayList<>();
      for (HashtagEntity hashtagEntity : status.getHashtagEntities()) {
        GenericRecordBuilder builderHashtagObject = new GenericRecordBuilder(schemaHashtagObject);
        builderHashtagObject.set("text", hashtagEntity.getText());
        builderHashtagObject.set("start", hashtagEntity.getStart());
        builderHashtagObject.set("end", hashtagEntity.getEnd());
        listHashtagObjects.add(builderHashtagObject.build());
      }
      builderEntities.set("hashtags", listHashtagObjects);
    } else builderEntities.set("hashtags", Collections.emptyList());

    if (status.getSymbolEntities().length > 0) {
      Schema schemaSymbolObject = schemaEntities.getField("symbols").schema().getElementType();
      List<GenericRecord> listSymbolObject = new ArrayList<>();
      for (SymbolEntity symbolEntity : status.getSymbolEntities()) {
        GenericRecordBuilder builderSymbolObject = new GenericRecordBuilder(schemaSymbolObject);
        builderSymbolObject.set("text", symbolEntity.getText());
        builderSymbolObject.set("start", symbolEntity.getStart());
        builderSymbolObject.set("end", symbolEntity.getEnd());
        listSymbolObject.add(builderSymbolObject.build());
      }
      builderEntities.set("symbols", listSymbolObject);
    } else builderEntities.set("symbols", Collections.emptyList());

    if (status.getMediaEntities().length > 0) {
      Schema schemaMediaObject = schemaEntities.getField("media").schema().getElementType();
      List<GenericRecord> listMediaObject = new ArrayList<>();
      for (MediaEntity mediaEntity : status.getMediaEntities()) {
        GenericRecordBuilder builderMediaObject = new GenericRecordBuilder(schemaMediaObject);
        builderMediaObject.set("url", mediaEntity.getURL());
        builderMediaObject.set("display_url", mediaEntity.getDisplayURL());
        builderMediaObject.set("expanded_url", mediaEntity.getExpandedURL());
        builderMediaObject.set("id", mediaEntity.getId());
        builderMediaObject.set("media_url", mediaEntity.getMediaURL());
        builderMediaObject.set("media_url_https", mediaEntity.getMediaURLHttps());
        builderMediaObject.set("type", mediaEntity.getType());
        builderMediaObject.set("text", mediaEntity.getText());
        builderMediaObject.set("start", mediaEntity.getStart());
        builderMediaObject.set("end", mediaEntity.getEnd());

        Schema schemaSize = schemaMediaObject.getField("sizes").schema().getValueType();
        GenericRecordBuilder builderSize = new GenericRecordBuilder(schemaSize);
        Map<String, GenericRecord> mapSizes = new HashMap<>(4);
        for (int key : mediaEntity.getSizes().keySet()) {
          Size size = mediaEntity.getSizes().get(key);
          builderSize.set("h", size.getHeight());
          builderSize.set("w", size.getWidth());
          builderSize.set("resize", size.getResize());
          mapSizes.put(Integer.toString(key), builderSize.build());
        }
        builderMediaObject.set("sizes", mapSizes);
        listMediaObject.add(builderMediaObject.build());
      }
      builderEntities.set("media", listMediaObject);
    } else builderEntities.set("media", Collections.emptyList());

    if (status.getURLEntities().length > 0) {
      Schema schemaURLObject = schemaEntities.getField("urls").schema().getElementType();
      List<GenericRecord> listURLObject1 = new ArrayList<>();
      for (URLEntity urlEntity : status.getURLEntities())
        listURLObject1.add(buildURLEntity(schemaURLObject, urlEntity));
      builderEntities.set("urls", listURLObject1);
    } else builderEntities.set("urls", Collections.emptyList());

    if (status.getUserMentionEntities().length > 0) {
      Schema schemaUserMentionObject =
          schemaEntities.getField("user_mentions").schema().getElementType();
      List<GenericRecord> listUserMentionObject = new ArrayList<>();
      for (UserMentionEntity userMentionEntity : status.getUserMentionEntities()) {
        GenericRecordBuilder builderUserMentionObject =
            new GenericRecordBuilder(schemaUserMentionObject);
        builderUserMentionObject.set("name", userMentionEntity.getName());
        builderUserMentionObject.set("screen_name", userMentionEntity.getScreenName());
        builderUserMentionObject.set("text", userMentionEntity.getText());
        builderUserMentionObject.set("id", userMentionEntity.getId());
        builderUserMentionObject.set("start", userMentionEntity.getStart());
        builderUserMentionObject.set("end", userMentionEntity.getEnd());
        listUserMentionObject.add(builderUserMentionObject.build());
      }
      builderEntities.set("user_mentions", listUserMentionObject);
    } else builderEntities.set("user_mentions", Collections.emptyList());

    if (status.getExtendedMediaEntities().length > 0) {
      Schema schemaExtendedMediaObject =
          schemaEntities.getField("extended_entities").schema().getElementType();
      List<GenericRecord> listExtendedMediaObject = new ArrayList<>();
      for (ExtendedMediaEntity extendedMediaEntity : status.getExtendedMediaEntities()) {
        GenericRecordBuilder builderExtendedMediaObject =
            new GenericRecordBuilder(schemaExtendedMediaObject);
        builderExtendedMediaObject.set("url", extendedMediaEntity.getURL());
        builderExtendedMediaObject.set("display_url", extendedMediaEntity.getDisplayURL());
        builderExtendedMediaObject.set("expanded_url", extendedMediaEntity.getExpandedURL());
        builderExtendedMediaObject.set("id", extendedMediaEntity.getId());
        builderExtendedMediaObject.set("media_url", extendedMediaEntity.getMediaURL());
        builderExtendedMediaObject.set("media_url_https", extendedMediaEntity.getMediaURLHttps());
        builderExtendedMediaObject.set("type", extendedMediaEntity.getType());
        builderExtendedMediaObject.set("text", extendedMediaEntity.getText());
        builderExtendedMediaObject.set("start", extendedMediaEntity.getStart());
        builderExtendedMediaObject.set("end", extendedMediaEntity.getEnd());

        Schema schemaSize = schemaExtendedMediaObject.getField("sizes").schema().getValueType();
        GenericRecordBuilder builderSize = new GenericRecordBuilder(schemaSize);
        Map<String, GenericRecord> mapSizes = new HashMap<>(4);
        for (int key : extendedMediaEntity.getSizes().keySet()) {
          Size size = extendedMediaEntity.getSizes().get(key);
          builderSize.set("h", size.getHeight());
          builderSize.set("w", size.getWidth());
          builderSize.set("resize", size.getResize());
          mapSizes.put(Integer.toString(key), builderSize.build());
        }
        builderExtendedMediaObject.set("sizes", mapSizes);

        Schema schemaVideoInfo = schemaExtendedMediaObject.getField("video_info").schema();
        GenericRecordBuilder builderVideoInfo = new GenericRecordBuilder(schemaVideoInfo);
        builderVideoInfo.set("h", extendedMediaEntity.getVideoAspectRatioHeight());
        builderVideoInfo.set("w", extendedMediaEntity.getVideoAspectRatioWidth());
        builderVideoInfo.set("duration_millis", extendedMediaEntity.getVideoDurationMillis());

        Schema schemaVideoVariants = schemaVideoInfo.getField("variants").schema().getElementType();
        List<GenericRecord> listVideoVariants = new ArrayList<>();
        for (Variant extendedVideoVariant : extendedMediaEntity.getVideoVariants()) {
          GenericRecordBuilder builderVideoVariant = new GenericRecordBuilder(schemaVideoVariants);
          builderVideoVariant.set("bitrate", extendedVideoVariant.getBitrate());
          builderVideoVariant.set("content_type", extendedVideoVariant.getContentType());
          builderVideoVariant.set("url", extendedVideoVariant.getUrl());
          listVideoVariants.add(builderVideoVariant.build());
        }
        builderVideoInfo.set("variants", listVideoVariants);
        builderExtendedMediaObject.set("video_info", builderVideoInfo.build());

        listExtendedMediaObject.add(builderExtendedMediaObject.build());
      }
      builderEntities.set("extended_entities", listExtendedMediaObject);
    } else builderEntities.set("extended_entities", Collections.emptyList());
    return builderEntities.build();
  }
Пример #4
0
 protected boolean checkMatch(twitter4j.Status status) {
   boolean result = false;
   if (matchPattern.matcher(status.getText()).find()) result = true;
   if (result) {
     Logger.debug("Terms found in text");
     Logger.debug("    \"" + status.getText() + "\"");
     return result;
   }
   for (URLEntity ue : status.getURLEntities()) {
     if (matchPattern.matcher(ue.getDisplayURL()).find()) result = true;
     if (matchPattern.matcher(ue.getExpandedURL()).find()) result = true;
   }
   if (result) {
     Logger.debug("Terms found in URL entities");
     for (URLEntity ue : status.getURLEntities()) {
       Logger.debug("    " + ue.getDisplayURL());
       Logger.debug("    " + ue.getExpandedURL());
     }
     return result;
   }
   for (URLEntity ue : status.getMediaEntities()) {
     if (matchPattern.matcher(ue.getDisplayURL()).find()) result = true;
     if (matchPattern.matcher(ue.getExpandedURL()).find()) result = true;
   }
   if (result) {
     Logger.debug("Terms found in Media entities");
     for (URLEntity ue : status.getMediaEntities()) {
       Logger.debug("    " + ue.getDisplayURL());
       Logger.debug("    " + ue.getExpandedURL());
     }
     return result;
   }
   for (HashtagEntity he : status.getHashtagEntities()) {
     if (matchPattern.matcher(he.getText()).find()) result = true;
   }
   if (result) {
     Logger.debug("Terms found in Hashtag entities");
     for (HashtagEntity he : status.getHashtagEntities()) {
       Logger.debug("    " + he.getText());
     }
     return result;
   }
   for (UserMentionEntity me : status.getUserMentionEntities()) {
     if (matchPattern.matcher(me.getScreenName()).find()) result = true;
   }
   if (result) {
     Logger.debug("Terms found in User mention entities");
     for (UserMentionEntity me : status.getUserMentionEntities()) {
       Logger.debug("    " + me.getScreenName());
     }
     return result;
   }
   Logger.debug("Terms NOT FOUND");
   Logger.debug("    Terms not found in URL entities");
   for (URLEntity ue : status.getURLEntities()) {
     Logger.debug("    " + ue.getDisplayURL());
     Logger.debug("    " + ue.getExpandedURL());
   }
   Logger.debug("    Terms not found in Media entities");
   for (URLEntity ue : status.getMediaEntities()) {
     Logger.debug("    " + ue.getDisplayURL());
     Logger.debug("    " + ue.getExpandedURL());
   }
   Logger.debug("    Terms not found in Hashtag entities");
   for (HashtagEntity he : status.getHashtagEntities()) {
     Logger.debug("    " + he.getText());
   }
   Logger.debug("    Terms not found in User mention entities");
   for (UserMentionEntity me : status.getUserMentionEntities()) {
     Logger.debug("    " + me.getScreenName());
   }
   return result;
 }
Пример #5
0
 private String getStatusText(Status status) {
   return parseStatusText(status.getText(), status.getURLEntities(), status.getMediaEntities());
 }