/** * Transforms a twitter4j.Status object into a JSONObject * * @param pStatus a twitter4j.Status object * @return JSONObject */ @SuppressWarnings("unchecked") public static JSONObject getJson(Status pStatus) { JSONObject jsonObj = null; if (pStatus != null) { jsonObj = new JSONObject(); jsonObj.put("createdAt", pStatus.getCreatedAt()); jsonObj.put("id", pStatus.getId()); jsonObj.put("text", pStatus.getText()); jsonObj.put("source", pStatus.getSource()); jsonObj.put("isTruncated", pStatus.isTruncated()); jsonObj.put("inReplyToStatusId", pStatus.getInReplyToStatusId()); jsonObj.put("inReplyToUserId", pStatus.getInReplyToUserId()); jsonObj.put("isFavorited", pStatus.isFavorited()); jsonObj.put("isRetweeted", pStatus.isRetweeted()); jsonObj.put("favoriteCount", pStatus.getFavoriteCount()); jsonObj.put("inReplyToScreenName", pStatus.getInReplyToScreenName()); jsonObj.put("geoLocation", pStatus.getGeoLocation()); jsonObj.put("place", pStatus.getPlace()); jsonObj.put("retweetCount", pStatus.getRetweetCount()); jsonObj.put("isPossiblySensitive", pStatus.isPossiblySensitive()); jsonObj.put("isoLanguageCode", pStatus.getIsoLanguageCode()); jsonObj.put("contributorsIDs", pStatus.getContributors()); jsonObj.put("retweetedStatus", pStatus.getRetweetedStatus()); jsonObj.put("userMentionEntities", pStatus.getUserMentionEntities()); jsonObj.put("urlEntities", pStatus.getURLEntities()); jsonObj.put("hashtagEntities", pStatus.getHashtagEntities()); jsonObj.put("mediaEntities", pStatus.getMediaEntities()); jsonObj.put("currentUserRetweetId", pStatus.getCurrentUserRetweetId()); jsonObj.put("user", pStatus.getUser()); } return jsonObj; }
public void persistirTweets( List<Status> tweets, String query, String polaridadIndicada, String codFinalidadData) { logger.debug("<==== Inicio Method persistirTweet ====>"); StatusTweet dto = null; HashTag ht = null; Url url = null; UserMention um = null; Media md = null; List<UserMention> ums = null; List<HashTag> hts = null; List<Url> urls = null; List<Media> mds = null; UserMentionEntity[] ume = null; URLEntity[] urle = null; HashtagEntity[] hte = null; MediaEntity[] mes = null; for (Status tweet : tweets) { ums = null; hts = null; urls = null; mds = null; ume = null; urle = null; hte = null; mes = null; dto = null; ht = null; url = null; um = null; md = null; logger.debug( "tweet completo: {}", ToStringBuilder.reflectionToString(tweet, ToStringStyle.MULTI_LINE_STYLE)); dto = new StatusTweet(); dto.setId(tweet.getId()); dto.setQuery(query); dto.setMensaje(tweet.getText()); if (tweet.getUser() != null) { dto.setScreenNameUser(tweet.getUser().getScreenName()); } dto.setContadorRetweets((int) tweet.getRetweetCount()); dto.setCodOpinionInicial(polaridadIndicada); dto.setFechaHoraRegistro(new Date()); dto.setFlagEnProceso(FLAG_POR_PROCESAR); dto.setCodIdioma(COD_LENGUAJE_CONSULTA_ESPANOL); dto.setCodFinalidadData(codFinalidadData); if (!ArrayUtils.isEmpty(tweet.getUserMentionEntities())) { ums = new ArrayList<UserMention>(); ume = tweet.getUserMentionEntities(); int size = ume.length; for (int i = 0; i < size; i++) { um = new UserMention(); um.setScreenName(ume[i].getScreenName()); um.setIdTweet(tweet.getId()); ums.add(um); } } if (!ArrayUtils.isEmpty(tweet.getURLEntities())) { urls = new ArrayList<Url>(); urle = tweet.getURLEntities(); int size = urle.length; for (int i = 0; i < size; i++) { url = new Url(); url.setIdTweet(tweet.getId()); url.setUrlCorta(urle[i].getURL()); url.setUrlLarga(urle[i].getExpandedURL()); url.setUrlMostrada(urle[i].getDisplayURL()); url.setFlagEnProceso(FLAG_POR_PROCESAR); urls.add(url); } } if (!ArrayUtils.isEmpty(tweet.getMediaEntities())) { mds = new ArrayList<Media>(); mes = tweet.getMediaEntities(); int size = mes.length; for (int i = 0; i < size; i++) { md = new Media(); md.setIdTweet(tweet.getId()); md.setUrl(mes[i].getURL()); md.setMediaUrl(mes[i].getMediaURL()); md.setMediaUrlHttps(mes[i].getMediaURLHttps()); md.setExpandedUrl(mes[i].getExpandedURL()); mds.add(md); } } if (!ArrayUtils.isEmpty(tweet.getHashtagEntities())) { hts = new ArrayList<HashTag>(); hte = tweet.getHashtagEntities(); int size = hte.length; for (int i = 0; i < size; i++) { ht = new HashTag(); ht.setTextoTag(hte[i].getText()); ht.setIdTweet(tweet.getId()); hts.add(ht); } } dto.setUrls(urls); dto.setUsuariosMencionados(ums); dto.setHashTags(hts); dto.setMedias(mds); persistirTweet(dto); } }
private static Record buildEntities(Schema schemaEntities, Status status) { GenericRecordBuilder builderEntities = new GenericRecordBuilder(schemaEntities); if (status.getHashtagEntities().length > 0) { Schema schemaHashtagObject = schemaEntities.getField("hashtags").schema().getElementType(); List<GenericRecord> listHashtagObjects = new ArrayList<>(); for (HashtagEntity hashtagEntity : status.getHashtagEntities()) { GenericRecordBuilder builderHashtagObject = new GenericRecordBuilder(schemaHashtagObject); builderHashtagObject.set("text", hashtagEntity.getText()); builderHashtagObject.set("start", hashtagEntity.getStart()); builderHashtagObject.set("end", hashtagEntity.getEnd()); listHashtagObjects.add(builderHashtagObject.build()); } builderEntities.set("hashtags", listHashtagObjects); } else builderEntities.set("hashtags", Collections.emptyList()); if (status.getSymbolEntities().length > 0) { Schema schemaSymbolObject = schemaEntities.getField("symbols").schema().getElementType(); List<GenericRecord> listSymbolObject = new ArrayList<>(); for (SymbolEntity symbolEntity : status.getSymbolEntities()) { GenericRecordBuilder builderSymbolObject = new GenericRecordBuilder(schemaSymbolObject); builderSymbolObject.set("text", symbolEntity.getText()); builderSymbolObject.set("start", symbolEntity.getStart()); builderSymbolObject.set("end", symbolEntity.getEnd()); listSymbolObject.add(builderSymbolObject.build()); } builderEntities.set("symbols", listSymbolObject); } else builderEntities.set("symbols", Collections.emptyList()); if (status.getMediaEntities().length > 0) { Schema schemaMediaObject = schemaEntities.getField("media").schema().getElementType(); List<GenericRecord> listMediaObject = new ArrayList<>(); for (MediaEntity mediaEntity : status.getMediaEntities()) { GenericRecordBuilder builderMediaObject = new GenericRecordBuilder(schemaMediaObject); builderMediaObject.set("url", mediaEntity.getURL()); builderMediaObject.set("display_url", mediaEntity.getDisplayURL()); builderMediaObject.set("expanded_url", mediaEntity.getExpandedURL()); builderMediaObject.set("id", mediaEntity.getId()); builderMediaObject.set("media_url", mediaEntity.getMediaURL()); builderMediaObject.set("media_url_https", mediaEntity.getMediaURLHttps()); builderMediaObject.set("type", mediaEntity.getType()); builderMediaObject.set("text", mediaEntity.getText()); builderMediaObject.set("start", mediaEntity.getStart()); builderMediaObject.set("end", mediaEntity.getEnd()); Schema schemaSize = schemaMediaObject.getField("sizes").schema().getValueType(); GenericRecordBuilder builderSize = new GenericRecordBuilder(schemaSize); Map<String, GenericRecord> mapSizes = new HashMap<>(4); for (int key : mediaEntity.getSizes().keySet()) { Size size = mediaEntity.getSizes().get(key); builderSize.set("h", size.getHeight()); builderSize.set("w", size.getWidth()); builderSize.set("resize", size.getResize()); mapSizes.put(Integer.toString(key), builderSize.build()); } builderMediaObject.set("sizes", mapSizes); listMediaObject.add(builderMediaObject.build()); } builderEntities.set("media", listMediaObject); } else builderEntities.set("media", Collections.emptyList()); if (status.getURLEntities().length > 0) { Schema schemaURLObject = schemaEntities.getField("urls").schema().getElementType(); List<GenericRecord> listURLObject1 = new ArrayList<>(); for (URLEntity urlEntity : status.getURLEntities()) listURLObject1.add(buildURLEntity(schemaURLObject, urlEntity)); builderEntities.set("urls", listURLObject1); } else builderEntities.set("urls", Collections.emptyList()); if (status.getUserMentionEntities().length > 0) { Schema schemaUserMentionObject = schemaEntities.getField("user_mentions").schema().getElementType(); List<GenericRecord> listUserMentionObject = new ArrayList<>(); for (UserMentionEntity userMentionEntity : status.getUserMentionEntities()) { GenericRecordBuilder builderUserMentionObject = new GenericRecordBuilder(schemaUserMentionObject); builderUserMentionObject.set("name", userMentionEntity.getName()); builderUserMentionObject.set("screen_name", userMentionEntity.getScreenName()); builderUserMentionObject.set("text", userMentionEntity.getText()); builderUserMentionObject.set("id", userMentionEntity.getId()); builderUserMentionObject.set("start", userMentionEntity.getStart()); builderUserMentionObject.set("end", userMentionEntity.getEnd()); listUserMentionObject.add(builderUserMentionObject.build()); } builderEntities.set("user_mentions", listUserMentionObject); } else builderEntities.set("user_mentions", Collections.emptyList()); if (status.getExtendedMediaEntities().length > 0) { Schema schemaExtendedMediaObject = schemaEntities.getField("extended_entities").schema().getElementType(); List<GenericRecord> listExtendedMediaObject = new ArrayList<>(); for (ExtendedMediaEntity extendedMediaEntity : status.getExtendedMediaEntities()) { GenericRecordBuilder builderExtendedMediaObject = new GenericRecordBuilder(schemaExtendedMediaObject); builderExtendedMediaObject.set("url", extendedMediaEntity.getURL()); builderExtendedMediaObject.set("display_url", extendedMediaEntity.getDisplayURL()); builderExtendedMediaObject.set("expanded_url", extendedMediaEntity.getExpandedURL()); builderExtendedMediaObject.set("id", extendedMediaEntity.getId()); builderExtendedMediaObject.set("media_url", extendedMediaEntity.getMediaURL()); builderExtendedMediaObject.set("media_url_https", extendedMediaEntity.getMediaURLHttps()); builderExtendedMediaObject.set("type", extendedMediaEntity.getType()); builderExtendedMediaObject.set("text", extendedMediaEntity.getText()); builderExtendedMediaObject.set("start", extendedMediaEntity.getStart()); builderExtendedMediaObject.set("end", extendedMediaEntity.getEnd()); Schema schemaSize = schemaExtendedMediaObject.getField("sizes").schema().getValueType(); GenericRecordBuilder builderSize = new GenericRecordBuilder(schemaSize); Map<String, GenericRecord> mapSizes = new HashMap<>(4); for (int key : extendedMediaEntity.getSizes().keySet()) { Size size = extendedMediaEntity.getSizes().get(key); builderSize.set("h", size.getHeight()); builderSize.set("w", size.getWidth()); builderSize.set("resize", size.getResize()); mapSizes.put(Integer.toString(key), builderSize.build()); } builderExtendedMediaObject.set("sizes", mapSizes); Schema schemaVideoInfo = schemaExtendedMediaObject.getField("video_info").schema(); GenericRecordBuilder builderVideoInfo = new GenericRecordBuilder(schemaVideoInfo); builderVideoInfo.set("h", extendedMediaEntity.getVideoAspectRatioHeight()); builderVideoInfo.set("w", extendedMediaEntity.getVideoAspectRatioWidth()); builderVideoInfo.set("duration_millis", extendedMediaEntity.getVideoDurationMillis()); Schema schemaVideoVariants = schemaVideoInfo.getField("variants").schema().getElementType(); List<GenericRecord> listVideoVariants = new ArrayList<>(); for (Variant extendedVideoVariant : extendedMediaEntity.getVideoVariants()) { GenericRecordBuilder builderVideoVariant = new GenericRecordBuilder(schemaVideoVariants); builderVideoVariant.set("bitrate", extendedVideoVariant.getBitrate()); builderVideoVariant.set("content_type", extendedVideoVariant.getContentType()); builderVideoVariant.set("url", extendedVideoVariant.getUrl()); listVideoVariants.add(builderVideoVariant.build()); } builderVideoInfo.set("variants", listVideoVariants); builderExtendedMediaObject.set("video_info", builderVideoInfo.build()); listExtendedMediaObject.add(builderExtendedMediaObject.build()); } builderEntities.set("extended_entities", listExtendedMediaObject); } else builderEntities.set("extended_entities", Collections.emptyList()); return builderEntities.build(); }
protected boolean checkMatch(twitter4j.Status status) { boolean result = false; if (matchPattern.matcher(status.getText()).find()) result = true; if (result) { Logger.debug("Terms found in text"); Logger.debug(" \"" + status.getText() + "\""); return result; } for (URLEntity ue : status.getURLEntities()) { if (matchPattern.matcher(ue.getDisplayURL()).find()) result = true; if (matchPattern.matcher(ue.getExpandedURL()).find()) result = true; } if (result) { Logger.debug("Terms found in URL entities"); for (URLEntity ue : status.getURLEntities()) { Logger.debug(" " + ue.getDisplayURL()); Logger.debug(" " + ue.getExpandedURL()); } return result; } for (URLEntity ue : status.getMediaEntities()) { if (matchPattern.matcher(ue.getDisplayURL()).find()) result = true; if (matchPattern.matcher(ue.getExpandedURL()).find()) result = true; } if (result) { Logger.debug("Terms found in Media entities"); for (URLEntity ue : status.getMediaEntities()) { Logger.debug(" " + ue.getDisplayURL()); Logger.debug(" " + ue.getExpandedURL()); } return result; } for (HashtagEntity he : status.getHashtagEntities()) { if (matchPattern.matcher(he.getText()).find()) result = true; } if (result) { Logger.debug("Terms found in Hashtag entities"); for (HashtagEntity he : status.getHashtagEntities()) { Logger.debug(" " + he.getText()); } return result; } for (UserMentionEntity me : status.getUserMentionEntities()) { if (matchPattern.matcher(me.getScreenName()).find()) result = true; } if (result) { Logger.debug("Terms found in User mention entities"); for (UserMentionEntity me : status.getUserMentionEntities()) { Logger.debug(" " + me.getScreenName()); } return result; } Logger.debug("Terms NOT FOUND"); Logger.debug(" Terms not found in URL entities"); for (URLEntity ue : status.getURLEntities()) { Logger.debug(" " + ue.getDisplayURL()); Logger.debug(" " + ue.getExpandedURL()); } Logger.debug(" Terms not found in Media entities"); for (URLEntity ue : status.getMediaEntities()) { Logger.debug(" " + ue.getDisplayURL()); Logger.debug(" " + ue.getExpandedURL()); } Logger.debug(" Terms not found in Hashtag entities"); for (HashtagEntity he : status.getHashtagEntities()) { Logger.debug(" " + he.getText()); } Logger.debug(" Terms not found in User mention entities"); for (UserMentionEntity me : status.getUserMentionEntities()) { Logger.debug(" " + me.getScreenName()); } return result; }
private String getStatusText(Status status) { return parseStatusText(status.getText(), status.getURLEntities(), status.getMediaEntities()); }