public void linkToURL(Entity entity, String text, StringBuilder builder, URLEntity urlEntity) { CharSequence url = entity.getValue(); CharSequence linkText = escapeHTML(url); if (urlEntity != null && urlEntity.getDisplayURL() != null) { linkText = urlEntity.getDisplayURL(); } else if (entity.displayURL != null) { linkText = entity.displayURL; } Map<String, String> attrs = new LinkedHashMap<String, String>(); attrs.put("href", url.toString()); linkToText(entity, linkText, attrs, builder); }
private static Record buildURLEntity(Schema schemaURLObject, URLEntity urlEntity) { GenericRecordBuilder builderURLObject = new GenericRecordBuilder(schemaURLObject); builderURLObject.set("url", urlEntity.getURL()); builderURLObject.set("text", urlEntity.getText()); builderURLObject.set("expanded_url", urlEntity.getExpandedURL()); builderURLObject.set("display_url", urlEntity.getDisplayURL()); builderURLObject.set("start", urlEntity.getStart()); builderURLObject.set("end", urlEntity.getEnd()); return builderURLObject.build(); }
protected boolean checkMatch(twitter4j.Status status) { boolean result = false; if (matchPattern.matcher(status.getText()).find()) result = true; if (result) { Logger.debug("Terms found in text"); Logger.debug(" \"" + status.getText() + "\""); return result; } for (URLEntity ue : status.getURLEntities()) { if (matchPattern.matcher(ue.getDisplayURL()).find()) result = true; if (matchPattern.matcher(ue.getExpandedURL()).find()) result = true; } if (result) { Logger.debug("Terms found in URL entities"); for (URLEntity ue : status.getURLEntities()) { Logger.debug(" " + ue.getDisplayURL()); Logger.debug(" " + ue.getExpandedURL()); } return result; } for (URLEntity ue : status.getMediaEntities()) { if (matchPattern.matcher(ue.getDisplayURL()).find()) result = true; if (matchPattern.matcher(ue.getExpandedURL()).find()) result = true; } if (result) { Logger.debug("Terms found in Media entities"); for (URLEntity ue : status.getMediaEntities()) { Logger.debug(" " + ue.getDisplayURL()); Logger.debug(" " + ue.getExpandedURL()); } return result; } for (HashtagEntity he : status.getHashtagEntities()) { if (matchPattern.matcher(he.getText()).find()) result = true; } if (result) { Logger.debug("Terms found in Hashtag entities"); for (HashtagEntity he : status.getHashtagEntities()) { Logger.debug(" " + he.getText()); } return result; } for (UserMentionEntity me : status.getUserMentionEntities()) { if (matchPattern.matcher(me.getScreenName()).find()) result = true; } if (result) { Logger.debug("Terms found in User mention entities"); for (UserMentionEntity me : status.getUserMentionEntities()) { Logger.debug(" " + me.getScreenName()); } return result; } Logger.debug("Terms NOT FOUND"); Logger.debug(" Terms not found in URL entities"); for (URLEntity ue : status.getURLEntities()) { Logger.debug(" " + ue.getDisplayURL()); Logger.debug(" " + ue.getExpandedURL()); } Logger.debug(" Terms not found in Media entities"); for (URLEntity ue : status.getMediaEntities()) { Logger.debug(" " + ue.getDisplayURL()); Logger.debug(" " + ue.getExpandedURL()); } Logger.debug(" Terms not found in Hashtag entities"); for (HashtagEntity he : status.getHashtagEntities()) { Logger.debug(" " + he.getText()); } Logger.debug(" Terms not found in User mention entities"); for (UserMentionEntity me : status.getUserMentionEntities()) { Logger.debug(" " + me.getScreenName()); } return result; }