public void onStatus(twitter4j.Status status) { Logger.info(status.getUser().getName() + " : " + status.getText()); Tweet tweet = new Tweet(status); tweet.conformsToTerms = checkMatch(status); tweet.save(); if (tweet.conformsToTerms && esClient != null) { String json = DataObjectFactory.getRawJSON(status); json = json.replaceAll( "(\"geo\":\\{\"type\":\"Point\",\"coordinates\":)\\[([-0-9.,]*)\\]", "$1\"$2\""); // Logger.debug("geo mangled json"); // Logger.debug(json); IndexResponse response = esClient.prepareIndex("twitter", "tweet").setSource(json).execute().actionGet(); } }
protected boolean checkMatch(twitter4j.Status status) { boolean result = false; if (matchPattern.matcher(status.getText()).find()) result = true; if (result) { Logger.debug("Terms found in text"); Logger.debug(" \"" + status.getText() + "\""); return result; } for (URLEntity ue : status.getURLEntities()) { if (matchPattern.matcher(ue.getDisplayURL()).find()) result = true; if (matchPattern.matcher(ue.getExpandedURL()).find()) result = true; } if (result) { Logger.debug("Terms found in URL entities"); for (URLEntity ue : status.getURLEntities()) { Logger.debug(" " + ue.getDisplayURL()); Logger.debug(" " + ue.getExpandedURL()); } return result; } for (URLEntity ue : status.getMediaEntities()) { if (matchPattern.matcher(ue.getDisplayURL()).find()) result = true; if (matchPattern.matcher(ue.getExpandedURL()).find()) result = true; } if (result) { Logger.debug("Terms found in Media entities"); for (URLEntity ue : status.getMediaEntities()) { Logger.debug(" " + ue.getDisplayURL()); Logger.debug(" " + ue.getExpandedURL()); } return result; } for (HashtagEntity he : status.getHashtagEntities()) { if (matchPattern.matcher(he.getText()).find()) result = true; } if (result) { Logger.debug("Terms found in Hashtag entities"); for (HashtagEntity he : status.getHashtagEntities()) { Logger.debug(" " + he.getText()); } return result; } for (UserMentionEntity me : status.getUserMentionEntities()) { if (matchPattern.matcher(me.getScreenName()).find()) result = true; } if (result) { Logger.debug("Terms found in User mention entities"); for (UserMentionEntity me : status.getUserMentionEntities()) { Logger.debug(" " + me.getScreenName()); } return result; } Logger.debug("Terms NOT FOUND"); Logger.debug(" Terms not found in URL entities"); for (URLEntity ue : status.getURLEntities()) { Logger.debug(" " + ue.getDisplayURL()); Logger.debug(" " + ue.getExpandedURL()); } Logger.debug(" Terms not found in Media entities"); for (URLEntity ue : status.getMediaEntities()) { Logger.debug(" " + ue.getDisplayURL()); Logger.debug(" " + ue.getExpandedURL()); } Logger.debug(" Terms not found in Hashtag entities"); for (HashtagEntity he : status.getHashtagEntities()) { Logger.debug(" " + he.getText()); } Logger.debug(" Terms not found in User mention entities"); for (UserMentionEntity me : status.getUserMentionEntities()) { Logger.debug(" " + me.getScreenName()); } return result; }