/** * Does the passed status match the filter pattern? This method checks the status text and the * expanded url entities * * @param status Status to check * @return True if the filter expression matches, false otherwise */ private boolean matchesFilter(Status status) { boolean shouldFilter = false; if (filterPattern != null) { Matcher m = filterPattern.matcher(status.getText()); if (m.matches()) shouldFilter = true; if (status.getURLEntities() != null) { for (URLEntity ue : status.getURLEntities()) { URL expUrl = ue.getExpandedURL(); if (expUrl != null && expUrl.toString() != null) { m = filterPattern.matcher(expUrl.toString()); if (m.matches()) shouldFilter = true; } } } } return shouldFilter; }
private static Record buildEntities(Schema schemaEntities, Status status) { GenericRecordBuilder builderEntities = new GenericRecordBuilder(schemaEntities); if (status.getHashtagEntities().length > 0) { Schema schemaHashtagObject = schemaEntities.getField("hashtags").schema().getElementType(); List<GenericRecord> listHashtagObjects = new ArrayList<>(); for (HashtagEntity hashtagEntity : status.getHashtagEntities()) { GenericRecordBuilder builderHashtagObject = new GenericRecordBuilder(schemaHashtagObject); builderHashtagObject.set("text", hashtagEntity.getText()); builderHashtagObject.set("start", hashtagEntity.getStart()); builderHashtagObject.set("end", hashtagEntity.getEnd()); listHashtagObjects.add(builderHashtagObject.build()); } builderEntities.set("hashtags", listHashtagObjects); } else builderEntities.set("hashtags", Collections.emptyList()); if (status.getSymbolEntities().length > 0) { Schema schemaSymbolObject = schemaEntities.getField("symbols").schema().getElementType(); List<GenericRecord> listSymbolObject = new ArrayList<>(); for (SymbolEntity symbolEntity : status.getSymbolEntities()) { GenericRecordBuilder builderSymbolObject = new GenericRecordBuilder(schemaSymbolObject); builderSymbolObject.set("text", symbolEntity.getText()); builderSymbolObject.set("start", symbolEntity.getStart()); builderSymbolObject.set("end", symbolEntity.getEnd()); listSymbolObject.add(builderSymbolObject.build()); } builderEntities.set("symbols", listSymbolObject); } else builderEntities.set("symbols", Collections.emptyList()); if (status.getMediaEntities().length > 0) { Schema schemaMediaObject = schemaEntities.getField("media").schema().getElementType(); List<GenericRecord> listMediaObject = new ArrayList<>(); for (MediaEntity mediaEntity : status.getMediaEntities()) { GenericRecordBuilder builderMediaObject = new GenericRecordBuilder(schemaMediaObject); builderMediaObject.set("url", mediaEntity.getURL()); builderMediaObject.set("display_url", mediaEntity.getDisplayURL()); builderMediaObject.set("expanded_url", mediaEntity.getExpandedURL()); builderMediaObject.set("id", mediaEntity.getId()); builderMediaObject.set("media_url", mediaEntity.getMediaURL()); builderMediaObject.set("media_url_https", mediaEntity.getMediaURLHttps()); builderMediaObject.set("type", mediaEntity.getType()); builderMediaObject.set("text", mediaEntity.getText()); builderMediaObject.set("start", mediaEntity.getStart()); builderMediaObject.set("end", mediaEntity.getEnd()); Schema schemaSize = schemaMediaObject.getField("sizes").schema().getValueType(); GenericRecordBuilder builderSize = new GenericRecordBuilder(schemaSize); Map<String, GenericRecord> mapSizes = new HashMap<>(4); for (int key : mediaEntity.getSizes().keySet()) { Size size = mediaEntity.getSizes().get(key); builderSize.set("h", size.getHeight()); builderSize.set("w", size.getWidth()); builderSize.set("resize", size.getResize()); mapSizes.put(Integer.toString(key), builderSize.build()); } builderMediaObject.set("sizes", mapSizes); listMediaObject.add(builderMediaObject.build()); } builderEntities.set("media", listMediaObject); } else builderEntities.set("media", Collections.emptyList()); if (status.getURLEntities().length > 0) { Schema schemaURLObject = schemaEntities.getField("urls").schema().getElementType(); List<GenericRecord> listURLObject1 = new ArrayList<>(); for (URLEntity urlEntity : status.getURLEntities()) listURLObject1.add(buildURLEntity(schemaURLObject, urlEntity)); builderEntities.set("urls", listURLObject1); } else builderEntities.set("urls", Collections.emptyList()); if (status.getUserMentionEntities().length > 0) { Schema schemaUserMentionObject = schemaEntities.getField("user_mentions").schema().getElementType(); List<GenericRecord> listUserMentionObject = new ArrayList<>(); for (UserMentionEntity userMentionEntity : status.getUserMentionEntities()) { GenericRecordBuilder builderUserMentionObject = new GenericRecordBuilder(schemaUserMentionObject); builderUserMentionObject.set("name", userMentionEntity.getName()); builderUserMentionObject.set("screen_name", userMentionEntity.getScreenName()); builderUserMentionObject.set("text", userMentionEntity.getText()); builderUserMentionObject.set("id", userMentionEntity.getId()); builderUserMentionObject.set("start", userMentionEntity.getStart()); builderUserMentionObject.set("end", userMentionEntity.getEnd()); listUserMentionObject.add(builderUserMentionObject.build()); } builderEntities.set("user_mentions", listUserMentionObject); } else builderEntities.set("user_mentions", Collections.emptyList()); if (status.getExtendedMediaEntities().length > 0) { Schema schemaExtendedMediaObject = schemaEntities.getField("extended_entities").schema().getElementType(); List<GenericRecord> listExtendedMediaObject = new ArrayList<>(); for (ExtendedMediaEntity extendedMediaEntity : status.getExtendedMediaEntities()) { GenericRecordBuilder builderExtendedMediaObject = new GenericRecordBuilder(schemaExtendedMediaObject); builderExtendedMediaObject.set("url", extendedMediaEntity.getURL()); builderExtendedMediaObject.set("display_url", extendedMediaEntity.getDisplayURL()); builderExtendedMediaObject.set("expanded_url", extendedMediaEntity.getExpandedURL()); builderExtendedMediaObject.set("id", extendedMediaEntity.getId()); builderExtendedMediaObject.set("media_url", extendedMediaEntity.getMediaURL()); builderExtendedMediaObject.set("media_url_https", extendedMediaEntity.getMediaURLHttps()); builderExtendedMediaObject.set("type", extendedMediaEntity.getType()); builderExtendedMediaObject.set("text", extendedMediaEntity.getText()); builderExtendedMediaObject.set("start", extendedMediaEntity.getStart()); builderExtendedMediaObject.set("end", extendedMediaEntity.getEnd()); Schema schemaSize = schemaExtendedMediaObject.getField("sizes").schema().getValueType(); GenericRecordBuilder builderSize = new GenericRecordBuilder(schemaSize); Map<String, GenericRecord> mapSizes = new HashMap<>(4); for (int key : extendedMediaEntity.getSizes().keySet()) { Size size = extendedMediaEntity.getSizes().get(key); builderSize.set("h", size.getHeight()); builderSize.set("w", size.getWidth()); builderSize.set("resize", size.getResize()); mapSizes.put(Integer.toString(key), builderSize.build()); } builderExtendedMediaObject.set("sizes", mapSizes); Schema schemaVideoInfo = schemaExtendedMediaObject.getField("video_info").schema(); GenericRecordBuilder builderVideoInfo = new GenericRecordBuilder(schemaVideoInfo); builderVideoInfo.set("h", extendedMediaEntity.getVideoAspectRatioHeight()); builderVideoInfo.set("w", extendedMediaEntity.getVideoAspectRatioWidth()); builderVideoInfo.set("duration_millis", extendedMediaEntity.getVideoDurationMillis()); Schema schemaVideoVariants = schemaVideoInfo.getField("variants").schema().getElementType(); List<GenericRecord> listVideoVariants = new ArrayList<>(); for (Variant extendedVideoVariant : extendedMediaEntity.getVideoVariants()) { GenericRecordBuilder builderVideoVariant = new GenericRecordBuilder(schemaVideoVariants); builderVideoVariant.set("bitrate", extendedVideoVariant.getBitrate()); builderVideoVariant.set("content_type", extendedVideoVariant.getContentType()); builderVideoVariant.set("url", extendedVideoVariant.getUrl()); listVideoVariants.add(builderVideoVariant.build()); } builderVideoInfo.set("variants", listVideoVariants); builderExtendedMediaObject.set("video_info", builderVideoInfo.build()); listExtendedMediaObject.add(builderExtendedMediaObject.build()); } builderEntities.set("extended_entities", listExtendedMediaObject); } else builderEntities.set("extended_entities", Collections.emptyList()); return builderEntities.build(); }