public void setEpisodeAttributes(Episode episode, Podcast podcast, SyndEntry entry) { // set DESCRIPTION for episode - used in search if (null != entry.getDescription()) { String episodeDesc = entry.getDescription().getValue(); // tags are removed from description String descWithoutTabs = episodeDesc.replaceAll("\\<[^>]*>", ""); // carriage returns are removed from description - for player String descWithoutEndOfLine = descWithoutTabs.replaceAll("\\n", ""); if (descWithoutEndOfLine.length() > MAX_LENGTH_DESCRIPTION) { episode.setDescription(descWithoutEndOfLine.substring(0, MAX_LENGTH_DESCRIPTION)); } else { episode.setDescription(descWithoutEndOfLine); } } // set author episode.setAuthor(entry.getAuthor()); // set title for episode - used in search String episodeTitle = entry.getTitle(); if (episodeTitle != null) { // removes quotes to display properly in player episodeTitle = episodeTitle.replaceAll("\"", ""); if (episodeTitle.length() > MAX_PERMITTED_TITLE_LENGTH) { episodeTitle = episodeTitle.substring(0, MAX_PERMITTED_TITLE_LENGTH); } episode.setTitle(episodeTitle); String titleInUrl = episodeTitle.trim().replaceAll("[^a-zA-Z0-9\\-\\s\\.]", ""); titleInUrl = titleInUrl.replaceAll("[\\-| |\\.]+", "-"); if (titleInUrl.length() > TITLE_IN_URL_MAX_LENGTH) { episode.setTitleInUrl(titleInUrl.substring(0, TITLE_IN_URL_MAX_LENGTH)); } else { episode.setTitleInUrl(titleInUrl); } } episode.setLink(entry.getLink()); // in the beginning inherit the media type from the podcast episode.setMediaType(podcast.getMediaType()); // get the list of enclosures @SuppressWarnings("unchecked") List<SyndEnclosure> enclosures = (List<SyndEnclosure>) entry.getEnclosures(); List<String> audioMimeTypesList = Arrays.asList(audioMimeTypesArray); List<String> videoMimeTypesList = Arrays.asList(videoMimeTypesArray); // set media url for the episode - this will be played in the player if (null != enclosures) { // if in the enclosure list is a media type (either audio or video), // this will set as the link of the episode for (SyndEnclosure enclosure : enclosures) { if (null != enclosure) { episode.setMediaUrl(enclosure.getUrl()); if (enclosure.getLength() >= 0) episode.setLength(enclosure.getLength()); // when adding a new podcast media type is selected for the // podcast based on an initial view, but it can be that is a // mixed podcast so both audio // and video should be considered and in that case PRIORITY // has the type of the episode if any... if (null != enclosure.getType()) { episode.setEnclosureType(enclosure.getType().trim()); if (audioMimeTypesList.contains(enclosure.getType().trim())) { episode.setMediaType(MediaType.Audio); break; } if (videoMimeTypesList.contains(enclosure.getType().trim())) { episode.setMediaType(MediaType.Video); break; } } } } } else { episode.setMediaUrl("noMediaUrl"); } if (episode.getMediaUrl() == null) { episode.setMediaUrl("noMediaUrl"); } if (episode.getMediaUrl() == null || episode.getMediaUrl().equals("noMediaUrl")) { LOG.warn( "PodcastId[" + podcast.getPodcastId() + "] - " + "COULD NOT SET MEDIA URL - " + "epTitle[" + entry.getTitle() + "]" + "feed[" + podcast.getUrl() + "]"); } // set link attribute episode.setLink(entry.getLink()); episode.setPublicationDate(entry.getPublishedDate()); updatePodcastPublicationDateAndLastMediaUrl(episode, podcast); if (episode.getPublicationDate() == null) { LOG.warn( "PodcastId[" + podcast.getPodcastId() + "] - " + "COULD NOT SET publication date " + "epTitle[" + entry.getTitle() + "]" + "feed[" + podcast.getUrl() + "]"); } }
private List<Outlink> parseFeed(String url, byte[] content, Metadata parentMetadata) throws MalformedURLException { List<Outlink> links = new ArrayList<>(); SyndFeed feed = null; try (ByteArrayInputStream is = new ByteArrayInputStream(content)) { SyndFeedInput input = new SyndFeedInput(); feed = input.build(new InputSource(is)); } catch (Exception e) { LOG.error("Exception parsing feed from DOM {}", url); return links; } URL sURL = new URL(url); List<SyndEntry> entries = feed.getEntries(); for (SyndEntry entry : entries) { String targetURL = entry.getLink(); // build an absolute URL try { targetURL = URLUtil.resolveURL(sURL, targetURL).toExternalForm(); } catch (MalformedURLException e) { LOG.debug("MalformedURLException on {}", targetURL); continue; } targetURL = urlFilters.filter(sURL, parentMetadata, targetURL); if (StringUtils.isBlank(targetURL)) continue; Outlink newLink = new Outlink(targetURL); Metadata targetMD = metadataTransfer.getMetaForOutlink(targetURL, url, parentMetadata); newLink.setMetadata(targetMD); String title = entry.getTitle(); if (StringUtils.isNotBlank(title)) { targetMD.setValue("feed.title", title.trim()); } Date publishedDate = entry.getPublishedDate(); if (publishedDate != null) { // filter based on the published date if (filterHoursSincePub != -1) { Calendar rightNow = Calendar.getInstance(); rightNow.add(Calendar.HOUR, -filterHoursSincePub); if (publishedDate.before(rightNow.getTime())) { LOG.info( "{} has a published date {} which is more than {} hours old", targetURL, publishedDate.toString(), filterHoursSincePub); continue; } } targetMD.setValue("feed.publishedDate", publishedDate.toString()); } SyndContent description = entry.getDescription(); if (description != null && StringUtils.isNotBlank(description.getValue())) { targetMD.setValue("feed.description", description.getValue()); } links.add(newLink); } return links; }