public FeedBuilder fetch(String url) throws FeedException { SyndFeedInput syndFeedInput = new SyndFeedInput(); String pageSource = pageFetcher.fetch(url); SyndFeed feed = syndFeedInput.build(new StringReader(pageSource)); return new FeedBuilder(feed); }
public void testParse() throws Exception { final SyndFeedInput input = new SyndFeedInput(); final SyndFeed feed = input.build(new File(super.getTestFile("xml/custom-tags-example.xml"))); final List<SyndEntry> entries = feed.getEntries(); final SyndEntry entry = entries.get(0); final CustomTags customTags = (CustomTags) entry.getModule(CustomTags.URI); final Iterator<CustomTag> it = customTags.getValues().iterator(); while (it.hasNext()) { final CustomTag tag = it.next(); LOG.debug("{}", tag); if (tag.getName().equals("language_skills")) { Assert.assertEquals("Fluent in English and German", tag.getValue()); } if (tag.getName().equals("prior_experience_years")) { Assert.assertEquals(new Integer(5), tag.getValue()); } else if (tag.getName().equals("start_date")) { final Calendar cal = Calendar.getInstance(); cal.setTimeInMillis(0); cal.set(2005, 10, 15, 0, 0, 0); Assert.assertEquals(cal.getTime(), tag.getValue()); } else if (tag.getName().equals("test_url")) { Assert.assertEquals(new URL("http://www.screaming-penguin.com"), tag.getValue()); } else if (tag.getName().equals("test_boolean")) { Assert.assertEquals(new Boolean(true), tag.getValue()); } else if (tag.getName().equals("test_intUnit")) { Assert.assertEquals(new IntUnit(25, "horses"), tag.getValue()); } else if (tag.getName().equals("test_floatUnit")) { Assert.assertEquals(new FloatUnit((float) 2.5, "cows"), tag.getValue()); } else if (tag.getName().equals("test_location")) { Assert.assertEquals( new CustomTagImpl.Location("125 Main St, Sometown, GA"), tag.getValue()); } else if (tag.getName().equals("test_dateRange")) { final Calendar cal = Calendar.getInstance(); cal.setTimeInMillis(0); cal.set(2005, 06, 04, 20, 0, 0); final Date start = cal.getTime(); cal.set(2005, 06, 04, 23, 0, 0); final DateTimeRange dtr = new DateTimeRange(start, cal.getTime()); Assert.assertEquals(dtr, tag.getValue()); } } }
private List<Outlink> parseFeed(String url, byte[] content, Metadata parentMetadata) throws MalformedURLException { List<Outlink> links = new ArrayList<>(); SyndFeed feed = null; try (ByteArrayInputStream is = new ByteArrayInputStream(content)) { SyndFeedInput input = new SyndFeedInput(); feed = input.build(new InputSource(is)); } catch (Exception e) { LOG.error("Exception parsing feed from DOM {}", url); return links; } URL sURL = new URL(url); List<SyndEntry> entries = feed.getEntries(); for (SyndEntry entry : entries) { String targetURL = entry.getLink(); // build an absolute URL try { targetURL = URLUtil.resolveURL(sURL, targetURL).toExternalForm(); } catch (MalformedURLException e) { LOG.debug("MalformedURLException on {}", targetURL); continue; } targetURL = urlFilters.filter(sURL, parentMetadata, targetURL); if (StringUtils.isBlank(targetURL)) continue; Outlink newLink = new Outlink(targetURL); Metadata targetMD = metadataTransfer.getMetaForOutlink(targetURL, url, parentMetadata); newLink.setMetadata(targetMD); String title = entry.getTitle(); if (StringUtils.isNotBlank(title)) { targetMD.setValue("feed.title", title.trim()); } Date publishedDate = entry.getPublishedDate(); if (publishedDate != null) { // filter based on the published date if (filterHoursSincePub != -1) { Calendar rightNow = Calendar.getInstance(); rightNow.add(Calendar.HOUR, -filterHoursSincePub); if (publishedDate.before(rightNow.getTime())) { LOG.info( "{} has a published date {} which is more than {} hours old", targetURL, publishedDate.toString(), filterHoursSincePub); continue; } } targetMD.setValue("feed.publishedDate", publishedDate.toString()); } SyndContent description = entry.getDescription(); if (description != null && StringUtils.isNotBlank(description.getValue())) { targetMD.setValue("feed.description", description.getValue()); } links.add(newLink); } return links; }