public static void main(String[] args) { boolean ok = false; if (args.length == 1) { try { URL feedUrl = new URL(args[0]); FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance(); FeedFetcher fetcher = new HttpURLFeedFetcher(feedInfoCache); FetcherEventListenerImpl listener = new FetcherEventListenerImpl(); fetcher.addFetcherEventListener(listener); System.err.println("Retrieving feed " + feedUrl); // Retrieve the feed. // We will get a Feed Polled Event and then a // Feed Retrieved event (assuming the feed is valid) SyndFeed feed = fetcher.retrieveFeed(feedUrl); System.err.println(feedUrl + " retrieved"); System.err.println( feedUrl + " has a title: " + feed.getTitle() + " and contains " + feed.getEntries().size() + " entries."); // We will now retrieve the feed again. If the feed is unmodified // and the server supports conditional gets, we will get a "Feed // Unchanged" event after the Feed Polled event System.err.println("Polling " + feedUrl + " again to test conditional get support."); SyndFeed feed2 = fetcher.retrieveFeed(feedUrl); System.err.println( "If a \"Feed Unchanged\" event fired then the server supports conditional gets."); ok = true; } catch (Exception ex) { System.out.println("ERROR: " + ex.getMessage()); ex.printStackTrace(); } } if (!ok) { System.out.println(); System.out.println("FeedReader reads and prints any RSS/Atom feed type."); System.out.println("The first parameter must be the URL of the feed to read."); System.out.println(); } }
// get a feed fetcher private FeedFetcher getRomeFetcher() { FeedFetcherCache feedCache = getRomeFetcherCache(); FeedFetcher feedFetcher = null; if (feedCache != null) { feedFetcher = new HttpURLFeedFetcher(feedCache); } else { feedFetcher = new HttpURLFeedFetcher(); } // set options feedFetcher.setUsingDeltaEncoding(false); feedFetcher.setUserAgent("RollerPlanetAggregator"); return feedFetcher; }
// Get the syndicated feed using rome private SyndFeed getFeed(SourcePojo source, String url) { synchronized ( FeedHarvester .class) { // (workaround for ROME concurrency issues: // http://www.jdom.org/pipermail/jdom-interest/2008-December/016252.html) if (null == url) { url = source.getUrl(); } for (int i = 0; i < 2; ++i) { // Will have 2 goes in case of failure // Check to see if the feed requires authentication if (source.getAuthentication() != null) // requires auth { try { FeedFetcher feedFetcher = new HttpClientFeedFetcher(null, authenticateFeed(source.getAuthentication())); if ((null != source.getRssConfig()) && (null != source.getRssConfig().getUserAgent())) { feedFetcher.setUserAgent(source.getRssConfig().getUserAgent()); } return feedFetcher.retrieveFeed(new URL(url)); } catch (Exception e) { if (1 == i) { // else just try again if (null == url) { // can only error on primary RSS, makes life simpler handleRssError(e, source); } } } } else // does not require auth { try { FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance(); FeedFetcher feedFetcher = new HttpURLFeedFetcher(feedInfoCache); if ((null != source.getRssConfig()) && (null != source.getRssConfig().getUserAgent())) { feedFetcher.setUserAgent(source.getRssConfig().getUserAgent()); } return feedFetcher.retrieveFeed(new URL(this.cleanUrlStart(url))); } catch (Exception e) { if (1 == i) { // else just try again if (null == url) { // can only error on primary RSS, makes life simpler handleRssError(e, source); } } } } // If still here, must have errored so sleep before trying again try { Thread.sleep(10000); } catch (InterruptedException e) { } } // (end get 2 goes) } return null; }
/** @inheritDoc */ public Subscription fetchSubscription(String feedURL, Date lastModified) throws FetcherException { if (feedURL == null) { throw new IllegalArgumentException("feed url cannot be null"); } // setup Rome feed fetcher FeedFetcher feedFetcher = getRomeFetcher(); // fetch the feed log.debug("Fetching feed: " + feedURL); SyndFeed feed; try { feed = feedFetcher.retrieveFeed(new URL(feedURL)); } catch (Exception ex) { throw new FetcherException("Error fetching subscription - " + feedURL, ex); } log.debug("Feed pulled, extracting data into Subscription"); // build planet subscription from fetched feed Subscription newSub = new Subscription(); newSub.setFeedURL(feedURL); newSub.setSiteURL(feed.getLink()); newSub.setTitle(feed.getTitle()); newSub.setAuthor(feed.getAuthor()); newSub.setLastUpdated(feed.getPublishedDate()); // normalize any data that couldn't be properly extracted if (newSub.getSiteURL() == null) { // set the site url to the feed url then newSub.setSiteURL(newSub.getFeedURL()); } if (newSub.getAuthor() == null) { // set the author to the title newSub.setAuthor(newSub.getTitle()); } if (newSub.getLastUpdated() == null) { // no update time specified in feed, so try consulting feed info cache FeedFetcherCache feedCache = getRomeFetcherCache(); try { SyndFeedInfo feedInfo = feedCache.getFeedInfo(new URL(newSub.getFeedURL())); if (feedInfo.getLastModified() != null) { long lastUpdatedLong = ((Long) feedInfo.getLastModified()).longValue(); if (lastUpdatedLong != 0) { newSub.setLastUpdated(new Date(lastUpdatedLong)); } } } catch (MalformedURLException ex) { // should never happen since we check this above } } // check if feed is unchanged and bail now if so if (lastModified != null && newSub.getLastUpdated() != null && !newSub.getLastUpdated().after(lastModified)) { return null; } if (log.isDebugEnabled()) { log.debug("Subscription is: " + newSub.toString()); } // some kludge to deal with feeds w/ no entry dates // we assign arbitrary dates chronologically by entry starting either // from the current time or the last update time of the subscription Calendar cal = Calendar.getInstance(); if (newSub.getLastUpdated() != null) { cal.setTime(newSub.getLastUpdated()); } else { cal.setTime(new Date()); cal.add(Calendar.DATE, -1); } // add entries List<SyndEntry> feedEntries = feed.getEntries(); for (SyndEntry feedEntry : feedEntries) { SubscriptionEntry newEntry = buildEntry(feedEntry); // some kludge to handle feeds with no entry dates if (newEntry.getPubTime() == null) { log.debug("No published date, assigning fake date for " + feedURL); newEntry.setPubTime(new Timestamp(cal.getTimeInMillis())); cal.add(Calendar.DATE, -1); } if (newEntry != null) { newSub.addEntry(newEntry); } } log.debug(feedEntries.size() + " entries included"); return newSub; }