/** @inheritDoc */ public Subscription fetchSubscription(String feedURL, Date lastModified) throws FetcherException { if (feedURL == null) { throw new IllegalArgumentException("feed url cannot be null"); } // setup Rome feed fetcher FeedFetcher feedFetcher = getRomeFetcher(); // fetch the feed log.debug("Fetching feed: " + feedURL); SyndFeed feed; try { feed = feedFetcher.retrieveFeed(new URL(feedURL)); } catch (Exception ex) { throw new FetcherException("Error fetching subscription - " + feedURL, ex); } log.debug("Feed pulled, extracting data into Subscription"); // build planet subscription from fetched feed Subscription newSub = new Subscription(); newSub.setFeedURL(feedURL); newSub.setSiteURL(feed.getLink()); newSub.setTitle(feed.getTitle()); newSub.setAuthor(feed.getAuthor()); newSub.setLastUpdated(feed.getPublishedDate()); // normalize any data that couldn't be properly extracted if (newSub.getSiteURL() == null) { // set the site url to the feed url then newSub.setSiteURL(newSub.getFeedURL()); } if (newSub.getAuthor() == null) { // set the author to the title newSub.setAuthor(newSub.getTitle()); } if (newSub.getLastUpdated() == null) { // no update time specified in feed, so try consulting feed info cache FeedFetcherCache feedCache = getRomeFetcherCache(); try { SyndFeedInfo feedInfo = feedCache.getFeedInfo(new URL(newSub.getFeedURL())); if (feedInfo.getLastModified() != null) { long lastUpdatedLong = ((Long) feedInfo.getLastModified()).longValue(); if (lastUpdatedLong != 0) { newSub.setLastUpdated(new Date(lastUpdatedLong)); } } } catch (MalformedURLException ex) { // should never happen since we check this above } } // check if feed is unchanged and bail now if so if (lastModified != null && newSub.getLastUpdated() != null && !newSub.getLastUpdated().after(lastModified)) { return null; } if (log.isDebugEnabled()) { log.debug("Subscription is: " + newSub.toString()); } // some kludge to deal with feeds w/ no entry dates // we assign arbitrary dates chronologically by entry starting either // from the current time or the last update time of the subscription Calendar cal = Calendar.getInstance(); if (newSub.getLastUpdated() != null) { cal.setTime(newSub.getLastUpdated()); } else { cal.setTime(new Date()); cal.add(Calendar.DATE, -1); } // add entries List<SyndEntry> feedEntries = feed.getEntries(); for (SyndEntry feedEntry : feedEntries) { SubscriptionEntry newEntry = buildEntry(feedEntry); // some kludge to handle feeds with no entry dates if (newEntry.getPubTime() == null) { log.debug("No published date, assigning fake date for " + feedURL); newEntry.setPubTime(new Timestamp(cal.getTimeInMillis())); cal.add(Calendar.DATE, -1); } if (newEntry != null) { newSub.addEntry(newEntry); } } log.debug(feedEntries.size() + " entries included"); return newSub; }
public void testEntryCRUD() throws Exception { PlanetManager mgr = WebloggerFactory.getWeblogger().getPlanetManager(); Subscription sub = mgr.getSubscriptionById(testSub.getId()); SubscriptionEntry testEntry = new SubscriptionEntry(); testEntry.setPermalink("entryBasics"); testEntry.setTitle("entryBasics"); testEntry.setPubTime(new java.sql.Timestamp(System.currentTimeMillis())); testEntry.setSubscription(sub); // add mgr.saveEntry(testEntry); TestUtils.endSession(true); // verify SubscriptionEntry entry = null; entry = mgr.getEntryById(testEntry.getId()); assertNotNull(entry); assertEquals("entryBasics", entry.getPermalink()); // modify entry.setTitle("foo"); mgr.saveEntry(entry); TestUtils.endSession(true); // verify entry = null; entry = mgr.getEntryById(testEntry.getId()); assertNotNull(entry); assertEquals("foo", entry.getTitle()); // remove mgr.deleteEntry(entry); TestUtils.endSession(true); // verify entry = null; entry = mgr.getEntryById(testEntry.getId()); assertNull(entry); }
// build a SubscriptionEntry from Rome SyndEntry and SyndFeed private SubscriptionEntry buildEntry(SyndEntry romeEntry) { // if we don't have a permalink then we can't continue if (romeEntry.getLink() == null) { return null; } SubscriptionEntry newEntry = new SubscriptionEntry(); newEntry.setTitle(romeEntry.getTitle()); newEntry.setPermalink(romeEntry.getLink()); // Play some games to get the author DCModule entrydc = (DCModule) romeEntry.getModule(DCModule.URI); if (romeEntry.getAuthor() != null) { newEntry.setAuthor(romeEntry.getAuthor()); } else { newEntry.setAuthor(entrydc.getCreator()); // use <dc:creator> } // Play some games to get the updated date if (romeEntry.getUpdatedDate() != null) { newEntry.setUpdateTime(new Timestamp(romeEntry.getUpdatedDate().getTime())); } // TODO: should we set a default update time here? // And more games getting publish date if (romeEntry.getPublishedDate() != null) { newEntry.setPubTime(new Timestamp(romeEntry.getPublishedDate().getTime())); // use <pubDate> } else if (entrydc != null && entrydc.getDate() != null) { newEntry.setPubTime(new Timestamp(entrydc.getDate().getTime())); // use <dc:date> } else { newEntry.setPubTime(newEntry.getUpdateTime()); } // get content and unescape if it is 'text/plain' if (romeEntry.getContents().size() > 0) { SyndContent content = (SyndContent) romeEntry.getContents().get(0); if (content != null && content.getType().equals("text/plain")) { newEntry.setText(StringEscapeUtils.unescapeHtml(content.getValue())); } else if (content != null) { newEntry.setText(content.getValue()); } } // no content, try summary if (newEntry.getText() == null || newEntry.getText().trim().length() == 0) { if (romeEntry.getDescription() != null) { newEntry.setText(romeEntry.getDescription().getValue()); } } // copy categories if (romeEntry.getCategories().size() > 0) { List list = new ArrayList(); Iterator cats = romeEntry.getCategories().iterator(); while (cats.hasNext()) { SyndCategory cat = (SyndCategory) cats.next(); list.add(cat.getName()); } newEntry.setCategoriesString(list); } return newEntry; }