public static void main(String[] args) throws Exception { URL url = new URL("http://feeds.reuters.com/reuters/businessNews"); XmlReader reader = null; try { reader = new XmlReader(url); SyndFeed feed = new SyndFeedInput().build(reader); System.out.println("Feed Title: " + feed.getAuthor()); for (@SuppressWarnings("unchecked") Iterator<SyndEntry> i = feed.getEntries().iterator(); i.hasNext(); ) { SyndEntry entry = i.next(); System.out.println(entry.getTitle()); } } finally { if (reader != null) reader.close(); } }
/** * Generate an ebook from an RSS DOM Document. * * @param url The URL from where the Document was fetched (used only to set the author metadata) * @param doc The DOM Document of the feed. * @return An ebook. * @throws IllegalArgumentException * @throws FeedException * @throws IOException */ private static Book createBookFromFeed(URL url, Document doc, List<Keyword> keywords) throws IllegalArgumentException, FeedException, IOException { Book book = new Book(); // start parsing our feed and have the above onItem methods called SyndFeedInput input = new SyndFeedInput(); SyndFeed feed = input.build(doc); System.out.println(feed); // Set the title book.getMetadata().addTitle(feed.getTitle()); // Add an Author String author = feed.getAuthor(); if (author == null || "".equals(author.trim())) { author = url.getHost(); } book.getMetadata().addAuthor(new Author(author)); if (feed.getPublishedDate() != null) { book.getMetadata().addDate(new nl.siegmann.epublib.domain.Date(feed.getPublishedDate())); } if (feed.getDescription() != null) { book.getMetadata().addDescription(feed.getDescription()); } if (feed.getCopyright() != null) { book.getMetadata().getRights().add(feed.getCopyright()); } // Set cover image - This has never worked. // if (feed.getImage() != null) { // System.out.println("There is an image for the feed"); // Promise<HttpResponse> futureImgResponse = // WS.url(feed.getImage().getUrl()).getAsync(); // HttpResponse imgResponse = await(futureImgResponse); // System.out.println("Content-type: " + imgResponse.getContentType()); // if (imgResponse.getContentType().startsWith("image/")) { // String extension = // imgResponse.getContentType().substring("image/".length()); // InputStream imageStream = imgResponse.getStream(); // book.getMetadata().setCoverImage(new Resource(imageStream, "cover." + // extension)); // System.out.println("Using default cover"); // imageStream = // VirtualFile.fromRelativePath("assets/cover.png").inputstream(); // if (imageStream != null) { // System.out.println("Using default cover"); // book.getMetadata().setCoverImage(new Resource(imageStream, // "cover.png")); // } else { // System.out.println("Could not load default cover"); // } // } // } int entryNumber = 0; List<SyndEntry> entries = feed.getEntries(); for (SyndEntry entry : entries) { if (matchesKeyword(entry, keywords)) { StringBuilder title = new StringBuilder(100); if (entry.getTitle() != null) { title.append(entry.getTitle()); } if (entry.getAuthor() != null) { title.append(" - ").append(entry.getAuthor()); } StringBuilder content = new StringBuilder(); // Add title inside text content.append("<h2>").append(title).append("</h2>"); if (entry.getDescription() != null) { SyndContent syndContent = (SyndContent) entry.getDescription(); if (!syndContent.getType().contains("html")) { content.append("<pre>\n"); } content.append(syndContent.getValue()); if (!syndContent.getType().contains("html")) { content.append("\n</pre>"); } content.append("<hr/>"); } if (entry.getContents().size() > 0) { SyndContent syndContent = (SyndContent) entry.getContents().get(0); if (!syndContent.getType().contains("html")) { content.append("<pre>\n"); } content.append(syndContent.getValue()); if (!syndContent.getType().contains("html")) { content.append("\n</pre>"); } } String strContent = clean(content.toString()); // Add Chapter try { entryNumber++; book.addSection( title.toString(), new Resource(new StringReader(strContent), "entry" + entryNumber + ".xhtml")); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } return book; }
/** @inheritDoc */ public Subscription fetchSubscription(String feedURL, Date lastModified) throws FetcherException { if (feedURL == null) { throw new IllegalArgumentException("feed url cannot be null"); } // setup Rome feed fetcher FeedFetcher feedFetcher = getRomeFetcher(); // fetch the feed log.debug("Fetching feed: " + feedURL); SyndFeed feed; try { feed = feedFetcher.retrieveFeed(new URL(feedURL)); } catch (Exception ex) { throw new FetcherException("Error fetching subscription - " + feedURL, ex); } log.debug("Feed pulled, extracting data into Subscription"); // build planet subscription from fetched feed Subscription newSub = new Subscription(); newSub.setFeedURL(feedURL); newSub.setSiteURL(feed.getLink()); newSub.setTitle(feed.getTitle()); newSub.setAuthor(feed.getAuthor()); newSub.setLastUpdated(feed.getPublishedDate()); // normalize any data that couldn't be properly extracted if (newSub.getSiteURL() == null) { // set the site url to the feed url then newSub.setSiteURL(newSub.getFeedURL()); } if (newSub.getAuthor() == null) { // set the author to the title newSub.setAuthor(newSub.getTitle()); } if (newSub.getLastUpdated() == null) { // no update time specified in feed, so try consulting feed info cache FeedFetcherCache feedCache = getRomeFetcherCache(); try { SyndFeedInfo feedInfo = feedCache.getFeedInfo(new URL(newSub.getFeedURL())); if (feedInfo.getLastModified() != null) { long lastUpdatedLong = ((Long) feedInfo.getLastModified()).longValue(); if (lastUpdatedLong != 0) { newSub.setLastUpdated(new Date(lastUpdatedLong)); } } } catch (MalformedURLException ex) { // should never happen since we check this above } } // check if feed is unchanged and bail now if so if (lastModified != null && newSub.getLastUpdated() != null && !newSub.getLastUpdated().after(lastModified)) { return null; } if (log.isDebugEnabled()) { log.debug("Subscription is: " + newSub.toString()); } // some kludge to deal with feeds w/ no entry dates // we assign arbitrary dates chronologically by entry starting either // from the current time or the last update time of the subscription Calendar cal = Calendar.getInstance(); if (newSub.getLastUpdated() != null) { cal.setTime(newSub.getLastUpdated()); } else { cal.setTime(new Date()); cal.add(Calendar.DATE, -1); } // add entries List<SyndEntry> feedEntries = feed.getEntries(); for (SyndEntry feedEntry : feedEntries) { SubscriptionEntry newEntry = buildEntry(feedEntry); // some kludge to handle feeds with no entry dates if (newEntry.getPubTime() == null) { log.debug("No published date, assigning fake date for " + feedURL); newEntry.setPubTime(new Timestamp(cal.getTimeInMillis())); cal.add(Calendar.DATE, -1); } if (newEntry != null) { newSub.addEntry(newEntry); } } log.debug(feedEntries.size() + " entries included"); return newSub; }