public static void main(String[] args) throws Exception {

    URL url = new URL("http://feeds.reuters.com/reuters/businessNews");
    XmlReader reader = null;

    try {

      reader = new XmlReader(url);
      SyndFeed feed = new SyndFeedInput().build(reader);
      System.out.println("Feed Title: " + feed.getAuthor());

      for (@SuppressWarnings("unchecked") Iterator<SyndEntry> i = feed.getEntries().iterator();
          i.hasNext(); ) {
        SyndEntry entry = i.next();
        System.out.println(entry.getTitle());
      }
    } finally {
      if (reader != null) reader.close();
    }
  }
Beispiel #2
0
  /**
   * Generate an ebook from an RSS DOM Document.
   *
   * @param url The URL from where the Document was fetched (used only to set the author metadata)
   * @param doc The DOM Document of the feed.
   * @return An ebook.
   * @throws IllegalArgumentException
   * @throws FeedException
   * @throws IOException
   */
  private static Book createBookFromFeed(URL url, Document doc, List<Keyword> keywords)
      throws IllegalArgumentException, FeedException, IOException {
    Book book = new Book();
    // start parsing our feed and have the above onItem methods called
    SyndFeedInput input = new SyndFeedInput();
    SyndFeed feed = input.build(doc);

    System.out.println(feed);

    // Set the title
    book.getMetadata().addTitle(feed.getTitle());

    // Add an Author
    String author = feed.getAuthor();
    if (author == null || "".equals(author.trim())) {
      author = url.getHost();
    }
    book.getMetadata().addAuthor(new Author(author));

    if (feed.getPublishedDate() != null) {
      book.getMetadata().addDate(new nl.siegmann.epublib.domain.Date(feed.getPublishedDate()));
    }

    if (feed.getDescription() != null) {
      book.getMetadata().addDescription(feed.getDescription());
    }

    if (feed.getCopyright() != null) {
      book.getMetadata().getRights().add(feed.getCopyright());
    }

    // Set cover image - This has never worked.
    // if (feed.getImage() != null) {
    // System.out.println("There is an image for the feed");

    // Promise<HttpResponse> futureImgResponse =
    // WS.url(feed.getImage().getUrl()).getAsync();
    // HttpResponse imgResponse = await(futureImgResponse);
    // System.out.println("Content-type: " + imgResponse.getContentType());
    // if (imgResponse.getContentType().startsWith("image/")) {
    // String extension =
    // imgResponse.getContentType().substring("image/".length());
    // InputStream imageStream = imgResponse.getStream();
    // book.getMetadata().setCoverImage(new Resource(imageStream, "cover." +
    // extension));

    // System.out.println("Using default cover");
    // imageStream =
    // VirtualFile.fromRelativePath("assets/cover.png").inputstream();
    // if (imageStream != null) {
    // System.out.println("Using default cover");
    // book.getMetadata().setCoverImage(new Resource(imageStream,
    // "cover.png"));
    // } else {
    // System.out.println("Could not load default cover");
    // }

    // }
    // }

    int entryNumber = 0;
    List<SyndEntry> entries = feed.getEntries();

    for (SyndEntry entry : entries) {
      if (matchesKeyword(entry, keywords)) {

        StringBuilder title = new StringBuilder(100);
        if (entry.getTitle() != null) {
          title.append(entry.getTitle());
        }
        if (entry.getAuthor() != null) {
          title.append(" - ").append(entry.getAuthor());
        }
        StringBuilder content = new StringBuilder();

        // Add title inside text
        content.append("<h2>").append(title).append("</h2>");

        if (entry.getDescription() != null) {
          SyndContent syndContent = (SyndContent) entry.getDescription();
          if (!syndContent.getType().contains("html")) {
            content.append("<pre>\n");
          }
          content.append(syndContent.getValue());
          if (!syndContent.getType().contains("html")) {
            content.append("\n</pre>");
          }
          content.append("<hr/>");
        }

        if (entry.getContents().size() > 0) {
          SyndContent syndContent = (SyndContent) entry.getContents().get(0);
          if (!syndContent.getType().contains("html")) {
            content.append("<pre>\n");
          }
          content.append(syndContent.getValue());
          if (!syndContent.getType().contains("html")) {
            content.append("\n</pre>");
          }
        }
        String strContent = clean(content.toString());
        // Add Chapter
        try {
          entryNumber++;
          book.addSection(
              title.toString(),
              new Resource(new StringReader(strContent), "entry" + entryNumber + ".xhtml"));
        } catch (IOException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }
      }
    }

    return book;
  }
Beispiel #3
0
  /** @inheritDoc */
  public Subscription fetchSubscription(String feedURL, Date lastModified) throws FetcherException {

    if (feedURL == null) {
      throw new IllegalArgumentException("feed url cannot be null");
    }

    // setup Rome feed fetcher
    FeedFetcher feedFetcher = getRomeFetcher();

    // fetch the feed
    log.debug("Fetching feed: " + feedURL);
    SyndFeed feed;
    try {
      feed = feedFetcher.retrieveFeed(new URL(feedURL));
    } catch (Exception ex) {
      throw new FetcherException("Error fetching subscription - " + feedURL, ex);
    }

    log.debug("Feed pulled, extracting data into Subscription");

    // build planet subscription from fetched feed
    Subscription newSub = new Subscription();
    newSub.setFeedURL(feedURL);
    newSub.setSiteURL(feed.getLink());
    newSub.setTitle(feed.getTitle());
    newSub.setAuthor(feed.getAuthor());
    newSub.setLastUpdated(feed.getPublishedDate());

    // normalize any data that couldn't be properly extracted
    if (newSub.getSiteURL() == null) {
      // set the site url to the feed url then
      newSub.setSiteURL(newSub.getFeedURL());
    }
    if (newSub.getAuthor() == null) {
      // set the author to the title
      newSub.setAuthor(newSub.getTitle());
    }
    if (newSub.getLastUpdated() == null) {
      // no update time specified in feed, so try consulting feed info cache
      FeedFetcherCache feedCache = getRomeFetcherCache();
      try {
        SyndFeedInfo feedInfo = feedCache.getFeedInfo(new URL(newSub.getFeedURL()));
        if (feedInfo.getLastModified() != null) {
          long lastUpdatedLong = ((Long) feedInfo.getLastModified()).longValue();
          if (lastUpdatedLong != 0) {
            newSub.setLastUpdated(new Date(lastUpdatedLong));
          }
        }
      } catch (MalformedURLException ex) {
        // should never happen since we check this above
      }
    }

    // check if feed is unchanged and bail now if so
    if (lastModified != null
        && newSub.getLastUpdated() != null
        && !newSub.getLastUpdated().after(lastModified)) {
      return null;
    }

    if (log.isDebugEnabled()) {
      log.debug("Subscription is: " + newSub.toString());
    }

    // some kludge to deal with feeds w/ no entry dates
    // we assign arbitrary dates chronologically by entry starting either
    // from the current time or the last update time of the subscription
    Calendar cal = Calendar.getInstance();
    if (newSub.getLastUpdated() != null) {
      cal.setTime(newSub.getLastUpdated());
    } else {
      cal.setTime(new Date());
      cal.add(Calendar.DATE, -1);
    }

    // add entries
    List<SyndEntry> feedEntries = feed.getEntries();
    for (SyndEntry feedEntry : feedEntries) {
      SubscriptionEntry newEntry = buildEntry(feedEntry);

      // some kludge to handle feeds with no entry dates
      if (newEntry.getPubTime() == null) {
        log.debug("No published date, assigning fake date for " + feedURL);
        newEntry.setPubTime(new Timestamp(cal.getTimeInMillis()));
        cal.add(Calendar.DATE, -1);
      }

      if (newEntry != null) {
        newSub.addEntry(newEntry);
      }
    }

    log.debug(feedEntries.size() + " entries included");

    return newSub;
  }