Ejemplo n.º 1
0
  public static void main(String[] args) {
    boolean ok = false;
    if (args.length == 1) {
      try {
        URL feedUrl = new URL(args[0]);
        FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance();
        FeedFetcher fetcher = new HttpURLFeedFetcher(feedInfoCache);

        FetcherEventListenerImpl listener = new FetcherEventListenerImpl();

        fetcher.addFetcherEventListener(listener);

        System.err.println("Retrieving feed " + feedUrl);
        // Retrieve the feed.
        // We will get a Feed Polled Event and then a
        // Feed Retrieved event (assuming the feed is valid)
        SyndFeed feed = fetcher.retrieveFeed(feedUrl);

        System.err.println(feedUrl + " retrieved");
        System.err.println(
            feedUrl
                + " has a title: "
                + feed.getTitle()
                + " and contains "
                + feed.getEntries().size()
                + " entries.");
        // We will now retrieve the feed again. If the feed is unmodified
        // and the server supports conditional gets, we will get a "Feed
        // Unchanged" event after the Feed Polled event
        System.err.println("Polling " + feedUrl + " again to test conditional get support.");
        SyndFeed feed2 = fetcher.retrieveFeed(feedUrl);
        System.err.println(
            "If a \"Feed Unchanged\" event fired then the server supports conditional gets.");

        ok = true;
      } catch (Exception ex) {
        System.out.println("ERROR: " + ex.getMessage());
        ex.printStackTrace();
      }
    }

    if (!ok) {
      System.out.println();
      System.out.println("FeedReader reads and prints any RSS/Atom feed type.");
      System.out.println("The first parameter must be the URL of the feed to read.");
      System.out.println();
    }
  }
Ejemplo n.º 2
0
  // get a feed fetcher
  private FeedFetcher getRomeFetcher() {

    FeedFetcherCache feedCache = getRomeFetcherCache();

    FeedFetcher feedFetcher = null;
    if (feedCache != null) {
      feedFetcher = new HttpURLFeedFetcher(feedCache);
    } else {
      feedFetcher = new HttpURLFeedFetcher();
    }

    // set options
    feedFetcher.setUsingDeltaEncoding(false);
    feedFetcher.setUserAgent("RollerPlanetAggregator");

    return feedFetcher;
  }
Ejemplo n.º 3
0
  // Get the syndicated feed using rome
  private SyndFeed getFeed(SourcePojo source, String url) {
    synchronized (
        FeedHarvester
            .class) { // (workaround for ROME concurrency issues:
                      // http://www.jdom.org/pipermail/jdom-interest/2008-December/016252.html)
      if (null == url) {
        url = source.getUrl();
      }
      for (int i = 0; i < 2; ++i) { // Will have 2 goes in case of failure
        // Check to see if the feed requires authentication
        if (source.getAuthentication() != null) // requires auth
        {
          try {
            FeedFetcher feedFetcher =
                new HttpClientFeedFetcher(null, authenticateFeed(source.getAuthentication()));
            if ((null != source.getRssConfig()) && (null != source.getRssConfig().getUserAgent())) {
              feedFetcher.setUserAgent(source.getRssConfig().getUserAgent());
            }
            return feedFetcher.retrieveFeed(new URL(url));
          } catch (Exception e) {
            if (1 == i) { // else just try again
              if (null == url) { // can only error on primary RSS, makes life simpler
                handleRssError(e, source);
              }
            }
          }
        } else // does not require auth
        {
          try {
            FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance();
            FeedFetcher feedFetcher = new HttpURLFeedFetcher(feedInfoCache);
            if ((null != source.getRssConfig()) && (null != source.getRssConfig().getUserAgent())) {
              feedFetcher.setUserAgent(source.getRssConfig().getUserAgent());
            }
            return feedFetcher.retrieveFeed(new URL(this.cleanUrlStart(url)));
          } catch (Exception e) {
            if (1 == i) { // else just try again
              if (null == url) { // can only error on primary RSS, makes life simpler
                handleRssError(e, source);
              }
            }
          }
        }

        // If still here, must have errored so sleep before trying again
        try {
          Thread.sleep(10000);
        } catch (InterruptedException e) {
        }
      } // (end get 2 goes)
    }
    return null;
  }
Ejemplo n.º 4
0
  /** @inheritDoc */
  public Subscription fetchSubscription(String feedURL, Date lastModified) throws FetcherException {

    if (feedURL == null) {
      throw new IllegalArgumentException("feed url cannot be null");
    }

    // setup Rome feed fetcher
    FeedFetcher feedFetcher = getRomeFetcher();

    // fetch the feed
    log.debug("Fetching feed: " + feedURL);
    SyndFeed feed;
    try {
      feed = feedFetcher.retrieveFeed(new URL(feedURL));
    } catch (Exception ex) {
      throw new FetcherException("Error fetching subscription - " + feedURL, ex);
    }

    log.debug("Feed pulled, extracting data into Subscription");

    // build planet subscription from fetched feed
    Subscription newSub = new Subscription();
    newSub.setFeedURL(feedURL);
    newSub.setSiteURL(feed.getLink());
    newSub.setTitle(feed.getTitle());
    newSub.setAuthor(feed.getAuthor());
    newSub.setLastUpdated(feed.getPublishedDate());

    // normalize any data that couldn't be properly extracted
    if (newSub.getSiteURL() == null) {
      // set the site url to the feed url then
      newSub.setSiteURL(newSub.getFeedURL());
    }
    if (newSub.getAuthor() == null) {
      // set the author to the title
      newSub.setAuthor(newSub.getTitle());
    }
    if (newSub.getLastUpdated() == null) {
      // no update time specified in feed, so try consulting feed info cache
      FeedFetcherCache feedCache = getRomeFetcherCache();
      try {
        SyndFeedInfo feedInfo = feedCache.getFeedInfo(new URL(newSub.getFeedURL()));
        if (feedInfo.getLastModified() != null) {
          long lastUpdatedLong = ((Long) feedInfo.getLastModified()).longValue();
          if (lastUpdatedLong != 0) {
            newSub.setLastUpdated(new Date(lastUpdatedLong));
          }
        }
      } catch (MalformedURLException ex) {
        // should never happen since we check this above
      }
    }

    // check if feed is unchanged and bail now if so
    if (lastModified != null
        && newSub.getLastUpdated() != null
        && !newSub.getLastUpdated().after(lastModified)) {
      return null;
    }

    if (log.isDebugEnabled()) {
      log.debug("Subscription is: " + newSub.toString());
    }

    // some kludge to deal with feeds w/ no entry dates
    // we assign arbitrary dates chronologically by entry starting either
    // from the current time or the last update time of the subscription
    Calendar cal = Calendar.getInstance();
    if (newSub.getLastUpdated() != null) {
      cal.setTime(newSub.getLastUpdated());
    } else {
      cal.setTime(new Date());
      cal.add(Calendar.DATE, -1);
    }

    // add entries
    List<SyndEntry> feedEntries = feed.getEntries();
    for (SyndEntry feedEntry : feedEntries) {
      SubscriptionEntry newEntry = buildEntry(feedEntry);

      // some kludge to handle feeds with no entry dates
      if (newEntry.getPubTime() == null) {
        log.debug("No published date, assigning fake date for " + feedURL);
        newEntry.setPubTime(new Timestamp(cal.getTimeInMillis()));
        cal.add(Calendar.DATE, -1);
      }

      if (newEntry != null) {
        newSub.addEntry(newEntry);
      }
    }

    log.debug(feedEntries.size() + " entries included");

    return newSub;
  }