public void testReadAtom1() throws FeedException { SyndFeedInput input = new SyndFeedInput(); SyndFeed result = input.build( new InputSource(getClass().getResourceAsStream("/be/hikage/xml/rome/atom_1.0.xml"))); assertEquals("RSS Veille Techno", result.getTitle()); assertEquals("http://svn.cyg.be/", result.getLink()); assertEquals(1, result.getEntries().size()); }
@SuppressWarnings("unchecked") private void validateLinksInFeeds(SyndFeed feed) { Assert.assertTrue("Feed link is wrong", feed.getLink().startsWith(this.getBaseNexusUrl())); List<SyndEntry> entries = feed.getEntries(); for (SyndEntry syndEntry : entries) { Assert.assertNotNull("Feed item link is empty.", syndEntry.getLink()); Assert.assertTrue( "Feed item link is wrong, is: " + syndEntry.getLink(), syndEntry.getLink().startsWith(this.getBaseNexusUrl())); } }
protected WireFeed createRealFeed(String type, SyndFeed syndFeed) { Channel channel = new Channel(type); channel.setModules(ModuleUtils.cloneModules(syndFeed.getModules())); channel.setEncoding(syndFeed.getEncoding()); channel.setTitle(syndFeed.getTitle()); channel.setLink(syndFeed.getLink()); channel.setDescription(syndFeed.getDescription()); SyndImage sImage = syndFeed.getImage(); if (sImage != null) { channel.setImage(createRSSImage(sImage)); } List sEntries = syndFeed.getEntries(); if (sEntries != null) { channel.setItems(createRSSItems(sEntries)); } return channel; }
// @Transactional // @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void importEntry( final SyndFeed feed, final SyndEntry entry, final Set<KiWiUriResource> types, final Set<ContentItem> tags, User user, final Collection<ContentItem> output) { if (user == null && entry.getAuthor() != null && !"".equals(entry.getAuthor())) { if (userService.userExists(entry.getAuthor())) { user = userService.getUserByLogin(entry.getAuthor()); } else { // user = userService.createUser(entry.getAuthor()); /* In my opinion, it is not ok to create a user entity * without asking the person if he/she wants to be * created and persisted in the KiWi dataset. * Thus I'm changing the user to 'anonymous', * if he/she is'nt registered with the same nick that * is given in the rss entry. */ user = userService.getUserByLogin("anonymous"); kiwiEntityManager.persist(user); } } log.debug("feed entry: #0 (#1)", entry.getTitle(), entry.getUri()); // create a new content item and copy all data from the feed entry ContentItem item; if (entry.getLink() != null) { item = contentItemService.createExternContentItem(entry.getLink()); } else if (entry.getUri() != null) { try { // try parsing URI; if it is not valid, URI uri = new URI(entry.getUri()); item = contentItemService.createExternContentItem(entry.getUri()); } catch (URISyntaxException e) { item = contentItemService.createExternContentItem(feed.getLink() + "#" + entry.getUri()); } } else { item = contentItemService.createContentItem(); } contentItemService.updateTitle(item, entry.getTitle()); if (feed.getLanguage() != null) item.setLanguage(new Locale(feed.getLanguage())); if (entry.getPublishedDate() != null) { item.setCreated(entry.getPublishedDate()); item.setModified(entry.getPublishedDate()); } if (entry.getUpdatedDate() != null) { if (entry.getPublishedDate() == null) { item.setCreated(entry.getUpdatedDate()); } item.setModified(entry.getUpdatedDate()); } item.setAuthor(user); // read feed content and set it as item's text content List<SyndContent> contents = entry.getContents(); if (contents.size() == 1) { log.debug("using RSS content section provided by item"); contentItemService.updateTextContentItem(item, "<p>" + contents.get(0).getValue() + "</p>"); } else if (contents.size() > 1) { log.warn("feed entry contained more than one content section"); contentItemService.updateTextContentItem(item, "<p>" + contents.get(0).getValue() + "</p>"); } else if (contents.size() == 0) { if (entry.getDescription() != null && entry.getDescription().getValue() != null) { log.debug("using RSS description as no content section was available"); contentItemService.updateTextContentItem( item, "<p>" + entry.getDescription().getValue() + "</p>"); } } // save before tagging contentItemService.saveContentItem(item); // read feed categories and use them as tags for (SyndCategory cat : (List<SyndCategory>) entry.getCategories()) { ContentItem _cat; if (!taggingService.hasTag(item, cat.getName())) { if (cat.getTaxonomyUri() != null) { _cat = contentItemService.getContentItemByUri(cat.getTaxonomyUri()); if (_cat == null) { _cat = contentItemService.createExternContentItem(cat.getTaxonomyUri()); contentItemService.updateTitle(_cat, cat.getName()); _cat.setAuthor(user); contentItemService.saveContentItem(_cat); } taggingService.createTagging(cat.getName(), item, _cat, user); } else { _cat = contentItemService.getContentItemByTitle(cat.getName()); if (_cat == null) { _cat = contentItemService.createContentItem(); contentItemService.updateTitle(_cat, cat.getName()); _cat.setAuthor(user); contentItemService.saveContentItem(_cat); } taggingService.createTagging(cat.getName(), item, _cat, user); } } } // scan for Twitter-style hash tags in title (e.g. #kiwiknows, see KIWI-622) Matcher m_hashtag = p_hashtag.matcher(entry.getTitle()); while (m_hashtag.find()) { String tag_label = m_hashtag.group(1); if (!taggingService.hasTag(item, tag_label)) { ContentItem tag = contentItemService.getContentItemByTitle(tag_label); if (tag == null) { tag = contentItemService.createContentItem(); contentItemService.updateTitle(tag, tag_label); tag.setAuthor(user); contentItemService.saveContentItem(tag); } taggingService.createTagging(tag_label, item, tag, user); } } // check for geo information GeoRSSModule geoRSSModule = GeoRSSUtils.getGeoRSS(entry); if (geoRSSModule != null && geoRSSModule.getPosition() != null) { POI poi = kiwiEntityManager.createFacade(item, POI.class); poi.setLatitude(geoRSSModule.getPosition().getLatitude()); poi.setLongitude(geoRSSModule.getPosition().getLongitude()); kiwiEntityManager.persist(poi); } // check for media information MediaEntryModule mediaModule = (MediaEntryModule) entry.getModule(MediaModule.URI); if (mediaModule != null) { MediaContent[] media = mediaModule.getMediaContents(); if (media.length > 0) { MediaContent m = media[0]; if (m.getReference() instanceof UrlReference) { URL url = ((UrlReference) m.getReference()).getUrl(); String type = m.getType(); String name = url.getFile(); if (name.lastIndexOf("/") > 0) { name = name.substring(name.lastIndexOf("/") + 1); } log.debug("importing media data from URL #0", url.toString()); try { InputStream is = url.openStream(); ByteArrayOutputStream bout = new ByteArrayOutputStream(); int c; while ((c = is.read()) != -1) { bout.write(c); } byte[] data = bout.toByteArray(); contentItemService.updateMediaContentItem(item, data, type, name); is.close(); bout.close(); } catch (IOException ex) { log.error("error importing media content from RSS stream"); } } else { log.info("RSS importer can only import media with URL references"); } } else { log.warn("media module found without content"); } Category[] cats = mediaModule.getMetadata().getCategories(); for (Category cat : cats) { ContentItem _cat; String label = cat.getLabel() != null ? cat.getLabel() : cat.getValue(); if (!taggingService.hasTag(item, label)) { if (cat.getScheme() != null) { _cat = contentItemService.getContentItemByUri(cat.getScheme() + cat.getValue()); if (_cat == null) { _cat = contentItemService.createExternContentItem(cat.getScheme() + cat.getValue()); contentItemService.updateTitle(_cat, label); _cat.setAuthor(user); contentItemService.saveContentItem(_cat); } taggingService.createTagging(label, item, _cat, user); } else { _cat = contentItemService.getContentItemByTitle(label); if (_cat == null) { _cat = contentItemService.createContentItem(); contentItemService.updateTitle(_cat, label); _cat.setAuthor(user); contentItemService.saveContentItem(_cat); } taggingService.createTagging(label, item, _cat, user); } } } } // add parameter categories as tags for (ContentItem tag : tags) { if (!taggingService.hasTag(item, tag.getTitle())) { taggingService.createTagging(tag.getTitle(), item, tag, user); } } // add parameter types as types for (KiWiUriResource type : types) { item.addType(type); } // add kiwi:FeedPost type item.addType(tripleStore.createUriResource(Constants.NS_KIWI_CORE + "FeedPost")); /* the flush is necessary, because CIs or tags will * otherwise be created multiple times when they * appear more than once in one RSS feed */ entityManager.flush(); log.debug("imported content item '#0' with URI '#1'", item.getTitle(), item.getResource()); }
private String getBlogHomepageFromRss(String rss) { SyndFeed syndFeed = syndFeedFactory.createFor(rss); return syndFeed.getLink(); }
/** @inheritDoc */ public Subscription fetchSubscription(String feedURL, Date lastModified) throws FetcherException { if (feedURL == null) { throw new IllegalArgumentException("feed url cannot be null"); } // setup Rome feed fetcher FeedFetcher feedFetcher = getRomeFetcher(); // fetch the feed log.debug("Fetching feed: " + feedURL); SyndFeed feed; try { feed = feedFetcher.retrieveFeed(new URL(feedURL)); } catch (Exception ex) { throw new FetcherException("Error fetching subscription - " + feedURL, ex); } log.debug("Feed pulled, extracting data into Subscription"); // build planet subscription from fetched feed Subscription newSub = new Subscription(); newSub.setFeedURL(feedURL); newSub.setSiteURL(feed.getLink()); newSub.setTitle(feed.getTitle()); newSub.setAuthor(feed.getAuthor()); newSub.setLastUpdated(feed.getPublishedDate()); // normalize any data that couldn't be properly extracted if (newSub.getSiteURL() == null) { // set the site url to the feed url then newSub.setSiteURL(newSub.getFeedURL()); } if (newSub.getAuthor() == null) { // set the author to the title newSub.setAuthor(newSub.getTitle()); } if (newSub.getLastUpdated() == null) { // no update time specified in feed, so try consulting feed info cache FeedFetcherCache feedCache = getRomeFetcherCache(); try { SyndFeedInfo feedInfo = feedCache.getFeedInfo(new URL(newSub.getFeedURL())); if (feedInfo.getLastModified() != null) { long lastUpdatedLong = ((Long) feedInfo.getLastModified()).longValue(); if (lastUpdatedLong != 0) { newSub.setLastUpdated(new Date(lastUpdatedLong)); } } } catch (MalformedURLException ex) { // should never happen since we check this above } } // check if feed is unchanged and bail now if so if (lastModified != null && newSub.getLastUpdated() != null && !newSub.getLastUpdated().after(lastModified)) { return null; } if (log.isDebugEnabled()) { log.debug("Subscription is: " + newSub.toString()); } // some kludge to deal with feeds w/ no entry dates // we assign arbitrary dates chronologically by entry starting either // from the current time or the last update time of the subscription Calendar cal = Calendar.getInstance(); if (newSub.getLastUpdated() != null) { cal.setTime(newSub.getLastUpdated()); } else { cal.setTime(new Date()); cal.add(Calendar.DATE, -1); } // add entries List<SyndEntry> feedEntries = feed.getEntries(); for (SyndEntry feedEntry : feedEntries) { SubscriptionEntry newEntry = buildEntry(feedEntry); // some kludge to handle feeds with no entry dates if (newEntry.getPubTime() == null) { log.debug("No published date, assigning fake date for " + feedURL); newEntry.setPubTime(new Timestamp(cal.getTimeInMillis())); cal.add(Calendar.DATE, -1); } if (newEntry != null) { newSub.addEntry(newEntry); } } log.debug(feedEntries.size() + " entries included"); return newSub; }