// @Transactional public int importData( final SyndFeed feed, Set<KiWiUriResource> types, Set<ContentItem> tags, final User user, final Collection<ContentItem> output) { log.info( "importing entries from #0 feed '#1' found at '#2'", feed.getFeedType(), feed.getTitle(), feed.getUri()); if (types == null) { types = new HashSet<KiWiUriResource>(); } if (tags == null) { tags = new HashSet<ContentItem>(); } final Set<ContentItem> my_tags = tags; final Set<KiWiUriResource> my_types = types; // a hack for importing facebook activity streams: if the type is kiwi:FacebookPost, // turn facebook activity stream mode on; in this mode, we will skip all entries where // the remote author name and local user name are not identical boolean facebookImport = false; String t_facebookPost = Constants.NS_KIWI_CORE + "FacebookPost"; for (KiWiUriResource r : types) { if (r.getUri().equals(t_facebookPost)) { facebookImport = true; break; } } for (final SyndEntry entry : (List<SyndEntry>) feed.getEntries()) { // facebook hack ... (see above) if (facebookImport && !entry.getAuthor().equalsIgnoreCase(user.getFirstName() + " " + user.getLastName())) { log.info("Facebook import: skipping friend post with title", entry.getTitle()); continue; } new RunAsOperation() { @Override public void execute() { importEntry(feed, entry, my_types, my_tags, user, output); } }.addRole("admin").run(); } // entityManager.flush(); log.info("#0 content items have been imported from RSS/Atom feed", feed.getEntries().size()); return feed.getEntries().size(); }
public void testReadAtom1() throws FeedException { SyndFeedInput input = new SyndFeedInput(); SyndFeed result = input.build( new InputSource(getClass().getResourceAsStream("/be/hikage/xml/rome/atom_1.0.xml"))); assertEquals("RSS Veille Techno", result.getTitle()); assertEquals("http://svn.cyg.be/", result.getLink()); assertEquals(1, result.getEntries().size()); }
public static void main(String[] args) { boolean ok = false; if (args.length == 1) { try { URL feedUrl = new URL(args[0]); FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance(); FeedFetcher fetcher = new HttpURLFeedFetcher(feedInfoCache); FetcherEventListenerImpl listener = new FetcherEventListenerImpl(); fetcher.addFetcherEventListener(listener); System.err.println("Retrieving feed " + feedUrl); // Retrieve the feed. // We will get a Feed Polled Event and then a // Feed Retrieved event (assuming the feed is valid) SyndFeed feed = fetcher.retrieveFeed(feedUrl); System.err.println(feedUrl + " retrieved"); System.err.println( feedUrl + " has a title: " + feed.getTitle() + " and contains " + feed.getEntries().size() + " entries."); // We will now retrieve the feed again. If the feed is unmodified // and the server supports conditional gets, we will get a "Feed // Unchanged" event after the Feed Polled event System.err.println("Polling " + feedUrl + " again to test conditional get support."); SyndFeed feed2 = fetcher.retrieveFeed(feedUrl); System.err.println( "If a \"Feed Unchanged\" event fired then the server supports conditional gets."); ok = true; } catch (Exception ex) { System.out.println("ERROR: " + ex.getMessage()); ex.printStackTrace(); } } if (!ok) { System.out.println(); System.out.println("FeedReader reads and prints any RSS/Atom feed type."); System.out.println("The first parameter must be the URL of the feed to read."); System.out.println(); } }
protected WireFeed createRealFeed(String type, SyndFeed syndFeed) { Channel channel = new Channel(type); channel.setModules(ModuleUtils.cloneModules(syndFeed.getModules())); channel.setEncoding(syndFeed.getEncoding()); channel.setTitle(syndFeed.getTitle()); channel.setLink(syndFeed.getLink()); channel.setDescription(syndFeed.getDescription()); SyndImage sImage = syndFeed.getImage(); if (sImage != null) { channel.setImage(createRSSImage(sImage)); } List sEntries = syndFeed.getEntries(); if (sEntries != null) { channel.setItems(createRSSItems(sEntries)); } return channel; }
/** * Generate an ebook from an RSS DOM Document. * * @param url The URL from where the Document was fetched (used only to set the author metadata) * @param doc The DOM Document of the feed. * @return An ebook. * @throws IllegalArgumentException * @throws FeedException * @throws IOException */ private static Book createBookFromFeed(URL url, Document doc, List<Keyword> keywords) throws IllegalArgumentException, FeedException, IOException { Book book = new Book(); // start parsing our feed and have the above onItem methods called SyndFeedInput input = new SyndFeedInput(); SyndFeed feed = input.build(doc); System.out.println(feed); // Set the title book.getMetadata().addTitle(feed.getTitle()); // Add an Author String author = feed.getAuthor(); if (author == null || "".equals(author.trim())) { author = url.getHost(); } book.getMetadata().addAuthor(new Author(author)); if (feed.getPublishedDate() != null) { book.getMetadata().addDate(new nl.siegmann.epublib.domain.Date(feed.getPublishedDate())); } if (feed.getDescription() != null) { book.getMetadata().addDescription(feed.getDescription()); } if (feed.getCopyright() != null) { book.getMetadata().getRights().add(feed.getCopyright()); } // Set cover image - This has never worked. // if (feed.getImage() != null) { // System.out.println("There is an image for the feed"); // Promise<HttpResponse> futureImgResponse = // WS.url(feed.getImage().getUrl()).getAsync(); // HttpResponse imgResponse = await(futureImgResponse); // System.out.println("Content-type: " + imgResponse.getContentType()); // if (imgResponse.getContentType().startsWith("image/")) { // String extension = // imgResponse.getContentType().substring("image/".length()); // InputStream imageStream = imgResponse.getStream(); // book.getMetadata().setCoverImage(new Resource(imageStream, "cover." + // extension)); // System.out.println("Using default cover"); // imageStream = // VirtualFile.fromRelativePath("assets/cover.png").inputstream(); // if (imageStream != null) { // System.out.println("Using default cover"); // book.getMetadata().setCoverImage(new Resource(imageStream, // "cover.png")); // } else { // System.out.println("Could not load default cover"); // } // } // } int entryNumber = 0; List<SyndEntry> entries = feed.getEntries(); for (SyndEntry entry : entries) { if (matchesKeyword(entry, keywords)) { StringBuilder title = new StringBuilder(100); if (entry.getTitle() != null) { title.append(entry.getTitle()); } if (entry.getAuthor() != null) { title.append(" - ").append(entry.getAuthor()); } StringBuilder content = new StringBuilder(); // Add title inside text content.append("<h2>").append(title).append("</h2>"); if (entry.getDescription() != null) { SyndContent syndContent = (SyndContent) entry.getDescription(); if (!syndContent.getType().contains("html")) { content.append("<pre>\n"); } content.append(syndContent.getValue()); if (!syndContent.getType().contains("html")) { content.append("\n</pre>"); } content.append("<hr/>"); } if (entry.getContents().size() > 0) { SyndContent syndContent = (SyndContent) entry.getContents().get(0); if (!syndContent.getType().contains("html")) { content.append("<pre>\n"); } content.append(syndContent.getValue()); if (!syndContent.getType().contains("html")) { content.append("\n</pre>"); } } String strContent = clean(content.toString()); // Add Chapter try { entryNumber++; book.addSection( title.toString(), new Resource(new StringReader(strContent), "entry" + entryNumber + ".xhtml")); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } return book; }
/** * Fills in the feed and entry-level metadata from DSpace objects. * * @param request request * @param context context * @param dso DSpaceObject * @param items array of objects * @param labels label map */ public void populate( HttpServletRequest request, Context context, DSpaceObject dso, List<? extends DSpaceObject> items, Map<String, String> labels) { String logoURL = null; String objectURL = null; String defaultTitle = null; boolean podcastFeed = false; this.request = request; // dso is null for the whole site, or a search without scope if (dso == null) { defaultTitle = ConfigurationManager.getProperty("dspace.name"); feed.setDescription(localize(labels, MSG_FEED_DESCRIPTION)); objectURL = resolveURL(request, null); logoURL = ConfigurationManager.getProperty("webui.feed.logo.url"); } else { Bitstream logo = null; if (dso.getType() == Constants.COLLECTION) { Collection col = (Collection) dso; defaultTitle = col.getName(); feed.setDescription(collectionService.getMetadata(col, "short_description")); logo = col.getLogo(); String cols = ConfigurationManager.getProperty("webui.feed.podcast.collections"); if (cols != null && cols.length() > 1 && cols.contains(col.getHandle())) { podcastFeed = true; } } else if (dso.getType() == Constants.COMMUNITY) { Community comm = (Community) dso; defaultTitle = comm.getName(); feed.setDescription(communityService.getMetadata(comm, "short_description")); logo = comm.getLogo(); String comms = ConfigurationManager.getProperty("webui.feed.podcast.communities"); if (comms != null && comms.length() > 1 && comms.contains(comm.getHandle())) { podcastFeed = true; } } objectURL = resolveURL(request, dso); if (logo != null) { logoURL = urlOfBitstream(request, logo); } } feed.setTitle( labels.containsKey(MSG_FEED_TITLE) ? localize(labels, MSG_FEED_TITLE) : defaultTitle); feed.setLink(objectURL); feed.setPublishedDate(new Date()); feed.setUri(objectURL); // add logo if we found one: if (logoURL != null) { // we use the path to the logo for this, the logo itself cannot // be contained in the rdf. Not all RSS-viewers show this logo. SyndImage image = new SyndImageImpl(); image.setLink(objectURL); if (StringUtils.isNotBlank(feed.getTitle())) { image.setTitle(feed.getTitle()); } else { image.setTitle(localize(labels, MSG_LOGO_TITLE)); } image.setUrl(logoURL); feed.setImage(image); } // add entries for items if (items != null) { List<SyndEntry> entries = new ArrayList<SyndEntry>(); for (DSpaceObject itemDSO : items) { if (itemDSO.getType() != Constants.ITEM) { continue; } Item item = (Item) itemDSO; boolean hasDate = false; SyndEntry entry = new SyndEntryImpl(); entries.add(entry); String entryURL = resolveURL(request, item); entry.setLink(entryURL); entry.setUri(entryURL); String title = getOneDC(item, titleField); entry.setTitle(title == null ? localize(labels, MSG_UNTITLED) : title); // "published" date -- should be dc.date.issued String pubDate = getOneDC(item, dateField); if (pubDate != null) { entry.setPublishedDate((new DCDate(pubDate)).toDate()); hasDate = true; } // date of last change to Item entry.setUpdatedDate(item.getLastModified()); StringBuffer db = new StringBuffer(); for (String df : descriptionFields) { // Special Case: "(date)" in field name means render as date boolean isDate = df.indexOf("(date)") > 0; if (isDate) { df = df.replaceAll("\\(date\\)", ""); } List<MetadataValue> dcv = itemService.getMetadataByMetadataString(item, df); if (dcv.size() > 0) { String fieldLabel = labels.get(MSG_METADATA + df); if (fieldLabel != null && fieldLabel.length() > 0) { db.append(fieldLabel).append(": "); } boolean first = true; for (MetadataValue v : dcv) { if (first) { first = false; } else { db.append("; "); } db.append(isDate ? new DCDate(v.getValue()).toString() : v.getValue()); } db.append("\n"); } } if (db.length() > 0) { SyndContent desc = new SyndContentImpl(); desc.setType("text/plain"); desc.setValue(db.toString()); entry.setDescription(desc); } // This gets the authors into an ATOM feed List<MetadataValue> authors = itemService.getMetadataByMetadataString(item, authorField); if (authors.size() > 0) { List<SyndPerson> creators = new ArrayList<SyndPerson>(); for (MetadataValue author : authors) { SyndPerson sp = new SyndPersonImpl(); sp.setName(author.getValue()); creators.add(sp); } entry.setAuthors(creators); } // only add DC module if any DC fields are configured if (dcCreatorField != null || dcDateField != null || dcDescriptionField != null) { DCModule dc = new DCModuleImpl(); if (dcCreatorField != null) { List<MetadataValue> dcAuthors = itemService.getMetadataByMetadataString(item, dcCreatorField); if (dcAuthors.size() > 0) { List<String> creators = new ArrayList<String>(); for (MetadataValue author : dcAuthors) { creators.add(author.getValue()); } dc.setCreators(creators); } } if (dcDateField != null && !hasDate) { List<MetadataValue> v = itemService.getMetadataByMetadataString(item, dcDateField); if (v.size() > 0) { dc.setDate((new DCDate(v.get(0).getValue())).toDate()); } } if (dcDescriptionField != null) { List<MetadataValue> v = itemService.getMetadataByMetadataString(item, dcDescriptionField); if (v.size() > 0) { StringBuffer descs = new StringBuffer(); for (MetadataValue d : v) { if (descs.length() > 0) { descs.append("\n\n"); } descs.append(d.getValue()); } dc.setDescription(descs.toString()); } } entry.getModules().add(dc); } // iTunes Podcast Support - START if (podcastFeed) { // Add enclosure(s) List<SyndEnclosure> enclosures = new ArrayList(); try { List<Bundle> bunds = itemService.getBundles(item, "ORIGINAL"); if (bunds.get(0) != null) { List<Bitstream> bits = bunds.get(0).getBitstreams(); for (Bitstream bit : bits) { String mime = bit.getFormat(context).getMIMEType(); if (ArrayUtils.contains(podcastableMIMETypes, mime)) { SyndEnclosure enc = new SyndEnclosureImpl(); enc.setType(bit.getFormat(context).getMIMEType()); enc.setLength(bit.getSize()); enc.setUrl(urlOfBitstream(request, bit)); enclosures.add(enc); } else { continue; } } } // Also try to add an external value from dc.identifier.other // We are assuming that if this is set, then it is a media file List<MetadataValue> externalMedia = itemService.getMetadataByMetadataString(item, externalSourceField); if (externalMedia.size() > 0) { for (MetadataValue anExternalMedia : externalMedia) { SyndEnclosure enc = new SyndEnclosureImpl(); enc.setType( "audio/x-mpeg"); // We can't determine MIME of external file, so just picking // one. enc.setLength(1); enc.setUrl(anExternalMedia.getValue()); enclosures.add(enc); } } } catch (Exception e) { System.out.println(e.getMessage()); } entry.setEnclosures(enclosures); // Get iTunes specific fields: author, subtitle, summary, duration, keywords EntryInformation itunes = new EntryInformationImpl(); String author = getOneDC(item, authorField); if (author != null && author.length() > 0) { itunes.setAuthor(author); // <itunes:author> } itunes.setSubtitle( title == null ? localize(labels, MSG_UNTITLED) : title); // <itunes:subtitle> if (db.length() > 0) { itunes.setSummary(db.toString()); // <itunes:summary> } String extent = getOneDC( item, "dc.format.extent"); // assumed that user will enter this field with length of // song in seconds if (extent != null && extent.length() > 0) { extent = extent.split(" ")[0]; Integer duration = Integer.parseInt(extent); itunes.setDuration(new Duration(duration)); // <itunes:duration> } String subject = getOneDC(item, "dc.subject"); if (subject != null && subject.length() > 0) { String[] subjects = new String[1]; subjects[0] = subject; itunes.setKeywords(subjects); // <itunes:keywords> } entry.getModules().add(itunes); } } feed.setEntries(entries); } }
/** @inheritDoc */ public Subscription fetchSubscription(String feedURL, Date lastModified) throws FetcherException { if (feedURL == null) { throw new IllegalArgumentException("feed url cannot be null"); } // setup Rome feed fetcher FeedFetcher feedFetcher = getRomeFetcher(); // fetch the feed log.debug("Fetching feed: " + feedURL); SyndFeed feed; try { feed = feedFetcher.retrieveFeed(new URL(feedURL)); } catch (Exception ex) { throw new FetcherException("Error fetching subscription - " + feedURL, ex); } log.debug("Feed pulled, extracting data into Subscription"); // build planet subscription from fetched feed Subscription newSub = new Subscription(); newSub.setFeedURL(feedURL); newSub.setSiteURL(feed.getLink()); newSub.setTitle(feed.getTitle()); newSub.setAuthor(feed.getAuthor()); newSub.setLastUpdated(feed.getPublishedDate()); // normalize any data that couldn't be properly extracted if (newSub.getSiteURL() == null) { // set the site url to the feed url then newSub.setSiteURL(newSub.getFeedURL()); } if (newSub.getAuthor() == null) { // set the author to the title newSub.setAuthor(newSub.getTitle()); } if (newSub.getLastUpdated() == null) { // no update time specified in feed, so try consulting feed info cache FeedFetcherCache feedCache = getRomeFetcherCache(); try { SyndFeedInfo feedInfo = feedCache.getFeedInfo(new URL(newSub.getFeedURL())); if (feedInfo.getLastModified() != null) { long lastUpdatedLong = ((Long) feedInfo.getLastModified()).longValue(); if (lastUpdatedLong != 0) { newSub.setLastUpdated(new Date(lastUpdatedLong)); } } } catch (MalformedURLException ex) { // should never happen since we check this above } } // check if feed is unchanged and bail now if so if (lastModified != null && newSub.getLastUpdated() != null && !newSub.getLastUpdated().after(lastModified)) { return null; } if (log.isDebugEnabled()) { log.debug("Subscription is: " + newSub.toString()); } // some kludge to deal with feeds w/ no entry dates // we assign arbitrary dates chronologically by entry starting either // from the current time or the last update time of the subscription Calendar cal = Calendar.getInstance(); if (newSub.getLastUpdated() != null) { cal.setTime(newSub.getLastUpdated()); } else { cal.setTime(new Date()); cal.add(Calendar.DATE, -1); } // add entries List<SyndEntry> feedEntries = feed.getEntries(); for (SyndEntry feedEntry : feedEntries) { SubscriptionEntry newEntry = buildEntry(feedEntry); // some kludge to handle feeds with no entry dates if (newEntry.getPubTime() == null) { log.debug("No published date, assigning fake date for " + feedURL); newEntry.setPubTime(new Timestamp(cal.getTimeInMillis())); cal.add(Calendar.DATE, -1); } if (newEntry != null) { newSub.addEntry(newEntry); } } log.debug(feedEntries.size() + " entries included"); return newSub; }