protected Item createRSSItem(SyndEntry sEntry) { Item item = new Item(); item.setModules(ModuleUtils.cloneModules(sEntry.getModules())); item.setTitle(sEntry.getTitle()); item.setLink(sEntry.getLink()); item.setUri(sEntry.getUri()); if (sEntry.getDescription() != null) { Description description = new Description(); description.setValue(sEntry.getDescription().getValue()); item.setDescription(description); } return item; }
@SuppressWarnings("unchecked") private boolean findFeedEntry(SyndFeed feed, String title, String[] bodyPortions) { List<SyndEntry> entries = feed.getEntries(); for (SyndEntry entry : entries) { if (entry.getTitle().equals(title)) { if (bodyPortions == null) { return true; } boolean missingPortion = false; SyndContent description = entry.getDescription(); String value = description.getValue(); for (int i = 0; i < bodyPortions.length; i++) { if (!value.contains(bodyPortions[i])) { missingPortion = true; break; } } if (!missingPortion) { return true; } } } return false; }
public static List<RssUrlBean> getRssUrlBeanListFromPage(int rssCompo_id, String url) { List<RssUrlBean> rubList = new ArrayList<RssUrlBean>(); if (url.equals("")) return rubList; try { URL feedUrl = new URL(url); // SyndFeedInput:从远程读到xml结构的内容转成SyndFeedImpl实例 SyndFeedInput input = new SyndFeedInput(); // rome按SyndFeed类型生成rss和atom的实例, // SyndFeed是rss和atom实现类SyndFeedImpl的接口 SyndFeed syndFeed = input.build(new XmlReader(feedUrl)); List<SyndEntry> entryList = syndFeed.getEntries(); for (SyndEntry entry : entryList) { RssUrlBean rub = new RssUrlBean(); rub.setRssCompo_id(rssCompo_id); rub.setTitle(entry.getTitle()); rub.setLink(entry.getUri()); rub.setPublishedDate(CommonUtil.getStandardDate(entry.getPublishedDate().toLocaleString())); rub.setDescription(entry.getDescription().getValue()); if (entry.getUpdatedDate() != null) rub.setUpdatedDate(CommonUtil.getStandardDate(entry.getUpdatedDate().toLocaleString())); rub.setAuthors(entry.getAuthor()); rubList.add(rub); } } catch (Exception ex) { ex.printStackTrace(); } return rubList; }
public Item(String source, SyndEntry entry) { this( source, entry.getLink(), entry.getTitle(), entry.getDescription().getValue(), entry.getPublishedDate(), entry.getEnclosures()); }
/** * A helper function to print out the contents of an entry to log.trace * * @param entry */ public static void printEntry(SyndEntry entry) { if (!log.isTraceEnabled()) { return; } StringBuffer pr = new StringBuffer( "URI: " + entry.getUri() + "\n" + "Title: " + entry.getTitle() + "\n" + "\n" + "Date: " + entry.getPublishedDate() + "\n" + "Modified: " + entry.getUpdatedDate() + "\n"); pr.append("Creators: \n"); for (SyndPerson author : FeedHelper.getAuthors(entry)) { pr.append(" - " + author.getName() + "\n"); } pr.append("Links: \n"); for (SyndLink link : FeedHelper.getLinks(entry)) { pr.append(" - " + link.getTitle() + ": " + link.getHref() + "\n"); } SyndContent description = entry.getDescription(); if (description != null) { pr.append("\nDescription(" + description.getType() + "): " + description.getValue()); } pr.append("Contents: \n"); for (SyndContent content : FeedHelper.getContents(entry)) { pr.append(" Type: " + content.getType()); pr.append(" Body: " + content.getValue()); try { pr.append( " Body (Plain text): " + PlainTextExtractor.getPlainText(content.getType(), content.getValue())); } catch (ParserException e) { pr.append("Failed to parse content"); } } pr.append("Categories: \n"); for (SyndCategory category : FeedHelper.getCategories(entry)) { pr.append(category.getName() + "(" + category.getTaxonomyUri() + ")"); } log.trace(pr.toString()); }
protected void _testItem(int i) throws Exception { super._testItem(i); List items = getCachedSyndFeed().getEntries(); SyndEntry entry = (SyndEntry) items.get(i); assertProperty(entry.getTitle(), "channel.item[" + i + "].title"); assertProperty(entry.getLink(), "channel.item[" + i + "].link"); assertProperty(entry.getDescription().getValue(), "channel.item[" + i + "].description"); _testCategories(entry.getCategories(), "channel.item[" + i + "]"); _testEnclosures(entry.getEnclosures(), "channel.item[" + i + "]"); }
@Override protected void testItem(final int i) throws Exception { super.testItem(i); final List<SyndEntry> items = this.getCachedSyndFeed().getEntries(); final SyndEntry entry = items.get(i); assertProperty(entry.getTitle(), "channel.item[" + i + "].title"); assertProperty(entry.getLink(), "channel.item[" + i + "].link"); assertProperty(entry.getDescription().getValue(), "channel.item[" + i + "].description"); testCategories(entry.getCategories(), "channel.item[" + i + "]"); testEnclosures(entry.getEnclosures(), "channel.item[" + i + "]"); }
public static List<News> getNews(String categoryLink) { ArrayList<SyndEntry> syndEntrys = getSyndEntrys(categoryLink); String link, description, title; List<News> listNews = new ArrayList<News>(); for (SyndEntry entry : syndEntrys) { title = entry.getTitle(); link = entry.getLink(); description = entry.getDescription().getValue(); listNews.add(new News(title, link, description)); } return listNews; }
public void mouseClicked(MouseEvent e) { Point p = e.getPoint(); int row = appWindow.feedItems.rowAtPoint(p); int column = appWindow.feedItems.columnAtPoint(p); ListFeed channel = (ListFeed) appWindow.channelsList.getSelectedValue(); SyndFeed feed = channel.feed; SyndEntry item = (SyndEntry) feed.getEntries().get(row); if (e.getClickCount() == 2) { // open in browser window } else if (e.getClickCount() == 1) { appWindow.itemDetails.setText(item.getDescription().getValue()); } }
protected Item createRSSItem(SyndEntry sEntry) { Item item = super.createRSSItem(sEntry); SyndContent sContent = sEntry.getDescription(); if (sContent != null) { item.setDescription(createItemDescription(sContent)); } List contents = sEntry.getContents(); if (contents != null && contents.size() > 0) { SyndContent syndContent = (SyndContent) contents.get(0); Content cont = new Content(); cont.setValue(syndContent.getValue()); cont.setType(syndContent.getType()); item.setContent(cont); } return item; }
@SuppressWarnings("unchecked") public void test() throws Exception { final SyndFeedInput input = new SyndFeedInput(true); final SyndFeed feed = input.build(new File("c:\\temp\\google.xml")); logger.debug("Successfully parsed the RSS feed"); logger.debug("Author = " + feed.getAuthors()); logger.debug("Categories = " + feed.getCategories()); final List<SyndEntry> entries = feed.getEntries(); for (final SyndEntry entry : entries) { logger.debug("Title = " + StringEscapeUtils.unescapeHtml(entry.getTitle())); logger.debug( "Description = " + StringEscapeUtils.unescapeHtml(entry.getDescription().getValue())); logger.debug(entry.getUri()); logger.debug("Updated date = " + entry.getUpdatedDate()); logger.debug("Published date = " + entry.getPublishedDate()); logger.debug("===================================================="); } }
private Article mapArticle(SyndEntry syndEntry) { StringBuilder sb = new StringBuilder(); for (Object obj : syndEntry.getContents()) { if (!(obj instanceof SyndContent)) { continue; } SyndContent syndContent = (SyndContent) obj; sb.append(syndContent.getValue()); } return Article.builder() .num(0) .title(syndEntry.getTitle()) .content(sb.toString()) .description(syndEntry.getDescription().getValue()) .author(syndEntry.getAuthor()) .image("") .writtenDate(syndEntry.getPublishedDate()) .build(); }
/** @see com.elia.rssexample.data.NewsDao */ @SuppressWarnings("unchecked") public List<NewsItem> getNewsList() { // TODO: exception handling log.trace("Enter getNewsList()."); List<NewsItem> newsList = new ArrayList<NewsItem>(); XmlReader reader = null; try { for (String rssUrl : rssUrlList) { reader = new XmlReader(new URL(rssUrl)); SyndFeed feed = new SyndFeedInput().build(reader); for (SyndEntry entry : (List<SyndEntry>) feed.getEntries()) { NewsItem item = new NewsItem(); item.setTitle(entry.getTitle()); item.setDescription(entry.getDescription().getValue()); item.setLink(entry.getLink()); item.setPublished(entry.getPublishedDate()); newsList.add(item); } } } catch (Exception e) { log.error("Error reading feed.", e); } finally { try { reader.close(); } catch (IOException e) { log.warn("Unable to close xml reader.", e); } } return newsList; }
@SuppressWarnings("unchecked") @Trigger("!buzz") @Help("Fetches one of the latest posts from jeanmarcmorandini.com") public List<String> getLatestBuzz() { List<String> toReturn = new ArrayList<String>(); try { URL url = new URL("http://www.jeanmarcmorandini.com/rss.php"); SyndFeedInput input = new SyndFeedInput(); SyndFeed rss = input.build(new XmlReader(url)); Iterator<SyndEntry> it = rss.getEntries().iterator(); String message = null; while (it.hasNext()) { SyndEntry item = it.next(); String guid = item.getUri(); RSSFeed buzz = dao.findByGUID(guid); if (buzz == null) { buzz = new RSSFeed(); buzz.setGuid(item.getUri()); dao.save(buzz); String urlBitly = utilsService.bitly(item.getLink()); String content = Jsoup.parse(item.getDescription().getValue()).select("p").get(0).text(); message = IRCUtils.bold("EXCLU!") + " " + item.getTitle() + " - " + urlBitly; toReturn.add(message); toReturn.add(content); break; } } if (message == null) { toReturn.add("Pas d'exclus pour le moment."); } } catch (Exception e) { LOG.handle(e); } return toReturn; }
public String getDescription() { return entry.getDescription().getValue(); }
/** * Generate an ebook from an RSS DOM Document. * * @param url The URL from where the Document was fetched (used only to set the author metadata) * @param doc The DOM Document of the feed. * @return An ebook. * @throws IllegalArgumentException * @throws FeedException * @throws IOException */ private static Book createBookFromFeed(URL url, Document doc, List<Keyword> keywords) throws IllegalArgumentException, FeedException, IOException { Book book = new Book(); // start parsing our feed and have the above onItem methods called SyndFeedInput input = new SyndFeedInput(); SyndFeed feed = input.build(doc); System.out.println(feed); // Set the title book.getMetadata().addTitle(feed.getTitle()); // Add an Author String author = feed.getAuthor(); if (author == null || "".equals(author.trim())) { author = url.getHost(); } book.getMetadata().addAuthor(new Author(author)); if (feed.getPublishedDate() != null) { book.getMetadata().addDate(new nl.siegmann.epublib.domain.Date(feed.getPublishedDate())); } if (feed.getDescription() != null) { book.getMetadata().addDescription(feed.getDescription()); } if (feed.getCopyright() != null) { book.getMetadata().getRights().add(feed.getCopyright()); } // Set cover image - This has never worked. // if (feed.getImage() != null) { // System.out.println("There is an image for the feed"); // Promise<HttpResponse> futureImgResponse = // WS.url(feed.getImage().getUrl()).getAsync(); // HttpResponse imgResponse = await(futureImgResponse); // System.out.println("Content-type: " + imgResponse.getContentType()); // if (imgResponse.getContentType().startsWith("image/")) { // String extension = // imgResponse.getContentType().substring("image/".length()); // InputStream imageStream = imgResponse.getStream(); // book.getMetadata().setCoverImage(new Resource(imageStream, "cover." + // extension)); // System.out.println("Using default cover"); // imageStream = // VirtualFile.fromRelativePath("assets/cover.png").inputstream(); // if (imageStream != null) { // System.out.println("Using default cover"); // book.getMetadata().setCoverImage(new Resource(imageStream, // "cover.png")); // } else { // System.out.println("Could not load default cover"); // } // } // } int entryNumber = 0; List<SyndEntry> entries = feed.getEntries(); for (SyndEntry entry : entries) { if (matchesKeyword(entry, keywords)) { StringBuilder title = new StringBuilder(100); if (entry.getTitle() != null) { title.append(entry.getTitle()); } if (entry.getAuthor() != null) { title.append(" - ").append(entry.getAuthor()); } StringBuilder content = new StringBuilder(); // Add title inside text content.append("<h2>").append(title).append("</h2>"); if (entry.getDescription() != null) { SyndContent syndContent = (SyndContent) entry.getDescription(); if (!syndContent.getType().contains("html")) { content.append("<pre>\n"); } content.append(syndContent.getValue()); if (!syndContent.getType().contains("html")) { content.append("\n</pre>"); } content.append("<hr/>"); } if (entry.getContents().size() > 0) { SyndContent syndContent = (SyndContent) entry.getContents().get(0); if (!syndContent.getType().contains("html")) { content.append("<pre>\n"); } content.append(syndContent.getValue()); if (!syndContent.getType().contains("html")) { content.append("\n</pre>"); } } String strContent = clean(content.toString()); // Add Chapter try { entryNumber++; book.addSection( title.toString(), new Resource(new StringReader(strContent), "entry" + entryNumber + ".xhtml")); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } return book; }
// Build the feed list @SuppressWarnings("unchecked") private void buildFeedList(LinkedList<SyndFeed> syndFeeds, SourcePojo source) { // If there's a max number of sources to get per harvest, configure that here: long nWaitTime_ms = props.getWebCrawlWaitTime(); long nMaxTime_ms = props.getMaxTimePerFeed(); // (can't override this, too easy to break the system...) int nMaxDocsPerSource = props.getMaxDocsPerSource(); long nNow = new Date().getTime(); if (null != source.getRssConfig()) { if (null != source.getRssConfig().getWaitTimeOverride_ms()) { nWaitTime_ms = source.getRssConfig().getWaitTimeOverride_ms(); } } long nMaxDocs = Long.MAX_VALUE; if (nWaitTime_ms > 0) { nMaxDocs = nMaxTime_ms / nWaitTime_ms; } if (nMaxDocs > nMaxDocsPerSource) { // (another limit, take the smaller of the 2) nMaxDocs = nMaxDocsPerSource; } // (end per feed configuration) // Add extra docs List<SyndEntry> tmpList = null; boolean bCreatedAggregateList = false; int nRealSyndEntries = 0; for (SyndFeed feed : syndFeeds) { if (0 == nRealSyndEntries) { tmpList = feed.getEntries(); } else if (!bCreatedAggregateList) { bCreatedAggregateList = true; tmpList = new LinkedList<SyndEntry>(tmpList); tmpList.addAll(feed.getEntries()); } else { tmpList.addAll(feed.getEntries()); } nRealSyndEntries += feed.getEntries().size(); } if (null == tmpList) { tmpList = new LinkedList<SyndEntry>(); } // TESTED if ((null != source.getRssConfig()) && (null != source.getRssConfig().getExtraUrls())) { for (ExtraUrlPojo extraUrl : source.getRssConfig().getExtraUrls()) { if (null == extraUrl.title) { continue; // (this is an RSS feed not a URL) } // TESTED SyndEntryImpl synd = new SyndEntryImpl(); synd.setLink(extraUrl.url); if (null != extraUrl.description) { SyndContentImpl description = new SyndContentImpl(); description.setValue(extraUrl.description); synd.setDescription(description); } synd.setTitle(extraUrl.title); if (null != extraUrl.publishedDate) { try { synd.setPublishedDate(new Date(DateUtility.parseDate(extraUrl.publishedDate))); } catch (Exception e) { } // do nothign will use now as pub date } tmpList.add((SyndEntry) synd); if (null != extraUrl.fullText) { SyndFeedImpl fullTextContainer = new SyndFeedImpl(); fullTextContainer.setDescription(extraUrl.fullText); synd.setSource(fullTextContainer); } } } // Then begin looping over entries LinkedList<String> duplicateSources = new LinkedList<String>(); try { Map<String, List<SyndEntry>> urlDups = new HashMap<String, List<SyndEntry>>(); int nSyndEntries = 0; for (Object synd : tmpList) { nSyndEntries++; // (keep count so we know we're accessing our own fake SyndEntryImpls) final SyndEntry entry = (SyndEntry) synd; if (null != entry.getLink()) // if url returns null, skip this entry { String url = this.cleanUrlStart(entry.getLink()); if (null != source.getRssConfig()) { // Some RSS specific logic // If an include is specified, must match Matcher includeMatcher = source.getRssConfig().getIncludeMatcher(url); if (null != includeMatcher) { if (!includeMatcher.find()) { continue; } } // If an exclude is specified, must not match Matcher excludeMatcher = source.getRssConfig().getExcludeMatcher(url); if (null != excludeMatcher) { if (excludeMatcher.find()) { continue; } } } // Some error checking: // sometimes the URL seems to have some characters in front of the HTTP - remove these this.nTmpDocsSubmitted++; if (null == url) { this.nTmpHttpErrors++; continue; } // Also save the title and description: String title = ""; if (null != entry.getTitle()) { title = entry.getTitle(); } String desc = ""; if (null != entry.getDescription()) { desc = entry.getDescription().getValue(); } boolean duplicate = false; // Look for duplicates within the current set of sources List<SyndEntry> possDups = null; if (null == (possDups = urlDups.get(url))) { // (new URL) possDups = new LinkedList<SyndEntry>(); possDups.add(entry); urlDups.put(url, possDups); } else { // (old URL, check if this is a duplicate...) int nCount = 0; for (SyndEntry possDup : possDups) { if (possDup.getTitle().equals(title) || ((null != possDup.getDescription()) && possDup.getDescription().getValue().equals(desc)) || ((null != possDup.getDescription()) && (null == entry.getDescription()))) { // If *either* the title or the description matches as well as the URL... duplicate = true; break; } nCount++; } if (!duplicate) { possDups.add(entry); } else { // DUPLICATE: ensure we have minimal set of data to cover all cases: boolean bTitleMatch = false; boolean bDescMatch = false; for (SyndEntry possDup : possDups) { if (!bTitleMatch && possDup .getTitle() .equals(title)) { // (don't bother if already have a title match) bTitleMatch = true; } else if (!bDescMatch) { // (don't yet have a desc match( if (null != entry.getDescription()) { if (null != possDup.getDescription()) { // (neither desc is null) if (possDup.getDescription().getValue().equals(desc)) { bDescMatch = true; } } } else { // curr desc is null if (null == possDup.getDescription()) { // dup desc is null bDescMatch = true; } } // (end various title match/desc match/both have no desc cases } // (end if no desc match) if (bTitleMatch && bDescMatch) { break; // (no way can fire) } } // (end loop over dups) if (!bTitleMatch || !bDescMatch) { possDups.add(entry); } } // (end is duplicate, nasty logic to add minimal set to dup list to cover all titles, // descs) } if (duplicate) { continue; } try { DuplicateManager qr = _context.getDuplicateManager(); if (null != entry.getDescription()) { duplicate = qr.isDuplicate_UrlTitleDescription( url, title.replaceAll("\\<.*?\\>", "").trim(), desc.replaceAll("\\<.*?\\>", "").trim(), source, duplicateSources); } else { duplicate = qr.isDuplicate_UrlTitleDescription( url, title.replaceAll("\\<.*?\\>", "").trim(), null, source, duplicateSources); // ^^^(this is different to isDuplicate_UrlTitle because it enforces that the // description be null, vs just checking the title) } if (duplicate && (null != source.getRssConfig()) && (null != source.getRssConfig().getUpdateCycle_secs())) { // Check modified times... Date dupModDate = qr.getLastDuplicateModifiedTime(); ObjectId dupId = qr.getLastDuplicateId(); if ((null != dupModDate) && (null != dupId)) { if (dupModDate.getTime() + source.getRssConfig().getUpdateCycle_secs() * 1000 < nNow) { DocumentPojo doc = buildDocument(entry, source, duplicateSources); if ((nSyndEntries > nRealSyndEntries) && (null != entry.getSource())) { // (Use dummy TitleEx to create a "fake" full text block) doc.setFullText(entry.getSource().getDescription()); } doc.setUpdateId(dupId); // (set _id to document I'm going to overwrite) this.docsToUpdate.add(doc); if ((this.docsToAdd.size() + this.docsToUpdate.size()) >= nMaxDocs) { source.setReachedMaxDocs(); break; // (that's enough documents) } } } } // TESTED (duplicates we update instead of ignoring) if (!duplicate) { DocumentPojo doc = buildDocument(entry, source, duplicateSources); if ((nSyndEntries > nRealSyndEntries) && (null != entry.getSource())) { // (Use dummy TitleEx to create a "fake" full text block) doc.setFullText(entry.getSource().getDescription()); } this.docsToAdd.add(doc); if ((this.docsToAdd.size() + this.docsToUpdate.size()) >= nMaxDocs) { source.setReachedMaxDocs(); break; // (that's enough documents) } } if (this.nTmpDocsSubmitted > 20) { // (some arbitrary "significant" number) if (nTmpHttpErrors == this.nTmpDocsSubmitted) { break; } } } catch (Exception e) { // If an exception occurs log the error logger.error("Exception Message: " + e.getMessage(), e); } } } // (end loop over feeds in a syndicate) } catch (Exception e) { // If an exception occurs log the error logger.error("Exception Message: " + e.getMessage(), e); } }
private DocumentPojo buildDocument( SyndEntry entry, SourcePojo source, LinkedList<String> duplicateSources) { String tmpURL = this.cleanUrlStart(entry.getLink().toString()); // (can't return null because called from code which checks this) // create the feed pojo DocumentPojo doc = new DocumentPojo(); doc.setUrl(tmpURL); doc.setCreated(new Date()); doc.setModified(new Date()); // Strip out html if it is present if (entry.getTitle() != null) doc.setTitle(entry.getTitle().replaceAll("\\<.*?\\>", "").trim()); if (entry.getDescription() != null) doc.setDescription(entry.getDescription().getValue().replaceAll("\\<.*?\\>", "").trim()); if (entry.getPublishedDate() != null) { doc.setPublishedDate(entry.getPublishedDate()); } else { doc.setPublishedDate(new Date()); } // Clone from an existing source if we can: if (!duplicateSources.isEmpty() && (null == doc.getUpdateId())) { // (can't duplicate updating document) doc.setDuplicateFrom(duplicateSources.getFirst()); } // GeoRSS GeoRSSModule geoRSSModule = GeoRSSUtils.getGeoRSS(entry); // currently does not handle <georss:circle> if (null != geoRSSModule) { if (null != geoRSSModule.getPosition()) { double lat = geoRSSModule.getPosition().getLatitude(); double lon = geoRSSModule.getPosition().getLongitude(); GeoPojo gp = new GeoPojo(); gp.lat = lat; gp.lon = lon; doc.setDocGeo(gp); } if (null != geoRSSModule.getGeometry()) { AbstractGeometry ag = geoRSSModule.getGeometry(); if (ag.getClass().equals(new LineString().getClass())) { // <georss:line> LineString ls = ((LineString) geoRSSModule.getGeometry()); double latAvg = 0.0; double lonAvg = 0.0; int length = ls.getPositionList().size(); for (int i = 0; i < length; i++) { latAvg += ls.getPositionList().getLatitude(i); lonAvg += ls.getPositionList().getLongitude(i); } latAvg = latAvg / length; lonAvg = lonAvg / length; GeoPojo gp = new GeoPojo(); gp.lat = latAvg; gp.lon = lonAvg; doc.setDocGeo(gp); } else if (ag.getClass().equals(new Polygon().getClass())) // <georss:polygon> { Polygon poly = ((Polygon) geoRSSModule.getGeometry()); AbstractRing ar = poly.getExterior(); LinearRing lr = (LinearRing) ar; double latAvg = 0.0; double lonAvg = 0.0; int length = lr.getPositionList().size(); for (int i = 0; i < length; i++) { latAvg += lr.getPositionList().getLatitude(i); lonAvg += lr.getPositionList().getLongitude(i); } latAvg = latAvg / length; lonAvg = lonAvg / length; GeoPojo gp = new GeoPojo(); gp.lat = latAvg; gp.lon = lonAvg; doc.setDocGeo(gp); } else if (ag.getClass().equals(new Envelope().getClass())) { // <georss:box> Envelope env = ((Envelope) geoRSSModule.getGeometry()); double latAvg = (env.getMaxLatitude() + env.getMinLatitude()) / 2; double lonAvg = (env.getMaxLongitude() + env.getMinLongitude()) / 2; GeoPojo gp = new GeoPojo(); gp.lat = latAvg; gp.lon = lonAvg; doc.setDocGeo(gp); } } } // end if GeoRSS // Arbitrary other metadata: if (null != entry.getForeignMarkup()) { JSONObject rssMetadata = new JSONObject(); @SuppressWarnings("unchecked") List<Element> fms = (List<Element>) entry.getForeignMarkup(); for (Element fm : fms) { try { JSONObject subObj = XML.toJSONObject(new XMLOutputter().outputString(fm)); if (1 == subObj.length()) { for (String name : JSONObject.getNames(subObj)) { rssMetadata.put(name, subObj.get(name)); } } else { // (this will never happen in practice?) rssMetadata.put(fm.getName(), subObj); } } catch (JSONException e) { } // (do nothing just carry on) } if (!fms.isEmpty()) { doc.addToMetadata( "_FEED_METADATA_", XmlToMetadataParser.convertJsonObjectToLinkedHashMap(rssMetadata)); } } // TESTED (longs converted to string, eg edgar:assistantDirector from // "http.www.sec.gov.archives.edgar.usgaap.rss.xml") return doc; }
// @Transactional // @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void importEntry( final SyndFeed feed, final SyndEntry entry, final Set<KiWiUriResource> types, final Set<ContentItem> tags, User user, final Collection<ContentItem> output) { if (user == null && entry.getAuthor() != null && !"".equals(entry.getAuthor())) { if (userService.userExists(entry.getAuthor())) { user = userService.getUserByLogin(entry.getAuthor()); } else { // user = userService.createUser(entry.getAuthor()); /* In my opinion, it is not ok to create a user entity * without asking the person if he/she wants to be * created and persisted in the KiWi dataset. * Thus I'm changing the user to 'anonymous', * if he/she is'nt registered with the same nick that * is given in the rss entry. */ user = userService.getUserByLogin("anonymous"); kiwiEntityManager.persist(user); } } log.debug("feed entry: #0 (#1)", entry.getTitle(), entry.getUri()); // create a new content item and copy all data from the feed entry ContentItem item; if (entry.getLink() != null) { item = contentItemService.createExternContentItem(entry.getLink()); } else if (entry.getUri() != null) { try { // try parsing URI; if it is not valid, URI uri = new URI(entry.getUri()); item = contentItemService.createExternContentItem(entry.getUri()); } catch (URISyntaxException e) { item = contentItemService.createExternContentItem(feed.getLink() + "#" + entry.getUri()); } } else { item = contentItemService.createContentItem(); } contentItemService.updateTitle(item, entry.getTitle()); if (feed.getLanguage() != null) item.setLanguage(new Locale(feed.getLanguage())); if (entry.getPublishedDate() != null) { item.setCreated(entry.getPublishedDate()); item.setModified(entry.getPublishedDate()); } if (entry.getUpdatedDate() != null) { if (entry.getPublishedDate() == null) { item.setCreated(entry.getUpdatedDate()); } item.setModified(entry.getUpdatedDate()); } item.setAuthor(user); // read feed content and set it as item's text content List<SyndContent> contents = entry.getContents(); if (contents.size() == 1) { log.debug("using RSS content section provided by item"); contentItemService.updateTextContentItem(item, "<p>" + contents.get(0).getValue() + "</p>"); } else if (contents.size() > 1) { log.warn("feed entry contained more than one content section"); contentItemService.updateTextContentItem(item, "<p>" + contents.get(0).getValue() + "</p>"); } else if (contents.size() == 0) { if (entry.getDescription() != null && entry.getDescription().getValue() != null) { log.debug("using RSS description as no content section was available"); contentItemService.updateTextContentItem( item, "<p>" + entry.getDescription().getValue() + "</p>"); } } // save before tagging contentItemService.saveContentItem(item); // read feed categories and use them as tags for (SyndCategory cat : (List<SyndCategory>) entry.getCategories()) { ContentItem _cat; if (!taggingService.hasTag(item, cat.getName())) { if (cat.getTaxonomyUri() != null) { _cat = contentItemService.getContentItemByUri(cat.getTaxonomyUri()); if (_cat == null) { _cat = contentItemService.createExternContentItem(cat.getTaxonomyUri()); contentItemService.updateTitle(_cat, cat.getName()); _cat.setAuthor(user); contentItemService.saveContentItem(_cat); } taggingService.createTagging(cat.getName(), item, _cat, user); } else { _cat = contentItemService.getContentItemByTitle(cat.getName()); if (_cat == null) { _cat = contentItemService.createContentItem(); contentItemService.updateTitle(_cat, cat.getName()); _cat.setAuthor(user); contentItemService.saveContentItem(_cat); } taggingService.createTagging(cat.getName(), item, _cat, user); } } } // scan for Twitter-style hash tags in title (e.g. #kiwiknows, see KIWI-622) Matcher m_hashtag = p_hashtag.matcher(entry.getTitle()); while (m_hashtag.find()) { String tag_label = m_hashtag.group(1); if (!taggingService.hasTag(item, tag_label)) { ContentItem tag = contentItemService.getContentItemByTitle(tag_label); if (tag == null) { tag = contentItemService.createContentItem(); contentItemService.updateTitle(tag, tag_label); tag.setAuthor(user); contentItemService.saveContentItem(tag); } taggingService.createTagging(tag_label, item, tag, user); } } // check for geo information GeoRSSModule geoRSSModule = GeoRSSUtils.getGeoRSS(entry); if (geoRSSModule != null && geoRSSModule.getPosition() != null) { POI poi = kiwiEntityManager.createFacade(item, POI.class); poi.setLatitude(geoRSSModule.getPosition().getLatitude()); poi.setLongitude(geoRSSModule.getPosition().getLongitude()); kiwiEntityManager.persist(poi); } // check for media information MediaEntryModule mediaModule = (MediaEntryModule) entry.getModule(MediaModule.URI); if (mediaModule != null) { MediaContent[] media = mediaModule.getMediaContents(); if (media.length > 0) { MediaContent m = media[0]; if (m.getReference() instanceof UrlReference) { URL url = ((UrlReference) m.getReference()).getUrl(); String type = m.getType(); String name = url.getFile(); if (name.lastIndexOf("/") > 0) { name = name.substring(name.lastIndexOf("/") + 1); } log.debug("importing media data from URL #0", url.toString()); try { InputStream is = url.openStream(); ByteArrayOutputStream bout = new ByteArrayOutputStream(); int c; while ((c = is.read()) != -1) { bout.write(c); } byte[] data = bout.toByteArray(); contentItemService.updateMediaContentItem(item, data, type, name); is.close(); bout.close(); } catch (IOException ex) { log.error("error importing media content from RSS stream"); } } else { log.info("RSS importer can only import media with URL references"); } } else { log.warn("media module found without content"); } Category[] cats = mediaModule.getMetadata().getCategories(); for (Category cat : cats) { ContentItem _cat; String label = cat.getLabel() != null ? cat.getLabel() : cat.getValue(); if (!taggingService.hasTag(item, label)) { if (cat.getScheme() != null) { _cat = contentItemService.getContentItemByUri(cat.getScheme() + cat.getValue()); if (_cat == null) { _cat = contentItemService.createExternContentItem(cat.getScheme() + cat.getValue()); contentItemService.updateTitle(_cat, label); _cat.setAuthor(user); contentItemService.saveContentItem(_cat); } taggingService.createTagging(label, item, _cat, user); } else { _cat = contentItemService.getContentItemByTitle(label); if (_cat == null) { _cat = contentItemService.createContentItem(); contentItemService.updateTitle(_cat, label); _cat.setAuthor(user); contentItemService.saveContentItem(_cat); } taggingService.createTagging(label, item, _cat, user); } } } } // add parameter categories as tags for (ContentItem tag : tags) { if (!taggingService.hasTag(item, tag.getTitle())) { taggingService.createTagging(tag.getTitle(), item, tag, user); } } // add parameter types as types for (KiWiUriResource type : types) { item.addType(type); } // add kiwi:FeedPost type item.addType(tripleStore.createUriResource(Constants.NS_KIWI_CORE + "FeedPost")); /* the flush is necessary, because CIs or tags will * otherwise be created multiple times when they * appear more than once in one RSS feed */ entityManager.flush(); log.debug("imported content item '#0' with URI '#1'", item.getTitle(), item.getResource()); }
// build a SubscriptionEntry from Rome SyndEntry and SyndFeed private SubscriptionEntry buildEntry(SyndEntry romeEntry) { // if we don't have a permalink then we can't continue if (romeEntry.getLink() == null) { return null; } SubscriptionEntry newEntry = new SubscriptionEntry(); newEntry.setTitle(romeEntry.getTitle()); newEntry.setPermalink(romeEntry.getLink()); // Play some games to get the author DCModule entrydc = (DCModule) romeEntry.getModule(DCModule.URI); if (romeEntry.getAuthor() != null) { newEntry.setAuthor(romeEntry.getAuthor()); } else { newEntry.setAuthor(entrydc.getCreator()); // use <dc:creator> } // Play some games to get the updated date if (romeEntry.getUpdatedDate() != null) { newEntry.setUpdateTime(new Timestamp(romeEntry.getUpdatedDate().getTime())); } // TODO: should we set a default update time here? // And more games getting publish date if (romeEntry.getPublishedDate() != null) { newEntry.setPubTime(new Timestamp(romeEntry.getPublishedDate().getTime())); // use <pubDate> } else if (entrydc != null && entrydc.getDate() != null) { newEntry.setPubTime(new Timestamp(entrydc.getDate().getTime())); // use <dc:date> } else { newEntry.setPubTime(newEntry.getUpdateTime()); } // get content and unescape if it is 'text/plain' if (romeEntry.getContents().size() > 0) { SyndContent content = (SyndContent) romeEntry.getContents().get(0); if (content != null && content.getType().equals("text/plain")) { newEntry.setText(StringEscapeUtils.unescapeHtml(content.getValue())); } else if (content != null) { newEntry.setText(content.getValue()); } } // no content, try summary if (newEntry.getText() == null || newEntry.getText().trim().length() == 0) { if (romeEntry.getDescription() != null) { newEntry.setText(romeEntry.getDescription().getValue()); } } // copy categories if (romeEntry.getCategories().size() > 0) { List list = new ArrayList(); Iterator cats = romeEntry.getCategories().iterator(); while (cats.hasNext()) { SyndCategory cat = (SyndCategory) cats.next(); list.add(cat.getName()); } newEntry.setCategoriesString(list); } return newEntry; }