// @Transactional public int importData( final SyndFeed feed, Set<KiWiUriResource> types, Set<ContentItem> tags, final User user, final Collection<ContentItem> output) { log.info( "importing entries from #0 feed '#1' found at '#2'", feed.getFeedType(), feed.getTitle(), feed.getUri()); if (types == null) { types = new HashSet<KiWiUriResource>(); } if (tags == null) { tags = new HashSet<ContentItem>(); } final Set<ContentItem> my_tags = tags; final Set<KiWiUriResource> my_types = types; // a hack for importing facebook activity streams: if the type is kiwi:FacebookPost, // turn facebook activity stream mode on; in this mode, we will skip all entries where // the remote author name and local user name are not identical boolean facebookImport = false; String t_facebookPost = Constants.NS_KIWI_CORE + "FacebookPost"; for (KiWiUriResource r : types) { if (r.getUri().equals(t_facebookPost)) { facebookImport = true; break; } } for (final SyndEntry entry : (List<SyndEntry>) feed.getEntries()) { // facebook hack ... (see above) if (facebookImport && !entry.getAuthor().equalsIgnoreCase(user.getFirstName() + " " + user.getLastName())) { log.info("Facebook import: skipping friend post with title", entry.getTitle()); continue; } new RunAsOperation() { @Override public void execute() { importEntry(feed, entry, my_types, my_tags, user, output); } }.addRole("admin").run(); } // entityManager.flush(); log.info("#0 content items have been imported from RSS/Atom feed", feed.getEntries().size()); return feed.getEntries().size(); }
public List<Note> getEntryData() { List<Note> res = new ArrayList<>(); for (SyndEntryImpl list1 : (List<SyndEntryImpl>) feedContent.getEntries()) { if (list1 == null || list1.equals(last)) break; Note addition = new Note(); // print(addition + " " + last); addition.setTitle(list1.getTitle()); addition.setContent(list1.getLink()); res.add(addition); } last = (SyndEntryImpl) feedContent.getEntries().get(0); return res; }
public static List<RssUrlBean> getRssUrlBeanListFromPage(int rssCompo_id, String url) { List<RssUrlBean> rubList = new ArrayList<RssUrlBean>(); if (url.equals("")) return rubList; try { URL feedUrl = new URL(url); // SyndFeedInput:从远程读到xml结构的内容转成SyndFeedImpl实例 SyndFeedInput input = new SyndFeedInput(); // rome按SyndFeed类型生成rss和atom的实例, // SyndFeed是rss和atom实现类SyndFeedImpl的接口 SyndFeed syndFeed = input.build(new XmlReader(feedUrl)); List<SyndEntry> entryList = syndFeed.getEntries(); for (SyndEntry entry : entryList) { RssUrlBean rub = new RssUrlBean(); rub.setRssCompo_id(rssCompo_id); rub.setTitle(entry.getTitle()); rub.setLink(entry.getUri()); rub.setPublishedDate(CommonUtil.getStandardDate(entry.getPublishedDate().toLocaleString())); rub.setDescription(entry.getDescription().getValue()); if (entry.getUpdatedDate() != null) rub.setUpdatedDate(CommonUtil.getStandardDate(entry.getUpdatedDate().toLocaleString())); rub.setAuthors(entry.getAuthor()); rubList.add(rub); } } catch (Exception ex) { ex.printStackTrace(); } return rubList; }
private SyndFeed createFeed() { SyndFeed feed = new SyndFeedImpl(); SyndPerson auteur = new SyndPersonImpl(); auteur.setName("Gildas Cuisinier"); auteur.setEmail("*****@*****.**"); feed.setTitle("RSS Veille Techno"); feed.setAuthors(Collections.singletonList(auteur)); feed.setDescription("RSS d'exemple !"); feed.setLink("http://svn.cyg.be/"); feed.setPublishedDate(new Date()); feed.setLanguage("fr"); SyndEntry entry = new SyndEntryImpl(); entry.setTitle("Ajout du projet Rome sur le SVN"); entry.setLink("https://rome.dev.java.net/"); SyndContent description = new SyndContentImpl(); description.setValue("Ajout d'un projet Rome sur le SVN afin de voir comment creer un RSS"); description.setType("text"); entry.setDescription(description); entry.setAuthors(Collections.singletonList(auteur)); feed.getEntries().add(entry); return feed; }
@SuppressWarnings("unchecked") public List<FiddleSyndEntry> getFeeds(URL url, CategoryDetail catDetail) { List<FiddleSyndEntry> feeds = new ArrayList<FiddleSyndEntry>(); XmlReader reader = null; SyndFeed feed = null; try { System.setProperty("http.proxyHost", "proxy-tw.bcbsmn.com"); System.setProperty("http.proxyPort", "9119"); System.setProperty("http.proxyUser", "a0c5zz"); System.setProperty("http.proxyPassword", "test0810"); reader = new XmlReader(url); feed = new SyndFeedInput().build(reader); feeds = feed.getEntries(); } catch (IOException e) { log.error("Exception reading url"); } catch (IllegalArgumentException e) { log.error("Exception reading url"); } catch (FeedException e) { log.error("Exception reading feed"); } finally { if (reader != null) try { reader.close(); } catch (IOException e) { log.error("Unable to close Feed XmlReader"); } } return feeds; }
@SuppressWarnings("unchecked") private boolean findFeedEntry(SyndFeed feed, String title, String[] bodyPortions) { List<SyndEntry> entries = feed.getEntries(); for (SyndEntry entry : entries) { if (entry.getTitle().equals(title)) { if (bodyPortions == null) { return true; } boolean missingPortion = false; SyndContent description = entry.getDescription(); String value = description.getValue(); for (int i = 0; i < bodyPortions.length; i++) { if (!value.contains(bodyPortions[i])) { missingPortion = true; break; } } if (!missingPortion) { return true; } } } return false; }
@SuppressWarnings("unchecked") protected void removeRSSItem(String itemId, Node node, String description) throws Exception { RSS data = new RSS(node); SyndFeed feed = data.read(); List<SyndEntry> entries = feed.getEntries(); Node removeNode = getNodeById(itemId); if (removeNode.isNodeType("exo:topic")) { List<Node> listRemovePosts = getListRemove(removeNode, "exo:post"); removeItem(entries, listRemovePosts); } else if (removeNode.isNodeType("exo:forum")) { List<Node> listRemoveForum = getListRemove(removeNode, "exo:topic"); for (Node n : listRemoveForum) { List<Node> listRemovePosts = getListRemove(n, "exo:post"); removeItem(entries, listRemovePosts); } removeItem(entries, listRemoveForum); } feed.setEntries(entries); String title = new PropertyReader(node).string("exo:name", "Root"); feed.setTitle(title); feed.setDescription(description); data.saveFeed(feed, FORUM_RSS_TYPE); }
@Override public List<GmailMessage> getUnreadMessages() { final List<GmailMessage> messages = new ArrayList<GmailMessage>(); HttpGmailConnection c = null; try { // for ROME properties loader Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); c = getGmailConnection(); c.setUrl(gmailFeedUrl); final URLConnection con = c.openConnection(); final SyndFeedInput feedInput = new SyndFeedInput(); final SyndFeed gmail = feedInput.build(new XmlReader(con)); for (final Object entry : gmail.getEntries()) { if (entry instanceof SyndEntry) { messages.add(new RssGmailMessage((SyndEntry) entry)); } } if (log.isDebugEnabled()) { log.debug("Got " + messages.size() + " new messages."); } } catch (final Exception e) { throw new GmailException("Failed getting unread messages", e); } return messages; }
/** * Adds an entry to the feed * * @param newEntry * @return */ @SuppressWarnings("unchecked") public SyndFeed addEntry(SyndEntry newEntry) { SyndFeed feed = read(); List<SyndEntry> entries = feed.getEntries(); if (newEntry != null) entries.add(0, newEntry); feed.setEntries(entries); return feed; }
void parseFeeds( DecoratorRequest request, List<SyndEntry> entries, Map<SyndEntry, SyndFeed> feedMapping, Map<String, String> imgMap, Map<String, String> descriptionNoImage, URL requestURL, String[] urlArray) throws Exception { for (String url : urlArray) { url = url.trim(); SyndFeed tmpFeed = null; URL feedURL = requestURL.relativeURL(url); URL baseURL; try { if (feedURL.getHost().equals(requestURL.getHost())) { baseURL = new URL(requestURL); retrieveLocalResource(feedURL); tmpFeed = this.localFeedFetcher.getFeed(feedURL, request); } else { baseURL = new URL(feedURL); tmpFeed = this.cache.get(url); } } catch (Exception e) { String m = e.getMessage(); if (m == null) { m = e.getClass().getName(); } throw new RuntimeException("Could not read feed url " + url + ": " + m); } if (tmpFeed == null) { throw new RuntimeException("Unable to load feed: " + url); } @SuppressWarnings("unchecked") List<SyndEntry> tmpEntries = tmpFeed.getEntries(); List<SyndEntry> filteredEntries = new ArrayList<SyndEntry>(tmpEntries); boolean filter = !parameterHasValue(PARAMETER_ALLOW_MARKUP, "true", request); for (SyndEntry entry : tmpEntries) { if (entries.contains(entry)) { filteredEntries.remove(entry); } feedMapping.put(entry, tmpFeed); HtmlFragment description = getDescription(entry, baseURL, requestURL, filter); if (description == null) { descriptionNoImage.put(entry.toString(), null); continue; } HtmlElement image = removeImage(description); if (image != null) { imgMap.put(entry.toString(), image.getEnclosedContent()); } descriptionNoImage.put(entry.toString(), description.getStringRepresentation()); } entries.addAll(filteredEntries); } }
@Override public void parse(SyndFeed feed) throws Exception { List entries = feed.getEntries(); Iterator itEntries = entries.iterator(); while (itEntries.hasNext()) { SyndEntry entry = (SyndEntry) itEntries.next(); System.out.println("Title : " + entry.getTitle()); } }
/** Test of parse method, of class com.totsp.xml.syndication.itunes.ITunesParser. */ public void testParse() throws Exception { File feed = new File(this.getTestFile("/test/xml/leshow.xml")); SyndFeedInput input = new SyndFeedInput(); SyndFeed syndfeed = input.build(new XmlReader(feed.toURL())); Module module = syndfeed.getModule(AbstractITunesObject.URI); FeedInformationImpl feedInfo = (FeedInformationImpl) module; assertEquals("owner", "Harry Shearer", feedInfo.getOwnerName()); assertEquals("email", "", feedInfo.getOwnerEmailAddress()); assertEquals( "image", "http://a1.phobos.apple.com/Music/y2005/m06/d26/h21/mcdrrifv.jpg", feedInfo.getImage().toExternalForm()); assertEquals("category", "Comedy", ((Category) feedInfo.getCategories().get(0)).getName()); assertEquals( "summary", "A weekly, hour-long romp through the worlds of media, politics, sports and show business, leavened with an eclectic mix of mysterious music, hosted by Harry Shearer.", feedInfo.getSummary()); List entries = syndfeed.getEntries(); Iterator it = entries.iterator(); while (it.hasNext()) { SyndEntry entry = (SyndEntry) it.next(); EntryInformationImpl entryInfo = (EntryInformationImpl) entry.getModule(AbstractITunesObject.URI); System.out.println(entryInfo); } feed = new File(this.getTestFile("/test/xml/rsr.xml")); syndfeed = input.build(new XmlReader(feed.toURL())); entries = syndfeed.getEntries(); it = entries.iterator(); while (it.hasNext()) { SyndEntry entry = (SyndEntry) it.next(); EntryInformationImpl entryInfo = (EntryInformationImpl) entry.getModule(AbstractITunesObject.URI); System.out.println(entryInfo.getDuration()); } }
public void testReadAtom1() throws FeedException { SyndFeedInput input = new SyndFeedInput(); SyndFeed result = input.build( new InputSource(getClass().getResourceAsStream("/be/hikage/xml/rome/atom_1.0.xml"))); assertEquals("RSS Veille Techno", result.getTitle()); assertEquals("http://svn.cyg.be/", result.getLink()); assertEquals(1, result.getEntries().size()); }
public void run() { if (!step && (feed == null || paused)) return; int previous = (itemIndex - 1 < 0) ? feed.getEntries().size() + (itemIndex - 1) : itemIndex - 1; int prevprev = (itemIndex - 2 < 0) ? feed.getEntries().size() + (itemIndex - 2) : itemIndex - 2; form.now2.setText(trim(((SyndEntry) feed.getEntries().get(prevprev)).getTitle())); form.now1.setText(trim(((SyndEntry) feed.getEntries().get(previous)).getTitle())); form.now.setText(trim(((SyndEntry) feed.getEntries().get(itemIndex)).getTitle())); try { Document dom = jTidyParse( new ByteArrayInputStream( ((SyndEntry) feed.getEntries().get(itemIndex)) .getDescription() .getValue() .getBytes()), new ByteArrayOutputStream()); form.display.setDocument(dom); } catch (Exception e) { e.printStackTrace(); } itemIndex++; if (itemIndex >= feed.getEntries().size()) { itemIndex = 0; } }
@SuppressWarnings("unchecked") private void validateLinksInFeeds(SyndFeed feed) { Assert.assertTrue("Feed link is wrong", feed.getLink().startsWith(this.getBaseNexusUrl())); List<SyndEntry> entries = feed.getEntries(); for (SyndEntry syndEntry : entries) { Assert.assertNotNull("Feed item link is empty.", syndEntry.getLink()); Assert.assertTrue( "Feed item link is wrong, is: " + syndEntry.getLink(), syndEntry.getLink().startsWith(this.getBaseNexusUrl())); } }
/** * Remove an item for the feed * * @param uri * @return * @throws Exception */ @SuppressWarnings("unchecked") public SyndFeed removeEntry(String uri) { SyndFeed feed = read(); List<SyndEntry> entries = feed.getEntries(); if (uri != null && uri.trim().length() > 0) { for (SyndEntry syndEntry : entries) { if (syndEntry.getUri().equals(uri)) { entries.remove(syndEntry); break; } } } feed.setEntries(entries); return feed; }
public static void main(String[] args) { boolean ok = false; if (args.length == 1) { try { URL feedUrl = new URL(args[0]); FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance(); FeedFetcher fetcher = new HttpURLFeedFetcher(feedInfoCache); FetcherEventListenerImpl listener = new FetcherEventListenerImpl(); fetcher.addFetcherEventListener(listener); System.err.println("Retrieving feed " + feedUrl); // Retrieve the feed. // We will get a Feed Polled Event and then a // Feed Retrieved event (assuming the feed is valid) SyndFeed feed = fetcher.retrieveFeed(feedUrl); System.err.println(feedUrl + " retrieved"); System.err.println( feedUrl + " has a title: " + feed.getTitle() + " and contains " + feed.getEntries().size() + " entries."); // We will now retrieve the feed again. If the feed is unmodified // and the server supports conditional gets, we will get a "Feed // Unchanged" event after the Feed Polled event System.err.println("Polling " + feedUrl + " again to test conditional get support."); SyndFeed feed2 = fetcher.retrieveFeed(feedUrl); System.err.println( "If a \"Feed Unchanged\" event fired then the server supports conditional gets."); ok = true; } catch (Exception ex) { System.out.println("ERROR: " + ex.getMessage()); ex.printStackTrace(); } } if (!ok) { System.out.println(); System.out.println("FeedReader reads and prints any RSS/Atom feed type."); System.out.println("The first parameter must be the URL of the feed to read."); System.out.println(); } }
public void mouseClicked(MouseEvent e) { Point p = e.getPoint(); int row = appWindow.feedItems.rowAtPoint(p); int column = appWindow.feedItems.columnAtPoint(p); ListFeed channel = (ListFeed) appWindow.channelsList.getSelectedValue(); SyndFeed feed = channel.feed; SyndEntry item = (SyndEntry) feed.getEntries().get(row); if (e.getClickCount() == 2) { // open in browser window } else if (e.getClickCount() == 1) { appWindow.itemDetails.setText(item.getDescription().getValue()); } }
@SuppressWarnings("unchecked") public void test() throws Exception { final SyndFeedInput input = new SyndFeedInput(true); final SyndFeed feed = input.build(new File("c:\\temp\\google.xml")); logger.debug("Successfully parsed the RSS feed"); logger.debug("Author = " + feed.getAuthors()); logger.debug("Categories = " + feed.getCategories()); final List<SyndEntry> entries = feed.getEntries(); for (final SyndEntry entry : entries) { logger.debug("Title = " + StringEscapeUtils.unescapeHtml(entry.getTitle())); logger.debug( "Description = " + StringEscapeUtils.unescapeHtml(entry.getDescription().getValue())); logger.debug(entry.getUri()); logger.debug("Updated date = " + entry.getUpdatedDate()); logger.debug("Published date = " + entry.getPublishedDate()); logger.debug("===================================================="); } }
public static void main(String[] args) throws Exception { URL url = new URL("http://feeds.reuters.com/reuters/businessNews"); XmlReader reader = null; try { reader = new XmlReader(url); SyndFeed feed = new SyndFeedInput().build(reader); System.out.println("Feed Title: " + feed.getAuthor()); for (@SuppressWarnings("unchecked") Iterator<SyndEntry> i = feed.getEntries().iterator(); i.hasNext(); ) { SyndEntry entry = i.next(); System.out.println(entry.getTitle()); } } finally { if (reader != null) reader.close(); } }
protected WireFeed createRealFeed(String type, SyndFeed syndFeed) { Channel channel = new Channel(type); channel.setModules(ModuleUtils.cloneModules(syndFeed.getModules())); channel.setEncoding(syndFeed.getEncoding()); channel.setTitle(syndFeed.getTitle()); channel.setLink(syndFeed.getLink()); channel.setDescription(syndFeed.getDescription()); SyndImage sImage = syndFeed.getImage(); if (sImage != null) { channel.setImage(createRSSImage(sImage)); } List sEntries = syndFeed.getEntries(); if (sEntries != null) { channel.setItems(createRSSItems(sEntries)); } return channel; }
/** @see com.elia.rssexample.data.NewsDao */ @SuppressWarnings("unchecked") public List<NewsItem> getNewsList() { // TODO: exception handling log.trace("Enter getNewsList()."); List<NewsItem> newsList = new ArrayList<NewsItem>(); XmlReader reader = null; try { for (String rssUrl : rssUrlList) { reader = new XmlReader(new URL(rssUrl)); SyndFeed feed = new SyndFeedInput().build(reader); for (SyndEntry entry : (List<SyndEntry>) feed.getEntries()) { NewsItem item = new NewsItem(); item.setTitle(entry.getTitle()); item.setDescription(entry.getDescription().getValue()); item.setLink(entry.getLink()); item.setPublished(entry.getPublishedDate()); newsList.add(item); } } } catch (Exception e) { log.error("Error reading feed.", e); } finally { try { reader.close(); } catch (IOException e) { log.warn("Unable to close xml reader.", e); } } return newsList; }
@Override public List<Article> read(URL url) { try { List<Article> articles = new ArrayList<>(); SyndFeedInput input = new SyndFeedInput(); SyndFeed syndFeed = input.build(new InputStreamReader(url.openStream())); for (Object obj : syndFeed.getEntries()) { if (!(obj instanceof SyndEntry)) { continue; } SyndEntry syndEntry = (SyndEntry) obj; articles.add(mapArticle(syndEntry)); } return articles; } catch (FeedException | IOException e) { throw new RuntimeException("피드를 읽을 수 업습니다. " + e.getMessage(), e); } }
@SuppressWarnings("unchecked") @Trigger("!buzz") @Help("Fetches one of the latest posts from jeanmarcmorandini.com") public List<String> getLatestBuzz() { List<String> toReturn = new ArrayList<String>(); try { URL url = new URL("http://www.jeanmarcmorandini.com/rss.php"); SyndFeedInput input = new SyndFeedInput(); SyndFeed rss = input.build(new XmlReader(url)); Iterator<SyndEntry> it = rss.getEntries().iterator(); String message = null; while (it.hasNext()) { SyndEntry item = it.next(); String guid = item.getUri(); RSSFeed buzz = dao.findByGUID(guid); if (buzz == null) { buzz = new RSSFeed(); buzz.setGuid(item.getUri()); dao.save(buzz); String urlBitly = utilsService.bitly(item.getLink()); String content = Jsoup.parse(item.getDescription().getValue()).select("p").get(0).text(); message = IRCUtils.bold("EXCLU!") + " " + item.getTitle() + " - " + urlBitly; toReturn.add(message); toReturn.add(content); break; } } if (message == null) { toReturn.add("Pas d'exclus pour le moment."); } } catch (Exception e) { LOG.handle(e); } return toReturn; }
@SuppressWarnings("unchecked") public static ArrayList<SyndEntry> getSyndEntrys(String link) { ArrayList<SyndEntry> listOfSyndEntrys = new ArrayList<SyndEntry>(); try { URL url = new URL(link); HttpURLConnection httpcon = (HttpURLConnection) url.openConnection(); // Reading the feed SyndFeedInput input = new SyndFeedInput(); SyndFeed feed = input.build(new XmlReader(httpcon)); List<SyndEntry> entries = feed.getEntries(); Iterator<SyndEntry> itEntries = entries.iterator(); SyndEntry entry; while (itEntries.hasNext()) { entry = itEntries.next(); listOfSyndEntrys.add(entry); } return listOfSyndEntrys; } catch (Exception e) { System.out.println(e.getMessage()); } return listOfSyndEntrys; }
private double getTemperatureFromRSS() { double result = -1; // Here we pretend to be the google bot to fake out User-Agent // sniffing programs. try { tempFeedURL = new URL("http://www.hive13.org/isOpen/RSS.php?temp=0"); URLConnection conn = tempFeedURL.openConnection(); conn.setRequestProperty("User-Agent", BotProperties.getInstance().getUserAgentString()); // Create a feed off of the URL and get the latest news. SyndFeedInput input = new SyndFeedInput(); SyndFeed feed = input.build(new XmlReader(conn)); // Get the feed's list of entries @SuppressWarnings("unchecked") List<SyndEntry> entryList = feed.getEntries(); Collections.sort(entryList, new SyndEntryComparator()); if (entryList.size() > 0) { // We have found an RSS feed at least, is it formatted // correctly? result = Double.parseDouble(entryList.get(0).getTitle()); } else { // We have not found an RSS feed for the hackerspace // tempreature. } } catch (IOException e) { e.printStackTrace(); } catch (IllegalArgumentException e) { e.printStackTrace(); } catch (FeedException e) { e.printStackTrace(); } return result; }
/** * Generate an ebook from an RSS DOM Document. * * @param url The URL from where the Document was fetched (used only to set the author metadata) * @param doc The DOM Document of the feed. * @return An ebook. * @throws IllegalArgumentException * @throws FeedException * @throws IOException */ private static Book createBookFromFeed(URL url, Document doc, List<Keyword> keywords) throws IllegalArgumentException, FeedException, IOException { Book book = new Book(); // start parsing our feed and have the above onItem methods called SyndFeedInput input = new SyndFeedInput(); SyndFeed feed = input.build(doc); System.out.println(feed); // Set the title book.getMetadata().addTitle(feed.getTitle()); // Add an Author String author = feed.getAuthor(); if (author == null || "".equals(author.trim())) { author = url.getHost(); } book.getMetadata().addAuthor(new Author(author)); if (feed.getPublishedDate() != null) { book.getMetadata().addDate(new nl.siegmann.epublib.domain.Date(feed.getPublishedDate())); } if (feed.getDescription() != null) { book.getMetadata().addDescription(feed.getDescription()); } if (feed.getCopyright() != null) { book.getMetadata().getRights().add(feed.getCopyright()); } // Set cover image - This has never worked. // if (feed.getImage() != null) { // System.out.println("There is an image for the feed"); // Promise<HttpResponse> futureImgResponse = // WS.url(feed.getImage().getUrl()).getAsync(); // HttpResponse imgResponse = await(futureImgResponse); // System.out.println("Content-type: " + imgResponse.getContentType()); // if (imgResponse.getContentType().startsWith("image/")) { // String extension = // imgResponse.getContentType().substring("image/".length()); // InputStream imageStream = imgResponse.getStream(); // book.getMetadata().setCoverImage(new Resource(imageStream, "cover." + // extension)); // System.out.println("Using default cover"); // imageStream = // VirtualFile.fromRelativePath("assets/cover.png").inputstream(); // if (imageStream != null) { // System.out.println("Using default cover"); // book.getMetadata().setCoverImage(new Resource(imageStream, // "cover.png")); // } else { // System.out.println("Could not load default cover"); // } // } // } int entryNumber = 0; List<SyndEntry> entries = feed.getEntries(); for (SyndEntry entry : entries) { if (matchesKeyword(entry, keywords)) { StringBuilder title = new StringBuilder(100); if (entry.getTitle() != null) { title.append(entry.getTitle()); } if (entry.getAuthor() != null) { title.append(" - ").append(entry.getAuthor()); } StringBuilder content = new StringBuilder(); // Add title inside text content.append("<h2>").append(title).append("</h2>"); if (entry.getDescription() != null) { SyndContent syndContent = (SyndContent) entry.getDescription(); if (!syndContent.getType().contains("html")) { content.append("<pre>\n"); } content.append(syndContent.getValue()); if (!syndContent.getType().contains("html")) { content.append("\n</pre>"); } content.append("<hr/>"); } if (entry.getContents().size() > 0) { SyndContent syndContent = (SyndContent) entry.getContents().get(0); if (!syndContent.getType().contains("html")) { content.append("<pre>\n"); } content.append(syndContent.getValue()); if (!syndContent.getType().contains("html")) { content.append("\n</pre>"); } } String strContent = clean(content.toString()); // Add Chapter try { entryNumber++; book.addSection( title.toString(), new Resource(new StringReader(strContent), "entry" + entryNumber + ".xhtml")); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } return book; }
// Build the feed list @SuppressWarnings("unchecked") private void buildFeedList(LinkedList<SyndFeed> syndFeeds, SourcePojo source) { // If there's a max number of sources to get per harvest, configure that here: long nWaitTime_ms = props.getWebCrawlWaitTime(); long nMaxTime_ms = props.getMaxTimePerFeed(); // (can't override this, too easy to break the system...) int nMaxDocsPerSource = props.getMaxDocsPerSource(); long nNow = new Date().getTime(); if (null != source.getRssConfig()) { if (null != source.getRssConfig().getWaitTimeOverride_ms()) { nWaitTime_ms = source.getRssConfig().getWaitTimeOverride_ms(); } } long nMaxDocs = Long.MAX_VALUE; if (nWaitTime_ms > 0) { nMaxDocs = nMaxTime_ms / nWaitTime_ms; } if (nMaxDocs > nMaxDocsPerSource) { // (another limit, take the smaller of the 2) nMaxDocs = nMaxDocsPerSource; } // (end per feed configuration) // Add extra docs List<SyndEntry> tmpList = null; boolean bCreatedAggregateList = false; int nRealSyndEntries = 0; for (SyndFeed feed : syndFeeds) { if (0 == nRealSyndEntries) { tmpList = feed.getEntries(); } else if (!bCreatedAggregateList) { bCreatedAggregateList = true; tmpList = new LinkedList<SyndEntry>(tmpList); tmpList.addAll(feed.getEntries()); } else { tmpList.addAll(feed.getEntries()); } nRealSyndEntries += feed.getEntries().size(); } if (null == tmpList) { tmpList = new LinkedList<SyndEntry>(); } // TESTED if ((null != source.getRssConfig()) && (null != source.getRssConfig().getExtraUrls())) { for (ExtraUrlPojo extraUrl : source.getRssConfig().getExtraUrls()) { if (null == extraUrl.title) { continue; // (this is an RSS feed not a URL) } // TESTED SyndEntryImpl synd = new SyndEntryImpl(); synd.setLink(extraUrl.url); if (null != extraUrl.description) { SyndContentImpl description = new SyndContentImpl(); description.setValue(extraUrl.description); synd.setDescription(description); } synd.setTitle(extraUrl.title); if (null != extraUrl.publishedDate) { try { synd.setPublishedDate(new Date(DateUtility.parseDate(extraUrl.publishedDate))); } catch (Exception e) { } // do nothign will use now as pub date } tmpList.add((SyndEntry) synd); if (null != extraUrl.fullText) { SyndFeedImpl fullTextContainer = new SyndFeedImpl(); fullTextContainer.setDescription(extraUrl.fullText); synd.setSource(fullTextContainer); } } } // Then begin looping over entries LinkedList<String> duplicateSources = new LinkedList<String>(); try { Map<String, List<SyndEntry>> urlDups = new HashMap<String, List<SyndEntry>>(); int nSyndEntries = 0; for (Object synd : tmpList) { nSyndEntries++; // (keep count so we know we're accessing our own fake SyndEntryImpls) final SyndEntry entry = (SyndEntry) synd; if (null != entry.getLink()) // if url returns null, skip this entry { String url = this.cleanUrlStart(entry.getLink()); if (null != source.getRssConfig()) { // Some RSS specific logic // If an include is specified, must match Matcher includeMatcher = source.getRssConfig().getIncludeMatcher(url); if (null != includeMatcher) { if (!includeMatcher.find()) { continue; } } // If an exclude is specified, must not match Matcher excludeMatcher = source.getRssConfig().getExcludeMatcher(url); if (null != excludeMatcher) { if (excludeMatcher.find()) { continue; } } } // Some error checking: // sometimes the URL seems to have some characters in front of the HTTP - remove these this.nTmpDocsSubmitted++; if (null == url) { this.nTmpHttpErrors++; continue; } // Also save the title and description: String title = ""; if (null != entry.getTitle()) { title = entry.getTitle(); } String desc = ""; if (null != entry.getDescription()) { desc = entry.getDescription().getValue(); } boolean duplicate = false; // Look for duplicates within the current set of sources List<SyndEntry> possDups = null; if (null == (possDups = urlDups.get(url))) { // (new URL) possDups = new LinkedList<SyndEntry>(); possDups.add(entry); urlDups.put(url, possDups); } else { // (old URL, check if this is a duplicate...) int nCount = 0; for (SyndEntry possDup : possDups) { if (possDup.getTitle().equals(title) || ((null != possDup.getDescription()) && possDup.getDescription().getValue().equals(desc)) || ((null != possDup.getDescription()) && (null == entry.getDescription()))) { // If *either* the title or the description matches as well as the URL... duplicate = true; break; } nCount++; } if (!duplicate) { possDups.add(entry); } else { // DUPLICATE: ensure we have minimal set of data to cover all cases: boolean bTitleMatch = false; boolean bDescMatch = false; for (SyndEntry possDup : possDups) { if (!bTitleMatch && possDup .getTitle() .equals(title)) { // (don't bother if already have a title match) bTitleMatch = true; } else if (!bDescMatch) { // (don't yet have a desc match( if (null != entry.getDescription()) { if (null != possDup.getDescription()) { // (neither desc is null) if (possDup.getDescription().getValue().equals(desc)) { bDescMatch = true; } } } else { // curr desc is null if (null == possDup.getDescription()) { // dup desc is null bDescMatch = true; } } // (end various title match/desc match/both have no desc cases } // (end if no desc match) if (bTitleMatch && bDescMatch) { break; // (no way can fire) } } // (end loop over dups) if (!bTitleMatch || !bDescMatch) { possDups.add(entry); } } // (end is duplicate, nasty logic to add minimal set to dup list to cover all titles, // descs) } if (duplicate) { continue; } try { DuplicateManager qr = _context.getDuplicateManager(); if (null != entry.getDescription()) { duplicate = qr.isDuplicate_UrlTitleDescription( url, title.replaceAll("\\<.*?\\>", "").trim(), desc.replaceAll("\\<.*?\\>", "").trim(), source, duplicateSources); } else { duplicate = qr.isDuplicate_UrlTitleDescription( url, title.replaceAll("\\<.*?\\>", "").trim(), null, source, duplicateSources); // ^^^(this is different to isDuplicate_UrlTitle because it enforces that the // description be null, vs just checking the title) } if (duplicate && (null != source.getRssConfig()) && (null != source.getRssConfig().getUpdateCycle_secs())) { // Check modified times... Date dupModDate = qr.getLastDuplicateModifiedTime(); ObjectId dupId = qr.getLastDuplicateId(); if ((null != dupModDate) && (null != dupId)) { if (dupModDate.getTime() + source.getRssConfig().getUpdateCycle_secs() * 1000 < nNow) { DocumentPojo doc = buildDocument(entry, source, duplicateSources); if ((nSyndEntries > nRealSyndEntries) && (null != entry.getSource())) { // (Use dummy TitleEx to create a "fake" full text block) doc.setFullText(entry.getSource().getDescription()); } doc.setUpdateId(dupId); // (set _id to document I'm going to overwrite) this.docsToUpdate.add(doc); if ((this.docsToAdd.size() + this.docsToUpdate.size()) >= nMaxDocs) { source.setReachedMaxDocs(); break; // (that's enough documents) } } } } // TESTED (duplicates we update instead of ignoring) if (!duplicate) { DocumentPojo doc = buildDocument(entry, source, duplicateSources); if ((nSyndEntries > nRealSyndEntries) && (null != entry.getSource())) { // (Use dummy TitleEx to create a "fake" full text block) doc.setFullText(entry.getSource().getDescription()); } this.docsToAdd.add(doc); if ((this.docsToAdd.size() + this.docsToUpdate.size()) >= nMaxDocs) { source.setReachedMaxDocs(); break; // (that's enough documents) } } if (this.nTmpDocsSubmitted > 20) { // (some arbitrary "significant" number) if (nTmpHttpErrors == this.nTmpDocsSubmitted) { break; } } } catch (Exception e) { // If an exception occurs log the error logger.error("Exception Message: " + e.getMessage(), e); } } } // (end loop over feeds in a syndicate) } catch (Exception e) { // If an exception occurs log the error logger.error("Exception Message: " + e.getMessage(), e); } }
// Logs a new ATOM entry public static synchronized void addATOMEntry( String title, String link, String description, File atomFile, String context) { try { if (atomFile.exists()) { // System.out.println("ATOM file found!"); /** Namespace URI for content:encoded elements */ String CONTENT_NS = "http://www.w3.org/2005/Atom"; /** Parses RSS or Atom to instantiate a SyndFeed. */ SyndFeedInput input = new SyndFeedInput(); /** Transforms SyndFeed to RSS or Atom XML. */ SyndFeedOutput output = new SyndFeedOutput(); // Load the feed, regardless of RSS or Atom type SyndFeed feed = input.build(new XmlReader(atomFile)); // Set the output format of the feed feed.setFeedType("atom_1.0"); List<SyndEntry> items = feed.getEntries(); int numItems = items.size(); if (numItems > 9) { items.remove(0); feed.setEntries(items); } SyndEntry newItem = new SyndEntryImpl(); newItem.setTitle(title); newItem.setLink(link); newItem.setUri(link); SyndContent desc = new SyndContentImpl(); desc.setType("text/html"); desc.setValue(description); newItem.setDescription(desc); desc.setType("text/html"); newItem.setPublishedDate(new java.util.Date()); List<SyndCategory> categories = new ArrayList<SyndCategory>(); if (CommonConfiguration.getProperty("htmlTitle", context) != null) { SyndCategory category2 = new SyndCategoryImpl(); category2.setName(CommonConfiguration.getProperty("htmlTitle", context)); categories.add(category2); } newItem.setCategories(categories); if (CommonConfiguration.getProperty("htmlAuthor", context) != null) { newItem.setAuthor(CommonConfiguration.getProperty("htmlAuthor", context)); } items.add(newItem); feed.setEntries(items); feed.setPublishedDate(new java.util.Date()); FileWriter writer = new FileWriter(atomFile); output.output(feed, writer); writer.toString(); } } catch (IOException ioe) { System.out.println("ERROR: Could not find the ATOM file."); ioe.printStackTrace(); } catch (Exception e) { System.out.println("Unknown exception trying to add an entry to the ATOM file."); e.printStackTrace(); } }
/** * Returns a weather info about the current weather. * * @param feed * @return */ private WeatherInfo getWeatherInfo(SyndFeed feed) { WeatherInfo info = new WeatherInfo(); final List<Element> currentInfo = (List) feed.getForeignMarkup(); for (Element element : currentInfo) { String name = element.getName(); if (name.equalsIgnoreCase("location")) { info.setCity(element.getAttribute("city").getValue()); info.setCountry(element.getAttribute("country").getValue()); break; } } // root elements final List<Element> channelElements = (List<Element>) feed.getForeignMarkup(); for (Element element : channelElements) { // wind if (element.getName().equalsIgnoreCase("wind")) { info.setWind(Double.parseDouble(element.getAttributeValue("speed"))); } // sunrise and sunset else if (element.getName().equalsIgnoreCase("astronomy")) { try { SimpleDateFormat format = new SimpleDateFormat("hh:mm a", Locale.US); info.setSunrise(format.parse(element.getAttributeValue("sunrise"))); info.setSunset(format.parse(element.getAttributeValue("sunset"))); } catch (ParseException e) { LOG.error( "Error retrieving sunrise time for " + element.getAttributeValue("sunrise") + ": " + e.getMessage()); } } } final List<SyndEntry> items = (List) feed.getEntries(); for (SyndEntry entry : items) { final List<Element> forecastInfo = (List) entry.getForeignMarkup(); boolean todayApplied = false; for (Element element : forecastInfo) { // get forecast if (element.getName().equalsIgnoreCase("forecast")) { WeatherInfo forecast = new WeatherInfo(); String high = element.getAttributeValue("high"); String low = element.getAttributeValue("low"); String description = element.getAttributeValue("text"); int code = Integer.parseInt(element.getAttribute("code").getValue()); String date = element.getAttribute("date").getValue(); String imageUrl = convertTypeCodeImage(code); // e.g. Fri, 06 Sep 2013 11:49 am CEST Date formatted = null; try { formatted = new SimpleDateFormat("dd MMM yyyy", Locale.US).parse(date); } catch (ParseException e) { LOG.error("Error parsing date '" + date + ": " + e.getMessage()); } forecast.setDescription(description); forecast.setForecastDate(formatted); forecast.setHighTemp(high); forecast.setLowTemp(low); forecast.setImageUrl(WeatherBigResourceLoader.getResource(imageUrl)); forecast.setIconWhiteUrl(WeatherSmallWhiteResourceLoader.getResource(imageUrl)); forecast.setIconBlackUrl(WeatherSmallBlackResourceLoader.getResource(imageUrl)); info.getForecast().add(forecast); } // get location else if (element.getName().equalsIgnoreCase("lat")) { info.setLatitude(element.getText()); } else if (element.getName().equalsIgnoreCase("long")) { info.setLatitude(element.getText()); } // get local time else if (element.getName().equalsIgnoreCase("condition")) { String date = element.getAttributeValue("date"); try { Date formatted = new SimpleDateFormat("EEE, dd MMM yyyy hh:mm a", Locale.US).parse(date); info.setLocalTime(formatted); info.setTemp(element.getAttributeValue("temp")); info.setDescription(element.getAttributeValue("text")); int code = Integer.parseInt(element.getAttribute("code").getValue()); String imageUrl = convertTypeCodeImage(code); // LOG.info("Weather code for " + info.getCity() + " is " + code); info.setImageUrl(WeatherBigResourceLoader.getResource(imageUrl)); info.setIconWhiteUrl(WeatherSmallWhiteResourceLoader.getResource(imageUrl)); } catch (ParseException e) { LOG.error("Error retrieving local time for " + date + ": " + e.getMessage()); } } } } // apply data of first forecast WeatherInfo todayForecast = info.getForecast().get(0); info.setForecastDate(todayForecast.getForecastDate()); info.setHighTemp(todayForecast.getHighTemp()); info.setLowTemp(todayForecast.getLowTemp()); return info; }