Esempio n. 1
0
  //	@Transactional
  public int importData(
      final SyndFeed feed,
      Set<KiWiUriResource> types,
      Set<ContentItem> tags,
      final User user,
      final Collection<ContentItem> output) {
    log.info(
        "importing entries from #0 feed '#1' found at '#2'",
        feed.getFeedType(),
        feed.getTitle(),
        feed.getUri());

    if (types == null) {
      types = new HashSet<KiWiUriResource>();
    }

    if (tags == null) {
      tags = new HashSet<ContentItem>();
    }
    final Set<ContentItem> my_tags = tags;
    final Set<KiWiUriResource> my_types = types;

    // a hack for importing facebook activity streams: if the type is kiwi:FacebookPost,
    // turn facebook activity stream mode on; in this mode, we will skip all entries where
    // the remote author name and local user name are not identical
    boolean facebookImport = false;
    String t_facebookPost = Constants.NS_KIWI_CORE + "FacebookPost";
    for (KiWiUriResource r : types) {
      if (r.getUri().equals(t_facebookPost)) {
        facebookImport = true;
        break;
      }
    }

    for (final SyndEntry entry : (List<SyndEntry>) feed.getEntries()) {

      // facebook hack ... (see above)
      if (facebookImport
          && !entry.getAuthor().equalsIgnoreCase(user.getFirstName() + " " + user.getLastName())) {
        log.info("Facebook import: skipping friend post with title", entry.getTitle());
        continue;
      }

      new RunAsOperation() {
        @Override
        public void execute() {
          importEntry(feed, entry, my_types, my_tags, user, output);
        }
      }.addRole("admin").run();
    }

    //		entityManager.flush();

    log.info("#0 content items have been imported from RSS/Atom feed", feed.getEntries().size());

    return feed.getEntries().size();
  }
Esempio n. 2
0
 public List<Note> getEntryData() {
   List<Note> res = new ArrayList<>();
   for (SyndEntryImpl list1 : (List<SyndEntryImpl>) feedContent.getEntries()) {
     if (list1 == null || list1.equals(last)) break;
     Note addition = new Note();
     //            print(addition + " " + last);
     addition.setTitle(list1.getTitle());
     addition.setContent(list1.getLink());
     res.add(addition);
   }
   last = (SyndEntryImpl) feedContent.getEntries().get(0);
   return res;
 }
Esempio n. 3
0
  public static List<RssUrlBean> getRssUrlBeanListFromPage(int rssCompo_id, String url) {
    List<RssUrlBean> rubList = new ArrayList<RssUrlBean>();
    if (url.equals("")) return rubList;
    try {
      URL feedUrl = new URL(url);
      // SyndFeedInput:从远程读到xml结构的内容转成SyndFeedImpl实例
      SyndFeedInput input = new SyndFeedInput();
      // rome按SyndFeed类型生成rss和atom的实例,
      // SyndFeed是rss和atom实现类SyndFeedImpl的接口
      SyndFeed syndFeed = input.build(new XmlReader(feedUrl));

      List<SyndEntry> entryList = syndFeed.getEntries();
      for (SyndEntry entry : entryList) {
        RssUrlBean rub = new RssUrlBean();
        rub.setRssCompo_id(rssCompo_id);
        rub.setTitle(entry.getTitle());
        rub.setLink(entry.getUri());
        rub.setPublishedDate(CommonUtil.getStandardDate(entry.getPublishedDate().toLocaleString()));
        rub.setDescription(entry.getDescription().getValue());
        if (entry.getUpdatedDate() != null)
          rub.setUpdatedDate(CommonUtil.getStandardDate(entry.getUpdatedDate().toLocaleString()));
        rub.setAuthors(entry.getAuthor());
        rubList.add(rub);
      }
    } catch (Exception ex) {
      ex.printStackTrace();
    }
    return rubList;
  }
Esempio n. 4
0
  private SyndFeed createFeed() {

    SyndFeed feed = new SyndFeedImpl();

    SyndPerson auteur = new SyndPersonImpl();
    auteur.setName("Gildas Cuisinier");
    auteur.setEmail("*****@*****.**");

    feed.setTitle("RSS Veille Techno");
    feed.setAuthors(Collections.singletonList(auteur));
    feed.setDescription("RSS d'exemple !");
    feed.setLink("http://svn.cyg.be/");
    feed.setPublishedDate(new Date());
    feed.setLanguage("fr");

    SyndEntry entry = new SyndEntryImpl();
    entry.setTitle("Ajout du projet Rome sur le SVN");
    entry.setLink("https://rome.dev.java.net/");

    SyndContent description = new SyndContentImpl();
    description.setValue("Ajout d'un projet Rome sur le SVN afin de voir comment creer un RSS");
    description.setType("text");
    entry.setDescription(description);
    entry.setAuthors(Collections.singletonList(auteur));

    feed.getEntries().add(entry);

    return feed;
  }
Esempio n. 5
0
  @SuppressWarnings("unchecked")
  public List<FiddleSyndEntry> getFeeds(URL url, CategoryDetail catDetail) {
    List<FiddleSyndEntry> feeds = new ArrayList<FiddleSyndEntry>();
    XmlReader reader = null;
    SyndFeed feed = null;
    try {

      System.setProperty("http.proxyHost", "proxy-tw.bcbsmn.com");
      System.setProperty("http.proxyPort", "9119");
      System.setProperty("http.proxyUser", "a0c5zz");
      System.setProperty("http.proxyPassword", "test0810");

      reader = new XmlReader(url);

      feed = new SyndFeedInput().build(reader);
      feeds = feed.getEntries();

    } catch (IOException e) {
      log.error("Exception reading url");
    } catch (IllegalArgumentException e) {
      log.error("Exception reading url");
    } catch (FeedException e) {
      log.error("Exception reading feed");
    } finally {
      if (reader != null)
        try {
          reader.close();
        } catch (IOException e) {
          log.error("Unable to close Feed XmlReader");
        }
    }

    return feeds;
  }
  @SuppressWarnings("unchecked")
  private boolean findFeedEntry(SyndFeed feed, String title, String[] bodyPortions) {
    List<SyndEntry> entries = feed.getEntries();

    for (SyndEntry entry : entries) {
      if (entry.getTitle().equals(title)) {
        if (bodyPortions == null) {
          return true;
        }

        boolean missingPortion = false;

        SyndContent description = entry.getDescription();
        String value = description.getValue();
        for (int i = 0; i < bodyPortions.length; i++) {
          if (!value.contains(bodyPortions[i])) {
            missingPortion = true;
            break;
          }
        }

        if (!missingPortion) {
          return true;
        }
      }
    }

    return false;
  }
Esempio n. 7
0
  @SuppressWarnings("unchecked")
  protected void removeRSSItem(String itemId, Node node, String description) throws Exception {
    RSS data = new RSS(node);
    SyndFeed feed = data.read();

    List<SyndEntry> entries = feed.getEntries();
    Node removeNode = getNodeById(itemId);

    if (removeNode.isNodeType("exo:topic")) {
      List<Node> listRemovePosts = getListRemove(removeNode, "exo:post");
      removeItem(entries, listRemovePosts);
    } else if (removeNode.isNodeType("exo:forum")) {
      List<Node> listRemoveForum = getListRemove(removeNode, "exo:topic");

      for (Node n : listRemoveForum) {
        List<Node> listRemovePosts = getListRemove(n, "exo:post");
        removeItem(entries, listRemovePosts);
      }
      removeItem(entries, listRemoveForum);
    }

    feed.setEntries(entries);
    String title = new PropertyReader(node).string("exo:name", "Root");
    feed.setTitle(title);
    feed.setDescription(description);
    data.saveFeed(feed, FORUM_RSS_TYPE);
  }
Esempio n. 8
0
 @Override
 public List<GmailMessage> getUnreadMessages() {
   final List<GmailMessage> messages = new ArrayList<GmailMessage>();
   HttpGmailConnection c = null;
   try {
     // for ROME properties loader
     Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
     c = getGmailConnection();
     c.setUrl(gmailFeedUrl);
     final URLConnection con = c.openConnection();
     final SyndFeedInput feedInput = new SyndFeedInput();
     final SyndFeed gmail = feedInput.build(new XmlReader(con));
     for (final Object entry : gmail.getEntries()) {
       if (entry instanceof SyndEntry) {
         messages.add(new RssGmailMessage((SyndEntry) entry));
       }
     }
     if (log.isDebugEnabled()) {
       log.debug("Got " + messages.size() + " new messages.");
     }
   } catch (final Exception e) {
     throw new GmailException("Failed getting unread messages", e);
   }
   return messages;
 }
Esempio n. 9
0
 /**
  * Adds an entry to the feed
  *
  * @param newEntry
  * @return
  */
 @SuppressWarnings("unchecked")
 public SyndFeed addEntry(SyndEntry newEntry) {
   SyndFeed feed = read();
   List<SyndEntry> entries = feed.getEntries();
   if (newEntry != null) entries.add(0, newEntry);
   feed.setEntries(entries);
   return feed;
 }
  void parseFeeds(
      DecoratorRequest request,
      List<SyndEntry> entries,
      Map<SyndEntry, SyndFeed> feedMapping,
      Map<String, String> imgMap,
      Map<String, String> descriptionNoImage,
      URL requestURL,
      String[] urlArray)
      throws Exception {

    for (String url : urlArray) {
      url = url.trim();
      SyndFeed tmpFeed = null;
      URL feedURL = requestURL.relativeURL(url);
      URL baseURL;
      try {
        if (feedURL.getHost().equals(requestURL.getHost())) {
          baseURL = new URL(requestURL);
          retrieveLocalResource(feedURL);
          tmpFeed = this.localFeedFetcher.getFeed(feedURL, request);
        } else {
          baseURL = new URL(feedURL);
          tmpFeed = this.cache.get(url);
        }
      } catch (Exception e) {
        String m = e.getMessage();
        if (m == null) {
          m = e.getClass().getName();
        }
        throw new RuntimeException("Could not read feed url " + url + ": " + m);
      }
      if (tmpFeed == null) {
        throw new RuntimeException("Unable to load feed: " + url);
      }
      @SuppressWarnings("unchecked")
      List<SyndEntry> tmpEntries = tmpFeed.getEntries();
      List<SyndEntry> filteredEntries = new ArrayList<SyndEntry>(tmpEntries);
      boolean filter = !parameterHasValue(PARAMETER_ALLOW_MARKUP, "true", request);
      for (SyndEntry entry : tmpEntries) {
        if (entries.contains(entry)) {
          filteredEntries.remove(entry);
        }
        feedMapping.put(entry, tmpFeed);
        HtmlFragment description = getDescription(entry, baseURL, requestURL, filter);
        if (description == null) {
          descriptionNoImage.put(entry.toString(), null);
          continue;
        }
        HtmlElement image = removeImage(description);
        if (image != null) {
          imgMap.put(entry.toString(), image.getEnclosedContent());
        }
        descriptionNoImage.put(entry.toString(), description.getStringRepresentation());
      }
      entries.addAll(filteredEntries);
    }
  }
  @Override
  public void parse(SyndFeed feed) throws Exception {
    List entries = feed.getEntries();
    Iterator itEntries = entries.iterator();

    while (itEntries.hasNext()) {
      SyndEntry entry = (SyndEntry) itEntries.next();
      System.out.println("Title :  " + entry.getTitle());
    }
  }
Esempio n. 12
0
  /** Test of parse method, of class com.totsp.xml.syndication.itunes.ITunesParser. */
  public void testParse() throws Exception {
    File feed = new File(this.getTestFile("/test/xml/leshow.xml"));
    SyndFeedInput input = new SyndFeedInput();
    SyndFeed syndfeed = input.build(new XmlReader(feed.toURL()));

    Module module = syndfeed.getModule(AbstractITunesObject.URI);
    FeedInformationImpl feedInfo = (FeedInformationImpl) module;

    assertEquals("owner", "Harry Shearer", feedInfo.getOwnerName());
    assertEquals("email", "", feedInfo.getOwnerEmailAddress());
    assertEquals(
        "image",
        "http://a1.phobos.apple.com/Music/y2005/m06/d26/h21/mcdrrifv.jpg",
        feedInfo.getImage().toExternalForm());
    assertEquals("category", "Comedy", ((Category) feedInfo.getCategories().get(0)).getName());
    assertEquals(
        "summary",
        "A weekly, hour-long romp through the worlds of media, politics, sports and show business, leavened with an eclectic mix of mysterious music, hosted by Harry Shearer.",
        feedInfo.getSummary());

    List entries = syndfeed.getEntries();
    Iterator it = entries.iterator();

    while (it.hasNext()) {
      SyndEntry entry = (SyndEntry) it.next();
      EntryInformationImpl entryInfo =
          (EntryInformationImpl) entry.getModule(AbstractITunesObject.URI);
      System.out.println(entryInfo);
    }

    feed = new File(this.getTestFile("/test/xml/rsr.xml"));
    syndfeed = input.build(new XmlReader(feed.toURL()));
    entries = syndfeed.getEntries();
    it = entries.iterator();

    while (it.hasNext()) {
      SyndEntry entry = (SyndEntry) it.next();
      EntryInformationImpl entryInfo =
          (EntryInformationImpl) entry.getModule(AbstractITunesObject.URI);
      System.out.println(entryInfo.getDuration());
    }
  }
Esempio n. 13
0
  public void testReadAtom1() throws FeedException {
    SyndFeedInput input = new SyndFeedInput();

    SyndFeed result =
        input.build(
            new InputSource(getClass().getResourceAsStream("/be/hikage/xml/rome/atom_1.0.xml")));

    assertEquals("RSS Veille Techno", result.getTitle());
    assertEquals("http://svn.cyg.be/", result.getLink());
    assertEquals(1, result.getEntries().size());
  }
Esempio n. 14
0
        public void run() {
          if (!step && (feed == null || paused)) return;

          int previous =
              (itemIndex - 1 < 0) ? feed.getEntries().size() + (itemIndex - 1) : itemIndex - 1;
          int prevprev =
              (itemIndex - 2 < 0) ? feed.getEntries().size() + (itemIndex - 2) : itemIndex - 2;
          form.now2.setText(trim(((SyndEntry) feed.getEntries().get(prevprev)).getTitle()));
          form.now1.setText(trim(((SyndEntry) feed.getEntries().get(previous)).getTitle()));
          form.now.setText(trim(((SyndEntry) feed.getEntries().get(itemIndex)).getTitle()));
          try {
            Document dom =
                jTidyParse(
                    new ByteArrayInputStream(
                        ((SyndEntry) feed.getEntries().get(itemIndex))
                            .getDescription()
                            .getValue()
                            .getBytes()),
                    new ByteArrayOutputStream());
            form.display.setDocument(dom);
          } catch (Exception e) {
            e.printStackTrace();
          }
          itemIndex++;
          if (itemIndex >= feed.getEntries().size()) {
            itemIndex = 0;
          }
        }
  @SuppressWarnings("unchecked")
  private void validateLinksInFeeds(SyndFeed feed) {
    Assert.assertTrue("Feed link is wrong", feed.getLink().startsWith(this.getBaseNexusUrl()));

    List<SyndEntry> entries = feed.getEntries();

    for (SyndEntry syndEntry : entries) {
      Assert.assertNotNull("Feed item link is empty.", syndEntry.getLink());
      Assert.assertTrue(
          "Feed item link is wrong, is: " + syndEntry.getLink(),
          syndEntry.getLink().startsWith(this.getBaseNexusUrl()));
    }
  }
Esempio n. 16
0
 /**
  * Remove an item for the feed
  *
  * @param uri
  * @return
  * @throws Exception
  */
 @SuppressWarnings("unchecked")
 public SyndFeed removeEntry(String uri) {
   SyndFeed feed = read();
   List<SyndEntry> entries = feed.getEntries();
   if (uri != null && uri.trim().length() > 0) {
     for (SyndEntry syndEntry : entries) {
       if (syndEntry.getUri().equals(uri)) {
         entries.remove(syndEntry);
         break;
       }
     }
   }
   feed.setEntries(entries);
   return feed;
 }
Esempio n. 17
0
  public static void main(String[] args) {
    boolean ok = false;
    if (args.length == 1) {
      try {
        URL feedUrl = new URL(args[0]);
        FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance();
        FeedFetcher fetcher = new HttpURLFeedFetcher(feedInfoCache);

        FetcherEventListenerImpl listener = new FetcherEventListenerImpl();

        fetcher.addFetcherEventListener(listener);

        System.err.println("Retrieving feed " + feedUrl);
        // Retrieve the feed.
        // We will get a Feed Polled Event and then a
        // Feed Retrieved event (assuming the feed is valid)
        SyndFeed feed = fetcher.retrieveFeed(feedUrl);

        System.err.println(feedUrl + " retrieved");
        System.err.println(
            feedUrl
                + " has a title: "
                + feed.getTitle()
                + " and contains "
                + feed.getEntries().size()
                + " entries.");
        // We will now retrieve the feed again. If the feed is unmodified
        // and the server supports conditional gets, we will get a "Feed
        // Unchanged" event after the Feed Polled event
        System.err.println("Polling " + feedUrl + " again to test conditional get support.");
        SyndFeed feed2 = fetcher.retrieveFeed(feedUrl);
        System.err.println(
            "If a \"Feed Unchanged\" event fired then the server supports conditional gets.");

        ok = true;
      } catch (Exception ex) {
        System.out.println("ERROR: " + ex.getMessage());
        ex.printStackTrace();
      }
    }

    if (!ok) {
      System.out.println();
      System.out.println("FeedReader reads and prints any RSS/Atom feed type.");
      System.out.println("The first parameter must be the URL of the feed to read.");
      System.out.println();
    }
  }
Esempio n. 18
0
  public void mouseClicked(MouseEvent e) {
    Point p = e.getPoint();

    int row = appWindow.feedItems.rowAtPoint(p);
    int column = appWindow.feedItems.columnAtPoint(p);

    ListFeed channel = (ListFeed) appWindow.channelsList.getSelectedValue();
    SyndFeed feed = channel.feed;
    SyndEntry item = (SyndEntry) feed.getEntries().get(row);

    if (e.getClickCount() == 2) {
      // open in browser window
    } else if (e.getClickCount() == 1) {
      appWindow.itemDetails.setText(item.getDescription().getValue());
    }
  }
Esempio n. 19
0
  @SuppressWarnings("unchecked")
  public void test() throws Exception {
    final SyndFeedInput input = new SyndFeedInput(true);
    final SyndFeed feed = input.build(new File("c:\\temp\\google.xml"));

    logger.debug("Successfully parsed the RSS feed");
    logger.debug("Author      = " + feed.getAuthors());
    logger.debug("Categories  = " + feed.getCategories());
    final List<SyndEntry> entries = feed.getEntries();
    for (final SyndEntry entry : entries) {
      logger.debug("Title = " + StringEscapeUtils.unescapeHtml(entry.getTitle()));
      logger.debug(
          "Description = " + StringEscapeUtils.unescapeHtml(entry.getDescription().getValue()));
      logger.debug(entry.getUri());
      logger.debug("Updated date = " + entry.getUpdatedDate());
      logger.debug("Published date = " + entry.getPublishedDate());
      logger.debug("====================================================");
    }
  }
Esempio n. 20
0
  public static void main(String[] args) throws Exception {

    URL url = new URL("http://feeds.reuters.com/reuters/businessNews");
    XmlReader reader = null;

    try {

      reader = new XmlReader(url);
      SyndFeed feed = new SyndFeedInput().build(reader);
      System.out.println("Feed Title: " + feed.getAuthor());

      for (@SuppressWarnings("unchecked") Iterator<SyndEntry> i = feed.getEntries().iterator();
          i.hasNext(); ) {
        SyndEntry entry = i.next();
        System.out.println(entry.getTitle());
      }
    } finally {
      if (reader != null) reader.close();
    }
  }
  protected WireFeed createRealFeed(String type, SyndFeed syndFeed) {
    Channel channel = new Channel(type);
    channel.setModules(ModuleUtils.cloneModules(syndFeed.getModules()));

    channel.setEncoding(syndFeed.getEncoding());

    channel.setTitle(syndFeed.getTitle());
    channel.setLink(syndFeed.getLink());
    channel.setDescription(syndFeed.getDescription());
    SyndImage sImage = syndFeed.getImage();
    if (sImage != null) {
      channel.setImage(createRSSImage(sImage));
    }

    List sEntries = syndFeed.getEntries();
    if (sEntries != null) {
      channel.setItems(createRSSItems(sEntries));
    }
    return channel;
  }
Esempio n. 22
0
  /** @see com.elia.rssexample.data.NewsDao */
  @SuppressWarnings("unchecked")
  public List<NewsItem> getNewsList() {

    // TODO: exception handling

    log.trace("Enter getNewsList().");

    List<NewsItem> newsList = new ArrayList<NewsItem>();

    XmlReader reader = null;
    try {
      for (String rssUrl : rssUrlList) {

        reader = new XmlReader(new URL(rssUrl));
        SyndFeed feed = new SyndFeedInput().build(reader);

        for (SyndEntry entry : (List<SyndEntry>) feed.getEntries()) {

          NewsItem item = new NewsItem();

          item.setTitle(entry.getTitle());
          item.setDescription(entry.getDescription().getValue());
          item.setLink(entry.getLink());
          item.setPublished(entry.getPublishedDate());

          newsList.add(item);
        }
      }
    } catch (Exception e) {
      log.error("Error reading feed.", e);
    } finally {
      try {
        reader.close();
      } catch (IOException e) {
        log.warn("Unable to close xml reader.", e);
      }
    }

    return newsList;
  }
Esempio n. 23
0
  @Override
  public List<Article> read(URL url) {
    try {
      List<Article> articles = new ArrayList<>();

      SyndFeedInput input = new SyndFeedInput();
      SyndFeed syndFeed = input.build(new InputStreamReader(url.openStream()));

      for (Object obj : syndFeed.getEntries()) {
        if (!(obj instanceof SyndEntry)) {
          continue;
        }
        SyndEntry syndEntry = (SyndEntry) obj;
        articles.add(mapArticle(syndEntry));
      }

      return articles;

    } catch (FeedException | IOException e) {
      throw new RuntimeException("피드를 읽을 수 업습니다. " + e.getMessage(), e);
    }
  }
Esempio n. 24
0
  @SuppressWarnings("unchecked")
  @Trigger("!buzz")
  @Help("Fetches one of the latest posts from jeanmarcmorandini.com")
  public List<String> getLatestBuzz() {
    List<String> toReturn = new ArrayList<String>();
    try {
      URL url = new URL("http://www.jeanmarcmorandini.com/rss.php");
      SyndFeedInput input = new SyndFeedInput();
      SyndFeed rss = input.build(new XmlReader(url));

      Iterator<SyndEntry> it = rss.getEntries().iterator();
      String message = null;
      while (it.hasNext()) {
        SyndEntry item = it.next();
        String guid = item.getUri();
        RSSFeed buzz = dao.findByGUID(guid);
        if (buzz == null) {
          buzz = new RSSFeed();
          buzz.setGuid(item.getUri());
          dao.save(buzz);
          String urlBitly = utilsService.bitly(item.getLink());
          String content = Jsoup.parse(item.getDescription().getValue()).select("p").get(0).text();
          message = IRCUtils.bold("EXCLU!") + " " + item.getTitle() + " - " + urlBitly;
          toReturn.add(message);
          toReturn.add(content);
          break;
        }
      }

      if (message == null) {
        toReturn.add("Pas d'exclus pour le moment.");
      }

    } catch (Exception e) {
      LOG.handle(e);
    }

    return toReturn;
  }
Esempio n. 25
0
  @SuppressWarnings("unchecked")
  public static ArrayList<SyndEntry> getSyndEntrys(String link) {
    ArrayList<SyndEntry> listOfSyndEntrys = new ArrayList<SyndEntry>();
    try {
      URL url = new URL(link);
      HttpURLConnection httpcon = (HttpURLConnection) url.openConnection();
      // Reading the feed
      SyndFeedInput input = new SyndFeedInput();
      SyndFeed feed = input.build(new XmlReader(httpcon));
      List<SyndEntry> entries = feed.getEntries();
      Iterator<SyndEntry> itEntries = entries.iterator();
      SyndEntry entry;

      while (itEntries.hasNext()) {
        entry = itEntries.next();

        listOfSyndEntrys.add(entry);
      }
      return listOfSyndEntrys;
    } catch (Exception e) {
      System.out.println(e.getMessage());
    }
    return listOfSyndEntrys;
  }
Esempio n. 26
0
  private double getTemperatureFromRSS() {
    double result = -1;
    // Here we pretend to be the google bot to fake out User-Agent
    // sniffing programs.
    try {
      tempFeedURL = new URL("http://www.hive13.org/isOpen/RSS.php?temp=0");
      URLConnection conn = tempFeedURL.openConnection();
      conn.setRequestProperty("User-Agent", BotProperties.getInstance().getUserAgentString());

      // Create a feed off of the URL and get the latest news.
      SyndFeedInput input = new SyndFeedInput();
      SyndFeed feed = input.build(new XmlReader(conn));

      // Get the feed's list of entries
      @SuppressWarnings("unchecked")
      List<SyndEntry> entryList = feed.getEntries();
      Collections.sort(entryList, new SyndEntryComparator());

      if (entryList.size() > 0) {
        // We have found an RSS feed at least, is it formatted
        // correctly?
        result = Double.parseDouble(entryList.get(0).getTitle());
      } else {
        // We have not found an RSS feed for the hackerspace
        // tempreature.
      }
    } catch (IOException e) {
      e.printStackTrace();
    } catch (IllegalArgumentException e) {
      e.printStackTrace();
    } catch (FeedException e) {
      e.printStackTrace();
    }

    return result;
  }
Esempio n. 27
0
  /**
   * Generate an ebook from an RSS DOM Document.
   *
   * @param url The URL from where the Document was fetched (used only to set the author metadata)
   * @param doc The DOM Document of the feed.
   * @return An ebook.
   * @throws IllegalArgumentException
   * @throws FeedException
   * @throws IOException
   */
  private static Book createBookFromFeed(URL url, Document doc, List<Keyword> keywords)
      throws IllegalArgumentException, FeedException, IOException {
    Book book = new Book();
    // start parsing our feed and have the above onItem methods called
    SyndFeedInput input = new SyndFeedInput();
    SyndFeed feed = input.build(doc);

    System.out.println(feed);

    // Set the title
    book.getMetadata().addTitle(feed.getTitle());

    // Add an Author
    String author = feed.getAuthor();
    if (author == null || "".equals(author.trim())) {
      author = url.getHost();
    }
    book.getMetadata().addAuthor(new Author(author));

    if (feed.getPublishedDate() != null) {
      book.getMetadata().addDate(new nl.siegmann.epublib.domain.Date(feed.getPublishedDate()));
    }

    if (feed.getDescription() != null) {
      book.getMetadata().addDescription(feed.getDescription());
    }

    if (feed.getCopyright() != null) {
      book.getMetadata().getRights().add(feed.getCopyright());
    }

    // Set cover image - This has never worked.
    // if (feed.getImage() != null) {
    // System.out.println("There is an image for the feed");

    // Promise<HttpResponse> futureImgResponse =
    // WS.url(feed.getImage().getUrl()).getAsync();
    // HttpResponse imgResponse = await(futureImgResponse);
    // System.out.println("Content-type: " + imgResponse.getContentType());
    // if (imgResponse.getContentType().startsWith("image/")) {
    // String extension =
    // imgResponse.getContentType().substring("image/".length());
    // InputStream imageStream = imgResponse.getStream();
    // book.getMetadata().setCoverImage(new Resource(imageStream, "cover." +
    // extension));

    // System.out.println("Using default cover");
    // imageStream =
    // VirtualFile.fromRelativePath("assets/cover.png").inputstream();
    // if (imageStream != null) {
    // System.out.println("Using default cover");
    // book.getMetadata().setCoverImage(new Resource(imageStream,
    // "cover.png"));
    // } else {
    // System.out.println("Could not load default cover");
    // }

    // }
    // }

    int entryNumber = 0;
    List<SyndEntry> entries = feed.getEntries();

    for (SyndEntry entry : entries) {
      if (matchesKeyword(entry, keywords)) {

        StringBuilder title = new StringBuilder(100);
        if (entry.getTitle() != null) {
          title.append(entry.getTitle());
        }
        if (entry.getAuthor() != null) {
          title.append(" - ").append(entry.getAuthor());
        }
        StringBuilder content = new StringBuilder();

        // Add title inside text
        content.append("<h2>").append(title).append("</h2>");

        if (entry.getDescription() != null) {
          SyndContent syndContent = (SyndContent) entry.getDescription();
          if (!syndContent.getType().contains("html")) {
            content.append("<pre>\n");
          }
          content.append(syndContent.getValue());
          if (!syndContent.getType().contains("html")) {
            content.append("\n</pre>");
          }
          content.append("<hr/>");
        }

        if (entry.getContents().size() > 0) {
          SyndContent syndContent = (SyndContent) entry.getContents().get(0);
          if (!syndContent.getType().contains("html")) {
            content.append("<pre>\n");
          }
          content.append(syndContent.getValue());
          if (!syndContent.getType().contains("html")) {
            content.append("\n</pre>");
          }
        }
        String strContent = clean(content.toString());
        // Add Chapter
        try {
          entryNumber++;
          book.addSection(
              title.toString(),
              new Resource(new StringReader(strContent), "entry" + entryNumber + ".xhtml"));
        } catch (IOException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }
      }
    }

    return book;
  }
Esempio n. 28
0
  // Build the feed list
  @SuppressWarnings("unchecked")
  private void buildFeedList(LinkedList<SyndFeed> syndFeeds, SourcePojo source) {
    // If there's a max number of sources to get per harvest, configure that here:
    long nWaitTime_ms = props.getWebCrawlWaitTime();
    long nMaxTime_ms =
        props.getMaxTimePerFeed(); // (can't override this, too easy to break the system...)
    int nMaxDocsPerSource = props.getMaxDocsPerSource();
    long nNow = new Date().getTime();
    if (null != source.getRssConfig()) {
      if (null != source.getRssConfig().getWaitTimeOverride_ms()) {
        nWaitTime_ms = source.getRssConfig().getWaitTimeOverride_ms();
      }
    }
    long nMaxDocs = Long.MAX_VALUE;
    if (nWaitTime_ms > 0) {
      nMaxDocs = nMaxTime_ms / nWaitTime_ms;
    }
    if (nMaxDocs > nMaxDocsPerSource) { // (another limit, take the smaller of the 2)
      nMaxDocs = nMaxDocsPerSource;
    }
    // (end per feed configuration)

    // Add extra docs
    List<SyndEntry> tmpList = null;
    boolean bCreatedAggregateList = false;
    int nRealSyndEntries = 0;

    for (SyndFeed feed : syndFeeds) {
      if (0 == nRealSyndEntries) {
        tmpList = feed.getEntries();
      } else if (!bCreatedAggregateList) {
        bCreatedAggregateList = true;
        tmpList = new LinkedList<SyndEntry>(tmpList);
        tmpList.addAll(feed.getEntries());
      } else {
        tmpList.addAll(feed.getEntries());
      }
      nRealSyndEntries += feed.getEntries().size();
    }
    if (null == tmpList) {
      tmpList = new LinkedList<SyndEntry>();
    }
    // TESTED

    if ((null != source.getRssConfig()) && (null != source.getRssConfig().getExtraUrls())) {
      for (ExtraUrlPojo extraUrl : source.getRssConfig().getExtraUrls()) {
        if (null == extraUrl.title) {
          continue; // (this is an RSS feed not a URL)
        } // TESTED
        SyndEntryImpl synd = new SyndEntryImpl();
        synd.setLink(extraUrl.url);
        if (null != extraUrl.description) {
          SyndContentImpl description = new SyndContentImpl();
          description.setValue(extraUrl.description);
          synd.setDescription(description);
        }
        synd.setTitle(extraUrl.title);
        if (null != extraUrl.publishedDate) {
          try {
            synd.setPublishedDate(new Date(DateUtility.parseDate(extraUrl.publishedDate)));
          } catch (Exception e) {
          } // do nothign will use now as pub date
        }
        tmpList.add((SyndEntry) synd);

        if (null != extraUrl.fullText) {
          SyndFeedImpl fullTextContainer = new SyndFeedImpl();
          fullTextContainer.setDescription(extraUrl.fullText);
          synd.setSource(fullTextContainer);
        }
      }
    }

    // Then begin looping over entries

    LinkedList<String> duplicateSources = new LinkedList<String>();
    try {
      Map<String, List<SyndEntry>> urlDups = new HashMap<String, List<SyndEntry>>();
      int nSyndEntries = 0;
      for (Object synd : tmpList) {
        nSyndEntries++; // (keep count so we know we're accessing our own fake SyndEntryImpls)
        final SyndEntry entry = (SyndEntry) synd;

        if (null != entry.getLink()) // if url returns null, skip this entry
        {
          String url = this.cleanUrlStart(entry.getLink());

          if (null != source.getRssConfig()) { // Some RSS specific logic
            // If an include is specified, must match
            Matcher includeMatcher = source.getRssConfig().getIncludeMatcher(url);
            if (null != includeMatcher) {
              if (!includeMatcher.find()) {
                continue;
              }
            }
            // If an exclude is specified, must not match
            Matcher excludeMatcher = source.getRssConfig().getExcludeMatcher(url);
            if (null != excludeMatcher) {
              if (excludeMatcher.find()) {
                continue;
              }
            }
          }

          // Some error checking:
          // sometimes the URL seems to have some characters in front of the HTTP - remove these
          this.nTmpDocsSubmitted++;
          if (null == url) {
            this.nTmpHttpErrors++;
            continue;
          }

          // Also save the title and description:
          String title = "";
          if (null != entry.getTitle()) {
            title = entry.getTitle();
          }
          String desc = "";
          if (null != entry.getDescription()) {
            desc = entry.getDescription().getValue();
          }
          boolean duplicate = false;

          // Look for duplicates within the current set of sources
          List<SyndEntry> possDups = null;
          if (null == (possDups = urlDups.get(url))) { // (new URL)
            possDups = new LinkedList<SyndEntry>();
            possDups.add(entry);
            urlDups.put(url, possDups);
          } else { // (old URL, check if this is a duplicate...)
            int nCount = 0;
            for (SyndEntry possDup : possDups) {
              if (possDup.getTitle().equals(title)
                  || ((null != possDup.getDescription())
                      && possDup.getDescription().getValue().equals(desc))
                  || ((null != possDup.getDescription()) && (null == entry.getDescription()))) {
                // If *either* the title or the description matches as well as the URL...
                duplicate = true;
                break;
              }
              nCount++;
            }

            if (!duplicate) {
              possDups.add(entry);
            } else { // DUPLICATE: ensure we have minimal set of data to cover all cases:
              boolean bTitleMatch = false;
              boolean bDescMatch = false;
              for (SyndEntry possDup : possDups) {
                if (!bTitleMatch
                    && possDup
                        .getTitle()
                        .equals(title)) { // (don't bother if already have a title match)
                  bTitleMatch = true;
                } else if (!bDescMatch) { // (don't yet have a desc match(
                  if (null != entry.getDescription()) {
                    if (null != possDup.getDescription()) { // (neither desc is null)
                      if (possDup.getDescription().getValue().equals(desc)) {
                        bDescMatch = true;
                      }
                    }
                  } else { // curr desc is null
                    if (null == possDup.getDescription()) { // dup desc is null
                      bDescMatch = true;
                    }
                  } // (end various title match/desc match/both have no desc cases
                } // (end if no desc match)
                if (bTitleMatch && bDescMatch) {
                  break; // (no way can fire)
                }
              } // (end loop over dups)

              if (!bTitleMatch || !bDescMatch) {
                possDups.add(entry);
              }
            } // (end is duplicate, nasty logic to add minimal set to dup list to cover all titles,
              // descs)
          }
          if (duplicate) {
            continue;
          }

          try {
            DuplicateManager qr = _context.getDuplicateManager();
            if (null != entry.getDescription()) {
              duplicate =
                  qr.isDuplicate_UrlTitleDescription(
                      url,
                      title.replaceAll("\\<.*?\\>", "").trim(),
                      desc.replaceAll("\\<.*?\\>", "").trim(),
                      source,
                      duplicateSources);
            } else {
              duplicate =
                  qr.isDuplicate_UrlTitleDescription(
                      url,
                      title.replaceAll("\\<.*?\\>", "").trim(),
                      null,
                      source,
                      duplicateSources);
              // ^^^(this is different to isDuplicate_UrlTitle because it enforces that the
              // description be null, vs just checking the title)
            }
            if (duplicate
                && (null != source.getRssConfig())
                && (null != source.getRssConfig().getUpdateCycle_secs())) {
              // Check modified times...
              Date dupModDate = qr.getLastDuplicateModifiedTime();
              ObjectId dupId = qr.getLastDuplicateId();

              if ((null != dupModDate) && (null != dupId)) {
                if (dupModDate.getTime() + source.getRssConfig().getUpdateCycle_secs() * 1000
                    < nNow) {

                  DocumentPojo doc = buildDocument(entry, source, duplicateSources);
                  if ((nSyndEntries > nRealSyndEntries) && (null != entry.getSource())) {
                    // (Use dummy TitleEx to create a "fake" full text block)
                    doc.setFullText(entry.getSource().getDescription());
                  }
                  doc.setUpdateId(dupId); // (set _id to document I'm going to overwrite)
                  this.docsToUpdate.add(doc);

                  if ((this.docsToAdd.size() + this.docsToUpdate.size()) >= nMaxDocs) {
                    source.setReachedMaxDocs();
                    break; // (that's enough documents)
                  }
                }
              }
            } // TESTED (duplicates we update instead of ignoring)

            if (!duplicate) {
              DocumentPojo doc = buildDocument(entry, source, duplicateSources);
              if ((nSyndEntries > nRealSyndEntries) && (null != entry.getSource())) {
                // (Use dummy TitleEx to create a "fake" full text block)
                doc.setFullText(entry.getSource().getDescription());
              }
              this.docsToAdd.add(doc);

              if ((this.docsToAdd.size() + this.docsToUpdate.size()) >= nMaxDocs) {
                source.setReachedMaxDocs();
                break; // (that's enough documents)
              }
            }
            if (this.nTmpDocsSubmitted > 20) { // (some arbitrary "significant" number)
              if (nTmpHttpErrors == this.nTmpDocsSubmitted) {
                break;
              }
            }
          } catch (Exception e) {
            // If an exception occurs log the error
            logger.error("Exception Message: " + e.getMessage(), e);
          }
        }
      } // (end loop over feeds in a syndicate)
    } catch (Exception e) {
      // If an exception occurs log the error
      logger.error("Exception Message: " + e.getMessage(), e);
    }
  }
Esempio n. 29
0
  // Logs a new ATOM entry
  public static synchronized void addATOMEntry(
      String title, String link, String description, File atomFile, String context) {
    try {

      if (atomFile.exists()) {

        // System.out.println("ATOM file found!");
        /** Namespace URI for content:encoded elements */
        String CONTENT_NS = "http://www.w3.org/2005/Atom";

        /** Parses RSS or Atom to instantiate a SyndFeed. */
        SyndFeedInput input = new SyndFeedInput();

        /** Transforms SyndFeed to RSS or Atom XML. */
        SyndFeedOutput output = new SyndFeedOutput();

        // Load the feed, regardless of RSS or Atom type
        SyndFeed feed = input.build(new XmlReader(atomFile));

        // Set the output format of the feed
        feed.setFeedType("atom_1.0");

        List<SyndEntry> items = feed.getEntries();
        int numItems = items.size();
        if (numItems > 9) {
          items.remove(0);
          feed.setEntries(items);
        }

        SyndEntry newItem = new SyndEntryImpl();
        newItem.setTitle(title);
        newItem.setLink(link);
        newItem.setUri(link);
        SyndContent desc = new SyndContentImpl();
        desc.setType("text/html");
        desc.setValue(description);
        newItem.setDescription(desc);
        desc.setType("text/html");
        newItem.setPublishedDate(new java.util.Date());

        List<SyndCategory> categories = new ArrayList<SyndCategory>();
        if (CommonConfiguration.getProperty("htmlTitle", context) != null) {
          SyndCategory category2 = new SyndCategoryImpl();
          category2.setName(CommonConfiguration.getProperty("htmlTitle", context));
          categories.add(category2);
        }
        newItem.setCategories(categories);
        if (CommonConfiguration.getProperty("htmlAuthor", context) != null) {
          newItem.setAuthor(CommonConfiguration.getProperty("htmlAuthor", context));
        }
        items.add(newItem);
        feed.setEntries(items);

        feed.setPublishedDate(new java.util.Date());

        FileWriter writer = new FileWriter(atomFile);
        output.output(feed, writer);
        writer.toString();
      }
    } catch (IOException ioe) {
      System.out.println("ERROR: Could not find the ATOM file.");
      ioe.printStackTrace();
    } catch (Exception e) {
      System.out.println("Unknown exception trying to add an entry to the ATOM file.");
      e.printStackTrace();
    }
  }
  /**
   * Returns a weather info about the current weather.
   *
   * @param feed
   * @return
   */
  private WeatherInfo getWeatherInfo(SyndFeed feed) {
    WeatherInfo info = new WeatherInfo();
    final List<Element> currentInfo = (List) feed.getForeignMarkup();
    for (Element element : currentInfo) {
      String name = element.getName();
      if (name.equalsIgnoreCase("location")) {
        info.setCity(element.getAttribute("city").getValue());
        info.setCountry(element.getAttribute("country").getValue());
        break;
      }
    }

    // root elements
    final List<Element> channelElements = (List<Element>) feed.getForeignMarkup();
    for (Element element : channelElements) {
      // wind
      if (element.getName().equalsIgnoreCase("wind")) {
        info.setWind(Double.parseDouble(element.getAttributeValue("speed")));
      }
      // sunrise and sunset
      else if (element.getName().equalsIgnoreCase("astronomy")) {
        try {
          SimpleDateFormat format = new SimpleDateFormat("hh:mm a", Locale.US);
          info.setSunrise(format.parse(element.getAttributeValue("sunrise")));
          info.setSunset(format.parse(element.getAttributeValue("sunset")));
        } catch (ParseException e) {
          LOG.error(
              "Error retrieving sunrise time for "
                  + element.getAttributeValue("sunrise")
                  + ": "
                  + e.getMessage());
        }
      }
    }

    final List<SyndEntry> items = (List) feed.getEntries();
    for (SyndEntry entry : items) {
      final List<Element> forecastInfo = (List) entry.getForeignMarkup();
      boolean todayApplied = false;
      for (Element element : forecastInfo) {
        // get forecast
        if (element.getName().equalsIgnoreCase("forecast")) {
          WeatherInfo forecast = new WeatherInfo();
          String high = element.getAttributeValue("high");
          String low = element.getAttributeValue("low");
          String description = element.getAttributeValue("text");
          int code = Integer.parseInt(element.getAttribute("code").getValue());
          String date = element.getAttribute("date").getValue();
          String imageUrl = convertTypeCodeImage(code);

          // e.g. Fri, 06 Sep 2013 11:49 am CEST
          Date formatted = null;
          try {
            formatted = new SimpleDateFormat("dd MMM yyyy", Locale.US).parse(date);
          } catch (ParseException e) {
            LOG.error("Error parsing date '" + date + ": " + e.getMessage());
          }

          forecast.setDescription(description);
          forecast.setForecastDate(formatted);
          forecast.setHighTemp(high);
          forecast.setLowTemp(low);
          forecast.setImageUrl(WeatherBigResourceLoader.getResource(imageUrl));
          forecast.setIconWhiteUrl(WeatherSmallWhiteResourceLoader.getResource(imageUrl));
          forecast.setIconBlackUrl(WeatherSmallBlackResourceLoader.getResource(imageUrl));

          info.getForecast().add(forecast);
        }
        // get location
        else if (element.getName().equalsIgnoreCase("lat")) {
          info.setLatitude(element.getText());
        } else if (element.getName().equalsIgnoreCase("long")) {
          info.setLatitude(element.getText());
        }
        // get local time
        else if (element.getName().equalsIgnoreCase("condition")) {
          String date = element.getAttributeValue("date");
          try {
            Date formatted =
                new SimpleDateFormat("EEE, dd MMM yyyy hh:mm a", Locale.US).parse(date);
            info.setLocalTime(formatted);
            info.setTemp(element.getAttributeValue("temp"));
            info.setDescription(element.getAttributeValue("text"));

            int code = Integer.parseInt(element.getAttribute("code").getValue());
            String imageUrl = convertTypeCodeImage(code);
            //            LOG.info("Weather code for " + info.getCity() + " is " + code);
            info.setImageUrl(WeatherBigResourceLoader.getResource(imageUrl));
            info.setIconWhiteUrl(WeatherSmallWhiteResourceLoader.getResource(imageUrl));
          } catch (ParseException e) {
            LOG.error("Error retrieving local time for " + date + ": " + e.getMessage());
          }
        }
      }
    }

    // apply data of first forecast
    WeatherInfo todayForecast = info.getForecast().get(0);
    info.setForecastDate(todayForecast.getForecastDate());
    info.setHighTemp(todayForecast.getHighTemp());
    info.setLowTemp(todayForecast.getLowTemp());
    return info;
  }