Esempio n. 1
0
  public static List<RssUrlBean> getRssUrlBeanListFromPage(int rssCompo_id, String url) {
    List<RssUrlBean> rubList = new ArrayList<RssUrlBean>();
    if (url.equals("")) return rubList;
    try {
      URL feedUrl = new URL(url);
      // SyndFeedInput:从远程读到xml结构的内容转成SyndFeedImpl实例
      SyndFeedInput input = new SyndFeedInput();
      // rome按SyndFeed类型生成rss和atom的实例,
      // SyndFeed是rss和atom实现类SyndFeedImpl的接口
      SyndFeed syndFeed = input.build(new XmlReader(feedUrl));

      List<SyndEntry> entryList = syndFeed.getEntries();
      for (SyndEntry entry : entryList) {
        RssUrlBean rub = new RssUrlBean();
        rub.setRssCompo_id(rssCompo_id);
        rub.setTitle(entry.getTitle());
        rub.setLink(entry.getUri());
        rub.setPublishedDate(CommonUtil.getStandardDate(entry.getPublishedDate().toLocaleString()));
        rub.setDescription(entry.getDescription().getValue());
        if (entry.getUpdatedDate() != null)
          rub.setUpdatedDate(CommonUtil.getStandardDate(entry.getUpdatedDate().toLocaleString()));
        rub.setAuthors(entry.getAuthor());
        rubList.add(rub);
      }
    } catch (Exception ex) {
      ex.printStackTrace();
    }
    return rubList;
  }
  /**
   * A helper function to print out the contents of an entry to log.trace
   *
   * @param entry
   */
  public static void printEntry(SyndEntry entry) {
    if (!log.isTraceEnabled()) {
      return;
    }

    StringBuffer pr =
        new StringBuffer(
            "URI: "
                + entry.getUri()
                + "\n"
                + "Title: "
                + entry.getTitle()
                + "\n"
                + "\n"
                + "Date: "
                + entry.getPublishedDate()
                + "\n"
                + "Modified: "
                + entry.getUpdatedDate()
                + "\n");

    pr.append("Creators: \n");

    for (SyndPerson author : FeedHelper.getAuthors(entry)) {
      pr.append("  - " + author.getName() + "\n");
    }

    pr.append("Links: \n");
    for (SyndLink link : FeedHelper.getLinks(entry)) {
      pr.append("  - " + link.getTitle() + ": " + link.getHref() + "\n");
    }

    SyndContent description = entry.getDescription();
    if (description != null) {
      pr.append("\nDescription(" + description.getType() + "): " + description.getValue());
    }

    pr.append("Contents: \n");
    for (SyndContent content : FeedHelper.getContents(entry)) {
      pr.append(" Type: " + content.getType());
      pr.append(" Body: " + content.getValue());
      try {
        pr.append(
            " Body (Plain text): "
                + PlainTextExtractor.getPlainText(content.getType(), content.getValue()));
      } catch (ParserException e) {
        pr.append("Failed to parse content");
      }
    }

    pr.append("Categories: \n");
    for (SyndCategory category : FeedHelper.getCategories(entry)) {
      pr.append(category.getName() + "(" + category.getTaxonomyUri() + ")");
    }
    log.trace(pr.toString());
  }
 protected Item createRSSItem(SyndEntry sEntry) {
   Item item = new Item();
   item.setModules(ModuleUtils.cloneModules(sEntry.getModules()));
   item.setTitle(sEntry.getTitle());
   item.setLink(sEntry.getLink());
   item.setUri(sEntry.getUri());
   if (sEntry.getDescription() != null) {
     Description description = new Description();
     description.setValue(sEntry.getDescription().getValue());
     item.setDescription(description);
   }
   return item;
 }
Esempio n. 4
0
  @SuppressWarnings("unchecked")
  @Trigger("!buzz")
  @Help("Fetches one of the latest posts from jeanmarcmorandini.com")
  public List<String> getLatestBuzz() {
    List<String> toReturn = new ArrayList<String>();
    try {
      URL url = new URL("http://www.jeanmarcmorandini.com/rss.php");
      SyndFeedInput input = new SyndFeedInput();
      SyndFeed rss = input.build(new XmlReader(url));

      Iterator<SyndEntry> it = rss.getEntries().iterator();
      String message = null;
      while (it.hasNext()) {
        SyndEntry item = it.next();
        String guid = item.getUri();
        RSSFeed buzz = dao.findByGUID(guid);
        if (buzz == null) {
          buzz = new RSSFeed();
          buzz.setGuid(item.getUri());
          dao.save(buzz);
          String urlBitly = utilsService.bitly(item.getLink());
          String content = Jsoup.parse(item.getDescription().getValue()).select("p").get(0).text();
          message = IRCUtils.bold("EXCLU!") + " " + item.getTitle() + " - " + urlBitly;
          toReturn.add(message);
          toReturn.add(content);
          break;
        }
      }

      if (message == null) {
        toReturn.add("Pas d'exclus pour le moment.");
      }

    } catch (Exception e) {
      LOG.handle(e);
    }

    return toReturn;
  }
Esempio n. 5
0
 /**
  * Remove an item for the feed
  *
  * @param uri
  * @return
  * @throws Exception
  */
 @SuppressWarnings("unchecked")
 public SyndFeed removeEntry(String uri) {
   SyndFeed feed = read();
   List<SyndEntry> entries = feed.getEntries();
   if (uri != null && uri.trim().length() > 0) {
     for (SyndEntry syndEntry : entries) {
       if (syndEntry.getUri().equals(uri)) {
         entries.remove(syndEntry);
         break;
       }
     }
   }
   feed.setEntries(entries);
   return feed;
 }
Esempio n. 6
0
  @SuppressWarnings("unchecked")
  public void test() throws Exception {
    final SyndFeedInput input = new SyndFeedInput(true);
    final SyndFeed feed = input.build(new File("c:\\temp\\google.xml"));

    logger.debug("Successfully parsed the RSS feed");
    logger.debug("Author      = " + feed.getAuthors());
    logger.debug("Categories  = " + feed.getCategories());
    final List<SyndEntry> entries = feed.getEntries();
    for (final SyndEntry entry : entries) {
      logger.debug("Title = " + StringEscapeUtils.unescapeHtml(entry.getTitle()));
      logger.debug(
          "Description = " + StringEscapeUtils.unescapeHtml(entry.getDescription().getValue()));
      logger.debug(entry.getUri());
      logger.debug("Updated date = " + entry.getUpdatedDate());
      logger.debug("Published date = " + entry.getPublishedDate());
      logger.debug("====================================================");
    }
  }
Esempio n. 7
0
  protected void removeItem(List<SyndEntry> entries, List<Node> listRemove)
      throws RepositoryException {
    List<SyndEntry> entries1 = new ArrayList<SyndEntry>();

    boolean flag = true;
    for (SyndEntry syndEntry : entries) {
      flag = true;
      for (Node post : listRemove) {
        if (syndEntry.getUri().equals(post.getName())) {
          flag = false;
          break;
        }
      }
      if (flag) {
        entries1.add(syndEntry);
      }
    }
    entries.clear();
    entries.addAll(entries1);
  }
Esempio n. 8
0
  //	@Transactional
  //	@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
  public void importEntry(
      final SyndFeed feed,
      final SyndEntry entry,
      final Set<KiWiUriResource> types,
      final Set<ContentItem> tags,
      User user,
      final Collection<ContentItem> output) {
    if (user == null && entry.getAuthor() != null && !"".equals(entry.getAuthor())) {
      if (userService.userExists(entry.getAuthor())) {
        user = userService.getUserByLogin(entry.getAuthor());
      } else {

        //				user = userService.createUser(entry.getAuthor());
        /* In my opinion, it is not ok to create a user entity
         * without asking the person if he/she wants to be
         * created and persisted in the KiWi dataset.
         * Thus I'm changing the user to 'anonymous',
         * if he/she is'nt registered with the same nick that
         * is given in the rss entry.
         */
        user = userService.getUserByLogin("anonymous");
        kiwiEntityManager.persist(user);
      }
    }

    log.debug("feed entry: #0 (#1)", entry.getTitle(), entry.getUri());

    // create a new content item and copy all data from the feed entry
    ContentItem item;
    if (entry.getLink() != null) {
      item = contentItemService.createExternContentItem(entry.getLink());
    } else if (entry.getUri() != null) {
      try {
        // try parsing URI; if it is not valid,
        URI uri = new URI(entry.getUri());
        item = contentItemService.createExternContentItem(entry.getUri());
      } catch (URISyntaxException e) {
        item = contentItemService.createExternContentItem(feed.getLink() + "#" + entry.getUri());
      }
    } else {
      item = contentItemService.createContentItem();
    }
    contentItemService.updateTitle(item, entry.getTitle());

    if (feed.getLanguage() != null) item.setLanguage(new Locale(feed.getLanguage()));

    if (entry.getPublishedDate() != null) {
      item.setCreated(entry.getPublishedDate());
      item.setModified(entry.getPublishedDate());
    }

    if (entry.getUpdatedDate() != null) {
      if (entry.getPublishedDate() == null) {
        item.setCreated(entry.getUpdatedDate());
      }
      item.setModified(entry.getUpdatedDate());
    }

    item.setAuthor(user);

    // read feed content and set it as item's text content
    List<SyndContent> contents = entry.getContents();
    if (contents.size() == 1) {
      log.debug("using RSS content section provided by item");
      contentItemService.updateTextContentItem(item, "<p>" + contents.get(0).getValue() + "</p>");
    } else if (contents.size() > 1) {
      log.warn("feed entry contained more than one content section");
      contentItemService.updateTextContentItem(item, "<p>" + contents.get(0).getValue() + "</p>");
    } else if (contents.size() == 0) {
      if (entry.getDescription() != null && entry.getDescription().getValue() != null) {
        log.debug("using RSS description as no content section was available");
        contentItemService.updateTextContentItem(
            item, "<p>" + entry.getDescription().getValue() + "</p>");
      }
    }

    // save before tagging
    contentItemService.saveContentItem(item);

    // read feed categories and use them as tags
    for (SyndCategory cat : (List<SyndCategory>) entry.getCategories()) {
      ContentItem _cat;
      if (!taggingService.hasTag(item, cat.getName())) {
        if (cat.getTaxonomyUri() != null) {
          _cat = contentItemService.getContentItemByUri(cat.getTaxonomyUri());
          if (_cat == null) {
            _cat = contentItemService.createExternContentItem(cat.getTaxonomyUri());
            contentItemService.updateTitle(_cat, cat.getName());
            _cat.setAuthor(user);
            contentItemService.saveContentItem(_cat);
          }
          taggingService.createTagging(cat.getName(), item, _cat, user);
        } else {
          _cat = contentItemService.getContentItemByTitle(cat.getName());
          if (_cat == null) {
            _cat = contentItemService.createContentItem();
            contentItemService.updateTitle(_cat, cat.getName());
            _cat.setAuthor(user);
            contentItemService.saveContentItem(_cat);
          }
          taggingService.createTagging(cat.getName(), item, _cat, user);
        }
      }
    }
    // scan for Twitter-style hash tags in title (e.g. #kiwiknows, see KIWI-622)
    Matcher m_hashtag = p_hashtag.matcher(entry.getTitle());
    while (m_hashtag.find()) {
      String tag_label = m_hashtag.group(1);
      if (!taggingService.hasTag(item, tag_label)) {
        ContentItem tag = contentItemService.getContentItemByTitle(tag_label);
        if (tag == null) {
          tag = contentItemService.createContentItem();
          contentItemService.updateTitle(tag, tag_label);
          tag.setAuthor(user);
          contentItemService.saveContentItem(tag);
        }
        taggingService.createTagging(tag_label, item, tag, user);
      }
    }

    // check for geo information
    GeoRSSModule geoRSSModule = GeoRSSUtils.getGeoRSS(entry);
    if (geoRSSModule != null && geoRSSModule.getPosition() != null) {
      POI poi = kiwiEntityManager.createFacade(item, POI.class);
      poi.setLatitude(geoRSSModule.getPosition().getLatitude());
      poi.setLongitude(geoRSSModule.getPosition().getLongitude());
      kiwiEntityManager.persist(poi);
    }

    // check for media information
    MediaEntryModule mediaModule = (MediaEntryModule) entry.getModule(MediaModule.URI);
    if (mediaModule != null) {
      MediaContent[] media = mediaModule.getMediaContents();
      if (media.length > 0) {
        MediaContent m = media[0];
        if (m.getReference() instanceof UrlReference) {
          URL url = ((UrlReference) m.getReference()).getUrl();

          String type = m.getType();
          String name = url.getFile();
          if (name.lastIndexOf("/") > 0) {
            name = name.substring(name.lastIndexOf("/") + 1);
          }

          log.debug("importing media data from URL #0", url.toString());

          try {
            InputStream is = url.openStream();

            ByteArrayOutputStream bout = new ByteArrayOutputStream();

            int c;
            while ((c = is.read()) != -1) {
              bout.write(c);
            }

            byte[] data = bout.toByteArray();

            contentItemService.updateMediaContentItem(item, data, type, name);

            is.close();
            bout.close();
          } catch (IOException ex) {
            log.error("error importing media content from RSS stream");
          }
        } else {
          log.info("RSS importer can only import media with URL references");
        }
      } else {
        log.warn("media module found without content");
      }

      Category[] cats = mediaModule.getMetadata().getCategories();
      for (Category cat : cats) {
        ContentItem _cat;

        String label = cat.getLabel() != null ? cat.getLabel() : cat.getValue();

        if (!taggingService.hasTag(item, label)) {
          if (cat.getScheme() != null) {
            _cat = contentItemService.getContentItemByUri(cat.getScheme() + cat.getValue());
            if (_cat == null) {
              _cat = contentItemService.createExternContentItem(cat.getScheme() + cat.getValue());
              contentItemService.updateTitle(_cat, label);
              _cat.setAuthor(user);
              contentItemService.saveContentItem(_cat);
            }
            taggingService.createTagging(label, item, _cat, user);
          } else {
            _cat = contentItemService.getContentItemByTitle(label);
            if (_cat == null) {
              _cat = contentItemService.createContentItem();
              contentItemService.updateTitle(_cat, label);
              _cat.setAuthor(user);
              contentItemService.saveContentItem(_cat);
            }
            taggingService.createTagging(label, item, _cat, user);
          }
        }
      }
    }

    // add parameter categories as tags
    for (ContentItem tag : tags) {
      if (!taggingService.hasTag(item, tag.getTitle())) {
        taggingService.createTagging(tag.getTitle(), item, tag, user);
      }
    }

    // add parameter types as types
    for (KiWiUriResource type : types) {
      item.addType(type);
    }

    // add kiwi:FeedPost type
    item.addType(tripleStore.createUriResource(Constants.NS_KIWI_CORE + "FeedPost"));

    /* the flush is necessary, because CIs or tags will
     * otherwise be created multiple times when they
     * appear more than once in one RSS feed */
    entityManager.flush();
    log.debug("imported content item '#0' with URI '#1'", item.getTitle(), item.getResource());
  }
Esempio n. 9
0
  public Vector<SyndEntry> crawl() {
    if (feedid == 1301) {
      int z = 0;
      z++;
    }

    SyndFeedInput input = new SyndFeedInput();
    XmlReader reader = null;
    Vector<SyndEntry> ret = new Vector<SyndEntry>();
    Document doc;
    try {
      /*
       * Document doc = Jsoup.parse(feedurl,10000);
       * getLogger().info(doc.toString());
       */

      SyndFeed feed = null;
      try {
        feed = input.build(reader = new XmlReader(feedurl));
      } catch (Exception fe) {
        try {
          System.err.println("from url: " + feedurl);
          fe.printStackTrace();

          doc = Jsoup.parse(feedurl, 10000);

          feed = input.build(new StringReader(doc.toString()));
          getLogger().info("Could fix it woith jsoup");
        } catch (ExceptionInInitializerError ed) {
          System.err.println("from url: " + feedurl);
          ed.printStackTrace();

          try {
            URL url = feedurl;
            String feedpage = FeedCrawler.readPage(url);
            feedpage =
                feedpage
                    .replaceAll("\\&amp;ldquo;", "\"")
                    .replaceAll("&ldquo;", "\"")
                    .replaceAll("\\&amp;rdquo;", "\"")
                    .replaceAll("&rdquo;", "\"");
            System.out.println(feedpage);
            // feedpage=feedpage.replaceAll("\\&amp;ldquo;", "\"");
            // reader=new XmlReader(new InputSource(new
            // StringReader(feedpage)).getCharacterStream());

            feed = input.build(new InputSource(new StringReader(feedpage)));

            getLogger().info("Could fix it with complicatedreader");

          } catch (URISyntaxException e) {
            // TODO Auto-generated catch block
            System.err.println("from url: " + feedurl);
            e.printStackTrace();
            System.out.println("URL does not work: " + feedurl);
          } catch (FeedException e) {
            // TODO Auto-generated catch block
            System.err.println("from url: " + feedurl);
            e.printStackTrace();
            System.out.println("URL does not work: " + feedurl);
          } catch (Exception ex) {
            System.err.println("from url: " + feedurl);
            ex.printStackTrace();
            System.out.println("URL does not work: " + feedurl);
          }
        }
      }

      for (Iterator<SyndEntry> i = feed.getEntries().iterator(); i.hasNext(); ) {
        SyndEntry entry = (SyndEntry) i.next();
        System.out.println("\t INFOR: Entry\t" + entry.getUri());
        if (!entry.getUri().startsWith("http")) {
          System.out.println("\t ERROR: \t" + entry.getLink());
          System.out.println("\t DEBUG: \t" + feedurl);
          System.out.println("\t FIX: \t");
          if (domain.length() > 0) {
            entry.setLink(domain + entry.getLink());
            entry.setUri(domain + entry.getUri());
            ret.add(entry);
          }
        } else ret.add(entry);
        getLogger().info(entry.getTitle());
      }

    } catch (IllegalArgumentException e) {
      // TODO Auto-generated catch block

      getLogger().info("Feed error: " + feedurl);
      getLogger().info("try with jsoup");
      System.err.println("from url: " + feedurl);
      e.printStackTrace();
      try {
        doc = Jsoup.parse(feedurl, 10000);
      } catch (IOException e1) {
        // TODO Auto-generated catch block
        System.err.println("from url: " + feedurl);
        e1.printStackTrace();
      }

    } catch (FeedException fe) {
      getLogger().info("Even jsoup did not work: " + feedurl);
      System.err.println("from url: " + feedurl);
      fe.printStackTrace();

    } catch (IOException e) {
      getLogger().info("Feed error: " + feedurl);
      getLogger().info("try with jsoup");
      // TODO Auto-generated catch block
      System.err.println("from url: " + feedurl);
      e.printStackTrace();
      try {
        doc = Jsoup.parse(feedurl, 10000);
      } catch (IOException e2) {
        // TODO Auto-generated catch block
        System.err.println("from url: " + feedurl);
        e2.printStackTrace();

        try {
          getLogger().info("try with jsoup");
          doc = Jsoup.parse(feedurl, 10000);
          getLogger().info(doc.toString());
        } catch (IOException e1) {
          // TODO Auto-generated catch block
          getLogger().info("once again, jsoup cant do it");
          System.err.println("from url: " + feedurl);
          e1.printStackTrace();
        }
      }

    } finally {
      if (reader != null)
        try {

          reader.close();
        } catch (IOException eg) {
          getLogger().info("Feed error: " + feedurl);
          // TODO Auto-generated catch block
          System.err.println("from url: " + feedurl);
          eg.printStackTrace();
        }
    }
    return ret;
  }