Esempio n. 1
0
  public static List<RssUrlBean> getRssUrlBeanListFromPage(int rssCompo_id, String url) {
    List<RssUrlBean> rubList = new ArrayList<RssUrlBean>();
    if (url.equals("")) return rubList;
    try {
      URL feedUrl = new URL(url);
      // SyndFeedInput:从远程读到xml结构的内容转成SyndFeedImpl实例
      SyndFeedInput input = new SyndFeedInput();
      // rome按SyndFeed类型生成rss和atom的实例,
      // SyndFeed是rss和atom实现类SyndFeedImpl的接口
      SyndFeed syndFeed = input.build(new XmlReader(feedUrl));

      List<SyndEntry> entryList = syndFeed.getEntries();
      for (SyndEntry entry : entryList) {
        RssUrlBean rub = new RssUrlBean();
        rub.setRssCompo_id(rssCompo_id);
        rub.setTitle(entry.getTitle());
        rub.setLink(entry.getUri());
        rub.setPublishedDate(CommonUtil.getStandardDate(entry.getPublishedDate().toLocaleString()));
        rub.setDescription(entry.getDescription().getValue());
        if (entry.getUpdatedDate() != null)
          rub.setUpdatedDate(CommonUtil.getStandardDate(entry.getUpdatedDate().toLocaleString()));
        rub.setAuthors(entry.getAuthor());
        rubList.add(rub);
      }
    } catch (Exception ex) {
      ex.printStackTrace();
    }
    return rubList;
  }
Esempio n. 2
0
  //	@Transactional
  public int importData(
      final SyndFeed feed,
      Set<KiWiUriResource> types,
      Set<ContentItem> tags,
      final User user,
      final Collection<ContentItem> output) {
    log.info(
        "importing entries from #0 feed '#1' found at '#2'",
        feed.getFeedType(),
        feed.getTitle(),
        feed.getUri());

    if (types == null) {
      types = new HashSet<KiWiUriResource>();
    }

    if (tags == null) {
      tags = new HashSet<ContentItem>();
    }
    final Set<ContentItem> my_tags = tags;
    final Set<KiWiUriResource> my_types = types;

    // a hack for importing facebook activity streams: if the type is kiwi:FacebookPost,
    // turn facebook activity stream mode on; in this mode, we will skip all entries where
    // the remote author name and local user name are not identical
    boolean facebookImport = false;
    String t_facebookPost = Constants.NS_KIWI_CORE + "FacebookPost";
    for (KiWiUriResource r : types) {
      if (r.getUri().equals(t_facebookPost)) {
        facebookImport = true;
        break;
      }
    }

    for (final SyndEntry entry : (List<SyndEntry>) feed.getEntries()) {

      // facebook hack ... (see above)
      if (facebookImport
          && !entry.getAuthor().equalsIgnoreCase(user.getFirstName() + " " + user.getLastName())) {
        log.info("Facebook import: skipping friend post with title", entry.getTitle());
        continue;
      }

      new RunAsOperation() {
        @Override
        public void execute() {
          importEntry(feed, entry, my_types, my_tags, user, output);
        }
      }.addRole("admin").run();
    }

    //		entityManager.flush();

    log.info("#0 content items have been imported from RSS/Atom feed", feed.getEntries().size());

    return feed.getEntries().size();
  }
Esempio n. 3
0
  private Article mapArticle(SyndEntry syndEntry) {
    StringBuilder sb = new StringBuilder();
    for (Object obj : syndEntry.getContents()) {
      if (!(obj instanceof SyndContent)) {
        continue;
      }
      SyndContent syndContent = (SyndContent) obj;
      sb.append(syndContent.getValue());
    }

    return Article.builder()
        .num(0)
        .title(syndEntry.getTitle())
        .content(sb.toString())
        .description(syndEntry.getDescription().getValue())
        .author(syndEntry.getAuthor())
        .image("")
        .writtenDate(syndEntry.getPublishedDate())
        .build();
  }
Esempio n. 4
0
  //	@Transactional
  //	@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
  public void importEntry(
      final SyndFeed feed,
      final SyndEntry entry,
      final Set<KiWiUriResource> types,
      final Set<ContentItem> tags,
      User user,
      final Collection<ContentItem> output) {
    if (user == null && entry.getAuthor() != null && !"".equals(entry.getAuthor())) {
      if (userService.userExists(entry.getAuthor())) {
        user = userService.getUserByLogin(entry.getAuthor());
      } else {

        //				user = userService.createUser(entry.getAuthor());
        /* In my opinion, it is not ok to create a user entity
         * without asking the person if he/she wants to be
         * created and persisted in the KiWi dataset.
         * Thus I'm changing the user to 'anonymous',
         * if he/she is'nt registered with the same nick that
         * is given in the rss entry.
         */
        user = userService.getUserByLogin("anonymous");
        kiwiEntityManager.persist(user);
      }
    }

    log.debug("feed entry: #0 (#1)", entry.getTitle(), entry.getUri());

    // create a new content item and copy all data from the feed entry
    ContentItem item;
    if (entry.getLink() != null) {
      item = contentItemService.createExternContentItem(entry.getLink());
    } else if (entry.getUri() != null) {
      try {
        // try parsing URI; if it is not valid,
        URI uri = new URI(entry.getUri());
        item = contentItemService.createExternContentItem(entry.getUri());
      } catch (URISyntaxException e) {
        item = contentItemService.createExternContentItem(feed.getLink() + "#" + entry.getUri());
      }
    } else {
      item = contentItemService.createContentItem();
    }
    contentItemService.updateTitle(item, entry.getTitle());

    if (feed.getLanguage() != null) item.setLanguage(new Locale(feed.getLanguage()));

    if (entry.getPublishedDate() != null) {
      item.setCreated(entry.getPublishedDate());
      item.setModified(entry.getPublishedDate());
    }

    if (entry.getUpdatedDate() != null) {
      if (entry.getPublishedDate() == null) {
        item.setCreated(entry.getUpdatedDate());
      }
      item.setModified(entry.getUpdatedDate());
    }

    item.setAuthor(user);

    // read feed content and set it as item's text content
    List<SyndContent> contents = entry.getContents();
    if (contents.size() == 1) {
      log.debug("using RSS content section provided by item");
      contentItemService.updateTextContentItem(item, "<p>" + contents.get(0).getValue() + "</p>");
    } else if (contents.size() > 1) {
      log.warn("feed entry contained more than one content section");
      contentItemService.updateTextContentItem(item, "<p>" + contents.get(0).getValue() + "</p>");
    } else if (contents.size() == 0) {
      if (entry.getDescription() != null && entry.getDescription().getValue() != null) {
        log.debug("using RSS description as no content section was available");
        contentItemService.updateTextContentItem(
            item, "<p>" + entry.getDescription().getValue() + "</p>");
      }
    }

    // save before tagging
    contentItemService.saveContentItem(item);

    // read feed categories and use them as tags
    for (SyndCategory cat : (List<SyndCategory>) entry.getCategories()) {
      ContentItem _cat;
      if (!taggingService.hasTag(item, cat.getName())) {
        if (cat.getTaxonomyUri() != null) {
          _cat = contentItemService.getContentItemByUri(cat.getTaxonomyUri());
          if (_cat == null) {
            _cat = contentItemService.createExternContentItem(cat.getTaxonomyUri());
            contentItemService.updateTitle(_cat, cat.getName());
            _cat.setAuthor(user);
            contentItemService.saveContentItem(_cat);
          }
          taggingService.createTagging(cat.getName(), item, _cat, user);
        } else {
          _cat = contentItemService.getContentItemByTitle(cat.getName());
          if (_cat == null) {
            _cat = contentItemService.createContentItem();
            contentItemService.updateTitle(_cat, cat.getName());
            _cat.setAuthor(user);
            contentItemService.saveContentItem(_cat);
          }
          taggingService.createTagging(cat.getName(), item, _cat, user);
        }
      }
    }
    // scan for Twitter-style hash tags in title (e.g. #kiwiknows, see KIWI-622)
    Matcher m_hashtag = p_hashtag.matcher(entry.getTitle());
    while (m_hashtag.find()) {
      String tag_label = m_hashtag.group(1);
      if (!taggingService.hasTag(item, tag_label)) {
        ContentItem tag = contentItemService.getContentItemByTitle(tag_label);
        if (tag == null) {
          tag = contentItemService.createContentItem();
          contentItemService.updateTitle(tag, tag_label);
          tag.setAuthor(user);
          contentItemService.saveContentItem(tag);
        }
        taggingService.createTagging(tag_label, item, tag, user);
      }
    }

    // check for geo information
    GeoRSSModule geoRSSModule = GeoRSSUtils.getGeoRSS(entry);
    if (geoRSSModule != null && geoRSSModule.getPosition() != null) {
      POI poi = kiwiEntityManager.createFacade(item, POI.class);
      poi.setLatitude(geoRSSModule.getPosition().getLatitude());
      poi.setLongitude(geoRSSModule.getPosition().getLongitude());
      kiwiEntityManager.persist(poi);
    }

    // check for media information
    MediaEntryModule mediaModule = (MediaEntryModule) entry.getModule(MediaModule.URI);
    if (mediaModule != null) {
      MediaContent[] media = mediaModule.getMediaContents();
      if (media.length > 0) {
        MediaContent m = media[0];
        if (m.getReference() instanceof UrlReference) {
          URL url = ((UrlReference) m.getReference()).getUrl();

          String type = m.getType();
          String name = url.getFile();
          if (name.lastIndexOf("/") > 0) {
            name = name.substring(name.lastIndexOf("/") + 1);
          }

          log.debug("importing media data from URL #0", url.toString());

          try {
            InputStream is = url.openStream();

            ByteArrayOutputStream bout = new ByteArrayOutputStream();

            int c;
            while ((c = is.read()) != -1) {
              bout.write(c);
            }

            byte[] data = bout.toByteArray();

            contentItemService.updateMediaContentItem(item, data, type, name);

            is.close();
            bout.close();
          } catch (IOException ex) {
            log.error("error importing media content from RSS stream");
          }
        } else {
          log.info("RSS importer can only import media with URL references");
        }
      } else {
        log.warn("media module found without content");
      }

      Category[] cats = mediaModule.getMetadata().getCategories();
      for (Category cat : cats) {
        ContentItem _cat;

        String label = cat.getLabel() != null ? cat.getLabel() : cat.getValue();

        if (!taggingService.hasTag(item, label)) {
          if (cat.getScheme() != null) {
            _cat = contentItemService.getContentItemByUri(cat.getScheme() + cat.getValue());
            if (_cat == null) {
              _cat = contentItemService.createExternContentItem(cat.getScheme() + cat.getValue());
              contentItemService.updateTitle(_cat, label);
              _cat.setAuthor(user);
              contentItemService.saveContentItem(_cat);
            }
            taggingService.createTagging(label, item, _cat, user);
          } else {
            _cat = contentItemService.getContentItemByTitle(label);
            if (_cat == null) {
              _cat = contentItemService.createContentItem();
              contentItemService.updateTitle(_cat, label);
              _cat.setAuthor(user);
              contentItemService.saveContentItem(_cat);
            }
            taggingService.createTagging(label, item, _cat, user);
          }
        }
      }
    }

    // add parameter categories as tags
    for (ContentItem tag : tags) {
      if (!taggingService.hasTag(item, tag.getTitle())) {
        taggingService.createTagging(tag.getTitle(), item, tag, user);
      }
    }

    // add parameter types as types
    for (KiWiUriResource type : types) {
      item.addType(type);
    }

    // add kiwi:FeedPost type
    item.addType(tripleStore.createUriResource(Constants.NS_KIWI_CORE + "FeedPost"));

    /* the flush is necessary, because CIs or tags will
     * otherwise be created multiple times when they
     * appear more than once in one RSS feed */
    entityManager.flush();
    log.debug("imported content item '#0' with URI '#1'", item.getTitle(), item.getResource());
  }
Esempio n. 5
0
  /**
   * Generate an ebook from an RSS DOM Document.
   *
   * @param url The URL from where the Document was fetched (used only to set the author metadata)
   * @param doc The DOM Document of the feed.
   * @return An ebook.
   * @throws IllegalArgumentException
   * @throws FeedException
   * @throws IOException
   */
  private static Book createBookFromFeed(URL url, Document doc, List<Keyword> keywords)
      throws IllegalArgumentException, FeedException, IOException {
    Book book = new Book();
    // start parsing our feed and have the above onItem methods called
    SyndFeedInput input = new SyndFeedInput();
    SyndFeed feed = input.build(doc);

    System.out.println(feed);

    // Set the title
    book.getMetadata().addTitle(feed.getTitle());

    // Add an Author
    String author = feed.getAuthor();
    if (author == null || "".equals(author.trim())) {
      author = url.getHost();
    }
    book.getMetadata().addAuthor(new Author(author));

    if (feed.getPublishedDate() != null) {
      book.getMetadata().addDate(new nl.siegmann.epublib.domain.Date(feed.getPublishedDate()));
    }

    if (feed.getDescription() != null) {
      book.getMetadata().addDescription(feed.getDescription());
    }

    if (feed.getCopyright() != null) {
      book.getMetadata().getRights().add(feed.getCopyright());
    }

    // Set cover image - This has never worked.
    // if (feed.getImage() != null) {
    // System.out.println("There is an image for the feed");

    // Promise<HttpResponse> futureImgResponse =
    // WS.url(feed.getImage().getUrl()).getAsync();
    // HttpResponse imgResponse = await(futureImgResponse);
    // System.out.println("Content-type: " + imgResponse.getContentType());
    // if (imgResponse.getContentType().startsWith("image/")) {
    // String extension =
    // imgResponse.getContentType().substring("image/".length());
    // InputStream imageStream = imgResponse.getStream();
    // book.getMetadata().setCoverImage(new Resource(imageStream, "cover." +
    // extension));

    // System.out.println("Using default cover");
    // imageStream =
    // VirtualFile.fromRelativePath("assets/cover.png").inputstream();
    // if (imageStream != null) {
    // System.out.println("Using default cover");
    // book.getMetadata().setCoverImage(new Resource(imageStream,
    // "cover.png"));
    // } else {
    // System.out.println("Could not load default cover");
    // }

    // }
    // }

    int entryNumber = 0;
    List<SyndEntry> entries = feed.getEntries();

    for (SyndEntry entry : entries) {
      if (matchesKeyword(entry, keywords)) {

        StringBuilder title = new StringBuilder(100);
        if (entry.getTitle() != null) {
          title.append(entry.getTitle());
        }
        if (entry.getAuthor() != null) {
          title.append(" - ").append(entry.getAuthor());
        }
        StringBuilder content = new StringBuilder();

        // Add title inside text
        content.append("<h2>").append(title).append("</h2>");

        if (entry.getDescription() != null) {
          SyndContent syndContent = (SyndContent) entry.getDescription();
          if (!syndContent.getType().contains("html")) {
            content.append("<pre>\n");
          }
          content.append(syndContent.getValue());
          if (!syndContent.getType().contains("html")) {
            content.append("\n</pre>");
          }
          content.append("<hr/>");
        }

        if (entry.getContents().size() > 0) {
          SyndContent syndContent = (SyndContent) entry.getContents().get(0);
          if (!syndContent.getType().contains("html")) {
            content.append("<pre>\n");
          }
          content.append(syndContent.getValue());
          if (!syndContent.getType().contains("html")) {
            content.append("\n</pre>");
          }
        }
        String strContent = clean(content.toString());
        // Add Chapter
        try {
          entryNumber++;
          book.addSection(
              title.toString(),
              new Resource(new StringReader(strContent), "entry" + entryNumber + ".xhtml"));
        } catch (IOException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }
      }
    }

    return book;
  }
Esempio n. 6
0
 public String getAuthor() {
   return entry.getAuthor();
 }
Esempio n. 7
0
  // build a SubscriptionEntry from Rome SyndEntry and SyndFeed
  private SubscriptionEntry buildEntry(SyndEntry romeEntry) {

    // if we don't have a permalink then we can't continue
    if (romeEntry.getLink() == null) {
      return null;
    }

    SubscriptionEntry newEntry = new SubscriptionEntry();

    newEntry.setTitle(romeEntry.getTitle());
    newEntry.setPermalink(romeEntry.getLink());

    // Play some games to get the author
    DCModule entrydc = (DCModule) romeEntry.getModule(DCModule.URI);
    if (romeEntry.getAuthor() != null) {
      newEntry.setAuthor(romeEntry.getAuthor());
    } else {
      newEntry.setAuthor(entrydc.getCreator()); // use <dc:creator>
    }

    // Play some games to get the updated date
    if (romeEntry.getUpdatedDate() != null) {
      newEntry.setUpdateTime(new Timestamp(romeEntry.getUpdatedDate().getTime()));
    }
    // TODO: should we set a default update time here?

    // And more games getting publish date
    if (romeEntry.getPublishedDate() != null) {
      newEntry.setPubTime(new Timestamp(romeEntry.getPublishedDate().getTime())); // use <pubDate>
    } else if (entrydc != null && entrydc.getDate() != null) {
      newEntry.setPubTime(new Timestamp(entrydc.getDate().getTime())); // use <dc:date>
    } else {
      newEntry.setPubTime(newEntry.getUpdateTime());
    }

    // get content and unescape if it is 'text/plain'
    if (romeEntry.getContents().size() > 0) {
      SyndContent content = (SyndContent) romeEntry.getContents().get(0);
      if (content != null && content.getType().equals("text/plain")) {
        newEntry.setText(StringEscapeUtils.unescapeHtml(content.getValue()));
      } else if (content != null) {
        newEntry.setText(content.getValue());
      }
    }

    // no content, try summary
    if (newEntry.getText() == null || newEntry.getText().trim().length() == 0) {
      if (romeEntry.getDescription() != null) {
        newEntry.setText(romeEntry.getDescription().getValue());
      }
    }

    // copy categories
    if (romeEntry.getCategories().size() > 0) {
      List list = new ArrayList();
      Iterator cats = romeEntry.getCategories().iterator();
      while (cats.hasNext()) {
        SyndCategory cat = (SyndCategory) cats.next();
        list.add(cat.getName());
      }
      newEntry.setCategoriesString(list);
    }

    return newEntry;
  }