示例#1
0
  //	@Transactional
  public int importData(
      final SyndFeed feed,
      Set<KiWiUriResource> types,
      Set<ContentItem> tags,
      final User user,
      final Collection<ContentItem> output) {
    log.info(
        "importing entries from #0 feed '#1' found at '#2'",
        feed.getFeedType(),
        feed.getTitle(),
        feed.getUri());

    if (types == null) {
      types = new HashSet<KiWiUriResource>();
    }

    if (tags == null) {
      tags = new HashSet<ContentItem>();
    }
    final Set<ContentItem> my_tags = tags;
    final Set<KiWiUriResource> my_types = types;

    // a hack for importing facebook activity streams: if the type is kiwi:FacebookPost,
    // turn facebook activity stream mode on; in this mode, we will skip all entries where
    // the remote author name and local user name are not identical
    boolean facebookImport = false;
    String t_facebookPost = Constants.NS_KIWI_CORE + "FacebookPost";
    for (KiWiUriResource r : types) {
      if (r.getUri().equals(t_facebookPost)) {
        facebookImport = true;
        break;
      }
    }

    for (final SyndEntry entry : (List<SyndEntry>) feed.getEntries()) {

      // facebook hack ... (see above)
      if (facebookImport
          && !entry.getAuthor().equalsIgnoreCase(user.getFirstName() + " " + user.getLastName())) {
        log.info("Facebook import: skipping friend post with title", entry.getTitle());
        continue;
      }

      new RunAsOperation() {
        @Override
        public void execute() {
          importEntry(feed, entry, my_types, my_tags, user, output);
        }
      }.addRole("admin").run();
    }

    //		entityManager.flush();

    log.info("#0 content items have been imported from RSS/Atom feed", feed.getEntries().size());

    return feed.getEntries().size();
  }
示例#2
0
  public void testReadAtom1() throws FeedException {
    SyndFeedInput input = new SyndFeedInput();

    SyndFeed result =
        input.build(
            new InputSource(getClass().getResourceAsStream("/be/hikage/xml/rome/atom_1.0.xml")));

    assertEquals("RSS Veille Techno", result.getTitle());
    assertEquals("http://svn.cyg.be/", result.getLink());
    assertEquals(1, result.getEntries().size());
  }
示例#3
0
  public static void main(String[] args) {
    boolean ok = false;
    if (args.length == 1) {
      try {
        URL feedUrl = new URL(args[0]);
        FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance();
        FeedFetcher fetcher = new HttpURLFeedFetcher(feedInfoCache);

        FetcherEventListenerImpl listener = new FetcherEventListenerImpl();

        fetcher.addFetcherEventListener(listener);

        System.err.println("Retrieving feed " + feedUrl);
        // Retrieve the feed.
        // We will get a Feed Polled Event and then a
        // Feed Retrieved event (assuming the feed is valid)
        SyndFeed feed = fetcher.retrieveFeed(feedUrl);

        System.err.println(feedUrl + " retrieved");
        System.err.println(
            feedUrl
                + " has a title: "
                + feed.getTitle()
                + " and contains "
                + feed.getEntries().size()
                + " entries.");
        // We will now retrieve the feed again. If the feed is unmodified
        // and the server supports conditional gets, we will get a "Feed
        // Unchanged" event after the Feed Polled event
        System.err.println("Polling " + feedUrl + " again to test conditional get support.");
        SyndFeed feed2 = fetcher.retrieveFeed(feedUrl);
        System.err.println(
            "If a \"Feed Unchanged\" event fired then the server supports conditional gets.");

        ok = true;
      } catch (Exception ex) {
        System.out.println("ERROR: " + ex.getMessage());
        ex.printStackTrace();
      }
    }

    if (!ok) {
      System.out.println();
      System.out.println("FeedReader reads and prints any RSS/Atom feed type.");
      System.out.println("The first parameter must be the URL of the feed to read.");
      System.out.println();
    }
  }
  protected WireFeed createRealFeed(String type, SyndFeed syndFeed) {
    Channel channel = new Channel(type);
    channel.setModules(ModuleUtils.cloneModules(syndFeed.getModules()));

    channel.setEncoding(syndFeed.getEncoding());

    channel.setTitle(syndFeed.getTitle());
    channel.setLink(syndFeed.getLink());
    channel.setDescription(syndFeed.getDescription());
    SyndImage sImage = syndFeed.getImage();
    if (sImage != null) {
      channel.setImage(createRSSImage(sImage));
    }

    List sEntries = syndFeed.getEntries();
    if (sEntries != null) {
      channel.setItems(createRSSItems(sEntries));
    }
    return channel;
  }
示例#5
0
  /**
   * Generate an ebook from an RSS DOM Document.
   *
   * @param url The URL from where the Document was fetched (used only to set the author metadata)
   * @param doc The DOM Document of the feed.
   * @return An ebook.
   * @throws IllegalArgumentException
   * @throws FeedException
   * @throws IOException
   */
  private static Book createBookFromFeed(URL url, Document doc, List<Keyword> keywords)
      throws IllegalArgumentException, FeedException, IOException {
    Book book = new Book();
    // start parsing our feed and have the above onItem methods called
    SyndFeedInput input = new SyndFeedInput();
    SyndFeed feed = input.build(doc);

    System.out.println(feed);

    // Set the title
    book.getMetadata().addTitle(feed.getTitle());

    // Add an Author
    String author = feed.getAuthor();
    if (author == null || "".equals(author.trim())) {
      author = url.getHost();
    }
    book.getMetadata().addAuthor(new Author(author));

    if (feed.getPublishedDate() != null) {
      book.getMetadata().addDate(new nl.siegmann.epublib.domain.Date(feed.getPublishedDate()));
    }

    if (feed.getDescription() != null) {
      book.getMetadata().addDescription(feed.getDescription());
    }

    if (feed.getCopyright() != null) {
      book.getMetadata().getRights().add(feed.getCopyright());
    }

    // Set cover image - This has never worked.
    // if (feed.getImage() != null) {
    // System.out.println("There is an image for the feed");

    // Promise<HttpResponse> futureImgResponse =
    // WS.url(feed.getImage().getUrl()).getAsync();
    // HttpResponse imgResponse = await(futureImgResponse);
    // System.out.println("Content-type: " + imgResponse.getContentType());
    // if (imgResponse.getContentType().startsWith("image/")) {
    // String extension =
    // imgResponse.getContentType().substring("image/".length());
    // InputStream imageStream = imgResponse.getStream();
    // book.getMetadata().setCoverImage(new Resource(imageStream, "cover." +
    // extension));

    // System.out.println("Using default cover");
    // imageStream =
    // VirtualFile.fromRelativePath("assets/cover.png").inputstream();
    // if (imageStream != null) {
    // System.out.println("Using default cover");
    // book.getMetadata().setCoverImage(new Resource(imageStream,
    // "cover.png"));
    // } else {
    // System.out.println("Could not load default cover");
    // }

    // }
    // }

    int entryNumber = 0;
    List<SyndEntry> entries = feed.getEntries();

    for (SyndEntry entry : entries) {
      if (matchesKeyword(entry, keywords)) {

        StringBuilder title = new StringBuilder(100);
        if (entry.getTitle() != null) {
          title.append(entry.getTitle());
        }
        if (entry.getAuthor() != null) {
          title.append(" - ").append(entry.getAuthor());
        }
        StringBuilder content = new StringBuilder();

        // Add title inside text
        content.append("<h2>").append(title).append("</h2>");

        if (entry.getDescription() != null) {
          SyndContent syndContent = (SyndContent) entry.getDescription();
          if (!syndContent.getType().contains("html")) {
            content.append("<pre>\n");
          }
          content.append(syndContent.getValue());
          if (!syndContent.getType().contains("html")) {
            content.append("\n</pre>");
          }
          content.append("<hr/>");
        }

        if (entry.getContents().size() > 0) {
          SyndContent syndContent = (SyndContent) entry.getContents().get(0);
          if (!syndContent.getType().contains("html")) {
            content.append("<pre>\n");
          }
          content.append(syndContent.getValue());
          if (!syndContent.getType().contains("html")) {
            content.append("\n</pre>");
          }
        }
        String strContent = clean(content.toString());
        // Add Chapter
        try {
          entryNumber++;
          book.addSection(
              title.toString(),
              new Resource(new StringReader(strContent), "entry" + entryNumber + ".xhtml"));
        } catch (IOException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }
      }
    }

    return book;
  }
示例#6
0
  /**
   * Fills in the feed and entry-level metadata from DSpace objects.
   *
   * @param request request
   * @param context context
   * @param dso DSpaceObject
   * @param items array of objects
   * @param labels label map
   */
  public void populate(
      HttpServletRequest request,
      Context context,
      DSpaceObject dso,
      List<? extends DSpaceObject> items,
      Map<String, String> labels) {
    String logoURL = null;
    String objectURL = null;
    String defaultTitle = null;
    boolean podcastFeed = false;
    this.request = request;

    // dso is null for the whole site, or a search without scope
    if (dso == null) {
      defaultTitle = ConfigurationManager.getProperty("dspace.name");
      feed.setDescription(localize(labels, MSG_FEED_DESCRIPTION));
      objectURL = resolveURL(request, null);
      logoURL = ConfigurationManager.getProperty("webui.feed.logo.url");
    } else {
      Bitstream logo = null;
      if (dso.getType() == Constants.COLLECTION) {
        Collection col = (Collection) dso;
        defaultTitle = col.getName();
        feed.setDescription(collectionService.getMetadata(col, "short_description"));
        logo = col.getLogo();
        String cols = ConfigurationManager.getProperty("webui.feed.podcast.collections");
        if (cols != null && cols.length() > 1 && cols.contains(col.getHandle())) {
          podcastFeed = true;
        }
      } else if (dso.getType() == Constants.COMMUNITY) {
        Community comm = (Community) dso;
        defaultTitle = comm.getName();
        feed.setDescription(communityService.getMetadata(comm, "short_description"));
        logo = comm.getLogo();
        String comms = ConfigurationManager.getProperty("webui.feed.podcast.communities");
        if (comms != null && comms.length() > 1 && comms.contains(comm.getHandle())) {
          podcastFeed = true;
        }
      }
      objectURL = resolveURL(request, dso);
      if (logo != null) {
        logoURL = urlOfBitstream(request, logo);
      }
    }
    feed.setTitle(
        labels.containsKey(MSG_FEED_TITLE) ? localize(labels, MSG_FEED_TITLE) : defaultTitle);
    feed.setLink(objectURL);
    feed.setPublishedDate(new Date());
    feed.setUri(objectURL);

    // add logo if we found one:
    if (logoURL != null) {
      // we use the path to the logo for this, the logo itself cannot
      // be contained in the rdf. Not all RSS-viewers show this logo.
      SyndImage image = new SyndImageImpl();
      image.setLink(objectURL);
      if (StringUtils.isNotBlank(feed.getTitle())) {
        image.setTitle(feed.getTitle());
      } else {
        image.setTitle(localize(labels, MSG_LOGO_TITLE));
      }
      image.setUrl(logoURL);
      feed.setImage(image);
    }

    // add entries for items
    if (items != null) {
      List<SyndEntry> entries = new ArrayList<SyndEntry>();
      for (DSpaceObject itemDSO : items) {
        if (itemDSO.getType() != Constants.ITEM) {
          continue;
        }
        Item item = (Item) itemDSO;
        boolean hasDate = false;
        SyndEntry entry = new SyndEntryImpl();
        entries.add(entry);

        String entryURL = resolveURL(request, item);
        entry.setLink(entryURL);
        entry.setUri(entryURL);

        String title = getOneDC(item, titleField);
        entry.setTitle(title == null ? localize(labels, MSG_UNTITLED) : title);

        // "published" date -- should be dc.date.issued
        String pubDate = getOneDC(item, dateField);
        if (pubDate != null) {
          entry.setPublishedDate((new DCDate(pubDate)).toDate());
          hasDate = true;
        }
        // date of last change to Item
        entry.setUpdatedDate(item.getLastModified());

        StringBuffer db = new StringBuffer();
        for (String df : descriptionFields) {
          // Special Case: "(date)" in field name means render as date
          boolean isDate = df.indexOf("(date)") > 0;
          if (isDate) {
            df = df.replaceAll("\\(date\\)", "");
          }

          List<MetadataValue> dcv = itemService.getMetadataByMetadataString(item, df);
          if (dcv.size() > 0) {
            String fieldLabel = labels.get(MSG_METADATA + df);
            if (fieldLabel != null && fieldLabel.length() > 0) {
              db.append(fieldLabel).append(": ");
            }
            boolean first = true;
            for (MetadataValue v : dcv) {
              if (first) {
                first = false;
              } else {
                db.append("; ");
              }
              db.append(isDate ? new DCDate(v.getValue()).toString() : v.getValue());
            }
            db.append("\n");
          }
        }
        if (db.length() > 0) {
          SyndContent desc = new SyndContentImpl();
          desc.setType("text/plain");
          desc.setValue(db.toString());
          entry.setDescription(desc);
        }

        // This gets the authors into an ATOM feed
        List<MetadataValue> authors = itemService.getMetadataByMetadataString(item, authorField);
        if (authors.size() > 0) {
          List<SyndPerson> creators = new ArrayList<SyndPerson>();
          for (MetadataValue author : authors) {
            SyndPerson sp = new SyndPersonImpl();
            sp.setName(author.getValue());
            creators.add(sp);
          }
          entry.setAuthors(creators);
        }

        // only add DC module if any DC fields are configured
        if (dcCreatorField != null || dcDateField != null || dcDescriptionField != null) {
          DCModule dc = new DCModuleImpl();
          if (dcCreatorField != null) {
            List<MetadataValue> dcAuthors =
                itemService.getMetadataByMetadataString(item, dcCreatorField);
            if (dcAuthors.size() > 0) {
              List<String> creators = new ArrayList<String>();
              for (MetadataValue author : dcAuthors) {
                creators.add(author.getValue());
              }
              dc.setCreators(creators);
            }
          }
          if (dcDateField != null && !hasDate) {
            List<MetadataValue> v = itemService.getMetadataByMetadataString(item, dcDateField);
            if (v.size() > 0) {
              dc.setDate((new DCDate(v.get(0).getValue())).toDate());
            }
          }
          if (dcDescriptionField != null) {
            List<MetadataValue> v =
                itemService.getMetadataByMetadataString(item, dcDescriptionField);
            if (v.size() > 0) {
              StringBuffer descs = new StringBuffer();
              for (MetadataValue d : v) {
                if (descs.length() > 0) {
                  descs.append("\n\n");
                }
                descs.append(d.getValue());
              }
              dc.setDescription(descs.toString());
            }
          }
          entry.getModules().add(dc);
        }

        // iTunes Podcast Support - START
        if (podcastFeed) {
          // Add enclosure(s)
          List<SyndEnclosure> enclosures = new ArrayList();
          try {
            List<Bundle> bunds = itemService.getBundles(item, "ORIGINAL");
            if (bunds.get(0) != null) {
              List<Bitstream> bits = bunds.get(0).getBitstreams();
              for (Bitstream bit : bits) {
                String mime = bit.getFormat(context).getMIMEType();
                if (ArrayUtils.contains(podcastableMIMETypes, mime)) {
                  SyndEnclosure enc = new SyndEnclosureImpl();
                  enc.setType(bit.getFormat(context).getMIMEType());
                  enc.setLength(bit.getSize());
                  enc.setUrl(urlOfBitstream(request, bit));
                  enclosures.add(enc);
                } else {
                  continue;
                }
              }
            }
            // Also try to add an external value from dc.identifier.other
            // We are assuming that if this is set, then it is a media file
            List<MetadataValue> externalMedia =
                itemService.getMetadataByMetadataString(item, externalSourceField);
            if (externalMedia.size() > 0) {
              for (MetadataValue anExternalMedia : externalMedia) {
                SyndEnclosure enc = new SyndEnclosureImpl();
                enc.setType(
                    "audio/x-mpeg"); // We can't determine MIME of external file, so just picking
                                     // one.
                enc.setLength(1);
                enc.setUrl(anExternalMedia.getValue());
                enclosures.add(enc);
              }
            }

          } catch (Exception e) {
            System.out.println(e.getMessage());
          }
          entry.setEnclosures(enclosures);

          // Get iTunes specific fields: author, subtitle, summary, duration, keywords
          EntryInformation itunes = new EntryInformationImpl();

          String author = getOneDC(item, authorField);
          if (author != null && author.length() > 0) {
            itunes.setAuthor(author); // <itunes:author>
          }

          itunes.setSubtitle(
              title == null ? localize(labels, MSG_UNTITLED) : title); // <itunes:subtitle>

          if (db.length() > 0) {
            itunes.setSummary(db.toString()); // <itunes:summary>
          }

          String extent =
              getOneDC(
                  item,
                  "dc.format.extent"); // assumed that user will enter this field with length of
                                       // song in seconds
          if (extent != null && extent.length() > 0) {
            extent = extent.split(" ")[0];
            Integer duration = Integer.parseInt(extent);
            itunes.setDuration(new Duration(duration)); // <itunes:duration>
          }

          String subject = getOneDC(item, "dc.subject");
          if (subject != null && subject.length() > 0) {
            String[] subjects = new String[1];
            subjects[0] = subject;
            itunes.setKeywords(subjects); // <itunes:keywords>
          }

          entry.getModules().add(itunes);
        }
      }
      feed.setEntries(entries);
    }
  }
示例#7
0
  /** @inheritDoc */
  public Subscription fetchSubscription(String feedURL, Date lastModified) throws FetcherException {

    if (feedURL == null) {
      throw new IllegalArgumentException("feed url cannot be null");
    }

    // setup Rome feed fetcher
    FeedFetcher feedFetcher = getRomeFetcher();

    // fetch the feed
    log.debug("Fetching feed: " + feedURL);
    SyndFeed feed;
    try {
      feed = feedFetcher.retrieveFeed(new URL(feedURL));
    } catch (Exception ex) {
      throw new FetcherException("Error fetching subscription - " + feedURL, ex);
    }

    log.debug("Feed pulled, extracting data into Subscription");

    // build planet subscription from fetched feed
    Subscription newSub = new Subscription();
    newSub.setFeedURL(feedURL);
    newSub.setSiteURL(feed.getLink());
    newSub.setTitle(feed.getTitle());
    newSub.setAuthor(feed.getAuthor());
    newSub.setLastUpdated(feed.getPublishedDate());

    // normalize any data that couldn't be properly extracted
    if (newSub.getSiteURL() == null) {
      // set the site url to the feed url then
      newSub.setSiteURL(newSub.getFeedURL());
    }
    if (newSub.getAuthor() == null) {
      // set the author to the title
      newSub.setAuthor(newSub.getTitle());
    }
    if (newSub.getLastUpdated() == null) {
      // no update time specified in feed, so try consulting feed info cache
      FeedFetcherCache feedCache = getRomeFetcherCache();
      try {
        SyndFeedInfo feedInfo = feedCache.getFeedInfo(new URL(newSub.getFeedURL()));
        if (feedInfo.getLastModified() != null) {
          long lastUpdatedLong = ((Long) feedInfo.getLastModified()).longValue();
          if (lastUpdatedLong != 0) {
            newSub.setLastUpdated(new Date(lastUpdatedLong));
          }
        }
      } catch (MalformedURLException ex) {
        // should never happen since we check this above
      }
    }

    // check if feed is unchanged and bail now if so
    if (lastModified != null
        && newSub.getLastUpdated() != null
        && !newSub.getLastUpdated().after(lastModified)) {
      return null;
    }

    if (log.isDebugEnabled()) {
      log.debug("Subscription is: " + newSub.toString());
    }

    // some kludge to deal with feeds w/ no entry dates
    // we assign arbitrary dates chronologically by entry starting either
    // from the current time or the last update time of the subscription
    Calendar cal = Calendar.getInstance();
    if (newSub.getLastUpdated() != null) {
      cal.setTime(newSub.getLastUpdated());
    } else {
      cal.setTime(new Date());
      cal.add(Calendar.DATE, -1);
    }

    // add entries
    List<SyndEntry> feedEntries = feed.getEntries();
    for (SyndEntry feedEntry : feedEntries) {
      SubscriptionEntry newEntry = buildEntry(feedEntry);

      // some kludge to handle feeds with no entry dates
      if (newEntry.getPubTime() == null) {
        log.debug("No published date, assigning fake date for " + feedURL);
        newEntry.setPubTime(new Timestamp(cal.getTimeInMillis()));
        cal.add(Calendar.DATE, -1);
      }

      if (newEntry != null) {
        newSub.addEntry(newEntry);
      }
    }

    log.debug(feedEntries.size() + " entries included");

    return newSub;
  }