示例#1
0
    public FeedBuilder fetch(String url) throws FeedException {
      SyndFeedInput syndFeedInput = new SyndFeedInput();

      String pageSource = pageFetcher.fetch(url);
      SyndFeed feed = syndFeedInput.build(new StringReader(pageSource));

      return new FeedBuilder(feed);
    }
示例#2
0
  public void testParse() throws Exception {
    final SyndFeedInput input = new SyndFeedInput();

    final SyndFeed feed = input.build(new File(super.getTestFile("xml/custom-tags-example.xml")));
    final List<SyndEntry> entries = feed.getEntries();
    final SyndEntry entry = entries.get(0);
    final CustomTags customTags = (CustomTags) entry.getModule(CustomTags.URI);
    final Iterator<CustomTag> it = customTags.getValues().iterator();
    while (it.hasNext()) {
      final CustomTag tag = it.next();
      LOG.debug("{}", tag);
      if (tag.getName().equals("language_skills")) {
        Assert.assertEquals("Fluent in English and German", tag.getValue());
      }
      if (tag.getName().equals("prior_experience_years")) {
        Assert.assertEquals(new Integer(5), tag.getValue());
      } else if (tag.getName().equals("start_date")) {
        final Calendar cal = Calendar.getInstance();
        cal.setTimeInMillis(0);
        cal.set(2005, 10, 15, 0, 0, 0);
        Assert.assertEquals(cal.getTime(), tag.getValue());
      } else if (tag.getName().equals("test_url")) {
        Assert.assertEquals(new URL("http://www.screaming-penguin.com"), tag.getValue());
      } else if (tag.getName().equals("test_boolean")) {
        Assert.assertEquals(new Boolean(true), tag.getValue());
      } else if (tag.getName().equals("test_intUnit")) {
        Assert.assertEquals(new IntUnit(25, "horses"), tag.getValue());
      } else if (tag.getName().equals("test_floatUnit")) {
        Assert.assertEquals(new FloatUnit((float) 2.5, "cows"), tag.getValue());
      } else if (tag.getName().equals("test_location")) {
        Assert.assertEquals(
            new CustomTagImpl.Location("125 Main St, Sometown, GA"), tag.getValue());
      } else if (tag.getName().equals("test_dateRange")) {
        final Calendar cal = Calendar.getInstance();
        cal.setTimeInMillis(0);
        cal.set(2005, 06, 04, 20, 0, 0);
        final Date start = cal.getTime();
        cal.set(2005, 06, 04, 23, 0, 0);
        final DateTimeRange dtr = new DateTimeRange(start, cal.getTime());
        Assert.assertEquals(dtr, tag.getValue());
      }
    }
  }
  private List<Outlink> parseFeed(String url, byte[] content, Metadata parentMetadata)
      throws MalformedURLException {
    List<Outlink> links = new ArrayList<>();

    SyndFeed feed = null;
    try (ByteArrayInputStream is = new ByteArrayInputStream(content)) {
      SyndFeedInput input = new SyndFeedInput();
      feed = input.build(new InputSource(is));
    } catch (Exception e) {
      LOG.error("Exception parsing feed from DOM {}", url);
      return links;
    }

    URL sURL = new URL(url);

    List<SyndEntry> entries = feed.getEntries();
    for (SyndEntry entry : entries) {
      String targetURL = entry.getLink();

      // build an absolute URL
      try {
        targetURL = URLUtil.resolveURL(sURL, targetURL).toExternalForm();
      } catch (MalformedURLException e) {
        LOG.debug("MalformedURLException on {}", targetURL);
        continue;
      }

      targetURL = urlFilters.filter(sURL, parentMetadata, targetURL);

      if (StringUtils.isBlank(targetURL)) continue;

      Outlink newLink = new Outlink(targetURL);

      Metadata targetMD = metadataTransfer.getMetaForOutlink(targetURL, url, parentMetadata);
      newLink.setMetadata(targetMD);

      String title = entry.getTitle();
      if (StringUtils.isNotBlank(title)) {
        targetMD.setValue("feed.title", title.trim());
      }

      Date publishedDate = entry.getPublishedDate();
      if (publishedDate != null) {
        // filter based on the published date
        if (filterHoursSincePub != -1) {
          Calendar rightNow = Calendar.getInstance();
          rightNow.add(Calendar.HOUR, -filterHoursSincePub);
          if (publishedDate.before(rightNow.getTime())) {
            LOG.info(
                "{} has a published date {} which is more than {} hours old",
                targetURL,
                publishedDate.toString(),
                filterHoursSincePub);
            continue;
          }
        }
        targetMD.setValue("feed.publishedDate", publishedDate.toString());
      }

      SyndContent description = entry.getDescription();
      if (description != null && StringUtils.isNotBlank(description.getValue())) {
        targetMD.setValue("feed.description", description.getValue());
      }

      links.add(newLink);
    }

    return links;
  }