Example #1
0
  //	@Transactional
  //	@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
  public void importEntry(
      final SyndFeed feed,
      final SyndEntry entry,
      final Set<KiWiUriResource> types,
      final Set<ContentItem> tags,
      User user,
      final Collection<ContentItem> output) {
    if (user == null && entry.getAuthor() != null && !"".equals(entry.getAuthor())) {
      if (userService.userExists(entry.getAuthor())) {
        user = userService.getUserByLogin(entry.getAuthor());
      } else {

        //				user = userService.createUser(entry.getAuthor());
        /* In my opinion, it is not ok to create a user entity
         * without asking the person if he/she wants to be
         * created and persisted in the KiWi dataset.
         * Thus I'm changing the user to 'anonymous',
         * if he/she is'nt registered with the same nick that
         * is given in the rss entry.
         */
        user = userService.getUserByLogin("anonymous");
        kiwiEntityManager.persist(user);
      }
    }

    log.debug("feed entry: #0 (#1)", entry.getTitle(), entry.getUri());

    // create a new content item and copy all data from the feed entry
    ContentItem item;
    if (entry.getLink() != null) {
      item = contentItemService.createExternContentItem(entry.getLink());
    } else if (entry.getUri() != null) {
      try {
        // try parsing URI; if it is not valid,
        URI uri = new URI(entry.getUri());
        item = contentItemService.createExternContentItem(entry.getUri());
      } catch (URISyntaxException e) {
        item = contentItemService.createExternContentItem(feed.getLink() + "#" + entry.getUri());
      }
    } else {
      item = contentItemService.createContentItem();
    }
    contentItemService.updateTitle(item, entry.getTitle());

    if (feed.getLanguage() != null) item.setLanguage(new Locale(feed.getLanguage()));

    if (entry.getPublishedDate() != null) {
      item.setCreated(entry.getPublishedDate());
      item.setModified(entry.getPublishedDate());
    }

    if (entry.getUpdatedDate() != null) {
      if (entry.getPublishedDate() == null) {
        item.setCreated(entry.getUpdatedDate());
      }
      item.setModified(entry.getUpdatedDate());
    }

    item.setAuthor(user);

    // read feed content and set it as item's text content
    List<SyndContent> contents = entry.getContents();
    if (contents.size() == 1) {
      log.debug("using RSS content section provided by item");
      contentItemService.updateTextContentItem(item, "<p>" + contents.get(0).getValue() + "</p>");
    } else if (contents.size() > 1) {
      log.warn("feed entry contained more than one content section");
      contentItemService.updateTextContentItem(item, "<p>" + contents.get(0).getValue() + "</p>");
    } else if (contents.size() == 0) {
      if (entry.getDescription() != null && entry.getDescription().getValue() != null) {
        log.debug("using RSS description as no content section was available");
        contentItemService.updateTextContentItem(
            item, "<p>" + entry.getDescription().getValue() + "</p>");
      }
    }

    // save before tagging
    contentItemService.saveContentItem(item);

    // read feed categories and use them as tags
    for (SyndCategory cat : (List<SyndCategory>) entry.getCategories()) {
      ContentItem _cat;
      if (!taggingService.hasTag(item, cat.getName())) {
        if (cat.getTaxonomyUri() != null) {
          _cat = contentItemService.getContentItemByUri(cat.getTaxonomyUri());
          if (_cat == null) {
            _cat = contentItemService.createExternContentItem(cat.getTaxonomyUri());
            contentItemService.updateTitle(_cat, cat.getName());
            _cat.setAuthor(user);
            contentItemService.saveContentItem(_cat);
          }
          taggingService.createTagging(cat.getName(), item, _cat, user);
        } else {
          _cat = contentItemService.getContentItemByTitle(cat.getName());
          if (_cat == null) {
            _cat = contentItemService.createContentItem();
            contentItemService.updateTitle(_cat, cat.getName());
            _cat.setAuthor(user);
            contentItemService.saveContentItem(_cat);
          }
          taggingService.createTagging(cat.getName(), item, _cat, user);
        }
      }
    }
    // scan for Twitter-style hash tags in title (e.g. #kiwiknows, see KIWI-622)
    Matcher m_hashtag = p_hashtag.matcher(entry.getTitle());
    while (m_hashtag.find()) {
      String tag_label = m_hashtag.group(1);
      if (!taggingService.hasTag(item, tag_label)) {
        ContentItem tag = contentItemService.getContentItemByTitle(tag_label);
        if (tag == null) {
          tag = contentItemService.createContentItem();
          contentItemService.updateTitle(tag, tag_label);
          tag.setAuthor(user);
          contentItemService.saveContentItem(tag);
        }
        taggingService.createTagging(tag_label, item, tag, user);
      }
    }

    // check for geo information
    GeoRSSModule geoRSSModule = GeoRSSUtils.getGeoRSS(entry);
    if (geoRSSModule != null && geoRSSModule.getPosition() != null) {
      POI poi = kiwiEntityManager.createFacade(item, POI.class);
      poi.setLatitude(geoRSSModule.getPosition().getLatitude());
      poi.setLongitude(geoRSSModule.getPosition().getLongitude());
      kiwiEntityManager.persist(poi);
    }

    // check for media information
    MediaEntryModule mediaModule = (MediaEntryModule) entry.getModule(MediaModule.URI);
    if (mediaModule != null) {
      MediaContent[] media = mediaModule.getMediaContents();
      if (media.length > 0) {
        MediaContent m = media[0];
        if (m.getReference() instanceof UrlReference) {
          URL url = ((UrlReference) m.getReference()).getUrl();

          String type = m.getType();
          String name = url.getFile();
          if (name.lastIndexOf("/") > 0) {
            name = name.substring(name.lastIndexOf("/") + 1);
          }

          log.debug("importing media data from URL #0", url.toString());

          try {
            InputStream is = url.openStream();

            ByteArrayOutputStream bout = new ByteArrayOutputStream();

            int c;
            while ((c = is.read()) != -1) {
              bout.write(c);
            }

            byte[] data = bout.toByteArray();

            contentItemService.updateMediaContentItem(item, data, type, name);

            is.close();
            bout.close();
          } catch (IOException ex) {
            log.error("error importing media content from RSS stream");
          }
        } else {
          log.info("RSS importer can only import media with URL references");
        }
      } else {
        log.warn("media module found without content");
      }

      Category[] cats = mediaModule.getMetadata().getCategories();
      for (Category cat : cats) {
        ContentItem _cat;

        String label = cat.getLabel() != null ? cat.getLabel() : cat.getValue();

        if (!taggingService.hasTag(item, label)) {
          if (cat.getScheme() != null) {
            _cat = contentItemService.getContentItemByUri(cat.getScheme() + cat.getValue());
            if (_cat == null) {
              _cat = contentItemService.createExternContentItem(cat.getScheme() + cat.getValue());
              contentItemService.updateTitle(_cat, label);
              _cat.setAuthor(user);
              contentItemService.saveContentItem(_cat);
            }
            taggingService.createTagging(label, item, _cat, user);
          } else {
            _cat = contentItemService.getContentItemByTitle(label);
            if (_cat == null) {
              _cat = contentItemService.createContentItem();
              contentItemService.updateTitle(_cat, label);
              _cat.setAuthor(user);
              contentItemService.saveContentItem(_cat);
            }
            taggingService.createTagging(label, item, _cat, user);
          }
        }
      }
    }

    // add parameter categories as tags
    for (ContentItem tag : tags) {
      if (!taggingService.hasTag(item, tag.getTitle())) {
        taggingService.createTagging(tag.getTitle(), item, tag, user);
      }
    }

    // add parameter types as types
    for (KiWiUriResource type : types) {
      item.addType(type);
    }

    // add kiwi:FeedPost type
    item.addType(tripleStore.createUriResource(Constants.NS_KIWI_CORE + "FeedPost"));

    /* the flush is necessary, because CIs or tags will
     * otherwise be created multiple times when they
     * appear more than once in one RSS feed */
    entityManager.flush();
    log.debug("imported content item '#0' with URI '#1'", item.getTitle(), item.getResource());
  }
Example #2
0
  private DocumentPojo buildDocument(
      SyndEntry entry, SourcePojo source, LinkedList<String> duplicateSources) {

    String tmpURL = this.cleanUrlStart(entry.getLink().toString());
    // (can't return null because called from code which checks this)

    // create the feed pojo
    DocumentPojo doc = new DocumentPojo();

    doc.setUrl(tmpURL);
    doc.setCreated(new Date());
    doc.setModified(new Date());

    // Strip out html if it is present
    if (entry.getTitle() != null) doc.setTitle(entry.getTitle().replaceAll("\\<.*?\\>", "").trim());
    if (entry.getDescription() != null)
      doc.setDescription(entry.getDescription().getValue().replaceAll("\\<.*?\\>", "").trim());
    if (entry.getPublishedDate() != null) {
      doc.setPublishedDate(entry.getPublishedDate());
    } else {
      doc.setPublishedDate(new Date());
    }

    // Clone from an existing source if we can:
    if (!duplicateSources.isEmpty()
        && (null == doc.getUpdateId())) { // (can't duplicate updating document)
      doc.setDuplicateFrom(duplicateSources.getFirst());
    }

    // GeoRSS
    GeoRSSModule geoRSSModule =
        GeoRSSUtils.getGeoRSS(entry); // currently does not handle <georss:circle>
    if (null != geoRSSModule) {
      if (null != geoRSSModule.getPosition()) {
        double lat = geoRSSModule.getPosition().getLatitude();
        double lon = geoRSSModule.getPosition().getLongitude();
        GeoPojo gp = new GeoPojo();
        gp.lat = lat;
        gp.lon = lon;
        doc.setDocGeo(gp);
      }
      if (null != geoRSSModule.getGeometry()) {
        AbstractGeometry ag = geoRSSModule.getGeometry();
        if (ag.getClass().equals(new LineString().getClass())) { // <georss:line>
          LineString ls = ((LineString) geoRSSModule.getGeometry());

          double latAvg = 0.0;
          double lonAvg = 0.0;
          int length = ls.getPositionList().size();
          for (int i = 0; i < length; i++) {
            latAvg += ls.getPositionList().getLatitude(i);
            lonAvg += ls.getPositionList().getLongitude(i);
          }
          latAvg = latAvg / length;
          lonAvg = lonAvg / length;
          GeoPojo gp = new GeoPojo();
          gp.lat = latAvg;
          gp.lon = lonAvg;
          doc.setDocGeo(gp);
        } else if (ag.getClass().equals(new Polygon().getClass())) // <georss:polygon>
        {
          Polygon poly = ((Polygon) geoRSSModule.getGeometry());
          AbstractRing ar = poly.getExterior();
          LinearRing lr = (LinearRing) ar;

          double latAvg = 0.0;
          double lonAvg = 0.0;
          int length = lr.getPositionList().size();
          for (int i = 0; i < length; i++) {
            latAvg += lr.getPositionList().getLatitude(i);
            lonAvg += lr.getPositionList().getLongitude(i);
          }
          latAvg = latAvg / length;
          lonAvg = lonAvg / length;
          GeoPojo gp = new GeoPojo();
          gp.lat = latAvg;
          gp.lon = lonAvg;
          doc.setDocGeo(gp);
        } else if (ag.getClass().equals(new Envelope().getClass())) { // <georss:box>
          Envelope env = ((Envelope) geoRSSModule.getGeometry());

          double latAvg = (env.getMaxLatitude() + env.getMinLatitude()) / 2;
          double lonAvg = (env.getMaxLongitude() + env.getMinLongitude()) / 2;

          GeoPojo gp = new GeoPojo();
          gp.lat = latAvg;
          gp.lon = lonAvg;
          doc.setDocGeo(gp);
        }
      }
    } // end if GeoRSS

    // Arbitrary other metadata:

    if (null != entry.getForeignMarkup()) {
      JSONObject rssMetadata = new JSONObject();

      @SuppressWarnings("unchecked")
      List<Element> fms = (List<Element>) entry.getForeignMarkup();
      for (Element fm : fms) {
        try {
          JSONObject subObj = XML.toJSONObject(new XMLOutputter().outputString(fm));
          if (1 == subObj.length()) {
            for (String name : JSONObject.getNames(subObj)) {
              rssMetadata.put(name, subObj.get(name));
            }
          } else { // (this will never happen in practice?)
            rssMetadata.put(fm.getName(), subObj);
          }
        } catch (JSONException e) {
        } // (do nothing just carry on)
      }
      if (!fms.isEmpty()) {
        doc.addToMetadata(
            "_FEED_METADATA_", XmlToMetadataParser.convertJsonObjectToLinkedHashMap(rssMetadata));
      }
    } // TESTED (longs converted to string, eg edgar:assistantDirector from
      // "http.www.sec.gov.archives.edgar.usgaap.rss.xml")

    return doc;
  }