Example #1
0
  private DocumentPojo buildDocument(
      SyndEntry entry, SourcePojo source, LinkedList<String> duplicateSources) {

    String tmpURL = this.cleanUrlStart(entry.getLink().toString());
    // (can't return null because called from code which checks this)

    // create the feed pojo
    DocumentPojo doc = new DocumentPojo();

    doc.setUrl(tmpURL);
    doc.setCreated(new Date());
    doc.setModified(new Date());

    // Strip out html if it is present
    if (entry.getTitle() != null) doc.setTitle(entry.getTitle().replaceAll("\\<.*?\\>", "").trim());
    if (entry.getDescription() != null)
      doc.setDescription(entry.getDescription().getValue().replaceAll("\\<.*?\\>", "").trim());
    if (entry.getPublishedDate() != null) {
      doc.setPublishedDate(entry.getPublishedDate());
    } else {
      doc.setPublishedDate(new Date());
    }

    // Clone from an existing source if we can:
    if (!duplicateSources.isEmpty()
        && (null == doc.getUpdateId())) { // (can't duplicate updating document)
      doc.setDuplicateFrom(duplicateSources.getFirst());
    }

    // GeoRSS
    GeoRSSModule geoRSSModule =
        GeoRSSUtils.getGeoRSS(entry); // currently does not handle <georss:circle>
    if (null != geoRSSModule) {
      if (null != geoRSSModule.getPosition()) {
        double lat = geoRSSModule.getPosition().getLatitude();
        double lon = geoRSSModule.getPosition().getLongitude();
        GeoPojo gp = new GeoPojo();
        gp.lat = lat;
        gp.lon = lon;
        doc.setDocGeo(gp);
      }
      if (null != geoRSSModule.getGeometry()) {
        AbstractGeometry ag = geoRSSModule.getGeometry();
        if (ag.getClass().equals(new LineString().getClass())) { // <georss:line>
          LineString ls = ((LineString) geoRSSModule.getGeometry());

          double latAvg = 0.0;
          double lonAvg = 0.0;
          int length = ls.getPositionList().size();
          for (int i = 0; i < length; i++) {
            latAvg += ls.getPositionList().getLatitude(i);
            lonAvg += ls.getPositionList().getLongitude(i);
          }
          latAvg = latAvg / length;
          lonAvg = lonAvg / length;
          GeoPojo gp = new GeoPojo();
          gp.lat = latAvg;
          gp.lon = lonAvg;
          doc.setDocGeo(gp);
        } else if (ag.getClass().equals(new Polygon().getClass())) // <georss:polygon>
        {
          Polygon poly = ((Polygon) geoRSSModule.getGeometry());
          AbstractRing ar = poly.getExterior();
          LinearRing lr = (LinearRing) ar;

          double latAvg = 0.0;
          double lonAvg = 0.0;
          int length = lr.getPositionList().size();
          for (int i = 0; i < length; i++) {
            latAvg += lr.getPositionList().getLatitude(i);
            lonAvg += lr.getPositionList().getLongitude(i);
          }
          latAvg = latAvg / length;
          lonAvg = lonAvg / length;
          GeoPojo gp = new GeoPojo();
          gp.lat = latAvg;
          gp.lon = lonAvg;
          doc.setDocGeo(gp);
        } else if (ag.getClass().equals(new Envelope().getClass())) { // <georss:box>
          Envelope env = ((Envelope) geoRSSModule.getGeometry());

          double latAvg = (env.getMaxLatitude() + env.getMinLatitude()) / 2;
          double lonAvg = (env.getMaxLongitude() + env.getMinLongitude()) / 2;

          GeoPojo gp = new GeoPojo();
          gp.lat = latAvg;
          gp.lon = lonAvg;
          doc.setDocGeo(gp);
        }
      }
    } // end if GeoRSS

    // Arbitrary other metadata:

    if (null != entry.getForeignMarkup()) {
      JSONObject rssMetadata = new JSONObject();

      @SuppressWarnings("unchecked")
      List<Element> fms = (List<Element>) entry.getForeignMarkup();
      for (Element fm : fms) {
        try {
          JSONObject subObj = XML.toJSONObject(new XMLOutputter().outputString(fm));
          if (1 == subObj.length()) {
            for (String name : JSONObject.getNames(subObj)) {
              rssMetadata.put(name, subObj.get(name));
            }
          } else { // (this will never happen in practice?)
            rssMetadata.put(fm.getName(), subObj);
          }
        } catch (JSONException e) {
        } // (do nothing just carry on)
      }
      if (!fms.isEmpty()) {
        doc.addToMetadata(
            "_FEED_METADATA_", XmlToMetadataParser.convertJsonObjectToLinkedHashMap(rssMetadata));
      }
    } // TESTED (longs converted to string, eg edgar:assistantDirector from
      // "http.www.sec.gov.archives.edgar.usgaap.rss.xml")

    return doc;
  }