private DocumentPojo buildDocument( SyndEntry entry, SourcePojo source, LinkedList<String> duplicateSources) { String tmpURL = this.cleanUrlStart(entry.getLink().toString()); // (can't return null because called from code which checks this) // create the feed pojo DocumentPojo doc = new DocumentPojo(); doc.setUrl(tmpURL); doc.setCreated(new Date()); doc.setModified(new Date()); // Strip out html if it is present if (entry.getTitle() != null) doc.setTitle(entry.getTitle().replaceAll("\\<.*?\\>", "").trim()); if (entry.getDescription() != null) doc.setDescription(entry.getDescription().getValue().replaceAll("\\<.*?\\>", "").trim()); if (entry.getPublishedDate() != null) { doc.setPublishedDate(entry.getPublishedDate()); } else { doc.setPublishedDate(new Date()); } // Clone from an existing source if we can: if (!duplicateSources.isEmpty() && (null == doc.getUpdateId())) { // (can't duplicate updating document) doc.setDuplicateFrom(duplicateSources.getFirst()); } // GeoRSS GeoRSSModule geoRSSModule = GeoRSSUtils.getGeoRSS(entry); // currently does not handle <georss:circle> if (null != geoRSSModule) { if (null != geoRSSModule.getPosition()) { double lat = geoRSSModule.getPosition().getLatitude(); double lon = geoRSSModule.getPosition().getLongitude(); GeoPojo gp = new GeoPojo(); gp.lat = lat; gp.lon = lon; doc.setDocGeo(gp); } if (null != geoRSSModule.getGeometry()) { AbstractGeometry ag = geoRSSModule.getGeometry(); if (ag.getClass().equals(new LineString().getClass())) { // <georss:line> LineString ls = ((LineString) geoRSSModule.getGeometry()); double latAvg = 0.0; double lonAvg = 0.0; int length = ls.getPositionList().size(); for (int i = 0; i < length; i++) { latAvg += ls.getPositionList().getLatitude(i); lonAvg += ls.getPositionList().getLongitude(i); } latAvg = latAvg / length; lonAvg = lonAvg / length; GeoPojo gp = new GeoPojo(); gp.lat = latAvg; gp.lon = lonAvg; doc.setDocGeo(gp); } else if (ag.getClass().equals(new Polygon().getClass())) // <georss:polygon> { Polygon poly = ((Polygon) geoRSSModule.getGeometry()); AbstractRing ar = poly.getExterior(); LinearRing lr = (LinearRing) ar; double latAvg = 0.0; double lonAvg = 0.0; int length = lr.getPositionList().size(); for (int i = 0; i < length; i++) { latAvg += lr.getPositionList().getLatitude(i); lonAvg += lr.getPositionList().getLongitude(i); } latAvg = latAvg / length; lonAvg = lonAvg / length; GeoPojo gp = new GeoPojo(); gp.lat = latAvg; gp.lon = lonAvg; doc.setDocGeo(gp); } else if (ag.getClass().equals(new Envelope().getClass())) { // <georss:box> Envelope env = ((Envelope) geoRSSModule.getGeometry()); double latAvg = (env.getMaxLatitude() + env.getMinLatitude()) / 2; double lonAvg = (env.getMaxLongitude() + env.getMinLongitude()) / 2; GeoPojo gp = new GeoPojo(); gp.lat = latAvg; gp.lon = lonAvg; doc.setDocGeo(gp); } } } // end if GeoRSS // Arbitrary other metadata: if (null != entry.getForeignMarkup()) { JSONObject rssMetadata = new JSONObject(); @SuppressWarnings("unchecked") List<Element> fms = (List<Element>) entry.getForeignMarkup(); for (Element fm : fms) { try { JSONObject subObj = XML.toJSONObject(new XMLOutputter().outputString(fm)); if (1 == subObj.length()) { for (String name : JSONObject.getNames(subObj)) { rssMetadata.put(name, subObj.get(name)); } } else { // (this will never happen in practice?) rssMetadata.put(fm.getName(), subObj); } } catch (JSONException e) { } // (do nothing just carry on) } if (!fms.isEmpty()) { doc.addToMetadata( "_FEED_METADATA_", XmlToMetadataParser.convertJsonObjectToLinkedHashMap(rssMetadata)); } } // TESTED (longs converted to string, eg edgar:assistantDirector from // "http.www.sec.gov.archives.edgar.usgaap.rss.xml") return doc; }