private void addLocations(HTMLDocument doc, Resource entry) throws ExtractionException { List<Node> nodes = doc.findAllByClassName( Microformats2Prefixes.PROPERTY_PREFIX + entryFields[11] + Microformats2Prefixes.SPACE_SEPARATOR + Microformats2Prefixes.CLASS_PREFIX + "geo"); if (nodes.isEmpty()) return; for (Node node : nodes) { BNode location = valueFactory.createBNode(); addURIProperty(location, RDF.TYPE, vEntry.location); HTMLDocument fragment = new HTMLDocument(node); for (String field : geoFields) { HTMLDocument.TextField[] values = fragment.getPluralTextField(Microformats2Prefixes.PROPERTY_PREFIX + field); for (HTMLDocument.TextField val : values) { Node attribute = val.source().getAttributes().getNamedItem("title"); if (attribute == null) { conditionallyAddStringProperty( val.source(), location, vVCARD.getProperty(field), val.value()); } else { conditionallyAddStringProperty( val.source(), location, vVCARD.getProperty(field), attribute.getNodeValue()); } } } } }
/** * Extractor for the <a href="http://microformats.org/wiki/h-entry">h-entry</a> microformat. * * @author Nisala Nirmana */ public class HEntryExtractor extends EntityBasedMicroformatExtractor { private static final HEntry vEntry = HEntry.getInstance(); private static final VCard vVCARD = VCard.getInstance(); private static final String[] entryFields = { "name", "summary", "content", "published", "updated", "category", "url", "uid", "syndication", "in-reply-to", "author", "location", }; private static final String[] geoFields = {"latitude", "longitude", "altitude"}; @Override public ExtractorDescription getDescription() { return HEntryExtractorFactory.getDescriptionInstance(); } @Override protected String getBaseClassName() { return Microformats2Prefixes.CLASS_PREFIX + "entry"; } @Override protected void resetExtractor() { // Empty. } @Override protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException { final BNode entry = getBlankNodeFor(node); conditionallyAddResourceProperty(entry, RDF.TYPE, vEntry.Entry); final HTMLDocument fragment = new HTMLDocument(node); addName(fragment, entry); addSummary(fragment, entry); addContent(fragment, entry); addPublished(fragment, entry); addUpdated(fragment, entry); addCategories(fragment, entry); addURLs(fragment, entry); addUID(fragment, entry); addSyndications(fragment, entry); addInReplyTo(fragment, entry); addLocations(fragment, entry); addAuthors(fragment, entry); return true; } private void addAuthors(HTMLDocument doc, Resource entry) throws ExtractionException { List<Node> nodes = doc.findAllByClassName( Microformats2Prefixes.PROPERTY_PREFIX + entryFields[10] + Microformats2Prefixes.SPACE_SEPARATOR + Microformats2Prefixes.CLASS_PREFIX + "card"); if (nodes.isEmpty()) return; HCardExtractorFactory factory = new HCardExtractorFactory(); HCardExtractor extractor = factory.createExtractor(); for (Node node : nodes) { BNode author = valueFactory.createBNode(); addURIProperty(author, RDF.TYPE, vEntry.author); extractor.extractEntityAsEmbeddedProperty( new HTMLDocument(node), author, getCurrentExtractionResult()); } } private void mapFieldWithProperty( HTMLDocument fragment, BNode entry, String fieldClass, URI property) { HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass); conditionallyAddStringProperty(title.source(), entry, property, title.value()); } private void addName(HTMLDocument fragment, BNode entry) { mapFieldWithProperty( fragment, entry, Microformats2Prefixes.PROPERTY_PREFIX + entryFields[0], vEntry.name); } private void addSummary(HTMLDocument fragment, BNode entry) { mapFieldWithProperty( fragment, entry, Microformats2Prefixes.PROPERTY_PREFIX + entryFields[1], vEntry.summary); } private void addContent(HTMLDocument fragment, BNode entry) { mapFieldWithProperty( fragment, entry, Microformats2Prefixes.EMBEDDED_PROPERTY_PREFIX + entryFields[2], vEntry.content); } private void addPublished(HTMLDocument fragment, BNode entry) { final HTMLDocument.TextField[] durations = fragment.getPluralTextField(Microformats2Prefixes.TIME_PROPERTY_PREFIX + entryFields[3]); for (HTMLDocument.TextField duration : durations) { Node attribute = duration.source().getAttributes().getNamedItem("datetime"); if (attribute == null) { conditionallyAddStringProperty( duration.source(), entry, vEntry.published, duration.value()); } else { conditionallyAddStringProperty( duration.source(), entry, vEntry.published, attribute.getNodeValue()); } } } private void addUpdated(HTMLDocument fragment, BNode entry) { final HTMLDocument.TextField[] durations = fragment.getPluralTextField(Microformats2Prefixes.TIME_PROPERTY_PREFIX + entryFields[4]); for (HTMLDocument.TextField duration : durations) { Node attribute = duration.source().getAttributes().getNamedItem("datetime"); if (attribute == null) { conditionallyAddStringProperty(duration.source(), entry, vEntry.updated, duration.value()); } else { conditionallyAddStringProperty( duration.source(), entry, vEntry.updated, attribute.getNodeValue()); } } } private void addCategories(HTMLDocument fragment, BNode entry) { final HTMLDocument.TextField[] categories = fragment.getPluralTextField(Microformats2Prefixes.PROPERTY_PREFIX + entryFields[5]); for (HTMLDocument.TextField category : categories) { conditionallyAddStringProperty(category.source(), entry, vEntry.category, category.value()); } } private void addURLs(HTMLDocument fragment, BNode entry) throws ExtractionException { final HTMLDocument.TextField[] urls = fragment.getPluralUrlField(Microformats2Prefixes.URL_PROPERTY_PREFIX + entryFields[6]); for (HTMLDocument.TextField url : urls) { addURIProperty(entry, vEntry.url, fragment.resolveURI(url.value())); } } private void addUID(HTMLDocument fragment, BNode entry) throws ExtractionException { final HTMLDocument.TextField uid = fragment.getSingularTextField(Microformats2Prefixes.URL_PROPERTY_PREFIX + entryFields[7]); if (uid.source() == null) return; addURIProperty(entry, vEntry.uid, fragment.resolveURI(uid.value())); } private void addSyndications(HTMLDocument fragment, BNode entry) throws ExtractionException { final HTMLDocument.TextField[] syndications = fragment.getPluralUrlField(Microformats2Prefixes.URL_PROPERTY_PREFIX + entryFields[8]); for (HTMLDocument.TextField syndication : syndications) { addURIProperty(entry, vEntry.syndication, fragment.resolveURI(syndication.value())); } } private void addInReplyTo(HTMLDocument fragment, BNode entry) throws ExtractionException { final HTMLDocument.TextField inReplyTo = fragment.getSingularTextField(Microformats2Prefixes.URL_PROPERTY_PREFIX + entryFields[9]); if (inReplyTo.source() == null) return; addURIProperty(entry, vEntry.in_reply_to, fragment.resolveURI(inReplyTo.value())); } private void addLocations(HTMLDocument doc, Resource entry) throws ExtractionException { List<Node> nodes = doc.findAllByClassName( Microformats2Prefixes.PROPERTY_PREFIX + entryFields[11] + Microformats2Prefixes.SPACE_SEPARATOR + Microformats2Prefixes.CLASS_PREFIX + "geo"); if (nodes.isEmpty()) return; for (Node node : nodes) { BNode location = valueFactory.createBNode(); addURIProperty(location, RDF.TYPE, vEntry.location); HTMLDocument fragment = new HTMLDocument(node); for (String field : geoFields) { HTMLDocument.TextField[] values = fragment.getPluralTextField(Microformats2Prefixes.PROPERTY_PREFIX + field); for (HTMLDocument.TextField val : values) { Node attribute = val.source().getAttributes().getNamedItem("title"); if (attribute == null) { conditionallyAddStringProperty( val.source(), location, vVCARD.getProperty(field), val.value()); } else { conditionallyAddStringProperty( val.source(), location, vVCARD.getProperty(field), attribute.getNodeValue()); } } } } } }