private static List<Triple> generateTriples_ResourceVenue_upcoming( URI uri, List<Triple> url_triples) throws URISyntaxException { List<Triple> valid_triples = new ArrayList<Triple>(); valid_triples.add(new Triple(uri.toString(), "wam:domain", "upcoming.yahoo.com")); valid_triples.add(new Triple(uri.toString(), "wam:type", "wam:Resource")); valid_triples.add(new Triple(uri.toString(), "wam:type", "wam:Venue")); valid_triples.add(new Triple(uri.toString(), "wam:URI", uri.toString())); valid_triples.add(new Triple(uri.toString(), "wam:url", uri.toString())); String title = "", locality = ""; if (url_triples.isEmpty()) { /* * GET MORE INFO WITH ANY23 */ String n3s = MetadataEvalFunc.exec(uri.toString()); url_triples = new TripleNxParser().parse(n3s); logger.info(" GET MORE INFO for Venue : " + uri.toString()); } for (Triple triple : url_triples) { if (triple.predicate.equals(Constants.NS_RDF + "type") || triple.predicate.equals(Constants.NS_VCARD + "latitude") || triple.predicate.equals(Constants.NS_VCARD + "longitude") || triple.predicate.equals(Constants.NS_VCARD + "fn") || triple.predicate.equals(Constants.NS_VCARD + "street-address") || triple.predicate.equals(Constants.NS_VCARD + "region") || triple.predicate.equals(Constants.NS_VCARD + "postal-code")) { valid_triples.add(new Triple(uri.toString(), triple.predicate, triple.object)); } if ((triple.subject.equals(uri.toString()) && triple.predicate.equals(Constants.NS_DC + "title"))) { title = triple.object; valid_triples.add(new Triple(uri.toString(), triple.predicate, triple.object)); } if (triple.predicate.equals(Constants.NS_VCARD + "locality")) { locality = triple.object; valid_triples.add(new Triple(uri.toString(), triple.predicate, triple.object)); } } List<Triple> final_urltriples = new ArrayList<Triple>(); URI duplicate_uri = getDuplicate(uri, "wam:Venue", title, locality); if (duplicate_uri != null) { final_urltriples.add(new Triple(uri.toString(), "wam:hasResource", duplicate_uri.toString())); } else { if (!title.isEmpty()) { final_urltriples.addAll(valid_triples); } } return final_urltriples; }
private static List<Triple> generateTriples_ResourceVideo( URI uri, String title, String performer_name, String domain) { logger.info(" TRIPLES VIDEOS: " + uri.toString() + ", videoTitle:" + title); List<Triple> valid_triples = new ArrayList<Triple>(); valid_triples.add(new Triple(uri.toString(), "wam:domain", domain)); valid_triples.add(new Triple(uri.toString(), "wam:type", "wam:Resource")); valid_triples.add(new Triple(uri.toString(), "wam:type", "wam:Video")); valid_triples.add(new Triple(uri.toString(), "wam:URI", uri.toString())); valid_triples.add(new Triple(uri.toString(), "wam:url", uri.toString())); valid_triples.add(new Triple(uri.toString(), Constants.NS_DC + "title", title)); if (!performer_name.isEmpty()) { valid_triples.add(new Triple(uri.toString(), "wam:performerName", performer_name)); } try { String n3s = MetadataEvalFunc.exec(uri.toString()); List<Triple> url_triples = new TripleNxParser().parse(n3s); if (!url_triples.isEmpty()) { logger.info(" GET MORE INFO WITH ANY23 for VIDEO : " + uri.toString()); for (Triple triple : url_triples) { if (triple.predicate.equals(Constants.NS_XHTML + "keywords") || triple.predicate.equals(Constants.NS_XHTML + "description")) { valid_triples.add(new Triple(uri.toString(), triple.predicate, triple.object)); } } } } catch (Exception e) { e.printStackTrace(); } List<Triple> final_urltriples = new ArrayList<Triple>(); URI duplicate_uri = getDuplicate(uri, "wam:Video", title, ""); if (duplicate_uri != null) { final_urltriples.add(new Triple(uri.toString(), "wam:hasResource", duplicate_uri.toString())); } else { if (!title.isEmpty()) { final_urltriples.addAll(valid_triples); } } return final_urltriples; }
private static List<Triple> generateTriples_ResourcePerformer_eventful( URI uri, List<Triple> url_triples) throws URISyntaxException { List<Triple> valid_triples = new ArrayList<Triple>(); valid_triples.add(new Triple(uri.toString(), "wam:domain", "eventful.com")); valid_triples.add(new Triple(uri.toString(), "wam:type", "wam:Resource")); valid_triples.add(new Triple(uri.toString(), "wam:type", "wam:Performer")); valid_triples.add(new Triple(uri.toString(), "wam:URI", uri.toString())); valid_triples.add(new Triple(uri.toString(), "wam:url", uri.toString())); String title = ""; if (url_triples.isEmpty()) { /* * GET MORE INFO WITH ANY23 */ logger.info(" GET MORE INFO WITH ANY23: " + uri.toString()); String n3s = MetadataEvalFunc.exec(uri.toString()); url_triples = new TripleNxParser().parse(n3s); } for (Triple triple : url_triples) { if (triple.predicate.equals(Constants.NS_OG + "type") || (triple.subject.equals(uri.toString()) && triple.predicate.equals(Constants.NS_OG + "url"))) { valid_triples.add(new Triple(uri.toString(), triple.predicate, triple.object)); } if (triple.predicate.equals(Constants.NS_OG + "title")) { title = triple.object; valid_triples.add(new Triple(uri.toString(), triple.predicate, triple.object)); } } List<Triple> final_urltriples = new ArrayList<Triple>(); URI duplicate_uri = getDuplicate(uri, "wam:Performer", title, ""); if (duplicate_uri != null) { final_urltriples.add(new Triple(uri.toString(), "wam:hasResource", duplicate_uri.toString())); } else { if (!title.isEmpty()) { final_urltriples.addAll(valid_triples); } } return final_urltriples; }
private static List<Triple> generateTriples_ResourceEvent_upcoming( URI uri, List<Triple> url_triples) { List<Triple> valid_triples = new ArrayList<Triple>(); valid_triples.add(new Triple(uri.toString(), "wam:URI", uri.toString())); valid_triples.add(new Triple(uri.toString(), "wam:url", uri.toString())); valid_triples.add(new Triple(uri.toString(), "wam:type", "wam:Resource")); valid_triples.add(new Triple(uri.toString(), "wam:type", "wam:Event")); valid_triples.add(new Triple(uri.toString(), "wam:domain", "upcoming.yahoo.com")); String title = "", locality = "", venue = ""; for (Triple triple : url_triples) { if ((triple.subject.equals(uri.toString()) && triple.predicate.equals(Constants.NS_RDF + "type")) || triple.predicate.equals(Constants.NS_ICAL + "summary") || triple.predicate.equals(Constants.NS_ICAL + "location") || triple.predicate.equals(Constants.NS_ICAL + "dtstart") || triple.predicate.equals(Constants.NS_ICAL + "categories") || triple.predicate.equals(Constants.NS_OG + "region") || triple.predicate.equals(Constants.NS_OG + "postal-code")) { valid_triples.add(new Triple(uri.toString(), triple.predicate, triple.object)); } if ((triple.subject.equals(uri.toString()) && triple.predicate.equals(Constants.NS_DC + "title"))) { title = triple.object; valid_triples.add(new Triple(uri.toString(), triple.predicate, triple.object)); } if (triple.predicate.equals(Constants.NS_OG + "locality")) { locality = triple.object; valid_triples.add(new Triple(uri.toString(), triple.predicate, triple.object)); } /* * Get Venue */ if (triple.predicate.equals(Constants.NS_VCARD + "urlofvenue") && venue.isEmpty()) { venue = triple.object; try { /* * Check first for duplicate resources */ URI uri_venue = new URI(venue); valid_triples.add(new Triple(uri.toString(), "wam:hasVenue", uri_venue.toString())); valid_triples.addAll( generateTriples_ResourceVenue_upcoming(uri_venue, new ArrayList<Triple>())); } catch (URISyntaxException e) { e.printStackTrace(); } } } /* * Get Video of event (when no performer given) * First, GET Performer-URLS BY HTML-EXTRACTION */ MetadataEvalFunc extractor = new MetadataEvalFunc(); Map<String, String> performers = new HashMap<String, String>(); logger.info(" GET URLS BY HTML-EXTRACTION: " + uri.toString()); performers = extractor.scrapPage(uri.toString(), "upcoming.yahoo.com"); String performer_name = ""; String video_text_query = ""; for (String performer_url : performers.keySet()) { logger.info("performer_url: " + performer_url); try { URI uri_performer = new URI(performer_url); performer_name = performers.get(performer_url); valid_triples.add(new Triple(uri.toString(), "wam:hasPerformer", uri_performer.toString())); valid_triples.addAll( generateTriples_ResourcePerformer_eventful(uri_performer, new ArrayList<Triple>())); /* * Get Video of Performer */ video_text_query = performer_name; // logger.info(" GET VIDEOS PERFORMER WITH API: '" + performer_name+"'" +", // "+video_text_query); // YouTubeManager youtube_manager = new YouTubeManager(); // List<YouTubeVideo> videos = youtube_manager.retrieveVideos(video_text_query, // video_max_results, true, youtube_timeout); // for(YouTubeVideo video: videos){ // URI uri_video = new URI(video.getWebPlayerUrl()); // valid_triples.add(new Triple(uri_performer.toString(), "wam:hasVideo", // uri_video.toString())); // valid_triples.addAll(generateTriples_ResourceVideo(uri_video, video.getTitle(), // performer_name, "upcoming.yahoo.com")); // // } } catch (URISyntaxException e) { e.printStackTrace(); } } List<Triple> final_urltriples = new ArrayList<Triple>(); URI duplicate_uri = getDuplicate(uri, "wam:Event", title, locality); if (duplicate_uri != null) { final_urltriples.add(new Triple(uri.toString(), "wam:hasResource", duplicate_uri.toString())); } else { if (!title.isEmpty()) { final_urltriples.addAll(valid_triples); } } return final_urltriples; }
private static List<Triple> generateTriples_ResourceEvent_eventful( URI uri, List<Triple> url_triples) { List<Triple> valid_triples = new ArrayList<Triple>(); valid_triples.add(new Triple(uri.toString(), "wam:URI", uri.toString())); valid_triples.add(new Triple(uri.toString(), "wam:url", uri.toString())); valid_triples.add(new Triple(uri.toString(), "wam:type", "wam:Resource")); valid_triples.add(new Triple(uri.toString(), "wam:type", "wam:Event")); valid_triples.add(new Triple(uri.toString(), "wam:domain", "eventful.com")); String title = ""; String addressLocality = "", addressRegion = ""; String venue = ""; for (Triple triple : url_triples) { if (triple.predicate.equals(Constants.NS_OG + "type") || triple.predicate.equals(Constants.NS_OG + "title") || triple.predicate.equals(Constants.NS_OG + "url") || triple.predicate.equals(Constants.NS_XHTML + "description") || triple.predicate.equals(Constants.NS_XHTML + "keywords") || triple.predicate.equals(Constants.NS_GEO + "Point") || triple.predicate.equals(Constants.NS_GEO + "lat") || triple.predicate.equals(Constants.NS_GEO + "long")) { valid_triples.add(new Triple(uri.toString(), triple.predicate, triple.object)); } /* * Get Location */ if (triple.predicate.equals(Constants.NS_DC + "title") && title.isEmpty()) { /* * Example * subject http://purl.org/dc/terms/title Tommee Profitt in Grand Haven, MI - Jul 1, 2012 7:00 pm | Eventful */ title = triple.object.toString(); try { if (title.contains(",") && title.indexOf(",") > title.indexOf("in ")) { addressLocality = title.substring(title.lastIndexOf("in ") + 3, title.indexOf(",")).trim(); } if (title.contains("-") && title.indexOf("-") > title.indexOf(",")) { addressRegion = title.substring(title.indexOf(",") + 1, title.lastIndexOf("-")).trim(); } } catch (Exception e) { e.printStackTrace(); } valid_triples.add(new Triple(uri.toString(), triple.predicate, triple.object)); valid_triples.add( new Triple( uri.toString(), Constants.NS_SCHEMA_ORG + "addressLocality", addressLocality)); valid_triples.add( new Triple(uri.toString(), Constants.NS_SCHEMA_ORG + "addressRegion", addressRegion)); } /* * Get Venue */ if (triple.predicate.equals(Constants.NS_XHTML + "bookmark") && triple.object.matches(".*\\/venues\\/.*") && venue.isEmpty()) { venue = triple.object; valid_triples.add(triple); valid_triples.add(new Triple(uri.toString(), "wam:hasVenue", (triple.object))); try { URI uri_venue = new URI(triple.object); valid_triples.addAll( generateTriples_ResourceVenue_eventful(uri_venue, new ArrayList<Triple>())); } catch (URISyntaxException e) { e.printStackTrace(); } } } /* * Get Performer * GET URLS BY HTML-EXTRACTION */ MetadataEvalFunc extractor = new MetadataEvalFunc(); Map<String, String> performers = new HashMap<String, String>(); logger.info(" GET URLS BY HTML-EXTRACTION: " + uri.toString()); performers = extractor.scrapPage(uri.toString(), "eventful.com"); String performer_name = ""; String video_text_query = ""; for (String performer_url : performers.keySet()) { logger.info("performer_url: " + performer_url); try { URI uri_performer = new URI(performer_url); performer_name = performers.get(performer_url); valid_triples.add(new Triple(uri.toString(), "wam:hasPerformer", uri_performer.toString())); valid_triples.addAll( generateTriples_ResourcePerformer_eventful(uri_performer, new ArrayList<Triple>())); /* * Get Video of Performer */ video_text_query = performer_name; // logger.info(" GET VIDEOS PERFORMER WITH API: '" + performer_name+"'" +", // "+video_text_query); // YouTubeManager youtube_manager = new YouTubeManager(); // List<YouTubeVideo> videos = youtube_manager.retrieveVideos(video_text_query, // video_max_results, true, youtube_timeout); // for(YouTubeVideo video: videos){ // URI uri_video = new URI(video.getWebPlayerUrl()); // valid_triples.add(new Triple(uri_performer.toString(), "wam:hasVideo", // uri_video.toString())); // valid_triples.addAll(generateTriples_ResourceVideo(uri_video, video.getTitle(), // performer_name, "eventful.com")); // // } } catch (URISyntaxException e) { e.printStackTrace(); } } /* * Get Video of event (when no performer) */ // try { // if(performers.isEmpty() && !title.isEmpty()){ // video_text_query = title; // logger.info(" GET VIDEOS EVENT WITH API: " + uri.toString() +", "+video_text_query); // YouTubeManager youtube_manager = new YouTubeManager();//youtube_clientID // List<YouTubeVideo> videos_event = youtube_manager.retrieveVideos(video_text_query, // video_max_results, true, youtube_timeout); // for(YouTubeVideo video: videos_event){ // logger.info("video: " + video.getWebPlayerUrl() + ", " + video.getTitle()); // URI uri_video = new URI(video.getWebPlayerUrl()); // valid_triples.add(new Triple(uri.toString(), "wam:hasVideo", uri_video.toString())); // valid_triples.addAll(generateTriples_ResourceVideo(uri_video, video.getTitle(), "", // "eventful.com")); // // } // } // } catch (URISyntaxException e) { // e.printStackTrace(); // } catch (Exception e) { // e.printStackTrace(); // } List<Triple> final_urltriples = new ArrayList<Triple>(); URI duplicate_uri = getDuplicate(uri, "wam:Event", title, addressLocality); if (duplicate_uri != null) { final_urltriples.add(new Triple(uri.toString(), "wam:hasResource", duplicate_uri.toString())); } else { if (!title.isEmpty()) { logger.info("\t title: " + title); final_urltriples.addAll(valid_triples); } } return final_urltriples; }