void processPage(String pageUri) { String search = "href=\""; try { log.info("Calling to Google: " + pageUri); String inputString = UrlUtils.getURL(pageUri); log.info(inputString); int next = 0; Pattern cite = Pattern.compile("<cite>(.*?)</cite>"); Matcher matcher = cite.matcher(inputString); while (matcher.find()) { String newURI = "http://" + matcher .group(1) .replaceAll("\"", "") .replaceAll("<b>|</b>", "") .replaceAll("[ \\t\\n\\r]+", "") .trim(); log.info(newURI); profiles.addDeviceIfNotAlreadyKnown(newURI); } } catch (Exception e) { log.error(e.toString(), e); System.exit(0); } }
ProcessProfile(Resource device) { myARPReader.setProperty("WARN_RESOLVING_URI_AGAINST_EMPTY_BASE", "EM_IGNORE"); myARPReader.setErrorHandler( new RDFErrorHandler() { // ARP parser error handling routines public void warning(Exception e) { outputMsg("RDF parser warning:" + e.getMessage()); } public void error(Exception e) { outputMsg("RDF parser error:" + e.getMessage()); profileValidFlag = false; } public void fatalError(Exception e) { e.printStackTrace(); error(e); } }); this.deviceURI = device.getURI(); try { profile = UrlUtils.getURL(deviceURI).trim(); if (profile.contains("<html") || profile.contains("<head>")) { // this is HTML not a UAProf profile device.removeProperties(); } else if (!profile.contains("rdf") && !profile.contains("RDF")) { device.removeProperties(); } else if (!profile.contains("uaprof") && !profile.contains("UAPROF")) { device.removeProperties(); } else if (!profile.contains("openmobilealliance") && !profile.contains("wapforum")) { device.removeProperties(); } else { validate(device); profiles.fixMetadata(device, profile); } } catch (IOException io) { outputMsg("Could not retrieve " + device.getURI()); device.removeProperties(); unreachableProfiles++; } System.out.println(messages.toString()); }