public ArrayList<String> parseXML() throws Exception { ArrayList<String> ret = new ArrayList<String>(); handshake(); URL url = new URL( "http://mangaonweb.com/page.do?cdn=" + cdn + "&cpn=book.xml&crcod=" + crcod + "&rid=" + (int) (Math.random() * 10000)); String page = DownloaderUtils.getPage(url.toString(), "UTF-8", cookies); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); InputSource is = new InputSource(new StringReader(page)); Document d = builder.parse(is); Element doc = d.getDocumentElement(); NodeList pages = doc.getElementsByTagName("page"); total = pages.getLength(); for (int i = 0; i < pages.getLength(); i++) { Element e = (Element) pages.item(i); ret.add(e.getAttribute("path")); } return (ret); }
static { try { dbf.setNamespaceAware(true); db = dbf.newDocumentBuilder(); xpath.setNamespaceContext( new NamespaceContext() { @Override public Iterator<String> getPrefixes(String namespaceURI) { return Arrays.asList("md").iterator(); } @Override public String getPrefix(String namespaceURI) { return "md"; } @Override public String getNamespaceURI(String prefix) { return "http://www.osgi.org/xmlns/metatype/v1.1.0"; } }); } catch (ParserConfigurationException e) { e.printStackTrace(); throw new ExceptionInInitializerError(e); } }
private void createDOM() throws ParserConfigurationException { if (dbf == null) { dbf = DocumentBuilderFactory.newInstance(); db = dbf.newDocumentBuilder(); dil = (DOMImplementationLS) db.getDOMImplementation(); dw = dil.createDOMWriter(); } }
public WikipediaMinerAnnotator(String configFile) throws ParserConfigurationException, FileNotFoundException, SAXException, IOException, XPathExpressionException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document doc = builder.parse(new FileInputStream(configFile)); url = getConfigValue("access", "url", doc); if (url.equals("")) throw new AnnotationException( "Configuration file " + configFile + " has missing value 'url'."); }
static { try { URL url = SpellCheckActivator.bundleContext.getBundle().getResource(RESOURCE_LOC); InputStream stream = url.openStream(); if (stream == null) throw new IOException(); // strict parsing options DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setValidating(false); factory.setIgnoringComments(true); factory.setIgnoringElementContentWhitespace(true); // parses configuration xml /*- * Warning: Felix is unable to import the com.sun.rowset.internal * package, meaning this can't use the XmlErrorHandler. This causes * a warning and a default handler to be attached. Otherwise this * should have: builder.setErrorHandler(new XmlErrorHandler()); */ DocumentBuilder builder = factory.newDocumentBuilder(); Document doc = builder.parse(stream); // iterates over nodes, parsing contents Node root = doc.getChildNodes().item(1); NodeList categories = root.getChildNodes(); for (int i = 0; i < categories.getLength(); ++i) { Node node = categories.item(i); if (node.getNodeName().equals(NODE_DEFAULTS)) { parseDefaults(node.getChildNodes()); } else if (node.getNodeName().equals(NODE_LOCALES)) { parseLocales(node.getChildNodes()); } else { logger.warn("Unrecognized category: " + node.getNodeName()); } } } catch (IOException exc) { logger.error("Unable to load spell checker parameters", exc); } catch (SAXException exc) { logger.error("Unable to parse spell checker parameters", exc); } catch (ParserConfigurationException exc) { logger.error("Unable to parse spell checker parameters", exc); } }
@Override public HashSet<ScoredAnnotation> solveSa2W(String text) throws AnnotationException { HashSet<ScoredAnnotation> res; try { res = new HashSet<ScoredAnnotation>(); lastTime = Calendar.getInstance().getTimeInMillis(); URL wikiApi = new URL(url); String parameters = "references=true&repeatMode=all&minProbability=0.0&source=" + URLEncoder.encode(text, "UTF-8"); HttpURLConnection slConnection = (HttpURLConnection) wikiApi.openConnection(); slConnection.setRequestProperty("accept", "text/xml"); slConnection.setDoOutput(true); slConnection.setDoInput(true); slConnection.setRequestMethod("POST"); slConnection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); slConnection.setRequestProperty("charset", "utf-8"); slConnection.setRequestProperty( "Content-Length", "" + Integer.toString(parameters.getBytes().length)); slConnection.setUseCaches(false); DataOutputStream wr = new DataOutputStream(slConnection.getOutputStream()); wr.writeBytes(parameters); wr.flush(); wr.close(); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document doc = builder.parse(slConnection.getInputStream()); /* URL wikiApi = new URL(url+"?references=true&repeatMode=all&minProbability=0.0&source="+URLEncoder.encode(text, "UTF-8")); URLConnection wikiConnection = wikiApi.openConnection(); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document doc = builder.parse(wikiConnection.getInputStream()); */ lastTime = Calendar.getInstance().getTimeInMillis() - lastTime; XPathFactory xPathfactory = XPathFactory.newInstance(); XPath xpath = xPathfactory.newXPath(); XPathExpression idExpr = xpath.compile("//detectedTopic/@id"); XPathExpression weightExpr = xpath.compile("//detectedTopic/@weight"); XPathExpression referenceExpr = xpath.compile("//detectedTopic/references"); NodeList ids = (NodeList) idExpr.evaluate(doc, XPathConstants.NODESET); NodeList weights = (NodeList) weightExpr.evaluate(doc, XPathConstants.NODESET); NodeList references = (NodeList) referenceExpr.evaluate(doc, XPathConstants.NODESET); for (int i = 0; i < weights.getLength(); i++) { if (weights.item(i).getNodeType() != Node.TEXT_NODE) { int id = Integer.parseInt(ids.item(i).getNodeValue()); float weight = Float.parseFloat(weights.item(i).getNodeValue()); // System.out.println("ID="+ids.item(i).getNodeValue()+" weight="+weight); XPathExpression startExpr = xpath.compile("//detectedTopic[@id=" + id + "]/references/reference/@start"); XPathExpression endExpr = xpath.compile("//detectedTopic[@id=" + id + "]/references/reference/@end"); NodeList starts = (NodeList) startExpr.evaluate(references.item(i), XPathConstants.NODESET); NodeList ends = (NodeList) endExpr.evaluate(references.item(i), XPathConstants.NODESET); for (int j = 0; j < starts.getLength(); j++) { int start = Integer.parseInt(starts.item(j).getNodeValue()); int end = Integer.parseInt(ends.item(j).getNodeValue()); int len = end - start; res.add(new ScoredAnnotation(start, len, id, weight)); } } } } catch (Exception e) { e.printStackTrace(); throw new AnnotationException( "An error occurred while querying Wikipedia Miner API. Message: " + e.getMessage()); } return res; }