private byte[] downloadByteArray(URL url) throws IOException { HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setRequestProperty("Cookie", cookies); if (conn.getResponseCode() == HttpURLConnection.HTTP_NOT_FOUND) return (null); InputStream in = conn.getInputStream(); byte[] buf = new byte[conn.getContentLength()]; int read, offset = 0; while ((read = in.read(buf, offset, buf.length - offset)) != -1) { offset += read; } return (buf); }
private boolean handshake() throws Exception { URL homePage = new URL("http://mangaonweb.com/viewer.do?ctsn=" + ctsn); HttpURLConnection urlConn = (HttpURLConnection) homePage.openConnection(); urlConn.connect(); if (urlConn.getResponseCode() == HttpURLConnection.HTTP_NOT_FOUND) return (false); // save the cookie String headerName = null; for (int i = 1; (headerName = urlConn.getHeaderFieldKey(i)) != null; i++) { if (headerName.equals("Set-Cookie")) { cookies = urlConn.getHeaderField(i); } } // save cdn and crcod String page = "", line; BufferedReader stream = new BufferedReader(new InputStreamReader(urlConn.getInputStream(), "UTF-8")); while ((line = stream.readLine()) != null) page += line; cdn = param(page, "cdn"); crcod = param(page, "crcod"); return (true); }
@Override public HashSet<ScoredAnnotation> solveSa2W(String text) throws AnnotationException { HashSet<ScoredAnnotation> res; try { res = new HashSet<ScoredAnnotation>(); lastTime = Calendar.getInstance().getTimeInMillis(); URL wikiApi = new URL(url); String parameters = "references=true&repeatMode=all&minProbability=0.0&source=" + URLEncoder.encode(text, "UTF-8"); HttpURLConnection slConnection = (HttpURLConnection) wikiApi.openConnection(); slConnection.setRequestProperty("accept", "text/xml"); slConnection.setDoOutput(true); slConnection.setDoInput(true); slConnection.setRequestMethod("POST"); slConnection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); slConnection.setRequestProperty("charset", "utf-8"); slConnection.setRequestProperty( "Content-Length", "" + Integer.toString(parameters.getBytes().length)); slConnection.setUseCaches(false); DataOutputStream wr = new DataOutputStream(slConnection.getOutputStream()); wr.writeBytes(parameters); wr.flush(); wr.close(); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document doc = builder.parse(slConnection.getInputStream()); /* URL wikiApi = new URL(url+"?references=true&repeatMode=all&minProbability=0.0&source="+URLEncoder.encode(text, "UTF-8")); URLConnection wikiConnection = wikiApi.openConnection(); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document doc = builder.parse(wikiConnection.getInputStream()); */ lastTime = Calendar.getInstance().getTimeInMillis() - lastTime; XPathFactory xPathfactory = XPathFactory.newInstance(); XPath xpath = xPathfactory.newXPath(); XPathExpression idExpr = xpath.compile("//detectedTopic/@id"); XPathExpression weightExpr = xpath.compile("//detectedTopic/@weight"); XPathExpression referenceExpr = xpath.compile("//detectedTopic/references"); NodeList ids = (NodeList) idExpr.evaluate(doc, XPathConstants.NODESET); NodeList weights = (NodeList) weightExpr.evaluate(doc, XPathConstants.NODESET); NodeList references = (NodeList) referenceExpr.evaluate(doc, XPathConstants.NODESET); for (int i = 0; i < weights.getLength(); i++) { if (weights.item(i).getNodeType() != Node.TEXT_NODE) { int id = Integer.parseInt(ids.item(i).getNodeValue()); float weight = Float.parseFloat(weights.item(i).getNodeValue()); // System.out.println("ID="+ids.item(i).getNodeValue()+" weight="+weight); XPathExpression startExpr = xpath.compile("//detectedTopic[@id=" + id + "]/references/reference/@start"); XPathExpression endExpr = xpath.compile("//detectedTopic[@id=" + id + "]/references/reference/@end"); NodeList starts = (NodeList) startExpr.evaluate(references.item(i), XPathConstants.NODESET); NodeList ends = (NodeList) endExpr.evaluate(references.item(i), XPathConstants.NODESET); for (int j = 0; j < starts.getLength(); j++) { int start = Integer.parseInt(starts.item(j).getNodeValue()); int end = Integer.parseInt(ends.item(j).getNodeValue()); int len = end - start; res.add(new ScoredAnnotation(start, len, id, weight)); } } } } catch (Exception e) { e.printStackTrace(); throw new AnnotationException( "An error occurred while querying Wikipedia Miner API. Message: " + e.getMessage()); } return res; }