@Override public List<String> selectList(String text) { HtmlCleaner htmlCleaner = new HtmlCleaner(); TagNode tagNode = htmlCleaner.clean(text); if (tagNode == null) { return null; } List<String> results = new ArrayList<String>(); try { Object[] objects = tagNode.evaluateXPath(xpathStr); if (objects != null && objects.length >= 1) { for (Object object : objects) { if (object instanceof TagNode) { TagNode tagNode1 = (TagNode) object; results.add(htmlCleaner.getInnerHtml(tagNode1)); } else { results.add(object.toString()); } } } } catch (XPatherException e) { e.printStackTrace(); } return results; }
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end) { String src = node.getAttributeByName("src"); if (src == null) { src = node.getAttributeByName("href"); } if (src == null) { src = node.getAttributeByName("xlink:href"); } builder.append("\uFFFC"); String resolvedHref = spine.resolveHref(src); if (imageCache.containsKey(resolvedHref) && !fakeImages) { Drawable drawable = imageCache.get(resolvedHref); setImageSpan(builder, drawable, start, builder.length()); LOG.debug("Got cached href: " + resolvedHref); } else { LOG.debug("Loading href: " + resolvedHref); loader.registerCallback( resolvedHref, new ImageCallback(resolvedHref, builder, start, builder.length(), fakeImages)); } }
private void addFlowId(String flowID, TagNode formNode) { TagNode flowIdNode = new TagNode("input"); flowIdNode.setAttribute("name", Constants.FLOW_ID_FIELD_NAME); flowIdNode.setAttribute("type", "hidden"); flowIdNode.setAttribute("value", flowID + ""); formNode.insertChild(0, flowIdNode); }
protected void getFileAttache( SimpleHtmlSerializer htmlSerializer, TagNode pNode, KnouNoticeInfo knouNoticeInfo) { String expressionContent = "//div[@class=\"MultiFile-list\"]"; Object[] myNodeBody = null; try { myNodeBody = pNode.evaluateXPath(expressionContent); } catch (XPatherException e) { // TODO Auto-generated catch block e.printStackTrace(); } if (myNodeBody.length <= 0) { return; } TagNode tmpNode = (TagNode) myNodeBody[0]; TagNode[] nl = tmpNode.getChildTags(); // Log.d("HAN", "nl.length:" + nl.length); KnouNoticeFileInfo attacheFileInfo = null; for (int i = 0; i < nl.length; i++) { // 0번은 필요없는거 지움 // Log.d("HAN", "nl[i].getName():" + nl[i].getName()); // Log.d("HAN", "nl[i].getText():" + nl[i].getText()); attacheFileInfo = new KnouNoticeFileInfo(); if (nl[i].getName().trim().equals("a")) { String href = nl[i].getAttributeByName("href"); attacheFileInfo.href = "http://ep.knou.ac.kr" + href; attacheFileInfo.fileName = nl[i].getText().toString(); knouNoticeInfo.AttacheFile.add(attacheFileInfo); } } }
@Override protected List<TableElement> doInBackground(String... urls) { try { if (!trSession.getState().equals(LoginState.LOGIN_OK) && !trSession.getState().equals(LoginState.LOGIN_NOT_COMPLETED)) { trSession.processLogin(); } if (trSession.getState().equals(LoginState.LOGIN_OK)) { TagNode result = null; if (getRequestMethod().equals(GET)) { result = DataProviderUtil.downloadGetUrl(trSession, getFinalUrlParams()); } if (getRequestMethod().equals(POST)) { result = DataProviderUtil.downloadPostUrl(trSession, getFinalUrlParams(), null); } if (result != null) { TagNode elem; if ((elem = result.findElementByName("BODY", true)) != null) { return HtmlParserUtils.parseHtml(elem); } } } else { trSession.setState(LoginState.LOGIN_FAILED); } } catch (IOException e) { Log.e("Error on reading address.", e.getMessage()); } return null; }
public String GetInnerTextByXpath(String html, String xpath) throws Exception { HtmlCleaner hc = new HtmlCleaner(); TagNode tn = hc.clean(html); Object[] objarr = null; objarr = tn.evaluateXPath(xpath); TagNode newNode = (TagNode) objarr[0]; return newNode.getText().toString(); }
public boolean satisfy(TagNode tagNode) { if (tagNode == null || attName == null || attValue == null) { return false; } else { return isCaseSensitive ? attValue.equals(tagNode.getAttributeByName(attName)) : attValue.equalsIgnoreCase(tagNode.getAttributeByName(attName)); } }
@Override protected void handleBody(String tagName, TagNode tagNode) { if (modelSerializer.isExplanation()) { Explanation explanation = modelSerializer.getObject(Explanation.class); tagNode.addAttribute("data-message", explanation.getMessage()); tagNode.addAttribute("data-messageDetail", explanation.getMessageDetail()); } // tagNode.addChild(modelSerializer.getPageModelScript()); tagNode.addChild(modelSerializer.getScriptContent(visitor.getScriptResource())); }
private void handleAction(TagAttributeAction action, TagNode tagNode) { switch (action.getType()) { case REMOVE: tagNode.removeFromTree(); break; case MODIFY: tagNode.addAttribute(action.getName(), action.getReplace()); break; } }
/** * @param html * @return <code>true</code> se a página possui formulário de login. <code>false</code> caso * contrário. */ private boolean autenticaUsuario(String html) { HtmlCleaner cleaner = new HtmlCleaner(); TagNode root = cleaner.clean(html); String formulario_name = context.getString(R.string.janusmob_formulario); String campo_usuario_name = context.getString(R.string.janusmob_campo_usuario); String campo_senha_name = context.getString(R.string.janusmob_campo_senha); TagNode loginform = null; for (TagNode node : root.getElementsByName("form", true)) { if (node.getAttributeByName("name").equals(formulario_name)) { loginform = node; break; } } if (loginform == null) { return false; } // } // StringBuffer postData = new StringBuffer(String.format( // "%s=%s&%s=%s", campo_usuario_name, usuario, // campo_senha_name, senha)); // // for (TagNode node : loginform.getElementsByName("input", true)) { // if (!(node.getAttributeByName("name") // .equals(campo_usuario_name) || node.getAttributeByName( // "name").equals(campo_senha_name))) { // postData.append("&") // .append(node.getAttributeByName("name")) // .append("=") // .append(node.getAttributeByName("value")); // } // } // // webView.postUrl( // context.getString(R.string.janusmob_pagina_login), // EncodingUtils.getBytes(postData.toString(), "base64")); webView.loadUrl( String.format( "javascript:document.getElementById(\"%s\").value=%s;", campo_senha_name, senha)); webView.loadUrl( String.format( "javascript:document.getElementById(\"%s\").value=%s;", campo_usuario_name, usuario)); webView.loadUrl( String.format( "javascript:document.getElementById('%s').submit();", context.getString(R.string.janusmob_formulario))); return true; }
private String getMensagemErro(String html) { TagNode root = new HtmlCleaner().clean(html); String idSpanMensagemErro = context.getString(R.string.janusmob_span_erro_id); TagNode[] tags = root.getElementsByName("span", true); for (TagNode node : tags) { if (idSpanMensagemErro.equals(node.getAttributeByName("id"))) { return node.getText().toString(); } } return context.getString(R.string.janusmob_mensagem_erro_login); }
private boolean usuarioLogado(String html) { TagNode root = new HtmlCleaner().clean(html); String idLinkLogout = context.getString(R.string.janusmob_link_logout_id); TagNode[] tags = root.getElementsByName("a", true); for (TagNode node : tags) { if (idLinkLogout.equals(node.getAttributeByName("id"))) { return true; } } return false; }
public void addChild(Object child) { if (child == null) { return; } if (child instanceof List) { addChildren((List) child); } else { children.add(child); if (child instanceof TagNode) { TagNode childTagNode = (TagNode) child; childTagNode.parent = this; } } }
private void processActions(Map<String, FlowAction> currentActions, TagNode formNode) { @SuppressWarnings("unchecked") List<TagNode> actions = formNode.getElementListHavingAttribute("action", true); for (TagNode actionTagNode : actions) { String actionName = actionTagNode.getAttributeByName("action"); FlowAction flowAction = currentActions.get(actionName); if (flowAction != null) { FlowActionType type = flowAction.getType(); if (type != null) { actionTagNode.setAttribute("actionType", type.toString()); } } } }
public static void main(String[] args) throws Exception { try { HtmlCleaner cleaner = new HtmlCleaner(); // cleaner.clean(new File("s")); URL url = new URL("http://www.baidu.com"); TagNode node = cleaner.clean(url, "utf-8"); node.Object[] tagNodes = node.evaluateXPath("//p[@id='nv']/a"); for (Object tagNode : tagNodes) { System.out.println(((TagNode) tagNode).getText()); System.out.println(((TagNode) tagNode).getAttributeByName("href")); } } catch (Exception exception) { exception.printStackTrace(); } }
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end) { String src = node.getAttributeByName("src"); if (src == null) { src = node.getAttributeByName("href"); } if (src == null) { src = node.getAttributeByName("xlink:href"); } builder.append("\uFFFC"); loader.registerCallback( spine.resolveHref(src), new ImageCallback(builder, start, builder.length())); }
private TagNode loadDebugBar() { try { InputStream debugBarStream = FormParser.class.getResourceAsStream("/debugbar.html"); String barHtmlString = new String(new StreamUtils().readStream(debugBarStream)); barHtmlString = barHtmlString.replace("{viewDataDocumentUrl}", ""); TagNode html = htmlCleaner.clean(barHtmlString); TagNode body = (TagNode) html.getChildren().get(1); TagNode div = (TagNode) body.getChildren().get(0); return div; } catch (IOException e) { RuntimeException runtimeException = new RuntimeException("Failed to load debugBar.", e); logger.error(runtimeException.getMessage(), runtimeException); throw runtimeException; } }
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end) { String href = node.getAttributeByName("href"); if (href == null) { return; } final String linkHref = href; // First check if it should be a normal URL link for (String protocol : this.externalProtocols) { if (href.toLowerCase(Locale.US).startsWith(protocol)) { builder.setSpan(new URLSpan(href), start, end, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE); return; } } // If not, consider it an internal nav link. ClickableSpan span = new ClickableSpan() { @Override public void onClick(View widget) { navigateTo(spine.resolveHref(linkHref)); } }; builder.setSpan(span, start, end, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE); }
public void parseScoreSheet(String id) throws XPatherException, ParseException { ScoreSheetEntity scoreSheet = em.find(ScoreSheetEntity.class, id); TagNode html = cleaner.clean(scoreSheet.getContent()); // Race ------------------------------------------ String name = ((TagNode) html.evaluateXPath("//body//h1")[0]).getText().toString(); RaceEntity r = new RaceEntity(); r.setName(name); raceEntity = (RaceEntity) checkPossibleMatches(r, RaceEntity.class); // RaceVolume ------------------------------------------ RaceVolumeEntity rv = new RaceVolumeEntity(); String dateStr = ((TagNode) html.evaluateXPath("//body//div[@class='date']")[0]).getText().toString(); Date d = new SimpleDateFormat("dd. MM. yyyy").parse(dateStr); rv.setDate(d); rv.setRace(raceEntity); String vol = ((TagNode) html.evaluateXPath("//body//div[@class='volume']")[0]).getText().toString(); vol = vol.substring(0, vol.indexOf(".")); rv.setVolume(Integer.valueOf(vol)); raceVolume = (RaceVolumeEntity) checkPossibleMatches(rv, RaceVolumeEntity.class); man = true; TagNode menDiv = ((TagNode) html.evaluateXPath("//body//div[@id='men']")[0]); Object[] cats = menDiv.evaluateXPath("//table"); for (int i = 0; i < cats.length; i++) { TagNode c = (TagNode) cats[i]; processCategory(c); } man = false; TagNode womenDiv = ((TagNode) html.evaluateXPath("//body//div[@id='women']")[0]); cats = womenDiv.evaluateXPath("//table"); for (int i = 0; i < cats.length; i++) { TagNode c = (TagNode) cats[i]; processCategory(c); } }
public static ArrayList<String> dobisliko(TagNode node, String XPathExpression) { TagNode description_node = null; ArrayList<String> Temp = new ArrayList<String>(); NodeList nodes; try { // description_node = (TagNode) node.evaluateXPath(XPathExpression)[0]; for (int x = 0; x < node.evaluateXPath(XPathExpression).length; x++) { description_node = (TagNode) node.evaluateXPath(XPathExpression)[x]; // // System.out.println("http://www.krka.si"+description_node.getAttributeByName("src")+"\n"+"---------------------------------------"); Temp.add("http://www.krka.si" + description_node.getAttributeByName("src").toString()); } } catch (XPatherException e) { e.printStackTrace(); } return Temp; // // System.out.println(description_node.getText()+"\n"+"---------------------------------------"); }
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end) { String id = node.getAttributeByName("id"); if (id != null) { anchors.put(id, start); } wrappedHandler.handleTagNode(node, builder, start, end); }
public static ArrayList<String> dobi_opis(TagNode node, String XPathExpression) { ArrayList<String> Temp = new ArrayList<String>(); TagNode description_node = null; NodeList nodes; try { // description_node = (TagNode) node.evaluateXPath(XPathExpression)[0]; for (int x = 0; x < node.evaluateXPath(XPathExpression).length; x++) { description_node = (TagNode) node.evaluateXPath(XPathExpression)[x]; // // System.out.println(description_node.getText()+"\n"+"---------------------------------------"); Temp.add(description_node.getText().toString()); } } catch (XPatherException e) { e.printStackTrace(); } return Temp; // // System.out.println(description_node.getText()+"\n"+"---------------------------------------"); }
@SuppressWarnings("unchecked") protected void handleFile(File file, int depth, Collection results) { File f = new File(FilenameUtils.normalize(file.getAbsolutePath())); logger.debug(f.getAbsoluteFile()); try { HtmlCleaner cleaner = new HtmlCleaner(); cleaner.setTransformations(ct); CleanerProperties props = cleaner.getProperties(); props.setAdvancedXmlEscape(false); // props.setTranslateSpecialEntities(false); // props.setRecognizeUnicodeChars(false); TagNode node = cleaner.clean(f); TagNode tnBody = node.getAllElements(false)[1]; List l = tnBody.getChildren(); if (l != null && l.size() > 0) { // This is a hack to remove the <?xml in the beginning of body tnBody.removeChild(l.get(0)); } Document myJDom = new JDomSerializer(props, true).createJDom(node); // Format format = Format.getRawFormat(); Format format = new OutputFormat(); format.setEncoding("iso-8859-1"); XMLWriter outputter = new XMLWriter(format); OutputStream os = new FileOutputStream(f); // outputter.output(myJDom,os); output.setOutputStream(os); output.write(myJDom); // sbResult.append(outputter.outputString(myJDom)); results.add(f.getAbsoluteFile()); } catch (IOException e) { logger.error("", e); } }
/** * Finds first element in the tree that satisfy specified condition. * * @param condition * @param isRecursive * @return First TagNode found, or null if no such elements. */ private TagNode findElement(ITagNodeCondition condition, boolean isRecursive) { if (condition == null) { return null; } for (int i = 0; i < children.size(); i++) { Object item = children.get(i); if (item instanceof TagNode) { TagNode currNode = (TagNode) item; if (condition.satisfy(currNode)) { return currNode; } else if (isRecursive) { TagNode inner = currNode.findElement(condition, isRecursive); if (inner != null) { return inner; } } } } return null; }
@Override public String select(String text) { HtmlCleaner htmlCleaner = new HtmlCleaner(); TagNode tagNode = htmlCleaner.clean(text); if (tagNode == null) { return null; } try { Object[] objects = tagNode.evaluateXPath(xpathStr); if (objects != null && objects.length >= 1) { if (objects[0] instanceof TagNode) { TagNode tagNode1 = (TagNode) objects[0]; return htmlCleaner.getInnerHtml(tagNode1); } else { return objects[0].toString(); } } } catch (XPatherException e) { e.printStackTrace(); } return null; }
public List<InstitutionDataItem> getData(int year) throws MalformedURLException, IOException, XPatherException { String url = String.format(URL_PATTERN, year); System.out.println("reading from " + url); ArrayList<InstitutionDataItem> data = new ArrayList<InstitutionDataItem>(); TagNode cleaned = ScraperUtils.getCleanedHtml(url); Object[] rows = cleaned.evaluateXPath(DATA_ROW_XPATH); for (Object row : rows) { TagNode tr = (TagNode) row; InstitutionDataItem dataItem = new InstitutionDataItem(); String[] attributes = attributes2008; if (year >= 2010) attributes = attributes2010; if (year >= 2016) attributes = attributes2016; if (year >= 2017) attributes = attributes2017; dataItem.data.put("year", String.valueOf(year)); for (int i = 0; i < attributes.length; i++) { if (!attributes[i].equals(IGNORE)) { dataItem.data.put( attributes[i], tr.getChildTags()[i].getText().toString().trim().replace(",", "")); } } data.add(dataItem); } return data; }
@Override public boolean visit(TagNode parentNode, HtmlNode htmlNode) { if (htmlNode instanceof TagNode) { TagNode tagHtmlNode = (TagNode) htmlNode; if (tagHtmlNode.getName().equalsIgnoreCase("a")) { String link = tagHtmlNode.getAttributeByName("href"); if (link != null && !link.isEmpty() && tagHtmlNode.hasChildren()) { TagNode imgNode = tagHtmlNode.findElementByName("img", false); if (imgNode != null && imgNode.hasAttribute("src")) { getLinks().setLinkByServer(link, imgNode.getAttributeByName("src")); } } } } return true; }
/** * Get all elements in the tree that satisfy specified condition. * * @param condition * @param isRecursive * @return List of TagNode instances with specified name. */ private List getElementList(ITagNodeCondition condition, boolean isRecursive) { List result = new LinkedList(); if (condition == null) { return result; } for (int i = 0; i < children.size(); i++) { Object item = children.get(i); if (item instanceof TagNode) { TagNode currNode = (TagNode) item; if (condition.satisfy(currNode)) { result.add(currNode); } if (isRecursive) { List innerList = currNode.getElementList(condition, isRecursive); if (innerList != null && innerList.size() > 0) { result.addAll(innerList); } } } } return result; }
public static void main(String[] args) throws Exception { try { HtmlCleaner cleaner = new HtmlCleaner(); nameList = new ArrayList<String>(); URL url = new URL( "http://apps.wandoujia.com/apps/com.eg.android.AlipayGphone/versions?pos=w/popup"); TagNode node = cleaner.clean(url); Object[] tags = node.evaluateXPath("/body/div//div[@class='version-block']/div[position()<4]"); int i = 1; for (Object tag : tags) { // System.out.println(((TagNode)tagSize).getText()+""); Object[] tagVersion = node.evaluateXPath( "/body/div//div[@class='version-block']/div[" + i + "]//i[@itemprop='softwareVersion']"); String app_verison = ((TagNode) tagVersion[0]).getText() + ""; System.out.println(((TagNode) tagVersion[0]).getText() + ""); Object[] tagVersionCode = node.evaluateXPath( "/body/div//div[@class='version-block']/div[" + i + "]//span[@class='version-code']"); String app_versioncode = ((TagNode) tagVersionCode[0]).getText() + ""; System.out.println(((TagNode) tagVersionCode[0]).getText() + ""); Object[] tagFileSize = node.evaluateXPath( "/body/div//div[@class='version-block']/div[" + i + "]//span[@class='apk-size']"); String app_size = ((TagNode) tagFileSize[0]).getText() + ""; System.out.println(((TagNode) tagFileSize[0]).getText() + ""); Object[] tagDownload = node.evaluateXPath( "/body/div//div[@class='version-block']/div[" + i + "]//a[@download]"); System.out.println(((TagNode) tagDownload[0]).getAttributeByName("href")); String app_url = ((TagNode) tagDownload[0]).getAttributeByName("href"); String app_name = ((TagNode) tagDownload[0]).getAttributeByName("download"); i++; // ***写入数据库 明天写*** } } catch (Exception exception) { exception.printStackTrace(); } }
private void processInputSourceFields(TagNode formNode, String currentPath, FormFlow formFlow) throws XPatherException { Object[] autoCompleteNodes = formNode.evaluateXPath("//input[@" + Constants.SELECT_SOURCE_ATTR + "]"); for (Object autoCompleteNodeO : autoCompleteNodes) { TagNode autoCompleteNode = (TagNode) autoCompleteNodeO; String fieldName = autoCompleteNode.getAttributeByName(Constants.NAME_ATTR); String source = autoCompleteNode.getAttributeByName(Constants.INPUT_SOURCE_ATTR); FieldSourceProxy fieldSourceProxy = proxyFactory.createFlowProxy(currentPath, fieldName, source); formFlow.addFieldSourceProxy(fieldSourceProxy); autoCompleteNode.removeAttribute(Constants.INPUT_SOURCE_ATTR); autoCompleteNode.setAttribute( "rf.source", "rhinoforms/proxy/" + fieldSourceProxy.getProxyPath()); } }