private boolean recuperaDadosAluno(String html) { HtmlCleaner cleaner = new HtmlCleaner(); TagNode root = cleaner.clean(html); String table_name = context.getString(R.string.janusmob_table_ficha_aluno_id); TagNode table = null; for (TagNode node : root.getElementsByName("table", true)) { if (table_name.equals(node.getAttributeByName("id"))) { table = node; break; } } if (table == null) { return false; } String attribute_class_name = context.getString(R.string.janusmob_attribute_td_class_name); List<Campo> l = new LinkedList<Campo>(); TagNode nodeValor = null; for (TagNode nodeCampo : table.getElementsByName("td", true)) { if (attribute_class_name.equals(nodeCampo.getAttributeByName("class"))) { nodeValor = nodeCampo.getParent().getElementsByName("td", false)[1]; l.add( new Campo( nodeCampo.getText().toString().split(":")[0], nodeValor.getText().toString().trim().replaceAll("\"", ""))); } } dbAdapter.setCampos(l); return true; }
private boolean recuperaDadosPessoais(String html) { HtmlCleaner cleaner = new HtmlCleaner(); TagNode root = cleaner.clean(html); String table_name = context.getString(R.string.janusmob_table_dados_aluno_id); TagNode table = null; for (TagNode node : root.getElementsByName("table", true)) { if (table_name.equals(node.getAttributeByName("id"))) { table = node; break; } } if (table == null) { return false; } String attribute_class_name = context.getString(R.string.janusmob_attribute_class_name); String campo_nusp = context.getString(R.string.janusmob_campo_nusp); String campo_nome = context.getString(R.string.janusmob_campo_nome); String campo_email = context.getString(R.string.janusmob_campo_email); String campo_cpf = context.getString(R.string.janusmob_campo_cpf); Usuario usuario = new Usuario(); for (TagNode node : table.getElementsByName("span", true)) { if (attribute_class_name.equals(node.getAttributeByName("class"))) { TagNode parent = node.getParent(); parent.removeChild(node); if (node.getText().toString().trim().equals(campo_nusp)) { usuario.setNusp(parent.getText().toString().trim()); } else if (node.getText().toString().trim().equals(campo_nome)) { usuario.setNome(parent.getText().toString().trim()); } else if (node.getText().toString().trim().equals(campo_email)) { usuario.setEmail(parent.getText().toString().trim()); } else if (node.getText().toString().trim().equals(campo_cpf)) { usuario.setCpf(parent.getText().toString().trim()); } } } if (lembrarSenha) { usuario.setSenha(senha); } else { usuario.setSenha(null); } dbAdapter.setUsuario(usuario); return true; }
/** * @param html * @return <code>true</code> se a página possui formulário de login. <code>false</code> caso * contrário. */ private boolean autenticaUsuario(String html) { HtmlCleaner cleaner = new HtmlCleaner(); TagNode root = cleaner.clean(html); String formulario_name = context.getString(R.string.janusmob_formulario); String campo_usuario_name = context.getString(R.string.janusmob_campo_usuario); String campo_senha_name = context.getString(R.string.janusmob_campo_senha); TagNode loginform = null; for (TagNode node : root.getElementsByName("form", true)) { if (node.getAttributeByName("name").equals(formulario_name)) { loginform = node; break; } } if (loginform == null) { return false; } // } // StringBuffer postData = new StringBuffer(String.format( // "%s=%s&%s=%s", campo_usuario_name, usuario, // campo_senha_name, senha)); // // for (TagNode node : loginform.getElementsByName("input", true)) { // if (!(node.getAttributeByName("name") // .equals(campo_usuario_name) || node.getAttributeByName( // "name").equals(campo_senha_name))) { // postData.append("&") // .append(node.getAttributeByName("name")) // .append("=") // .append(node.getAttributeByName("value")); // } // } // // webView.postUrl( // context.getString(R.string.janusmob_pagina_login), // EncodingUtils.getBytes(postData.toString(), "base64")); webView.loadUrl( String.format( "javascript:document.getElementById(\"%s\").value=%s;", campo_senha_name, senha)); webView.loadUrl( String.format( "javascript:document.getElementById(\"%s\").value=%s;", campo_usuario_name, usuario)); webView.loadUrl( String.format( "javascript:document.getElementById('%s').submit();", context.getString(R.string.janusmob_formulario))); return true; }
private String getMensagemErro(String html) { TagNode root = new HtmlCleaner().clean(html); String idSpanMensagemErro = context.getString(R.string.janusmob_span_erro_id); TagNode[] tags = root.getElementsByName("span", true); for (TagNode node : tags) { if (idSpanMensagemErro.equals(node.getAttributeByName("id"))) { return node.getText().toString(); } } return context.getString(R.string.janusmob_mensagem_erro_login); }
private boolean usuarioLogado(String html) { TagNode root = new HtmlCleaner().clean(html); String idLinkLogout = context.getString(R.string.janusmob_link_logout_id); TagNode[] tags = root.getElementsByName("a", true); for (TagNode node : tags) { if (idLinkLogout.equals(node.getAttributeByName("id"))) { return true; } } return false; }
public static ArrayList<ContentValues> parsePosts( TagNode aThread, int aThreadId, int unreadIndex, int opId, AwfulPreferences prefs, int startIndex) { ArrayList<ContentValues> result = new ArrayList<ContentValues>(); boolean lastReadFound = false; int index = startIndex; String update_time = new Timestamp(System.currentTimeMillis()).toString(); Log.v(TAG, "Update time: " + update_time); try { if (!Constants.isICS() || !prefs.inlineYoutube) { // skipping youtube support for now, it kinda sucks. aThread = convertVideos(aThread); } TagNode[] postNodes = aThread.getElementsByAttValue("class", "post", true, true); for (TagNode node : postNodes) { // fyad status, to prevent processing postbody twice if we are in fyad ContentValues post = new ContentValues(); post.put(THREAD_ID, aThreadId); // We'll just reuse the array of objects rather than create // a ton of them int id = Integer.parseInt(node.getAttributeByName("id").replaceAll("post", "")); post.put(ID, id); post.put(AwfulProvider.UPDATED_TIMESTAMP, update_time); post.put(POST_INDEX, index); if (index > unreadIndex) { post.put(PREVIOUSLY_READ, 0); lastReadFound = true; } else { post.put(PREVIOUSLY_READ, 1); } index++; post.put(IS_MOD, 0); post.put(IS_ADMIN, 0); TagNode[] postContent = node.getElementsHavingAttribute("class", true); for (TagNode pc : postContent) { if (pc.getAttributeByName("class").contains("author")) { post.put(USERNAME, pc.getText().toString().trim()); } if (pc.getAttributeByName("class").contains("role-mod")) { post.put(IS_MOD, 1); } if (pc.getAttributeByName("class").contains("role-admin")) { post.put(IS_ADMIN, 1); } if (pc.getAttributeByName("class").equalsIgnoreCase("title") && pc.getChildTags().length > 0) { TagNode[] avatar = pc.getElementsByName("img", true); if (avatar.length > 0) { post.put(AVATAR, avatar[0].getAttributeByName("src")); } post.put(AVATAR_TEXT, pc.getText().toString().trim()); } if (pc.getAttributeByName("class").equalsIgnoreCase("postbody") || pc.getAttributeByName("class").contains("complete_shit")) { TagNode[] images = pc.getElementsByName("img", true); for (TagNode img : images) { // don't alter video mock buttons if ((img.hasAttribute("class") && img.getAttributeByName("class").contains("videoPlayButton"))) { continue; } boolean dontLink = false; TagNode parent = img.getParent(); String src = img.getAttributeByName("src"); if ((parent != null && parent.getName().equals("a")) || (img.hasAttribute("class") && img.getAttributeByName("class") .contains("nolink"))) { // image is linked, don't override dontLink = true; } if (src.contains(".gif")) { img.setAttribute( "class", (img.hasAttribute("class") ? img.getAttributeByName("class") + " " : "") + "gif"); } if (img.hasAttribute("title")) { if (!prefs.showSmilies) { // kill all emotes String name = img.getAttributeByName("title"); img.setName("p"); img.addChild(new ContentNode(name)); } } else { if (!lastReadFound && prefs.hideOldImages || !prefs.imagesEnabled) { if (!dontLink) { img.setName("a"); img.setAttribute("href", src); img.addChild(new ContentNode(src)); } else { img.setName("p"); img.addChild(new ContentNode(src)); } } else { if (!dontLink) { img.setName("a"); img.setAttribute("href", src); TagNode newimg = new TagNode("img"); if (!prefs.imgurThumbnails.equals("d") && src.contains("i.imgur.com")) { int lastSlash = src.lastIndexOf('/'); if (src.length() - lastSlash <= 9) { int pos = src.length() - 4; src = src.substring(0, pos) + prefs.imgurThumbnails + src.substring(pos); } } newimg.setAttribute("src", src); img.addChild(newimg); } } } } StringBuffer fixedContent = new StringBuffer(); Matcher fixCharMatch = fixCharacters_regex.matcher(NetworkUtils.getAsString(pc)); while (fixCharMatch.find()) { fixCharMatch.appendReplacement(fixedContent, ""); } fixCharMatch.appendTail(fixedContent); post.put(CONTENT, fixedContent.toString()); } if (pc.getAttributeByName("class").equalsIgnoreCase("postdate")) { post.put( DATE, NetworkUtils.unencodeHtml(pc.getText().toString()) .replaceAll("[^\\w\\s:,]", "") .trim()); } if (pc.getAttributeByName("class").equalsIgnoreCase("profilelinks")) { TagNode[] links = pc.getElementsHavingAttribute("href", true); if (links.length > 0) { String href = links[0].getAttributeByName("href").trim(); String userId = href.substring(href.lastIndexOf("rid=") + 4); post.put(USER_ID, userId); if (Integer.toString(opId).equals(userId)) { // ugh post.put(IS_OP, 1); } else { post.put(IS_OP, 0); } } } if (pc.getAttributeByName("class").equalsIgnoreCase("editedby") && pc.getChildTags().length > 0) { post.put(EDITED, "<i>" + pc.getChildTags()[0].getText().toString() + "</i>"); } } TagNode[] editImgs = node.getElementsByAttValue("alt", "Edit", true, true); if (editImgs.length > 0) { post.put(EDITABLE, 1); } else { post.put(EDITABLE, 0); } result.add(post); } Log.i( TAG, Integer.toString(postNodes.length) + " posts found, " + result.size() + " posts parsed."); } catch (Exception e) { e.printStackTrace(); } return result; }
private static TagNode convertVideos(TagNode contentNode) { TagNode[] videoNodes = contentNode.getElementsByAttValue("class", "bbcode_video", true, true); TagNode[] youtubeNodes = contentNode.getElementsByAttValue("class", "youtube-player", true, true); for (TagNode youTube : youtubeNodes) { String src = youTube.getAttributeByName("src"); int height = Integer.parseInt(youTube.getAttributeByName("height")); int width = Integer.parseInt(youTube.getAttributeByName("width")); Matcher youtube = youtubeHDId_regex.matcher(src); if (youtube.find()) { String videoId = youtube.group(1); String link = "http://www.youtube.com/watch?v=" + videoId; String image = "http://img.youtube.com/vi/" + videoId + "/0.jpg"; youTube.setName("a"); youTube.setAttribute("href", link); youTube.removeAttribute("type"); youTube.removeAttribute("frameborder"); youTube.removeAttribute("src"); youTube.removeAttribute("height"); youTube.removeAttribute("width"); youTube.setAttribute( "style", "background-image:url(" + image + ");background-size:cover;background-repeat:no-repeat;background-position:center; position:relative;display:block;text-align:center; width:" + width + "; height:" + height); TagNode img = new TagNode("img"); img.setAttribute("class", "nolink videoPlayButton"); img.setAttribute("src", "file:///android_res/drawable/ic_menu_video.png"); img.setAttribute( "style", "position:absolute;top:50%;left:50%;margin-top:-16px;margin-left:-16px;"); youTube.addChild(img); } } for (TagNode node : videoNodes) { try { String src = null; int height = 0; int width = 0; TagNode[] object = node.getElementsByName("object", false); if (object.length > 0) { height = Integer.parseInt(object[0].getAttributeByName("height")); width = Integer.parseInt(object[0].getAttributeByName("width")); TagNode[] emb = object[0].getElementsByName("embed", true); if (emb.length > 0) { src = emb[0].getAttributeByName("src"); } } if (src != null && height != 0 && width != 0) { String link = null, image = null; Matcher youtube = youtubeId_regex.matcher(src); Matcher vimeo = vimeoId_regex.matcher(src); if (youtube .find()) { // we'll leave in the old youtube code in case something gets reverted String videoId = youtube.group(1); link = "http://www.youtube.com/watch?v=" + videoId; image = "http://img.youtube.com/vi/" + videoId + "/0.jpg"; } else if (vimeo.find()) { String videoId = vimeo.group(1); TagNode vimeoXML; try { vimeoXML = NetworkUtils.get("http://vimeo.com/api/v2/video/" + videoId + ".xml"); } catch (Exception e) { e.printStackTrace(); continue; } if (vimeoXML.findElementByName("mobile_url", true) != null) { link = vimeoXML.findElementByName("mobile_url", true).getText().toString(); } else { link = vimeoXML.findElementByName("url", true).getText().toString(); } image = vimeoXML.findElementByName("thumbnail_large", true).getText().toString(); } else { node.removeAllChildren(); TagNode ln = new TagNode("a"); ln.setAttribute("href", src); ln.addChild(new ContentNode(src)); node.addChild(ln); continue; } node.removeAllChildren(); node.setAttribute( "style", "background-image:url(" + image + ");background-size:cover;background-repeat:no-repeat;background-position:center; position:relative;text-align:center; width:" + width + "; height:" + height); node.setAttribute("onclick", "location.href=\"" + link + "\""); TagNode img = new TagNode("img"); img.setAttribute("class", "nolink videoPlayButton"); img.setAttribute("src", "file:///android_res/drawable/ic_menu_video.png"); img.setAttribute( "style", "position:absolute;top:50%;left:50%;margin-top:-23px;margin-left:-32px;"); node.addChild(img); } } catch (Exception e) { continue; // if we fail to convert the video tag, we can still display the rest. } } return contentNode; }
String getAdditionInfo(String url) { String s = ""; if (url.contains("bongda")) { try { HtmlCleaner cleaner = new HtmlCleaner(); String id = url.substring(url.lastIndexOf("/")); id = id.substring(1, id.indexOf(".")); Uri ss = Uri.parse("http://m.bongdaplus.vn/Story.aspx?sid=" + id); URI fine = new URI( ss.getScheme(), ss.getUserInfo(), ss.getHost(), ss.getPort(), ss.getPath(), ss.getQuery(), ss.getFragment()); HttpClient httpclient = new DefaultHttpClient(); httpclient.getParams().setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BEST_MATCH); HttpGet httpget = new HttpGet(fine); httpget.addHeader( "user-agent", "Mozilla/5.0 (Linux; U; Android 2.3.3) Gecko/20100101 Firefox/8.0"); httpget.addHeader("accept-language", "en-us,en;q=0.5"); HttpResponse response = httpclient.execute(httpget); InputStream binaryreader = new BufferedInputStream(response.getEntity().getContent()); BufferedReader buf = new BufferedReader(new InputStreamReader(binaryreader)); String sss; StringBuilder sb = new StringBuilder(); while ((sss = buf.readLine()) != null) { sb.append(sss); } // URL u = new URL(ss.getScheme(), ss.getHost(), ss.getPort(), ss.getPath()); TagNode root = cleaner.clean(sb.toString()); TagNode div = root.findElementByAttValue("class", "story-body", true, false); TagNode[] child = div.getElementsByName("strong", true); for (int i = 0; i < child.length; i++) child[i].removeFromTree(); child = div.getElementsByAttValue("class", "listing", true, false); for (int i = 0; i < child.length; i++) child[i].removeFromTree(); child = div.getElementsByName("ul", true); for (int i = 0; i < child.length; i++) child[i].removeFromTree(); child = div.getElementsByName("img", true); for (int i = 0; i < child.length; i++) child[i].setAttribute("style", "width: 100%; height: auto;"); s = cleaner.getInnerHtml(div); if (s.indexOf("VideoPlaying(") > 0) { int x = s.indexOf("VideoPlaying("); int st = s.indexOf("'", x); int en = s.indexOf("'", st + 1); String urlvideo = "http://bongdaplus.vn" + s.substring(st + 1, en); s = s + "<a href=\"" + urlvideo + "\"> Video </a></br></br>"; } } catch (Exception e) { e.printStackTrace(); } } else if (url.contains("teamtalk")) { try { HtmlCleaner cleaner = new HtmlCleaner(); Uri ss = Uri.parse(url); URL u = new URL(ss.getScheme(), ss.getHost(), ss.getPort(), ss.getPath()); TagNode root = cleaner.clean(u); TagNode div = root.findElementByAttValue("class", "tt-article-text", true, false); TagNode[] child = div.getElementsByName("p", true); for (int i = 0; i < child.length; i++) if (child[i].hasAttribute("class") || child[i].hasAttribute("style")) child[i].removeFromTree(); child = div.getElementsByName("img", true); for (int i = 0; i < child.length; i++) child[i].setAttribute("style", "max-width: 80%; height: auto;"); s = cleaner.getInnerHtml(div); } catch (Exception e) { e.printStackTrace(); } } else if (url.contains("licheuro")) { try { HtmlCleaner cleaner = new HtmlCleaner(); Uri ss = Uri.parse(url); URL u = new URL(ss.getScheme(), ss.getHost(), ss.getPort(), ss.getPath()); TagNode root = cleaner.clean(u); TagNode div = root.findElementByAttValue("class", "entry", true, false); TagNode[] child = div.getElementsByName("span", true); for (int i = 0; i < child.length; i++) child[i].removeFromTree(); child = div.getElementsByName("strong", true); for (int i = 0; i < child.length; i++) { child[i].getParent().addChildren(child[i].getChildren()); child[i].getParent().addChild(child[i].getText()); child[i].removeFromTree(); } child = div.getElementsByName("h1", true); for (int i = 0; i < child.length; i++) { child[i].getParent().addChildren(child[i].getChildren()); child[i].getParent().addChild(child[i].getText()); child[i].removeFromTree(); } child = div.getElementsByAttValue("class", "boxpost", true, true); for (int i = 0; i < child.length; i++) child[i].removeFromTree(); child = div.getElementsByAttValue("class", "ratingblock ", true, true); for (int i = 0; i < child.length; i++) child[i].removeFromTree(); child = div.getElementsByName("iframe", true); for (int i = 0; i < child.length; i++) child[i].removeFromTree(); child = div.getElementsByName("blockqoute", true); for (int i = 0; i < child.length; i++) { child[i].getParent().addChildren(child[i].getChildren()); child[i].removeFromTree(); } child = div.getElementsByAttValue("class", "dd_outer", true, true); for (int i = 0; i < child.length; i++) child[i].removeFromTree(); child = div.getElementsByAttValue("id", "fb-root", true, true); for (int i = 0; i < child.length; i++) child[i].removeFromTree(); child = div.getElementsByAttValue("id", "fbSEOComments", true, true); for (int i = 0; i < child.length; i++) child[i].removeFromTree(); child = div.getElementsByAttValue("id", "ajax_comments_wrapper", true, true); for (int i = 0; i < child.length; i++) child[i].removeFromTree(); child = div.getElementsByName("table", true); for (int i = 0; i < child.length; i++) { child[i].setAttribute("style", "max-width: 100%; height: auto;"); child[i].setAttribute("width", "100%"); } child = div.getElementsByName("script", true); for (int i = 0; i < child.length; i++) child[i].removeFromTree(); child = div.getElementsByName("img", true); for (int i = 0; i < child.length; i++) child[i].setAttribute("style", "max-width: 80%; height: auto;"); s = cleaner.getInnerHtml(div); if (s.indexOf("\"file\":") > 0) { int x = s.indexOf("\"file\":"); int st = s.indexOf("http", x); int en = s.indexOf("\"", st + 1); String urlvideo = s.substring(st, en); s = s + "<a href=\"" + urlvideo + "\"> Video </a></br></br>"; } } catch (Exception e) { e.printStackTrace(); } } System.out.println(s); return s; }
@SuppressWarnings("unchecked") protected void handleFile(File file, int depth, Collection results) { File f = new File(FilenameUtils.normalize(file.getAbsolutePath())); logger.debug(f.getAbsoluteFile()); try { HtmlCleaner cleaner = new HtmlCleaner(); cleaner.setTransformations(ct); CleanerProperties props = cleaner.getProperties(); // props.setAdvancedXmlEscape(false); props.setUseEmptyElementTags(false); // props.setTranslateSpecialEntities(false); // props.setRecognizeUnicodeChars(false); TagNode node = cleaner.clean(f); TagNode tnBody = node.getAllElements(false)[1]; List l = tnBody.getChildren(); if (l != null && l.size() > 0) { // This is a hack to remove the <?xml in the beginning of body tnBody.removeChild(l.get(0)); } for (int i = 1; i <= anzElements; i++) { String tag = config.getString("substitute[" + i + "]/@tag"); String att = config.getString("substitute[" + i + "]/@att"); String from = config.getString("substitute[" + i + "]/from"); String to = config.getString("substitute[" + i + "]/to"); to = subSpecial(to); TagNode[] imgs = node.getElementsByName(tag, true); for (TagNode tn : imgs) { String srcAtt = tn.getAttributeByName(att); int index = srcAtt.indexOf(from); if (index >= 0) { tn.addAttribute(att, to); } } } BrowserCompactXmlSerializer serializer = new BrowserCompactXmlSerializer(props); // PrettyXmlSerializer serializer = new PrettyXmlSerializer(props); String s = serializer.getXmlAsString(node, "ISO-8859-1"); Writer fw = null; try { fw = new FileWriter(f); fw.write(s); } catch (IOException e) { logger.error("", e); } finally { if (fw != null) try { fw.close(); } catch (IOException e) { } } results.add(f.getAbsoluteFile()); } catch (IOException e) { logger.error("", e); } }