private boolean recuperaDadosAluno(String html) { HtmlCleaner cleaner = new HtmlCleaner(); TagNode root = cleaner.clean(html); String table_name = context.getString(R.string.janusmob_table_ficha_aluno_id); TagNode table = null; for (TagNode node : root.getElementsByName("table", true)) { if (table_name.equals(node.getAttributeByName("id"))) { table = node; break; } } if (table == null) { return false; } String attribute_class_name = context.getString(R.string.janusmob_attribute_td_class_name); List<Campo> l = new LinkedList<Campo>(); TagNode nodeValor = null; for (TagNode nodeCampo : table.getElementsByName("td", true)) { if (attribute_class_name.equals(nodeCampo.getAttributeByName("class"))) { nodeValor = nodeCampo.getParent().getElementsByName("td", false)[1]; l.add( new Campo( nodeCampo.getText().toString().split(":")[0], nodeValor.getText().toString().trim().replaceAll("\"", ""))); } } dbAdapter.setCampos(l); return true; }
private void processSelectSource(TagNode formNode, FormFlow formFlow) throws XPatherException, ResourceLoaderException { Object[] dynamicSelectNodes = formNode.evaluateXPath("//select[@" + Constants.SELECT_SOURCE_ATTR + "]"); for (Object dynamicSelectNodeO : dynamicSelectNodes) { TagNode dynamicSelectNode = (TagNode) dynamicSelectNodeO; String name = dynamicSelectNode.getAttributeByName(Constants.NAME_ATTR); String source = dynamicSelectNode.getAttributeByName(Constants.SELECT_SOURCE_ATTR); source = formFlow.resolveResourcePathIfRelative(source); String preselectFirstOption = dynamicSelectNode.getAttributeByName(Constants.SELECT_PRESELECT_FIRST_OPTION_ATTR); dynamicSelectNode.removeAttribute(Constants.SELECT_SOURCE_ATTR); dynamicSelectNode.removeAttribute(Constants.SELECT_PRESELECT_FIRST_OPTION_ATTR); logger.debug("Found dynamicSelectNode name:{}, source:{}", name, source); List<SelectOptionPojo> options = selectOptionHelper.loadOptions(source); if (!"true".equals(preselectFirstOption)) { options.add(0, new SelectOptionPojo("-- Please Select --", "")); } for (SelectOptionPojo selectOptionPojo : options) { TagNode optionNode = new TagNode("option"); String value = selectOptionPojo.getValue(); if (value != null) { optionNode.setAttribute("value", value); } optionNode.addChild(new ContentNode(selectOptionPojo.getText())); dynamicSelectNode.addChild(optionNode); } } // TODO: validate that submitted value comes from the list }
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end) { String src = node.getAttributeByName("src"); if (src == null) { src = node.getAttributeByName("href"); } if (src == null) { src = node.getAttributeByName("xlink:href"); } builder.append("\uFFFC"); String resolvedHref = spine.resolveHref(src); if (imageCache.containsKey(resolvedHref) && !fakeImages) { Drawable drawable = imageCache.get(resolvedHref); setImageSpan(builder, drawable, start, builder.length()); LOG.debug("Got cached href: " + resolvedHref); } else { LOG.debug("Loading href: " + resolvedHref); loader.registerCallback( resolvedHref, new ImageCallback(resolvedHref, builder, start, builder.length(), fakeImages)); } }
public boolean satisfy(TagNode tagNode) { if (tagNode == null || attName == null || attValue == null) { return false; } else { return isCaseSensitive ? attValue.equals(tagNode.getAttributeByName(attName)) : attValue.equalsIgnoreCase(tagNode.getAttributeByName(attName)); } }
private boolean recuperaDadosPessoais(String html) { HtmlCleaner cleaner = new HtmlCleaner(); TagNode root = cleaner.clean(html); String table_name = context.getString(R.string.janusmob_table_dados_aluno_id); TagNode table = null; for (TagNode node : root.getElementsByName("table", true)) { if (table_name.equals(node.getAttributeByName("id"))) { table = node; break; } } if (table == null) { return false; } String attribute_class_name = context.getString(R.string.janusmob_attribute_class_name); String campo_nusp = context.getString(R.string.janusmob_campo_nusp); String campo_nome = context.getString(R.string.janusmob_campo_nome); String campo_email = context.getString(R.string.janusmob_campo_email); String campo_cpf = context.getString(R.string.janusmob_campo_cpf); Usuario usuario = new Usuario(); for (TagNode node : table.getElementsByName("span", true)) { if (attribute_class_name.equals(node.getAttributeByName("class"))) { TagNode parent = node.getParent(); parent.removeChild(node); if (node.getText().toString().trim().equals(campo_nusp)) { usuario.setNusp(parent.getText().toString().trim()); } else if (node.getText().toString().trim().equals(campo_nome)) { usuario.setNome(parent.getText().toString().trim()); } else if (node.getText().toString().trim().equals(campo_email)) { usuario.setEmail(parent.getText().toString().trim()); } else if (node.getText().toString().trim().equals(campo_cpf)) { usuario.setCpf(parent.getText().toString().trim()); } } } if (lembrarSenha) { usuario.setSenha(senha); } else { usuario.setSenha(null); } dbAdapter.setUsuario(usuario); return true; }
@Override public boolean visit(TagNode parentNode, HtmlNode htmlNode) { if (htmlNode instanceof TagNode) { TagNode tagHtmlNode = (TagNode) htmlNode; if (tagHtmlNode.getName().equalsIgnoreCase("a")) { String link = tagHtmlNode.getAttributeByName("href"); if (link != null && !link.isEmpty() && tagHtmlNode.hasChildren()) { TagNode imgNode = tagHtmlNode.findElementByName("img", false); if (imgNode != null && imgNode.hasAttribute("src")) { getLinks().setLinkByServer(link, imgNode.getAttributeByName("src")); } } } } return true; }
private void doProcessIncludes(TagNode html, int depth, FormFlow formFlow) throws IOException, FormParserException { if (depth < processIncludesMaxDepth) { @SuppressWarnings("unchecked") List<TagNode> includeNodes = html.getElementListByName(Constants.INCLUDE_ELEMENT, true); for (TagNode includeNode : includeNodes) { String srcAttribute = includeNode.getAttributeByName("src"); srcAttribute = formFlow.resolveResourcePathIfRelative(srcAttribute); InputStream resourceAsStream = resourceLoader.getFormResourceAsStream(srcAttribute); if (resourceAsStream != null) { TagNode includeHtml = htmlCleaner.clean(resourceAsStream); TagNode body = includeHtml.findElementByName("body", false); doProcessIncludes(body, depth + 1, formFlow); @SuppressWarnings("unchecked") List<HtmlNode> bodyChildren = body.getChildren(); Collections.reverse(bodyChildren); TagNode includeParent = includeNode.getParent(); for (HtmlNode bodyChild : bodyChildren) { includeParent.insertChildAfter(includeNode, bodyChild); } includeParent.removeChild(includeNode); } else { throw new FormParserException("Include file not found. Path:'" + srcAttribute + "'"); } } } else { throw new FormParserException( "Exceeded maximum nested " + Constants.INCLUDE_ELEMENT + " depth of " + processIncludesMaxDepth); } }
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end) { String src = node.getAttributeByName("src"); if (src == null) { src = node.getAttributeByName("href"); } if (src == null) { src = node.getAttributeByName("xlink:href"); } builder.append("\uFFFC"); loader.registerCallback( spine.resolveHref(src), new ImageCallback(builder, start, builder.length())); }
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end) { String href = node.getAttributeByName("href"); if (href == null) { return; } final String linkHref = href; // First check if it should be a normal URL link for (String protocol : this.externalProtocols) { if (href.toLowerCase(Locale.US).startsWith(protocol)) { builder.setSpan(new URLSpan(href), start, end, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE); return; } } // If not, consider it an internal nav link. ClickableSpan span = new ClickableSpan() { @Override public void onClick(View widget) { navigateTo(spine.resolveHref(linkHref)); } }; builder.setSpan(span, start, end, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE); }
private void processInputSourceFields(TagNode formNode, String currentPath, FormFlow formFlow) throws XPatherException { Object[] autoCompleteNodes = formNode.evaluateXPath("//input[@" + Constants.SELECT_SOURCE_ATTR + "]"); for (Object autoCompleteNodeO : autoCompleteNodes) { TagNode autoCompleteNode = (TagNode) autoCompleteNodeO; String fieldName = autoCompleteNode.getAttributeByName(Constants.NAME_ATTR); String source = autoCompleteNode.getAttributeByName(Constants.INPUT_SOURCE_ATTR); FieldSourceProxy fieldSourceProxy = proxyFactory.createFlowProxy(currentPath, fieldName, source); formFlow.addFieldSourceProxy(fieldSourceProxy); autoCompleteNode.removeAttribute(Constants.INPUT_SOURCE_ATTR); autoCompleteNode.setAttribute( "rf.source", "rhinoforms/proxy/" + fieldSourceProxy.getProxyPath()); } }
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end) { String id = node.getAttributeByName("id"); if (id != null) { anchors.put(id, start); } wrappedHandler.handleTagNode(node, builder, start, end); }
/** * @param html * @return <code>true</code> se a página possui formulário de login. <code>false</code> caso * contrário. */ private boolean autenticaUsuario(String html) { HtmlCleaner cleaner = new HtmlCleaner(); TagNode root = cleaner.clean(html); String formulario_name = context.getString(R.string.janusmob_formulario); String campo_usuario_name = context.getString(R.string.janusmob_campo_usuario); String campo_senha_name = context.getString(R.string.janusmob_campo_senha); TagNode loginform = null; for (TagNode node : root.getElementsByName("form", true)) { if (node.getAttributeByName("name").equals(formulario_name)) { loginform = node; break; } } if (loginform == null) { return false; } // } // StringBuffer postData = new StringBuffer(String.format( // "%s=%s&%s=%s", campo_usuario_name, usuario, // campo_senha_name, senha)); // // for (TagNode node : loginform.getElementsByName("input", true)) { // if (!(node.getAttributeByName("name") // .equals(campo_usuario_name) || node.getAttributeByName( // "name").equals(campo_senha_name))) { // postData.append("&") // .append(node.getAttributeByName("name")) // .append("=") // .append(node.getAttributeByName("value")); // } // } // // webView.postUrl( // context.getString(R.string.janusmob_pagina_login), // EncodingUtils.getBytes(postData.toString(), "base64")); webView.loadUrl( String.format( "javascript:document.getElementById(\"%s\").value=%s;", campo_senha_name, senha)); webView.loadUrl( String.format( "javascript:document.getElementById(\"%s\").value=%s;", campo_usuario_name, usuario)); webView.loadUrl( String.format( "javascript:document.getElementById('%s').submit();", context.getString(R.string.janusmob_formulario))); return true; }
private boolean usuarioLogado(String html) { TagNode root = new HtmlCleaner().clean(html); String idLinkLogout = context.getString(R.string.janusmob_link_logout_id); TagNode[] tags = root.getElementsByName("a", true); for (TagNode node : tags) { if (idLinkLogout.equals(node.getAttributeByName("id"))) { return true; } } return false; }
private String getMensagemErro(String html) { TagNode root = new HtmlCleaner().clean(html); String idSpanMensagemErro = context.getString(R.string.janusmob_span_erro_id); TagNode[] tags = root.getElementsByName("span", true); for (TagNode node : tags) { if (idSpanMensagemErro.equals(node.getAttributeByName("id"))) { return node.getText().toString(); } } return context.getString(R.string.janusmob_mensagem_erro_login); }
@Override public void getSubtitleData(TagNode rootNode, ArrayList<Subtitle> subtitles) { ArrayList<TagNode> listingTags = getTagsByClass(rootNode, "li", "listing"); if (!listingTags.isEmpty()) { TagNode liNode = listingTags.get(0); TagNode nja = (TagNode) liNode.getChildren().get(0); for (int i = 0; i < nja.getChildren().size(); i++) { TagNode titlli = (TagNode) nja.getChildren().get(i); TagNode namelink = (TagNode) titlli.getElementListByName("a", true).get(0); ArrayList<TagNode> cdTags = getTagsByClass(titlli, "span", "cd"); ArrayList<TagNode> releaseTags = getTagsByClass(titlli, "span", "release"); ArrayList<TagNode> fpsTags = getTagsByClass(titlli, "span", "fps"); ArrayList<TagNode> dwTags = getTagsByClass(titlli, "a", "button"); // naslov String ttitle = namelink.getText().toString(); // broj cd-a String tCDNumber = "0"; if (!cdTags.isEmpty()) { tCDNumber = cdTags.get(0).getText().toString().replace("CD", " ").trim(); } // verzija String release = "N/A"; if (!releaseTags.isEmpty()) { release = releaseTags.get(0).getText().toString().trim(); } // fps String fps = "N/A"; if (!fpsTags.isEmpty()) { fps = fpsTags .get(0) .getText() .toString() .substring(4, fpsTags.get(0).getText().toString().length()) .trim(); } TagNode dwTagNode = dwTags.get(0); String dwPage = dwTagNode.getAttributeByName("href"); Subtitle subtitle = new Subtitle(ttitle, release, dwPage); subtitle.setNumberOfDiscs(Integer.parseInt(tCDNumber)); subtitle.setFps(fps); subtitle.setDownloadURL(dwPage); subtitles.add(subtitle); } } }
private void processActions(Map<String, FlowAction> currentActions, TagNode formNode) { @SuppressWarnings("unchecked") List<TagNode> actions = formNode.getElementListHavingAttribute("action", true); for (TagNode actionTagNode : actions) { String actionName = actionTagNode.getAttributeByName("action"); FlowAction flowAction = currentActions.get(actionName); if (flowAction != null) { FlowActionType type = flowAction.getType(); if (type != null) { actionTagNode.setAttribute("actionType", type.toString()); } } } }
public static ArrayList<String> dobisliko(TagNode node, String XPathExpression) { TagNode description_node = null; ArrayList<String> Temp = new ArrayList<String>(); NodeList nodes; try { // description_node = (TagNode) node.evaluateXPath(XPathExpression)[0]; for (int x = 0; x < node.evaluateXPath(XPathExpression).length; x++) { description_node = (TagNode) node.evaluateXPath(XPathExpression)[x]; // // System.out.println("http://www.krka.si"+description_node.getAttributeByName("src")+"\n"+"---------------------------------------"); Temp.add("http://www.krka.si" + description_node.getAttributeByName("src").toString()); } } catch (XPatherException e) { e.printStackTrace(); } return Temp; // // System.out.println(description_node.getText()+"\n"+"---------------------------------------"); }
private void processSelectRange(TagNode formNode, JSMasterScope masterScope) throws XPatherException { Object[] rangeSelectNodes = formNode.evaluateXPath("//select[@" + Constants.SELECT_RANGE_START_ATTR + "]"); if (rangeSelectNodes.length > 0) { Scriptable workingScope = masterScope.createWorkingScope(); Context context = masterScope.getCurrentContext(); for (Object rangeSelectNodeO : rangeSelectNodes) { TagNode rangeSelectNode = (TagNode) rangeSelectNodeO; String name = rangeSelectNode.getAttributeByName(Constants.NAME_ATTR); String rangeStart = rangeSelectNode.getAttributeByName(Constants.SELECT_RANGE_START_ATTR); String rangeEnd = rangeSelectNode.getAttributeByName(Constants.SELECT_RANGE_END_ATTR); String preselectFirstOption = rangeSelectNode.getAttributeByName(Constants.SELECT_PRESELECT_FIRST_OPTION_ATTR); rangeSelectNode.removeAttribute(Constants.SELECT_RANGE_START_ATTR); rangeSelectNode.removeAttribute(Constants.SELECT_RANGE_END_ATTR); rangeSelectNode.removeAttribute(Constants.SELECT_PRESELECT_FIRST_OPTION_ATTR); logger.debug( "Found rangeSelectNode name:{}, rangeStart:{}, rangeEnd:{}", new String[] {name, rangeStart, rangeEnd}); boolean rangeStartValid = rangeStart != null && !rangeStart.isEmpty(); boolean rangeEndValid = rangeEnd != null && !rangeEnd.isEmpty(); if (rangeStartValid && rangeEndValid) { Object rangeStartResult = context.evaluateString( workingScope, "{" + rangeStart + "}", Constants.SELECT_RANGE_START_ATTR, 1, null); Object rangeEndResult = context.evaluateString( workingScope, "{" + rangeEnd + "}", Constants.SELECT_RANGE_END_ATTR, 1, null); logger.debug( "RangeSelectNode name:{}, rangeStartResult:{}, rangeEndResult:{}", new Object[] {name, rangeStartResult, rangeEndResult}); double rangeStartResultNumber = Context.toNumber(rangeStartResult); double rangeEndResultNumber = Context.toNumber(rangeEndResult); String comparator; String incrementor; if (rangeStartResultNumber < rangeEndResultNumber) { comparator = "<="; incrementor = "++"; } else { comparator = ">="; incrementor = "--"; } String rangeStatement = "{ var range = []; for( var i = " + rangeStartResult + "; i " + comparator + " " + rangeEndResult + "; i" + incrementor + ") { range.push(i); }; '' + range; }"; logger.debug("RangeSelectNode name:{}, rangeStatement:{}", name, rangeStatement); String rangeResult = (String) context.evaluateString(workingScope, rangeStatement, "Calculate range", 1, null); logger.debug("RangeSelectNode name:{}, rangeResult:{}", name, rangeResult); if (!"true".equals(preselectFirstOption)) { TagNode optionNode = new TagNode("option"); optionNode.setAttribute("value", ""); optionNode.addChild(new ContentNode("-- Please Select --")); rangeSelectNode.addChild(optionNode); } for (String item : rangeResult.split(",")) { TagNode optionNode = new TagNode("option"); optionNode.addChild(new ContentNode(item)); rangeSelectNode.addChild(optionNode); } } else { logger.warn( "Range select node '{}' not processed because {} is empty.", name, (rangeStartValid ? Constants.SELECT_RANGE_START_ATTR : Constants.SELECT_RANGE_END_ATTR)); } } } }
public static ArrayList<ContentValues> parsePosts( TagNode aThread, int aThreadId, int unreadIndex, int opId, AwfulPreferences prefs, int startIndex) { ArrayList<ContentValues> result = new ArrayList<ContentValues>(); boolean lastReadFound = false; int index = startIndex; String update_time = new Timestamp(System.currentTimeMillis()).toString(); Log.v(TAG, "Update time: " + update_time); try { if (!Constants.isICS() || !prefs.inlineYoutube) { // skipping youtube support for now, it kinda sucks. aThread = convertVideos(aThread); } TagNode[] postNodes = aThread.getElementsByAttValue("class", "post", true, true); for (TagNode node : postNodes) { // fyad status, to prevent processing postbody twice if we are in fyad ContentValues post = new ContentValues(); post.put(THREAD_ID, aThreadId); // We'll just reuse the array of objects rather than create // a ton of them int id = Integer.parseInt(node.getAttributeByName("id").replaceAll("post", "")); post.put(ID, id); post.put(AwfulProvider.UPDATED_TIMESTAMP, update_time); post.put(POST_INDEX, index); if (index > unreadIndex) { post.put(PREVIOUSLY_READ, 0); lastReadFound = true; } else { post.put(PREVIOUSLY_READ, 1); } index++; post.put(IS_MOD, 0); post.put(IS_ADMIN, 0); TagNode[] postContent = node.getElementsHavingAttribute("class", true); for (TagNode pc : postContent) { if (pc.getAttributeByName("class").contains("author")) { post.put(USERNAME, pc.getText().toString().trim()); } if (pc.getAttributeByName("class").contains("role-mod")) { post.put(IS_MOD, 1); } if (pc.getAttributeByName("class").contains("role-admin")) { post.put(IS_ADMIN, 1); } if (pc.getAttributeByName("class").equalsIgnoreCase("title") && pc.getChildTags().length > 0) { TagNode[] avatar = pc.getElementsByName("img", true); if (avatar.length > 0) { post.put(AVATAR, avatar[0].getAttributeByName("src")); } post.put(AVATAR_TEXT, pc.getText().toString().trim()); } if (pc.getAttributeByName("class").equalsIgnoreCase("postbody") || pc.getAttributeByName("class").contains("complete_shit")) { TagNode[] images = pc.getElementsByName("img", true); for (TagNode img : images) { // don't alter video mock buttons if ((img.hasAttribute("class") && img.getAttributeByName("class").contains("videoPlayButton"))) { continue; } boolean dontLink = false; TagNode parent = img.getParent(); String src = img.getAttributeByName("src"); if ((parent != null && parent.getName().equals("a")) || (img.hasAttribute("class") && img.getAttributeByName("class") .contains("nolink"))) { // image is linked, don't override dontLink = true; } if (src.contains(".gif")) { img.setAttribute( "class", (img.hasAttribute("class") ? img.getAttributeByName("class") + " " : "") + "gif"); } if (img.hasAttribute("title")) { if (!prefs.showSmilies) { // kill all emotes String name = img.getAttributeByName("title"); img.setName("p"); img.addChild(new ContentNode(name)); } } else { if (!lastReadFound && prefs.hideOldImages || !prefs.imagesEnabled) { if (!dontLink) { img.setName("a"); img.setAttribute("href", src); img.addChild(new ContentNode(src)); } else { img.setName("p"); img.addChild(new ContentNode(src)); } } else { if (!dontLink) { img.setName("a"); img.setAttribute("href", src); TagNode newimg = new TagNode("img"); if (!prefs.imgurThumbnails.equals("d") && src.contains("i.imgur.com")) { int lastSlash = src.lastIndexOf('/'); if (src.length() - lastSlash <= 9) { int pos = src.length() - 4; src = src.substring(0, pos) + prefs.imgurThumbnails + src.substring(pos); } } newimg.setAttribute("src", src); img.addChild(newimg); } } } } StringBuffer fixedContent = new StringBuffer(); Matcher fixCharMatch = fixCharacters_regex.matcher(NetworkUtils.getAsString(pc)); while (fixCharMatch.find()) { fixCharMatch.appendReplacement(fixedContent, ""); } fixCharMatch.appendTail(fixedContent); post.put(CONTENT, fixedContent.toString()); } if (pc.getAttributeByName("class").equalsIgnoreCase("postdate")) { post.put( DATE, NetworkUtils.unencodeHtml(pc.getText().toString()) .replaceAll("[^\\w\\s:,]", "") .trim()); } if (pc.getAttributeByName("class").equalsIgnoreCase("profilelinks")) { TagNode[] links = pc.getElementsHavingAttribute("href", true); if (links.length > 0) { String href = links[0].getAttributeByName("href").trim(); String userId = href.substring(href.lastIndexOf("rid=") + 4); post.put(USER_ID, userId); if (Integer.toString(opId).equals(userId)) { // ugh post.put(IS_OP, 1); } else { post.put(IS_OP, 0); } } } if (pc.getAttributeByName("class").equalsIgnoreCase("editedby") && pc.getChildTags().length > 0) { post.put(EDITED, "<i>" + pc.getChildTags()[0].getText().toString() + "</i>"); } } TagNode[] editImgs = node.getElementsByAttValue("alt", "Edit", true, true); if (editImgs.length > 0) { post.put(EDITABLE, 1); } else { post.put(EDITABLE, 0); } result.add(post); } Log.i( TAG, Integer.toString(postNodes.length) + " posts found, " + result.size() + " posts parsed."); } catch (Exception e) { e.printStackTrace(); } return result; }
private static TagNode convertVideos(TagNode contentNode) { TagNode[] videoNodes = contentNode.getElementsByAttValue("class", "bbcode_video", true, true); TagNode[] youtubeNodes = contentNode.getElementsByAttValue("class", "youtube-player", true, true); for (TagNode youTube : youtubeNodes) { String src = youTube.getAttributeByName("src"); int height = Integer.parseInt(youTube.getAttributeByName("height")); int width = Integer.parseInt(youTube.getAttributeByName("width")); Matcher youtube = youtubeHDId_regex.matcher(src); if (youtube.find()) { String videoId = youtube.group(1); String link = "http://www.youtube.com/watch?v=" + videoId; String image = "http://img.youtube.com/vi/" + videoId + "/0.jpg"; youTube.setName("a"); youTube.setAttribute("href", link); youTube.removeAttribute("type"); youTube.removeAttribute("frameborder"); youTube.removeAttribute("src"); youTube.removeAttribute("height"); youTube.removeAttribute("width"); youTube.setAttribute( "style", "background-image:url(" + image + ");background-size:cover;background-repeat:no-repeat;background-position:center; position:relative;display:block;text-align:center; width:" + width + "; height:" + height); TagNode img = new TagNode("img"); img.setAttribute("class", "nolink videoPlayButton"); img.setAttribute("src", "file:///android_res/drawable/ic_menu_video.png"); img.setAttribute( "style", "position:absolute;top:50%;left:50%;margin-top:-16px;margin-left:-16px;"); youTube.addChild(img); } } for (TagNode node : videoNodes) { try { String src = null; int height = 0; int width = 0; TagNode[] object = node.getElementsByName("object", false); if (object.length > 0) { height = Integer.parseInt(object[0].getAttributeByName("height")); width = Integer.parseInt(object[0].getAttributeByName("width")); TagNode[] emb = object[0].getElementsByName("embed", true); if (emb.length > 0) { src = emb[0].getAttributeByName("src"); } } if (src != null && height != 0 && width != 0) { String link = null, image = null; Matcher youtube = youtubeId_regex.matcher(src); Matcher vimeo = vimeoId_regex.matcher(src); if (youtube .find()) { // we'll leave in the old youtube code in case something gets reverted String videoId = youtube.group(1); link = "http://www.youtube.com/watch?v=" + videoId; image = "http://img.youtube.com/vi/" + videoId + "/0.jpg"; } else if (vimeo.find()) { String videoId = vimeo.group(1); TagNode vimeoXML; try { vimeoXML = NetworkUtils.get("http://vimeo.com/api/v2/video/" + videoId + ".xml"); } catch (Exception e) { e.printStackTrace(); continue; } if (vimeoXML.findElementByName("mobile_url", true) != null) { link = vimeoXML.findElementByName("mobile_url", true).getText().toString(); } else { link = vimeoXML.findElementByName("url", true).getText().toString(); } image = vimeoXML.findElementByName("thumbnail_large", true).getText().toString(); } else { node.removeAllChildren(); TagNode ln = new TagNode("a"); ln.setAttribute("href", src); ln.addChild(new ContentNode(src)); node.addChild(ln); continue; } node.removeAllChildren(); node.setAttribute( "style", "background-image:url(" + image + ");background-size:cover;background-repeat:no-repeat;background-position:center; position:relative;text-align:center; width:" + width + "; height:" + height); node.setAttribute("onclick", "location.href=\"" + link + "\""); TagNode img = new TagNode("img"); img.setAttribute("class", "nolink videoPlayButton"); img.setAttribute("src", "file:///android_res/drawable/ic_menu_video.png"); img.setAttribute( "style", "position:absolute;top:50%;left:50%;margin-top:-23px;margin-left:-32px;"); node.addChild(img); } } catch (Exception e) { continue; // if we fail to convert the video tag, we can still display the rest. } } return contentNode; }
@Override public boolean authenticate() { if (!super.authenticate()) { LOG.error( String.format( "blank username or password detected, no %s xword will be downloaded", this.getType())); return false; } final HttpUriRequest loginGet = RequestBuilder.get().setUri(NYT_LOGIN_URL).build(); final String loginPage; try (final CloseableHttpResponse getResponse = this.getHttpClient().execute(loginGet)) { loginPage = EntityUtils.toString(getResponse.getEntity()); } catch (final IOException e) { LOG.error("error while navigating to NYT login page", e); return false; } final String token; final String expires; try { final TagNode node = this.getCleaner().clean(loginPage); final Object[] foundNodes = node.evaluateXPath("//input[@name='token']"); if (foundNodes.length != 1) { this.throwLoginException( "unexpected login page, found %d hidden token input elements, expected 1", foundNodes.length); } final TagNode hiddenTokenInput = (TagNode) foundNodes[0]; token = hiddenTokenInput.getAttributeByName("value"); LOG.debug("found hidden input token {}", token); final Object[] foundExpiresNodes = node.evaluateXPath("//input[@name='expires']"); if (foundExpiresNodes.length != 1) { this.throwLoginException( "unexpected login page, found %d hidden token expiration input elements, expected 1", foundNodes.length); } final TagNode hiddenTokenExpiresInput = (TagNode) foundExpiresNodes[0]; expires = hiddenTokenExpiresInput.getAttributeByName("value"); LOG.debug("found hidden input token expiration {}", expires); } catch (LoginException | XPatherException e) { LOG.error("error while pulling login tokens from NYT login page", e); return false; } // @formatter:off final HttpUriRequest loginPost = RequestBuilder.post() .setUri("https://myaccount.nytimes.com/auth/login") .addParameter("is_continue", Boolean.FALSE.toString()) .addParameter("token", token) .addParameter("expires", expires) .addParameter("userid", this.getLoginInfo().getUsername()) .addParameter("password", this.getLoginInfo().getPassword()) .addParameter("remember", Boolean.TRUE.toString()) .build(); // @formatter:on try (CloseableHttpResponse postResponse = this.getHttpClient().execute(loginPost)) { // successful NYT login should give 302 status final int responseStatus = postResponse.getStatusLine().getStatusCode(); if (responseStatus != 302) { final String errorMessage = String.format("did not detect expected 302 redirect, got %d instead", responseStatus); throw new LoginException(errorMessage); } // successful NYT login redirects to the NYT homepage final Header location = postResponse.getFirstHeader("Location"); // have seen this redirect both with and without the final portion final Pattern expectedRedirectLocation = Pattern.compile("http://www.nytimes.com(\\?login=email)*"); final String actualRedirectLocation = location.getValue(); final Matcher matcher = expectedRedirectLocation.matcher(actualRedirectLocation); if (!matcher.matches()) { final String errorMessage = String.format( "redirect to unexpected URL, expected %s, found Location=%s instead", expectedRedirectLocation, actualRedirectLocation); throw new LoginException(errorMessage); } // successful NYT login should set a few cookies final Header[] cookies = postResponse.getHeaders("Set-Cookie"); if (cookies.length < 1) { throw new LoginException("no post login cookies set, login likely failed"); } } catch (final IOException | LoginException e) { LOG.error("error while logging in, e={}", e.getMessage()); return false; } LOG.info("successfully logged in to nyt"); return true; }
@SuppressWarnings("unchecked") protected void handleFile(File file, int depth, Collection results) { File f = new File(FilenameUtils.normalize(file.getAbsolutePath())); logger.debug(f.getAbsoluteFile()); try { HtmlCleaner cleaner = new HtmlCleaner(); cleaner.setTransformations(ct); CleanerProperties props = cleaner.getProperties(); // props.setAdvancedXmlEscape(false); props.setUseEmptyElementTags(false); // props.setTranslateSpecialEntities(false); // props.setRecognizeUnicodeChars(false); TagNode node = cleaner.clean(f); TagNode tnBody = node.getAllElements(false)[1]; List l = tnBody.getChildren(); if (l != null && l.size() > 0) { // This is a hack to remove the <?xml in the beginning of body tnBody.removeChild(l.get(0)); } for (int i = 1; i <= anzElements; i++) { String tag = config.getString("substitute[" + i + "]/@tag"); String att = config.getString("substitute[" + i + "]/@att"); String from = config.getString("substitute[" + i + "]/from"); String to = config.getString("substitute[" + i + "]/to"); to = subSpecial(to); TagNode[] imgs = node.getElementsByName(tag, true); for (TagNode tn : imgs) { String srcAtt = tn.getAttributeByName(att); int index = srcAtt.indexOf(from); if (index >= 0) { tn.addAttribute(att, to); } } } BrowserCompactXmlSerializer serializer = new BrowserCompactXmlSerializer(props); // PrettyXmlSerializer serializer = new PrettyXmlSerializer(props); String s = serializer.getXmlAsString(node, "ISO-8859-1"); Writer fw = null; try { fw = new FileWriter(f); fw.write(s); } catch (IOException e) { logger.error("", e); } finally { if (fw != null) try { fw.close(); } catch (IOException e) { } } results.add(f.getAbsoluteFile()); } catch (IOException e) { logger.error("", e); } }
private void recordInputFields( TagNode formNode, FormFlow formFlow, Document dataDocument, String docBase) throws XPathExpressionException, XPatherException { List<InputPojo> inputPojos = new ArrayList<InputPojo>(); Map<String, InputPojo> inputPojosMap = new HashMap<String, InputPojo>(); @SuppressWarnings("unchecked") List<TagNode> inputs = formNode.getElementListByName("input", true); @SuppressWarnings("unchecked") List<TagNode> selects = formNode.getElementListByName("select", true); inputs.addAll(selects); for (TagNode inputTagNode : inputs) { String name = inputTagNode.getAttributeByName(Constants.NAME_ATTR); if (name != null) { String type; if (inputTagNode.getName().equals("select")) { type = "select"; } else { type = inputTagNode.getAttributeByName(Constants.TYPE_ATTR); } if (type != null) { if (!(type.equals("radio") && inputPojosMap.containsKey(name))) { // Collect all rf.xxx attributes Map<String, String> rfAttributes = new HashMap<String, String>(); Map<String, String> attributes = inputTagNode.getAttributes(); for (String attName : attributes.keySet()) { if (attName.startsWith("rf.")) { rfAttributes.put(attName, attributes.get(attName)); } } InputPojo inputPojo = new InputPojo(name, type, rfAttributes); inputPojosMap.put(name, inputPojo); inputPojos.add(inputPojo); } // Push values from the dataDocument into the form html. String inputValue = lookupValueByFieldName(dataDocument, name, docBase); if (inputValue != null) { if (type.equals("radio")) { String value = inputTagNode.getAttributeByName(Constants.VALUE_ATTR); if (inputValue.equals(value)) { inputTagNode.setAttribute(Constants.CHECKED_ATTR, Constants.CHECKED_ATTR); } } else if (type.equals("checkbox")) { if (inputValue.equals("true")) { inputTagNode.setAttribute(Constants.CHECKED_ATTR, Constants.CHECKED_ATTR); } } else if (type.equals("select")) { Object[] nodes = inputTagNode.evaluateXPath("option[@value=\"" + inputValue + "\"]"); if (nodes.length == 0) { nodes = inputTagNode.evaluateXPath("option[text()=\"" + inputValue + "\"]"); } if (nodes.length > 0) { ((TagNode) nodes[0]).setAttribute(Constants.SELECTED_ATTR, "selected"); } } else { inputTagNode.setAttribute("value", inputValue); } } } else { logger.debug("Input name:{} has no type attribute!", name); } } } formFlow.setCurrentInputPojos(inputPojos); }