private void doProcessIncludes(TagNode html, int depth, FormFlow formFlow) throws IOException, FormParserException { if (depth < processIncludesMaxDepth) { @SuppressWarnings("unchecked") List<TagNode> includeNodes = html.getElementListByName(Constants.INCLUDE_ELEMENT, true); for (TagNode includeNode : includeNodes) { String srcAttribute = includeNode.getAttributeByName("src"); srcAttribute = formFlow.resolveResourcePathIfRelative(srcAttribute); InputStream resourceAsStream = resourceLoader.getFormResourceAsStream(srcAttribute); if (resourceAsStream != null) { TagNode includeHtml = htmlCleaner.clean(resourceAsStream); TagNode body = includeHtml.findElementByName("body", false); doProcessIncludes(body, depth + 1, formFlow); @SuppressWarnings("unchecked") List<HtmlNode> bodyChildren = body.getChildren(); Collections.reverse(bodyChildren); TagNode includeParent = includeNode.getParent(); for (HtmlNode bodyChild : bodyChildren) { includeParent.insertChildAfter(includeNode, bodyChild); } includeParent.removeChild(includeNode); } else { throw new FormParserException("Include file not found. Path:'" + srcAttribute + "'"); } } } else { throw new FormParserException( "Exceeded maximum nested " + Constants.INCLUDE_ELEMENT + " depth of " + processIncludesMaxDepth); } }
private boolean recuperaDadosPessoais(String html) { HtmlCleaner cleaner = new HtmlCleaner(); TagNode root = cleaner.clean(html); String table_name = context.getString(R.string.janusmob_table_dados_aluno_id); TagNode table = null; for (TagNode node : root.getElementsByName("table", true)) { if (table_name.equals(node.getAttributeByName("id"))) { table = node; break; } } if (table == null) { return false; } String attribute_class_name = context.getString(R.string.janusmob_attribute_class_name); String campo_nusp = context.getString(R.string.janusmob_campo_nusp); String campo_nome = context.getString(R.string.janusmob_campo_nome); String campo_email = context.getString(R.string.janusmob_campo_email); String campo_cpf = context.getString(R.string.janusmob_campo_cpf); Usuario usuario = new Usuario(); for (TagNode node : table.getElementsByName("span", true)) { if (attribute_class_name.equals(node.getAttributeByName("class"))) { TagNode parent = node.getParent(); parent.removeChild(node); if (node.getText().toString().trim().equals(campo_nusp)) { usuario.setNusp(parent.getText().toString().trim()); } else if (node.getText().toString().trim().equals(campo_nome)) { usuario.setNome(parent.getText().toString().trim()); } else if (node.getText().toString().trim().equals(campo_email)) { usuario.setEmail(parent.getText().toString().trim()); } else if (node.getText().toString().trim().equals(campo_cpf)) { usuario.setCpf(parent.getText().toString().trim()); } } } if (lembrarSenha) { usuario.setSenha(senha); } else { usuario.setSenha(null); } dbAdapter.setUsuario(usuario); return true; }
@SuppressWarnings("unchecked") protected void handleFile(File file, int depth, Collection results) { File f = new File(FilenameUtils.normalize(file.getAbsolutePath())); logger.debug(f.getAbsoluteFile()); try { HtmlCleaner cleaner = new HtmlCleaner(); cleaner.setTransformations(ct); CleanerProperties props = cleaner.getProperties(); props.setAdvancedXmlEscape(false); // props.setTranslateSpecialEntities(false); // props.setRecognizeUnicodeChars(false); TagNode node = cleaner.clean(f); TagNode tnBody = node.getAllElements(false)[1]; List l = tnBody.getChildren(); if (l != null && l.size() > 0) { // This is a hack to remove the <?xml in the beginning of body tnBody.removeChild(l.get(0)); } Document myJDom = new JDomSerializer(props, true).createJDom(node); // Format format = Format.getRawFormat(); Format format = new OutputFormat(); format.setEncoding("iso-8859-1"); XMLWriter outputter = new XMLWriter(format); OutputStream os = new FileOutputStream(f); // outputter.output(myJDom,os); output.setOutputStream(os); output.write(myJDom); // sbResult.append(outputter.outputString(myJDom)); results.add(f.getAbsoluteFile()); } catch (IOException e) { logger.error("", e); } }
@SuppressWarnings("unchecked") protected void handleFile(File file, int depth, Collection results) { File f = new File(FilenameUtils.normalize(file.getAbsolutePath())); logger.debug(f.getAbsoluteFile()); try { HtmlCleaner cleaner = new HtmlCleaner(); cleaner.setTransformations(ct); CleanerProperties props = cleaner.getProperties(); // props.setAdvancedXmlEscape(false); props.setUseEmptyElementTags(false); // props.setTranslateSpecialEntities(false); // props.setRecognizeUnicodeChars(false); TagNode node = cleaner.clean(f); TagNode tnBody = node.getAllElements(false)[1]; List l = tnBody.getChildren(); if (l != null && l.size() > 0) { // This is a hack to remove the <?xml in the beginning of body tnBody.removeChild(l.get(0)); } for (int i = 1; i <= anzElements; i++) { String tag = config.getString("substitute[" + i + "]/@tag"); String att = config.getString("substitute[" + i + "]/@att"); String from = config.getString("substitute[" + i + "]/from"); String to = config.getString("substitute[" + i + "]/to"); to = subSpecial(to); TagNode[] imgs = node.getElementsByName(tag, true); for (TagNode tn : imgs) { String srcAtt = tn.getAttributeByName(att); int index = srcAtt.indexOf(from); if (index >= 0) { tn.addAttribute(att, to); } } } BrowserCompactXmlSerializer serializer = new BrowserCompactXmlSerializer(props); // PrettyXmlSerializer serializer = new PrettyXmlSerializer(props); String s = serializer.getXmlAsString(node, "ISO-8859-1"); Writer fw = null; try { fw = new FileWriter(f); fw.write(s); } catch (IOException e) { logger.error("", e); } finally { if (fw != null) try { fw.close(); } catch (IOException e) { } } results.add(f.getAbsoluteFile()); } catch (IOException e) { logger.error("", e); } }
/** * Remove this node from the tree. * * @return True if element is removed (if it is not root node). */ public boolean removeFromTree() { return parent != null ? parent.removeChild(this) : false; }
public static Boolean updateArtists(StaticDataStore db) { ArrayList<ArrayList<String>> artists = new ArrayList<ArrayList<String>>(); int numArtists; HtmlCleaner pageParser = new HtmlCleaner(); CleanerProperties props = pageParser.getProperties(); props.setAllowHtmlInsideAttributes(true); props.setAllowMultiWordAttributes(true); props.setRecognizeUnicodeChars(true); props.setOmitComments(true); try { String url = "http://www.archive.org/browse.php?field=/metadata/bandWithMP3s&collection=etree"; HttpParams params = new BasicHttpParams(); int timeout = (int) (15 * DateUtils.SECOND_IN_MILLIS); HttpConnectionParams.setConnectionTimeout(params, timeout); HttpConnectionParams.setSoTimeout(params, timeout); HttpClient client = new DefaultHttpClient(params); HttpGet request = new HttpGet(url); HttpResponse response = client.execute(request); StatusLine status = response.getStatusLine(); if (status.getStatusCode() == HttpStatus.SC_OK) { ResponseHandler<String> responseHandler = new BasicResponseHandler(); TagNode node = pageParser.clean(responseHandler.handleResponse(response)); client.getConnectionManager().shutdown(); // XPATH to get the nodes that we Want. Object[] artistsNodes = node.evaluateXPath("//tr[@valign='top']//li"); numArtists = artistsNodes.length; for (int i = 0; i < numArtists; i++) { // Cast the artistNode as a TagNode. TagNode artist = ((TagNode) artistsNodes[i]); // Grab the first child node, which is the link to the artist's page. // The inner HTML of this node will be the title. TagNode artistTitleSubNode = artist.getChildTags()[0]; // Remove the child node, so that the inner HTML of the artistNode // only contains the number of shows that the artist has. artist.removeChild(artistTitleSubNode); String artistTitle = pageParser.getInnerHtml(artistTitleSubNode); if (artistTitle != null) { ArrayList<String> artistPair = new ArrayList<String>(); artistPair.add( artistTitle .replace("'", "'") .replace(">", ">") .replace("<", "<") .replace(""", "\"") .replace("&", "&")); artistPair.add(pageParser.getInnerHtml(artist).trim()); /* * VibeVault.db.addArtist(artistTitle, pageParser * .getInnerHtml(artist).trim()); */ artists.add(artistPair); } } if (artists.size() > 0) { db.insertArtistBulk(artists); String s = DateFormat.format("yyyy-MM-dd", new GregorianCalendar().getTime()).toString(); db.updatePref("artistUpdate", s); } else { } } else { client.getConnectionManager().shutdown(); } } catch (Exception e) { e.printStackTrace(); } return true; }