private ElementMeta createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); int numDiscarded = 0; Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr); else numDiscarded++; } Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag); destAttrs.addAll(enforcedAttrs); return new ElementMeta(dest, numDiscarded); }
@Override protected String doProcess(File htmlfile, String originalUrl, Intent intent) { try { // String charset = "utf-8"; Connection coon = HttpConnection.connect(originalUrl); coon.followRedirects( false); // we don't want it be redirected to other page,example: Document doc = coon.get(); Element head = doc.head(); Element body = doc.body(); if (body.children().size() == 0) { Log.e(TAG, "body has no child with url=" + originalUrl); return PROCESS_FAILED_URL; } /* Elements meta ="meta"); if(!meta.isEmpty()){ Element m = meta.get(0); String content = m.attr("content"); String attr = content.substring(content.indexOf("charset=")+8); if(!attr.trim().isEmpty()){ charset = attr; } } */ Elements base ="base"); if (base.isEmpty()) { String b = head.baseUri(); Attributes attrs = new Attributes(); attrs.put("href", b); ArrayList<Element> a = new ArrayList<>(); a.add(new Element(Tag.valueOf("base"), b, attrs)); head.insertChildren(0, a); } Element div ="div.content-main").first(); if (div == null) { Log.e(TAG, "not found specific element with url=" + originalUrl); return PROCESS_FAILED_URL; } Element title ="h1.title").first(); title.remove(); body.empty(); ArrayList<Element> a = new ArrayList<>(); a.add(div); body.insertChildren(0, a); int g = 0; while (g < 2) { // try two times. if (FileUtil.saveStringToFile(doc.toString(), htmlfile, false)) { break; } g++; } if (g < 2) return StringUtils.file2Url(htmlfile, PROCESS_FAILED_URL); Log.e(TAG, "save html to file failed with url=" + originalUrl); } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return PROCESS_FAILED_URL; }