Beispiel #1
0
  private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
      if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr);
      else numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
  }
Beispiel #2
0
  @Override
  protected String doProcess(File htmlfile, String originalUrl, Intent intent) {
    try {
      //            String charset = "utf-8";
      Connection coon = HttpConnection.connect(originalUrl);
      coon.followRedirects(
          false); // we don't want it be redirected to other page,example: 10.254.7.4
      Document doc = coon.get();
      Element head = doc.head();
      Element body = doc.body();
      if (body.children().size() == 0) {
        Log.e(TAG, "body has no child with url=" + originalUrl);
        return PROCESS_FAILED_URL;
      }
      /*
      Elements meta = head.select("meta");
      if(!meta.isEmpty()){
          Element m = meta.get(0);
          String content = m.attr("content");
          String attr = content.substring(content.indexOf("charset=")+8);
          if(!attr.trim().isEmpty()){
              charset = attr;
          }
      }
      */
      Elements base = head.select("base");
      if (base.isEmpty()) {
        String b = head.baseUri();
        Attributes attrs = new Attributes();
        attrs.put("href", b);
        ArrayList<Element> a = new ArrayList<>();
        a.add(new Element(Tag.valueOf("base"), b, attrs));
        head.insertChildren(0, a);
      }

      Element div = doc.select("div.content-main").first();
      if (div == null) {
        Log.e(TAG, "not found specific element with url=" + originalUrl);
        return PROCESS_FAILED_URL;
      }
      Element title = div.select("h1.title").first();
      title.remove();
      body.empty();
      ArrayList<Element> a = new ArrayList<>();
      a.add(div);
      body.insertChildren(0, a);
      int g = 0;
      while (g < 2) { // try two times.
        if (FileUtil.saveStringToFile(doc.toString(), htmlfile, false)) {
          break;
        }
        g++;
      }

      if (g < 2) return StringUtils.file2Url(htmlfile, PROCESS_FAILED_URL);
      Log.e(TAG, "save html to file failed with url=" + originalUrl);
    } catch (MalformedURLException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    return PROCESS_FAILED_URL;
  }