public void startElement(QName element, XMLAttributes attrs, Augmentations augs) throws XNIException { if (this.baseUrl != null) { try { int attrCount = attrs != null ? attrs.getLength() : 0; for (int i = 0; i < attrCount; i++) { String aname = attrs.getQName(i); /** update by jiangjun 2011-5-24 补全file的相对地址 */ if (aname.matches("href|src|action|lsrc|real_src|dynamic-src|file")) { String avalue = attrs.getValue(i); if (null != avalue & !"".equals(avalue) & avalue.indexOf("javascript:") < 0) { attrs.setValue(i, UrlUtil.guessURL(baseUrl, avalue)); } } } } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } String depth = String.valueOf(fElementDepth); attrs.addAttribute( new QName(element.localpart, DEPTH_NAME, DEPTH_NAME, element.uri), "String", depth); if (baseUrl != null && baseUrl.toString().indexOf("app.wumii.com") == -1 && STYLE.equalsIgnoreCase(element.rawname)) { if (attrs.getValue("media") != null && !"screen".equalsIgnoreCase(attrs.getValue("media"))) { notCollectCss = 1; } styleBuffer = new StringBuffer(); } else { super.startElement(element, attrs, augs); fElementDepth++; } }
@Override public void emptyElement(QName element, XMLAttributes attributes, Augmentations augs) throws XNIException { if ("base".equalsIgnoreCase(element.rawname)) { String hrefValue = attributes.getValue("href"); if (hrefValue != null) { try { this.baseUrl = new URL(hrefValue); } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } else if (this.baseUrl != null) { try { int attrCount = attributes != null ? attributes.getLength() : 0; for (int i = 0; i < attrCount; i++) { String aname = attributes.getQName(i); /** update by jiangjun 2011-5-24 补全file的相对地址 */ if (aname.matches("href|src|action|lsrc|real_src|dynamic-src|file")) { String avalue = attributes.getValue(i); if (null != avalue & !"".equals(avalue) & avalue.indexOf("javascript:") < 0) { // 朱磊改,例如:http://www.qiushibaike.net/页面中图片相对地址为 // ../../aaa.jpg 目前已经是根目录了,上一级不存在 if (avalue.indexOf(baseUrl.getHost()) == -1 && avalue.startsWith("..")) { avalue = avalue.substring(avalue.lastIndexOf("../") + 3); } attributes.setValue(i, UrlUtil.guessURL(baseUrl, avalue)); } } } } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } if ("link".equalsIgnoreCase(element.rawname)) { String typeValue = attributes.getValue("type"); if (typeValue != null) { if (typeValue.indexOf("css") >= 0) { return; } } else { return; } } if (delMark == 1) { delMark = 0; return; } String depth = String.valueOf(fElementDepth); attributes.addAttribute(new QName(null, DEPTH_NAME, DEPTH_NAME, null), "String", depth); super.emptyElement(element, attributes, augs); }