public void writeArticle() { prepareList(); if (this.config.getType() == 1) { QueryBuilder imageQB = new QueryBuilder("select id from zccatalog where type=4 and siteid=?", CatalogUtil.getSiteID(this.config.getCatalogID())); String imageCatalogID = imageQB.executeString(); if (StringUtil.isEmpty(CatalogUtil.getSiteID(this.config.getCatalogID()))) { LogUtil.getLogger().warn("文档采集的目的栏目不存在:ID=" + this.config.getCatalogID()); return; } String sitePath = SiteUtil.getAbsolutePath(CatalogUtil.getSiteID(this.config.getCatalogID())); String imagePath = "upload/Image/" + CatalogUtil.getAlias(imageCatalogID) + "/"; RegexParser rp = this.config.getTemplate("Ref1"); RegexParser[] filters = this.config.getFilterBlocks(); this.list.moveFirst(); WebDocument doc = null; int cSuccess = 0; int cFailure = 0; int cLost = 0; boolean publishDateFlag = false; ZCArticleSet set = new ZCArticleSet(); while ((doc = this.list.next()) != null) { if (this.task.checkStop()) { return; } if (doc.getLevel() != this.config.getUrlLevels().length - 1) { continue; } int percent = (100 - this.task.getPercent()) * (cSuccess + cFailure + cLost) / this.list.size(); this.task.setPercent(this.task.getPercent() + percent); if ((doc.isTextContent()) && (doc.getContent() != null)) { String text = doc.getContentText(); rp.setText(text); if (rp.match()) { Mapx map = rp.getMapx(); Object[] ks = map.keyArray(); Object[] vs = map.valueArray(); for (int i = 0; i < map.size(); ++i) { String key = ks[i].toString(); String value = vs[i].toString(); if (!key.equalsIgnoreCase("Content")) { value = this.tagPattern.matcher(value).replaceAll(""); } value = StringUtil.htmlDecode(value); value = value.trim(); map.put(key, value); } String title = map.getString("Title"); String content = map.getString("Content"); String author = map.getString("Author"); String source = map.getString("Source"); String strDate = map.getString("PublishDate"); Date publishDate = doc.getLastmodifiedDate(); if ((StringUtil.isNotEmpty(strDate)) && (StringUtil.isNotEmpty(this.config.getPublishDateFormat()))) { try { strDate = DateUtil.convertChineseNumber(strDate); publishDate = DateUtil.parse(strDate, this.config.getPublishDateFormat()); } catch (Exception e) { this.task.addError("日期" + strDate + "不符合指定格式" + doc.getUrl()); } publishDateFlag = true; } if (publishDate.getTime() > System.currentTimeMillis()) { publishDate = new Date(); } ArticleAPI api = new ArticleAPI(); try { ZCArticleSchema article = new ZCArticleSchema(); if (StringUtil.isNotEmpty(title)) { article.setTitle(title); } else { ++cLost; break label1209: } if (StringUtil.isNotEmpty(content)) { content = content.trim(); while (rp.match()) { String html = rp.getMapx().getString("Content"); content = content + html; } if (this.config.isCleanLinkFlag()) { content = this.framePattern.matcher(content).replaceAll(""); content = this.stylePattern.matcher(content).replaceAll(""); content = this.scriptPattern.matcher(content).replaceAll(""); content = this.linkPattern.matcher(content).replaceAll("$1"); } if (filters != null) { for (int k = 0; k < filters.length; ++k) { content = filters[k].replace(content, ""); } } String str = dealImage(content, doc.getUrl(), sitePath, imagePath, imageCatalogID); article.setContent(str); } else { ++cLost; break label1209: } if (StringUtil.isNotEmpty(author)) { article.setAuthor(author); } if (StringUtil.isNotEmpty(source)) { article.setReferName(source); } article.setReferURL(doc.getUrl()); article.setPublishDate(publishDate); article.setCatalogID(this.config.getCatalogID()); article.setBranchInnerCode("0001"); article.setProp2("FromWeb"); if (ExtendManager.hasAction("FromWeb.BeforeSave")) { ExtendManager.executeAll("FromWeb.BeforeSave", new Object[] { article }); } Date date = (Date)new QueryBuilder( "select PublishDate from ZCArticle where ReferURL=? and CatalogID=?", doc.getUrl(), this.config.getCatalogID()).executeOneValue(); if (date != null) { if (date.getTime() < doc.getLastDownloadTime()) { QueryBuilder qb = new QueryBuilder( "update ZCArticle set Title=?,Content=? where CatalogID=? and ReferURL=?"); qb.add(article.getTitle()); qb.add(article.getContent()); qb.add(this.config.getCatalogID()); qb.add(doc.getUrl()); qb.executeNoQuery(); } ++cSuccess; } else { api.setSchema(article); set.add(article); if (api.insert() > 0L) ++cSuccess; else ++cFailure; } } catch (Exception e) { ++cFailure; e.printStackTrace(); } } else { LogUtil.getLogger().info("未能匹配" + doc.getUrl()); this.task.addError("未能匹配" + doc.getUrl()); ++cLost; } label1209: this.task.setCurrentInfo("正在转换文档, <font class='green'>" + cSuccess + "</font> 个成功, <font class='red'>" + cFailure + "</font> 个失败, <font class='green'>" + cLost + "</font> 个未匹配"); } }
public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException { if (ExtendManager.hasAction("BeforeSSIFilter")) { ExtendManager.executeAll("BeforeSSIFilter", new Object[] {request, response, chain}); } HttpServletRequest req = (HttpServletRequest) request; HttpServletResponse res = (HttpServletResponse) response; if ((Config.ServletMajorVersion == 2) && (Config.ServletMinorVersion == 3)) response.setContentType("text/html;charset=" + Constant.GlobalCharset); else { response.setCharacterEncoding(Constant.GlobalCharset); } request.setCharacterEncoding(Constant.GlobalCharset); req.setAttribute("org.apache.catalina.ssi.SSIServlet", "true"); ByteArrayServletOutputStream basos = new ByteArrayServletOutputStream(); ResponseIncludeWrapper responseIncludeWrapper = new ResponseIncludeWrapper(this.config.getServletContext(), req, res, basos); chain.doFilter(req, responseIncludeWrapper); responseIncludeWrapper.flushOutputStreamOrWriter(); byte[] bytes = basos.toByteArray(); String encoding = res.getCharacterEncoding(); SSIExternalResolver ssiExternalResolver = new SSIServletExternalResolver( this.config.getServletContext(), req, res, this.isVirtualWebappRelative, this.debug, encoding); SSIProcessor ssiProcessor = new SSIProcessor(ssiExternalResolver, this.debug); Reader reader = new InputStreamReader(new ByteArrayInputStream(bytes), encoding); ByteArrayOutputStream ssiout = new ByteArrayOutputStream(); PrintWriter writer = new PrintWriter(new OutputStreamWriter(ssiout, encoding)); long lastModified = ssiProcessor.process(reader, responseIncludeWrapper.getLastModified(), writer); writer.flush(); bytes = ssiout.toByteArray(); if (this.expires != null) { res.setDateHeader("expires", new Date().getTime() + this.expires.longValue() * 1000L); } if (lastModified > 0L) { res.setDateHeader("last-modified", lastModified); } res.setContentLength(bytes.length); res.setContentType("text/html;charset=" + Constant.GlobalCharset); try { OutputStream out = res.getOutputStream(); out.write(bytes); } catch (Throwable t) { Writer out = res.getWriter(); out.write(new String(bytes)); } }