コード例 #1
0
ファイル: Crawler.java プロジェクト: dalinhuang/xdarkness
   public void writeArticle()
   {
     prepareList();
     if (this.config.getType() == 1) {
       QueryBuilder imageQB = new QueryBuilder("select id from zccatalog where type=4 and siteid=?", 
         CatalogUtil.getSiteID(this.config.getCatalogID()));
       String imageCatalogID = imageQB.executeString();
       if (StringUtil.isEmpty(CatalogUtil.getSiteID(this.config.getCatalogID()))) {
         LogUtil.getLogger().warn("文档采集的目的栏目不存在:ID=" + this.config.getCatalogID());
         return;
       }
       String sitePath = SiteUtil.getAbsolutePath(CatalogUtil.getSiteID(this.config.getCatalogID()));
       String imagePath = "upload/Image/" + CatalogUtil.getAlias(imageCatalogID) + "/";
 
       RegexParser rp = this.config.getTemplate("Ref1");
       RegexParser[] filters = this.config.getFilterBlocks();
       this.list.moveFirst();
       WebDocument doc = null;
       int cSuccess = 0;
       int cFailure = 0;
       int cLost = 0;
 
       boolean publishDateFlag = false;
       ZCArticleSet set = new ZCArticleSet();
       while ((doc = this.list.next()) != null) {
         if (this.task.checkStop()) {
           return;
         }
         if (doc.getLevel() != this.config.getUrlLevels().length - 1) {
           continue;
         }
         int percent = (100 - this.task.getPercent()) * (cSuccess + cFailure + cLost) / this.list.size();
         this.task.setPercent(this.task.getPercent() + percent);
         if ((doc.isTextContent()) && (doc.getContent() != null)) {
           String text = doc.getContentText();
           rp.setText(text);
           if (rp.match()) {
             Mapx map = rp.getMapx();
             Object[] ks = map.keyArray();
             Object[] vs = map.valueArray();
             for (int i = 0; i < map.size(); ++i) {
               String key = ks[i].toString();
               String value = vs[i].toString();
               if (!key.equalsIgnoreCase("Content")) {
                 value = this.tagPattern.matcher(value).replaceAll("");
               }
               value = StringUtil.htmlDecode(value);
               value = value.trim();
               map.put(key, value);
             }
             String title = map.getString("Title");
             String content = map.getString("Content");
             String author = map.getString("Author");
             String source = map.getString("Source");
             String strDate = map.getString("PublishDate");
             Date publishDate = doc.getLastmodifiedDate();
             if ((StringUtil.isNotEmpty(strDate)) && (StringUtil.isNotEmpty(this.config.getPublishDateFormat())))
             {
               try {
                 strDate = DateUtil.convertChineseNumber(strDate);
                 publishDate = DateUtil.parse(strDate, this.config.getPublishDateFormat());
               } catch (Exception e) {
                 this.task.addError("日期" + strDate + "不符合指定格式" + doc.getUrl());
               }
               publishDateFlag = true;
             }
             if (publishDate.getTime() > System.currentTimeMillis()) {
               publishDate = new Date();
             }
             ArticleAPI api = new ArticleAPI();
             try {
               ZCArticleSchema article = new ZCArticleSchema();
               if (StringUtil.isNotEmpty(title)) {
                 article.setTitle(title);
               } else {
                 ++cLost;
                 break label1209:
               }
               if (StringUtil.isNotEmpty(content)) {
                 content = content.trim();
                 while (rp.match()) {
                   String html = rp.getMapx().getString("Content");
                   content = content + html;
                 }
                 if (this.config.isCleanLinkFlag()) {
                   content = this.framePattern.matcher(content).replaceAll("");
                   content = this.stylePattern.matcher(content).replaceAll("");
                   content = this.scriptPattern.matcher(content).replaceAll("");
                   content = this.linkPattern.matcher(content).replaceAll("$1");
                 }
                 if (filters != null) {
                   for (int k = 0; k < filters.length; ++k) {
                     content = filters[k].replace(content, "");
                   }
                 }
 
                 String str = dealImage(content, doc.getUrl(), sitePath, imagePath, imageCatalogID);
                 article.setContent(str);
               } else {
                 ++cLost;
                 break label1209:
               }
               if (StringUtil.isNotEmpty(author)) {
                 article.setAuthor(author);
               }
               if (StringUtil.isNotEmpty(source)) {
                 article.setReferName(source);
               }
               article.setReferURL(doc.getUrl());
               article.setPublishDate(publishDate);
               article.setCatalogID(this.config.getCatalogID());
               article.setBranchInnerCode("0001");
               article.setProp2("FromWeb");
 
               if (ExtendManager.hasAction("FromWeb.BeforeSave")) {
                 ExtendManager.executeAll("FromWeb.BeforeSave", new Object[] { article });
               }
 
               Date date = (Date)new QueryBuilder(
                 "select PublishDate from ZCArticle where ReferURL=? and CatalogID=?", doc.getUrl(), 
                 this.config.getCatalogID()).executeOneValue();
               if (date != null) {
                 if (date.getTime() < doc.getLastDownloadTime()) {
                   QueryBuilder qb = new QueryBuilder(
                     "update ZCArticle set Title=?,Content=? where CatalogID=? and ReferURL=?");
                   qb.add(article.getTitle());
                   qb.add(article.getContent());
                   qb.add(this.config.getCatalogID());
                   qb.add(doc.getUrl());
                   qb.executeNoQuery();
                 }
                 ++cSuccess;
               } else {
                 api.setSchema(article);
                 set.add(article);
                 if (api.insert() > 0L)
                   ++cSuccess;
                 else
                   ++cFailure;
               }
             }
             catch (Exception e) {
               ++cFailure;
               e.printStackTrace();
             }
           } else {
             LogUtil.getLogger().info("未能匹配" + doc.getUrl());
             this.task.addError("未能匹配" + doc.getUrl());
             ++cLost;
           }
           label1209: this.task.setCurrentInfo("正在转换文档, <font class='green'>" + cSuccess + "</font> 个成功, <font class='red'>" + 
             cFailure + "</font> 个失败, <font class='green'>" + cLost + "</font> 个未匹配");
         }
       }
コード例 #2
0
ファイル: SSIFilter.java プロジェクト: yourfei/myfcms
  public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain)
      throws IOException, ServletException {
    if (ExtendManager.hasAction("BeforeSSIFilter")) {
      ExtendManager.executeAll("BeforeSSIFilter", new Object[] {request, response, chain});
    }

    HttpServletRequest req = (HttpServletRequest) request;
    HttpServletResponse res = (HttpServletResponse) response;

    if ((Config.ServletMajorVersion == 2) && (Config.ServletMinorVersion == 3))
      response.setContentType("text/html;charset=" + Constant.GlobalCharset);
    else {
      response.setCharacterEncoding(Constant.GlobalCharset);
    }
    request.setCharacterEncoding(Constant.GlobalCharset);

    req.setAttribute("org.apache.catalina.ssi.SSIServlet", "true");

    ByteArrayServletOutputStream basos = new ByteArrayServletOutputStream();
    ResponseIncludeWrapper responseIncludeWrapper =
        new ResponseIncludeWrapper(this.config.getServletContext(), req, res, basos);

    chain.doFilter(req, responseIncludeWrapper);

    responseIncludeWrapper.flushOutputStreamOrWriter();
    byte[] bytes = basos.toByteArray();

    String encoding = res.getCharacterEncoding();

    SSIExternalResolver ssiExternalResolver =
        new SSIServletExternalResolver(
            this.config.getServletContext(),
            req,
            res,
            this.isVirtualWebappRelative,
            this.debug,
            encoding);
    SSIProcessor ssiProcessor = new SSIProcessor(ssiExternalResolver, this.debug);

    Reader reader = new InputStreamReader(new ByteArrayInputStream(bytes), encoding);
    ByteArrayOutputStream ssiout = new ByteArrayOutputStream();
    PrintWriter writer = new PrintWriter(new OutputStreamWriter(ssiout, encoding));

    long lastModified =
        ssiProcessor.process(reader, responseIncludeWrapper.getLastModified(), writer);

    writer.flush();
    bytes = ssiout.toByteArray();

    if (this.expires != null) {
      res.setDateHeader("expires", new Date().getTime() + this.expires.longValue() * 1000L);
    }
    if (lastModified > 0L) {
      res.setDateHeader("last-modified", lastModified);
    }
    res.setContentLength(bytes.length);

    res.setContentType("text/html;charset=" + Constant.GlobalCharset);
    try {
      OutputStream out = res.getOutputStream();
      out.write(bytes);
    } catch (Throwable t) {
      Writer out = res.getWriter();
      out.write(new String(bytes));
    }
  }