示例#1
0
 public static void loadConfig() {
   if (map == null) {
     map = new Mapx();
     String path = Config.getContextRealPath() + "WEB-INF/classes/framework.xml";
     if (!(new File(path).exists())) {
       return;
     }
     SAXReader reader = new SAXReader(false);
     try {
       Document doc = reader.read(new File(path));
       Element root = doc.getRootElement();
       Element extend = root.element("extend");
       if (extend != null) {
         List types = extend.elements("action");
         for (int i = 0; i < types.size(); ++i) {
           Element type = (Element) types.get(i);
           String className = type.attributeValue("class");
           try {
             Object obj = Class.forName(className).newInstance();
             if (!(obj instanceof IExtendAction)) {
               LogUtil.getLogger().warn("类" + className + "必须继承IExtendAction!");
             }
             IExtendAction action = (IExtendAction) obj;
             ArrayList list = (ArrayList) map.get(action.getTarget());
             if (list == null) {
               list = new ArrayList();
             }
             list.add(action);
             map.put(action.getTarget(), list);
           } catch (InstantiationException e) {
             e.printStackTrace();
           } catch (IllegalAccessException e) {
             e.printStackTrace();
           } catch (ClassNotFoundException e) {
             e.printStackTrace();
           }
         }
       }
     } catch (DocumentException e) {
       e.printStackTrace();
     }
   }
 }
示例#2
0
   public void writeArticle()
   {
     prepareList();
     if (this.config.getType() == 1) {
       QueryBuilder imageQB = new QueryBuilder("select id from zccatalog where type=4 and siteid=?", 
         CatalogUtil.getSiteID(this.config.getCatalogID()));
       String imageCatalogID = imageQB.executeString();
       if (StringUtil.isEmpty(CatalogUtil.getSiteID(this.config.getCatalogID()))) {
         LogUtil.getLogger().warn("文档采集的目的栏目不存在:ID=" + this.config.getCatalogID());
         return;
       }
       String sitePath = SiteUtil.getAbsolutePath(CatalogUtil.getSiteID(this.config.getCatalogID()));
       String imagePath = "upload/Image/" + CatalogUtil.getAlias(imageCatalogID) + "/";
 
       RegexParser rp = this.config.getTemplate("Ref1");
       RegexParser[] filters = this.config.getFilterBlocks();
       this.list.moveFirst();
       WebDocument doc = null;
       int cSuccess = 0;
       int cFailure = 0;
       int cLost = 0;
 
       boolean publishDateFlag = false;
       ZCArticleSet set = new ZCArticleSet();
       while ((doc = this.list.next()) != null) {
         if (this.task.checkStop()) {
           return;
         }
         if (doc.getLevel() != this.config.getUrlLevels().length - 1) {
           continue;
         }
         int percent = (100 - this.task.getPercent()) * (cSuccess + cFailure + cLost) / this.list.size();
         this.task.setPercent(this.task.getPercent() + percent);
         if ((doc.isTextContent()) && (doc.getContent() != null)) {
           String text = doc.getContentText();
           rp.setText(text);
           if (rp.match()) {
             Mapx map = rp.getMapx();
             Object[] ks = map.keyArray();
             Object[] vs = map.valueArray();
             for (int i = 0; i < map.size(); ++i) {
               String key = ks[i].toString();
               String value = vs[i].toString();
               if (!key.equalsIgnoreCase("Content")) {
                 value = this.tagPattern.matcher(value).replaceAll("");
               }
               value = StringUtil.htmlDecode(value);
               value = value.trim();
               map.put(key, value);
             }
             String title = map.getString("Title");
             String content = map.getString("Content");
             String author = map.getString("Author");
             String source = map.getString("Source");
             String strDate = map.getString("PublishDate");
             Date publishDate = doc.getLastmodifiedDate();
             if ((StringUtil.isNotEmpty(strDate)) && (StringUtil.isNotEmpty(this.config.getPublishDateFormat())))
             {
               try {
                 strDate = DateUtil.convertChineseNumber(strDate);
                 publishDate = DateUtil.parse(strDate, this.config.getPublishDateFormat());
               } catch (Exception e) {
                 this.task.addError("日期" + strDate + "不符合指定格式" + doc.getUrl());
               }
               publishDateFlag = true;
             }
             if (publishDate.getTime() > System.currentTimeMillis()) {
               publishDate = new Date();
             }
             ArticleAPI api = new ArticleAPI();
             try {
               ZCArticleSchema article = new ZCArticleSchema();
               if (StringUtil.isNotEmpty(title)) {
                 article.setTitle(title);
               } else {
                 ++cLost;
                 break label1209:
               }
               if (StringUtil.isNotEmpty(content)) {
                 content = content.trim();
                 while (rp.match()) {
                   String html = rp.getMapx().getString("Content");
                   content = content + html;
                 }
                 if (this.config.isCleanLinkFlag()) {
                   content = this.framePattern.matcher(content).replaceAll("");
                   content = this.stylePattern.matcher(content).replaceAll("");
                   content = this.scriptPattern.matcher(content).replaceAll("");
                   content = this.linkPattern.matcher(content).replaceAll("$1");
                 }
                 if (filters != null) {
                   for (int k = 0; k < filters.length; ++k) {
                     content = filters[k].replace(content, "");
                   }
                 }
 
                 String str = dealImage(content, doc.getUrl(), sitePath, imagePath, imageCatalogID);
                 article.setContent(str);
               } else {
                 ++cLost;
                 break label1209:
               }
               if (StringUtil.isNotEmpty(author)) {
                 article.setAuthor(author);
               }
               if (StringUtil.isNotEmpty(source)) {
                 article.setReferName(source);
               }
               article.setReferURL(doc.getUrl());
               article.setPublishDate(publishDate);
               article.setCatalogID(this.config.getCatalogID());
               article.setBranchInnerCode("0001");
               article.setProp2("FromWeb");
 
               if (ExtendManager.hasAction("FromWeb.BeforeSave")) {
                 ExtendManager.executeAll("FromWeb.BeforeSave", new Object[] { article });
               }
 
               Date date = (Date)new QueryBuilder(
                 "select PublishDate from ZCArticle where ReferURL=? and CatalogID=?", doc.getUrl(), 
                 this.config.getCatalogID()).executeOneValue();
               if (date != null) {
                 if (date.getTime() < doc.getLastDownloadTime()) {
                   QueryBuilder qb = new QueryBuilder(
                     "update ZCArticle set Title=?,Content=? where CatalogID=? and ReferURL=?");
                   qb.add(article.getTitle());
                   qb.add(article.getContent());
                   qb.add(this.config.getCatalogID());
                   qb.add(doc.getUrl());
                   qb.executeNoQuery();
                 }
                 ++cSuccess;
               } else {
                 api.setSchema(article);
                 set.add(article);
                 if (api.insert() > 0L)
                   ++cSuccess;
                 else
                   ++cFailure;
               }
             }
             catch (Exception e) {
               ++cFailure;
               e.printStackTrace();
             }
           } else {
             LogUtil.getLogger().info("未能匹配" + doc.getUrl());
             this.task.addError("未能匹配" + doc.getUrl());
             ++cLost;
           }
           label1209: this.task.setCurrentInfo("正在转换文档, <font class='green'>" + cSuccess + "</font> 个成功, <font class='red'>" + 
             cFailure + "</font> 个失败, <font class='green'>" + cLost + "</font> 个未匹配");
         }
       }