Esempio n. 1
0
 if (!publishDateFlag) {
   String[] lastURLs = this.config.getUrlLevels()[(this.config.getUrlLevels().length - 1)].split("\\\n", -1);
   if (lastURLs.length != 1) {
     return;
   }
   RegexParser rpUrl = new RegexParser(lastURLs[0]);
   boolean numberFlag = true;
   for (int i = 0; i < set.size(); ++i) {
     String url = set.get(i).getReferURL();
     rpUrl.setText(url);
     if (rpUrl.match()) {
       String v = rpUrl.getMapx().getString("SortID");
       set.get(i).setProp2(v);
       if (!NumberUtil.isLong(v)) {
         numberFlag = false;
       }
     }
   }
   set.sort("Prop2", "asc", numberFlag);
   for (int i = set.size() - 1; i >= 0; --i) {
     set.get(i).setOrderFlag(OrderUtil.getDefaultOrder());
     set.get(i).setProp2(null);
   }
   set.update();
 }
Esempio n. 2
0
   public void writeArticle()
   {
     prepareList();
     if (this.config.getType() == 1) {
       QueryBuilder imageQB = new QueryBuilder("select id from zccatalog where type=4 and siteid=?", 
         CatalogUtil.getSiteID(this.config.getCatalogID()));
       String imageCatalogID = imageQB.executeString();
       if (StringUtil.isEmpty(CatalogUtil.getSiteID(this.config.getCatalogID()))) {
         LogUtil.getLogger().warn("文档采集的目的栏目不存在:ID=" + this.config.getCatalogID());
         return;
       }
       String sitePath = SiteUtil.getAbsolutePath(CatalogUtil.getSiteID(this.config.getCatalogID()));
       String imagePath = "upload/Image/" + CatalogUtil.getAlias(imageCatalogID) + "/";
 
       RegexParser rp = this.config.getTemplate("Ref1");
       RegexParser[] filters = this.config.getFilterBlocks();
       this.list.moveFirst();
       WebDocument doc = null;
       int cSuccess = 0;
       int cFailure = 0;
       int cLost = 0;
 
       boolean publishDateFlag = false;
       ZCArticleSet set = new ZCArticleSet();
       while ((doc = this.list.next()) != null) {
         if (this.task.checkStop()) {
           return;
         }
         if (doc.getLevel() != this.config.getUrlLevels().length - 1) {
           continue;
         }
         int percent = (100 - this.task.getPercent()) * (cSuccess + cFailure + cLost) / this.list.size();
         this.task.setPercent(this.task.getPercent() + percent);
         if ((doc.isTextContent()) && (doc.getContent() != null)) {
           String text = doc.getContentText();
           rp.setText(text);
           if (rp.match()) {
             Mapx map = rp.getMapx();
             Object[] ks = map.keyArray();
             Object[] vs = map.valueArray();
             for (int i = 0; i < map.size(); ++i) {
               String key = ks[i].toString();
               String value = vs[i].toString();
               if (!key.equalsIgnoreCase("Content")) {
                 value = this.tagPattern.matcher(value).replaceAll("");
               }
               value = StringUtil.htmlDecode(value);
               value = value.trim();
               map.put(key, value);
             }
             String title = map.getString("Title");
             String content = map.getString("Content");
             String author = map.getString("Author");
             String source = map.getString("Source");
             String strDate = map.getString("PublishDate");
             Date publishDate = doc.getLastmodifiedDate();
             if ((StringUtil.isNotEmpty(strDate)) && (StringUtil.isNotEmpty(this.config.getPublishDateFormat())))
             {
               try {
                 strDate = DateUtil.convertChineseNumber(strDate);
                 publishDate = DateUtil.parse(strDate, this.config.getPublishDateFormat());
               } catch (Exception e) {
                 this.task.addError("日期" + strDate + "不符合指定格式" + doc.getUrl());
               }
               publishDateFlag = true;
             }
             if (publishDate.getTime() > System.currentTimeMillis()) {
               publishDate = new Date();
             }
             ArticleAPI api = new ArticleAPI();
             try {
               ZCArticleSchema article = new ZCArticleSchema();
               if (StringUtil.isNotEmpty(title)) {
                 article.setTitle(title);
               } else {
                 ++cLost;
                 break label1209:
               }
               if (StringUtil.isNotEmpty(content)) {
                 content = content.trim();
                 while (rp.match()) {
                   String html = rp.getMapx().getString("Content");
                   content = content + html;
                 }
                 if (this.config.isCleanLinkFlag()) {
                   content = this.framePattern.matcher(content).replaceAll("");
                   content = this.stylePattern.matcher(content).replaceAll("");
                   content = this.scriptPattern.matcher(content).replaceAll("");
                   content = this.linkPattern.matcher(content).replaceAll("$1");
                 }
                 if (filters != null) {
                   for (int k = 0; k < filters.length; ++k) {
                     content = filters[k].replace(content, "");
                   }
                 }
 
                 String str = dealImage(content, doc.getUrl(), sitePath, imagePath, imageCatalogID);
                 article.setContent(str);
               } else {
                 ++cLost;
                 break label1209:
               }
               if (StringUtil.isNotEmpty(author)) {
                 article.setAuthor(author);
               }
               if (StringUtil.isNotEmpty(source)) {
                 article.setReferName(source);
               }
               article.setReferURL(doc.getUrl());
               article.setPublishDate(publishDate);
               article.setCatalogID(this.config.getCatalogID());
               article.setBranchInnerCode("0001");
               article.setProp2("FromWeb");
 
               if (ExtendManager.hasAction("FromWeb.BeforeSave")) {
                 ExtendManager.executeAll("FromWeb.BeforeSave", new Object[] { article });
               }
 
               Date date = (Date)new QueryBuilder(
                 "select PublishDate from ZCArticle where ReferURL=? and CatalogID=?", doc.getUrl(), 
                 this.config.getCatalogID()).executeOneValue();
               if (date != null) {
                 if (date.getTime() < doc.getLastDownloadTime()) {
                   QueryBuilder qb = new QueryBuilder(
                     "update ZCArticle set Title=?,Content=? where CatalogID=? and ReferURL=?");
                   qb.add(article.getTitle());
                   qb.add(article.getContent());
                   qb.add(this.config.getCatalogID());
                   qb.add(doc.getUrl());
                   qb.executeNoQuery();
                 }
                 ++cSuccess;
               } else {
                 api.setSchema(article);
                 set.add(article);
                 if (api.insert() > 0L)
                   ++cSuccess;
                 else
                   ++cFailure;
               }
             }
             catch (Exception e) {
               ++cFailure;
               e.printStackTrace();
             }
           } else {
             LogUtil.getLogger().info("未能匹配" + doc.getUrl());
             this.task.addError("未能匹配" + doc.getUrl());
             ++cLost;
           }
           label1209: this.task.setCurrentInfo("正在转换文档, <font class='green'>" + cSuccess + "</font> 个成功, <font class='red'>" + 
             cFailure + "</font> 个失败, <font class='green'>" + cLost + "</font> 个未匹配");
         }
       }