if (!publishDateFlag) { String[] lastURLs = this.config.getUrlLevels()[(this.config.getUrlLevels().length - 1)].split("\\\n", -1); if (lastURLs.length != 1) { return; } RegexParser rpUrl = new RegexParser(lastURLs[0]); boolean numberFlag = true; for (int i = 0; i < set.size(); ++i) { String url = set.get(i).getReferURL(); rpUrl.setText(url); if (rpUrl.match()) { String v = rpUrl.getMapx().getString("SortID"); set.get(i).setProp2(v); if (!NumberUtil.isLong(v)) { numberFlag = false; } } } set.sort("Prop2", "asc", numberFlag); for (int i = set.size() - 1; i >= 0; --i) { set.get(i).setOrderFlag(OrderUtil.getDefaultOrder()); set.get(i).setProp2(null); } set.update(); }
public void writeArticle() { prepareList(); if (this.config.getType() == 1) { QueryBuilder imageQB = new QueryBuilder("select id from zccatalog where type=4 and siteid=?", CatalogUtil.getSiteID(this.config.getCatalogID())); String imageCatalogID = imageQB.executeString(); if (StringUtil.isEmpty(CatalogUtil.getSiteID(this.config.getCatalogID()))) { LogUtil.getLogger().warn("文档采集的目的栏目不存在:ID=" + this.config.getCatalogID()); return; } String sitePath = SiteUtil.getAbsolutePath(CatalogUtil.getSiteID(this.config.getCatalogID())); String imagePath = "upload/Image/" + CatalogUtil.getAlias(imageCatalogID) + "/"; RegexParser rp = this.config.getTemplate("Ref1"); RegexParser[] filters = this.config.getFilterBlocks(); this.list.moveFirst(); WebDocument doc = null; int cSuccess = 0; int cFailure = 0; int cLost = 0; boolean publishDateFlag = false; ZCArticleSet set = new ZCArticleSet(); while ((doc = this.list.next()) != null) { if (this.task.checkStop()) { return; } if (doc.getLevel() != this.config.getUrlLevels().length - 1) { continue; } int percent = (100 - this.task.getPercent()) * (cSuccess + cFailure + cLost) / this.list.size(); this.task.setPercent(this.task.getPercent() + percent); if ((doc.isTextContent()) && (doc.getContent() != null)) { String text = doc.getContentText(); rp.setText(text); if (rp.match()) { Mapx map = rp.getMapx(); Object[] ks = map.keyArray(); Object[] vs = map.valueArray(); for (int i = 0; i < map.size(); ++i) { String key = ks[i].toString(); String value = vs[i].toString(); if (!key.equalsIgnoreCase("Content")) { value = this.tagPattern.matcher(value).replaceAll(""); } value = StringUtil.htmlDecode(value); value = value.trim(); map.put(key, value); } String title = map.getString("Title"); String content = map.getString("Content"); String author = map.getString("Author"); String source = map.getString("Source"); String strDate = map.getString("PublishDate"); Date publishDate = doc.getLastmodifiedDate(); if ((StringUtil.isNotEmpty(strDate)) && (StringUtil.isNotEmpty(this.config.getPublishDateFormat()))) { try { strDate = DateUtil.convertChineseNumber(strDate); publishDate = DateUtil.parse(strDate, this.config.getPublishDateFormat()); } catch (Exception e) { this.task.addError("日期" + strDate + "不符合指定格式" + doc.getUrl()); } publishDateFlag = true; } if (publishDate.getTime() > System.currentTimeMillis()) { publishDate = new Date(); } ArticleAPI api = new ArticleAPI(); try { ZCArticleSchema article = new ZCArticleSchema(); if (StringUtil.isNotEmpty(title)) { article.setTitle(title); } else { ++cLost; break label1209: } if (StringUtil.isNotEmpty(content)) { content = content.trim(); while (rp.match()) { String html = rp.getMapx().getString("Content"); content = content + html; } if (this.config.isCleanLinkFlag()) { content = this.framePattern.matcher(content).replaceAll(""); content = this.stylePattern.matcher(content).replaceAll(""); content = this.scriptPattern.matcher(content).replaceAll(""); content = this.linkPattern.matcher(content).replaceAll("$1"); } if (filters != null) { for (int k = 0; k < filters.length; ++k) { content = filters[k].replace(content, ""); } } String str = dealImage(content, doc.getUrl(), sitePath, imagePath, imageCatalogID); article.setContent(str); } else { ++cLost; break label1209: } if (StringUtil.isNotEmpty(author)) { article.setAuthor(author); } if (StringUtil.isNotEmpty(source)) { article.setReferName(source); } article.setReferURL(doc.getUrl()); article.setPublishDate(publishDate); article.setCatalogID(this.config.getCatalogID()); article.setBranchInnerCode("0001"); article.setProp2("FromWeb"); if (ExtendManager.hasAction("FromWeb.BeforeSave")) { ExtendManager.executeAll("FromWeb.BeforeSave", new Object[] { article }); } Date date = (Date)new QueryBuilder( "select PublishDate from ZCArticle where ReferURL=? and CatalogID=?", doc.getUrl(), this.config.getCatalogID()).executeOneValue(); if (date != null) { if (date.getTime() < doc.getLastDownloadTime()) { QueryBuilder qb = new QueryBuilder( "update ZCArticle set Title=?,Content=? where CatalogID=? and ReferURL=?"); qb.add(article.getTitle()); qb.add(article.getContent()); qb.add(this.config.getCatalogID()); qb.add(doc.getUrl()); qb.executeNoQuery(); } ++cSuccess; } else { api.setSchema(article); set.add(article); if (api.insert() > 0L) ++cSuccess; else ++cFailure; } } catch (Exception e) { ++cFailure; e.printStackTrace(); } } else { LogUtil.getLogger().info("未能匹配" + doc.getUrl()); this.task.addError("未能匹配" + doc.getUrl()); ++cLost; } label1209: this.task.setCurrentInfo("正在转换文档, <font class='green'>" + cSuccess + "</font> 个成功, <font class='red'>" + cFailure + "</font> 个失败, <font class='green'>" + cLost + "</font> 个未匹配"); } }