예제 #1
0
 /**
  * 根据文章内容获取最新发布的文章
  *
  * @param webid
  * @param content
  * @throws Exception
  */
 public void processWithDoc(int webid, String content) throws Exception {
   docByHTML(content, "http://www.pcpop.com/doc/");
   Iterator it = LINKHASH.keySet().iterator();
   ArticleDoc doc = null;
   while (it.hasNext()) {
     String key1 = (String) it.next();
     if (null == articleDocCache.get(getKey(key1))) {
       LinkBean link = (LinkBean) LINKHASH.get(key1);
       doc = new ArticleDoc();
       doc.setTitle(link.getName());
       doc.setUrl(link.getLink());
       doc.setWebId(webid);
       int id = articleDocDao.insert(doc);
       if (!(id > 0)) {
         logger.info("失败,\t链接名称:" + link.getName() + "\n链接地址:" + link.getLink());
       } else {
         docCount++;
         doc.setId(id);
         doc.setStatus(1);
         articleDocCache.put(getKey(doc.getUrl()), doc);
         logger.info("processWithDoc,docCount:" + docCount);
       }
     }
   }
   LINKHASH.clear();
 }
예제 #2
0
 public void init() {
   try {
     List<Website> weblist = websiteDao.findByFatherId(166);
     logger.debug("初始化数据库数据");
     HashMap map = new HashMap();
     for (Website bean : weblist) {
       map.put("webId", bean.getId());
       List<ArticleDoc> docList = articleDocDao.find(map);
       for (ArticleDoc doc : docList) {
         if (articleDocCache.get(getKey(doc.getUrl())) == null) {
           logger.debug("添加对象到缓存");
           articleDocCache.put(getKey(doc.getUrl()), doc);
         }
       }
     }
   } catch (Exception e) {
     e.printStackTrace();
   }
 }
예제 #3
0
  public void processWithDoc() throws Exception {
    // Iterator hit = HTMLHASH.keySet().iterator();
    // while(hit.hasNext()){
    // String key = (String) hit.next();
    // logger.debug("key:"+key);
    // String[] keys = key.split(":");
    // String content = (String) HTMLHASH.get(key);
    // try{
    // docByHTML(content, "http://www.pcpop.com/doc/");
    // Iterator it = LINKHASH.keySet().iterator();
    // ArticleDoc doc = null;
    // while (it.hasNext()) {
    // String key1 = (String) it.next();
    // if(null == client.get(getKey(key1))){
    // LinkBean link = (LinkBean) LINKHASH.get(key1);
    // doc = new ArticleDoc();
    // doc.setTitle(link.getName());
    // doc.setUrl(link.getLink());
    // doc.setWebId(Integer.valueOf(keys[0]));
    // int id = articleDocDao.insert(doc);
    // if(!(id>0)){
    // logger.debug("失败,\t链接名称:" + link.getName() + "\n链接地址:"+
    // link.getLink());
    // }else{
    // doc.setId(id);
    // doc.setStatus(1);
    // client.add(getKey(doc.getUrl()), doc);
    // logger.debug("Memcached now store this object");
    // }
    // }
    // }
    // LINKHASH.clear();
    // }catch(Exception e){
    // logger.debug("解析异常,跳过:"+key+"\tException:"+e.getMessage());
    // continue;
    // }
    // }

    HashMap map = new HashMap();
    map.put("status", 1);

    List<ArticleDoc> list = articleDocDao.find(map);
    for (ArticleDoc bean : list) {

      if (!bean.getUrl().startsWith("http://www.pcpop.com/doc/")) {
        continue;
      }
      logger.debug("获取文章数据");
      // 更新文章的作者和发布时间
      try {
        String author = author(bean.getUrl());
        String tmp1 = author.substring(author.lastIndexOf("作者") + 3, author.lastIndexOf("编辑") - 1);
        String tmp2 = author.substring(0, author.indexOf(":") - 2);
        bean.setAuthor(tmp1);
        bean.setPublishTime(tmp2);
        bean.setStatus(2);
        if (articleDocDao.update(bean) == 0) {
          logger.debug("更新作者失败:" + bean.getUrl());
        } else {
          logger.debug("[" + bean.getId() + "]更新成功:" + bean.getUrl());
        }
      } catch (java.io.FileNotFoundException e) {
        bean.setStatus(10);
        if (articleDocDao.update(bean) == 0) {
          logger.debug("更新作者失败:" + bean.getUrl());
        } else {
          logger.debug("[" + bean.getId() + "]更新记录状态为10[文件或地址查找找不到]:" + bean.getUrl());
        }
        continue;
      } catch (org.htmlparser.util.ParserException e) {
        bean.setStatus(10);
        if (articleDocDao.update(bean) == 0) {
          logger.debug("更新作者失败:" + bean.getUrl());
        } else {
          logger.debug("[" + bean.getId() + "]更新记录状态为10[URL解析失败]:" + bean.getUrl());
        }
        continue;
      } catch (Exception e) {
        bean.setStatus(11);
        bean.setContent(e.getMessage());
        if (articleDocDao.update(bean) == 0) {
          logger.debug("更新作者和文章发布时间失败:" + bean.getUrl());
        } else {
          logger.debug("[" + bean.getId() + "]更新记录状态为11[其他异常情况]:" + bean.getUrl());
        }
        continue;
      }
    }
  }
예제 #4
0
  /** 获取文章作者,发布时间等数据 */
  public void processAuthor() throws Exception {
    HashMap map = new HashMap();
    map.put("status", 1);
    List<ArticleDoc> list = articleDocDao.find(map);
    for (ArticleDoc bean : list) {

      if (!bean.getUrl().startsWith("http://www.pcpop.com/doc/")) {
        continue;
      }
      logger.debug("获取文章数据");
      // 更新文章的作者和发布时间
      try {
        String author = author(bean.getUrl());
        String tmp1 = author.substring(author.lastIndexOf("作者") + 3, author.lastIndexOf("编辑") - 1);
        String tmp2 = author.substring(0, author.indexOf(":") - 2);
        bean.setAuthor(tmp1);
        bean.setPublishTime(tmp2);
        bean.setStatus(2);
        if (articleDocDao.update(bean) == 0) {
          logger.info("更新作者失败:" + bean.getUrl());
        } else {
          processCount++;
          logger.info(
              "[" + bean.getId() + "]更新成功:" + bean.getUrl() + "\tprocessCount:" + processCount);
        }
      } catch (java.io.FileNotFoundException e) {
        bean.setStatus(10);
        if (articleDocDao.update(bean) == 0) {
          logger.error("更新作者失败:" + bean.getUrl());
        } else {
          logger.error("[" + bean.getId() + "]更新记录状态为10[文件或地址查找找不到]:" + bean.getUrl());
        }
        continue;
      } catch (org.htmlparser.util.ParserException e) {
        bean.setStatus(10);
        if (articleDocDao.update(bean) == 0) {
          logger.error("更新作者失败:" + bean.getUrl());
        } else {
          logger.error("[" + bean.getId() + "]更新记录状态为10[URL解析失败]:" + bean.getUrl());
        }
        continue;
      } catch (Exception e) {
        bean.setStatus(11);
        bean.setContent(e.getMessage());
        if (articleDocDao.update(bean) == 0) {
          logger.error("更新作者和文章发布时间失败:" + bean.getUrl());
        } else {
          logger.error("[" + bean.getId() + "]更新记录状态为11[其他异常情况]:" + bean.getUrl());
        }
        continue;
      }
    }
  }
예제 #5
0
  void contentProcess() {
    HashMap map = new HashMap();
    map.put("status", 2);
    try {
      List<ArticleDoc> list = articleDocDao.find(map);
      for (ArticleDoc bean : list) {
        if (!bean.getUrl().startsWith("http://www.pcpop.com/doc/")) {
          continue;
        }

        try {
          // String content = content(bean.getUrl());
          // if(null != content){
          // bean.setContent(content);
          bean.setStatus(3);
          if (articleDocDao.update(bean) == 0) {
            logger.debug("更新作者失败:" + bean.getUrl());
          } else {
            if (null != client.get(getKey(bean.getUrl()))) {
              client.remove(getKey(bean.getUrl()));
              client.put(getKey(bean.getUrl()), bean);
            }
            logger.debug("[" + bean.getId() + "]更新文章内容成功");
          }
          // }
        } catch (Exception e) {
          bean.setStatus(11);
          bean.setContent(e.getMessage());
          if (articleDocDao.update(bean) == 0) {
            logger.debug("更新文章内容失败:" + bean.getUrl());
          } else {
            logger.debug("[" + bean.getId() + "]更新记录状态为11[其他异常情况]:" + bean.getUrl());
          }
          continue;
        }
      }
    } catch (Exception e) {

    }
  }