Esempio n. 1
0
/** Created with IntelliJ IDEA. User: DJH Date: 14-11-11 Time: 下午4:12 用于统计城市的pv、ip数据 */
public class CountPVData {

  private JdbcTemplate jdbcTemplateCount = HibernateCountUtil.getJdbcTemplate();

  private static Logger logger = Logger.getLogger(CountPVData.class);

  /**
   * 根据ip所在地统计ip数
   *
   * @param city
   * @param start
   * @param end
   * @return
   */
  public Integer countIpByCity(int city, Date start, Date end) {
    String sql =
        "select count(distinct a.userIp) from PV_Access_IP a where  a.viewFirstTime between ? and ?";
    List params = Lists.newArrayList();
    params.add(start);
    params.add(end);
    if (city != 0) {
      sql += " and a.cityCode = ? ";
      params.add(city);
    }
    Integer count = jdbcTemplateCount.queryForObject(sql, params.toArray(), Integer.class);
    return (count == null) ? 0 : count;
  }

  /**
   * 根据站点统计ip
   *
   * @param site
   * @param start
   * @param end
   * @return
   */
  public Integer countIpBySite(int site, Date start, Date end) {
    String sql =
        "select count(distinct a.userIp) from PV_Access a where  a.acceptTime between ? and ?";
    List params = Lists.newArrayList();
    params.add(start);
    params.add(end);
    if (site != 0) {
      sql += " and a.site = ?  ";
      params.add(site);
    }
    Integer count = jdbcTemplateCount.queryForObject(sql, params.toArray(), Integer.class);
    return (count == null) ? 0 : count;
  }

  /** 根据ip所在地统计浏览量 */
  private int countViewByCity(int city, Date start, Date end) {
    return 0; // TODO 待PV_Access表增加cityCode字段
    //        String cityStr="";
    //        List params = Lists.newArrayList();
    //        params.add(start);
    //        params.add(end);
    //        if(city!=0){
    //            cityStr=" and EXISTS(select 1 from PV_Access_IP a where a.userIp = b.userIp and
    // a.cityCode = ? )" ;
    //            params.add(city);
    //        }
    //        String sql = "select count(*) from  PV_Access b " +
    //                " where    b.acceptTime between ? and ? "+cityStr;
    //        Integer count = jdbcTemplateCount.queryForObject(sql, params.toArray(),
    // Integer.class);
    //        return (count == null) ? 0 : count;
  }

  /** 所在ip城市获得 */
  private int countViewBySite(int site, Date start, Date end) {
    String sql = "select count(*) from PV_Access a where " + "  a.acceptTime between ? and ?";
    List params = Lists.newArrayList();
    params.add(start);
    params.add(end);
    if (site != 0) {
      sql += " and a.site = ? ";
      params.add(site);
    }
    Integer count = jdbcTemplateCount.queryForObject(sql, params.toArray(), Integer.class);
    return (count == null) ? 0 : count;
  }

  /**
   * 插入城市/站点pv统计记录
   *
   * @param pvCountCitySite
   */
  private void insertPVCountCitySite(PVCountCitySite pvCountCitySite) {
    String insertSql =
        "insert into pv_count_city_site(site_id,"
            + "ip_count_city,"
            + "ip_count_site,"
            + "pv_count_city,"
            + "pv_count_site,"
            + "create_date) values(?,?,?,?,?,?)";
    jdbcTemplateCount.update(
        insertSql,
        pvCountCitySite.getSiteId(),
        pvCountCitySite.getIpCountCity() == null ? 0 : pvCountCitySite.getIpCountCity(),
        pvCountCitySite.getIpCountSite() == null ? 0 : pvCountCitySite.getIpCountSite(),
        pvCountCitySite.getPvCountCity() == null ? 0 : pvCountCitySite.getPvCountCity(),
        pvCountCitySite.getPvCountSite() == null ? 0 : pvCountCitySite.getPvCountSite(),
        pvCountCitySite.getCreateDtae());
  }

  public void countCitySitePv() {
    long startTime = System.currentTimeMillis();

    Date start = DateTime.now().minusDays(1).withTimeAtStartOfDay().toDate();
    Date end =
        DateTime.now()
            .minusDays(1)
            .hourOfDay()
            .withMaximumValue()
            .minuteOfHour()
            .withMaximumValue()
            .secondOfMinute()
            .withMaximumValue()
            .millisOfSecond()
            .withMaximumValue()
            .toDate();
    for (PvCitySiteEnum citySite : PvCitySiteEnum.values()) {
      PVCountCitySite pvCountCitySite = new PVCountCitySite();
      pvCountCitySite.setSiteId(citySite.getSiteId());
      pvCountCitySite.setCreateDtae(start);
      pvCountCitySite.setIpCountCity(countIpByCity(citySite.getCityCode(), start, end));
      pvCountCitySite.setIpCountSite(countIpBySite(citySite.getSiteId(), start, end));
      pvCountCitySite.setPvCountCity(countViewByCity(citySite.getCityCode(), start, end));
      pvCountCitySite.setPvCountSite(countViewBySite(citySite.getSiteId(), start, end));
      insertPVCountCitySite(pvCountCitySite);
    }

    long endTime = System.currentTimeMillis();
    logger.error("====城市PV统计结束!用时:" + ((endTime - startTime) / 1000 / 60) + " 分钟");
  }
}
/**
 * 来源统计
 *
 * @author pzm
 */
public class CountIndexFromSource {
  private static Logger LOG = Logger.getLogger(CountIndexFromSource.class);
  JdbcTemplate countJdbcTemplate = HibernateCountUtil.getJdbcTemplate();
  JdbcTemplate baseJdbcTemplate = HibernateUtil.getJdbcTemplate();
  JdbcTemplate actionJdbcTemplate = HibernateActionUtil.getJdbcTemplate();
  int processLimit = 100000;
  private Date countDate;
  private String countDateString;

  public CountIndexFromSource() throws ParseException {
    setCountDate(new DateTime().plusDays(-1).toString("yyyy-M-d"));
  }

  public Date getCountDate() {
    return countDate;
  }

  public void setCountDate(String countDate) throws ParseException {
    this.countDate = new Date(new DateTime(countDate).getMillis());
    this.countDateString = new DateTime(countDate).toString(ISODateTimeFormat.date());
  }

  public void countIndexFromSourceBaseData() {
    log("START countIndexFromSourceBaseData");
    deleteFromSourceBase();
    countPVIPToFromSourceBase();
    log("END countIndexFromSourceBaseData");
  }

  public void countIndexFromSourceData() {
    log("START countIndexFromSourceData");
    String deleteSql = "DELETE FROM count_index_from_source WHERE count_date = ?";
    countJdbcTemplate.update(deleteSql, countDate);
    String insertSql =
        "INSERT INTO count_index_from_source (from_source_id,count_date,ip,pv,register_count,resume_refresh_count,pos_apply_count)"
            + " SELECT from_source_id,max(count_date),sum(ip),sum(pv),sum(register_count),sum(resume_refresh_count),sum(pos_apply_count)"
            + " FROM count_index_from_source_base WHERE count_date = ? GROUP BY from_source_id";
    int updateCt = countJdbcTemplate.update(insertSql, countDate);
    log("END countIndexFromSourceData! [insert count:" + updateCt + "]");
  }

  public void countIndexFromSourceGeneralData() {
    log("START countIndexFromSourceData");
    String deleteSql = "DELETE FROM count_index_from_source_general WHERE count_date = ?";
    countJdbcTemplate.update(deleteSql, countDate);

    // 搜索引擎-自然搜索
    String insertSearchEngineSql =
        "INSERT INTO count_index_from_source_general (source_type,source_domain,search_engine,search_keyword,count_date,ip,pv,register_count,resume_refresh_count,pos_apply_count)"
            + " SELECT source_type,source_domain,max(search_engine),search_keyword,max(count_date),sum(ip),sum(pv),sum(register_count),sum(resume_refresh_count),sum(pos_apply_count)"
            + " FROM count_index_from_source_base a LEFT JOIN sys_from_source b ON a.from_source_id = b.id"
            + " WHERE count_date = ? AND source_type = ? AND spread_item_tn='' GROUP BY source_type,source_domain,search_keyword";
    log("BEGIN insertSearchEngineSpreadSql");
    int updateCt =
        countJdbcTemplate.update(
            insertSearchEngineSql, countDate, SysFromSource.SOURCE_TYPE_SEARCHENGINE);
    log("FINISH insertSearchEngineSql! [insert count:" + updateCt + "]");

    // 搜索引擎-推广栏目
    String insertSearchEngineSpreadSql =
        "INSERT INTO count_index_from_source_general (source_type,source_domain,search_engine,spread_item_tn,count_date,ip,pv,register_count,resume_refresh_count,pos_apply_count)"
            + " SELECT source_type,source_domain,max(search_engine),spread_item_tn,max(count_date),sum(ip),sum(pv),sum(register_count),sum(resume_refresh_count),sum(pos_apply_count)"
            + " FROM count_index_from_source_base a LEFT JOIN sys_from_source b ON a.from_source_id = b.id"
            + " WHERE count_date = ? AND source_type = ? AND spread_item_tn!='' GROUP BY source_type,source_domain,spread_item_tn";
    log("BEGIN insertSearchEngineSpreadSql");
    updateCt =
        countJdbcTemplate.update(
            insertSearchEngineSpreadSql, countDate, SysFromSource.SOURCE_TYPE_SEARCHENGINE);
    log("FINISH insertSearchEngineSpreadSql! [insert count:" + updateCt + "]");

    // 外部链接-自然流量
    String insertOutterlinkSql =
        "INSERT INTO count_index_from_source_general (source_type,source_domain,count_date,ip,pv,register_count,resume_refresh_count,pos_apply_count)"
            + " SELECT source_type,source_domain,max(count_date),sum(ip),sum(pv),sum(register_count),sum(resume_refresh_count),sum(pos_apply_count)"
            + " FROM count_index_from_source_base a LEFT JOIN sys_from_source b ON a.from_source_id = b.id"
            + " WHERE count_date = ? AND source_type = ? AND spread_item_tn='' GROUP BY source_type,source_domain";
    log("BEGIN insertOutterlinkSql");
    updateCt =
        countJdbcTemplate.update(
            insertOutterlinkSql, countDate, SysFromSource.SOURCE_TYPE_OUTTERLINK);
    log("FINISH insertOutterlinkSql! [insert count:" + updateCt + "]");

    // 外部链接-推广栏目
    String insertOutterlinkSpreadSql =
        "INSERT INTO count_index_from_source_general (source_type,source_domain,spread_item_tn,count_date,ip,pv,register_count,resume_refresh_count,pos_apply_count)"
            + " SELECT source_type,source_domain,spread_item_tn,max(count_date),sum(ip),sum(pv),sum(register_count),sum(resume_refresh_count),sum(pos_apply_count)"
            + " FROM count_index_from_source_base a LEFT JOIN sys_from_source b ON a.from_source_id = b.id"
            + " WHERE count_date = ? AND source_type = ? AND spread_item_tn!='' GROUP BY source_type,source_domain,spread_item_tn";
    log("BEGIN insertOutterlinkSpreadSql");
    updateCt =
        countJdbcTemplate.update(
            insertOutterlinkSpreadSql, countDate, SysFromSource.SOURCE_TYPE_OUTTERLINK);
    log("FINISH insertOutterlinkSpreadSql! [insert count:" + updateCt + "]");

    // 自主访问
    String insertSelfaccessSql =
        "INSERT INTO count_index_from_source_general (source_type,count_date,ip,pv,register_count,resume_refresh_count,pos_apply_count)"
            + " SELECT source_type,max(count_date),sum(ip),sum(pv),sum(register_count),sum(resume_refresh_count),sum(pos_apply_count)"
            + " FROM count_index_from_source_base a LEFT JOIN sys_from_source b ON a.from_source_id = b.id"
            + " WHERE count_date = ? GROUP BY source_type HAVING source_type=?";
    log("BEGIN insertSelfaccessSql");
    updateCt =
        countJdbcTemplate.update(
            insertSelfaccessSql, countDate, SysFromSource.SOURCE_TYPE_SELFACCESS);
    log("FINISH insertSelfaccessSql! [insert count:" + updateCt + "]");

    log("end countIndexFromSourceData");
  }

  private int saveFromSource(String fromSourceUrl, String fromTable) {
    SysFromSource sysFromSource = new SysFromSource();
    try {
      sysFromSource.parseFromSourceUrl(fromSourceUrl);
    } catch (Exception e) {
      LOG.warn(
          "parseFromSourceUrl 失败,忽略此url!fromSourceUrl=["
              + fromSourceUrl
              + "],来源于:"
              + fromTable
              + ", Exception:"
              + e.toString());
      LOG.debug("Exception Detail", e);
      return -1;
    }
    sysFromSource.setCreateTime(new Timestamp(System.currentTimeMillis()));
    String hql =
        "FROM SysFromSource as s WHERE s.sourceDomain=:sourceDomain and s.sourceAddr=:sourceAddr";
    Query query = HibernateCountUtil.currentSession().createQuery(hql);
    query.setParameter("sourceDomain", sysFromSource.getSourceDomain());
    query.setParameter("sourceAddr", sysFromSource.getSourceAddr());
    SysFromSource sysFromSourceQuery = (SysFromSource) query.uniqueResult();
    if (sysFromSourceQuery == null) {
      HibernateCountUtil.currentSession().save(sysFromSource);
    } else {
      sysFromSource = sysFromSourceQuery;
    }
    return sysFromSource.getId();
  }

  private boolean isToday() {
    return new DateTime(countDate)
        .toString(ISODateTimeFormat.date())
        .equals(
            new DateTime(new Date(System.currentTimeMillis())).toString(ISODateTimeFormat.date()));
  }

  private void deleteFromSourceBase() {
    countJdbcTemplate.update(
        "delete from count_index_from_source_base where count_date=?", countDate);
  }

  private void countPVIPToFromSourceBase() {
    log("BEGIN countIndexPVIPToFromSourceBase");
    String pvTable = isToday() ? "PV_Access" : PVAccessUtil.getPVAccessTableName(countDate);
    log("pvTable is " + pvTable);
    String selectPVIPSql =
        "SELECT fromSourceUrl,colId,count(*) AS PV,count(DISTINCT(userIp)) AS IP"
            + " FROM "
            + pvTable
            + " WHERE DATE_FORMAT(acceptTime, '%Y-%m-%d') = ? AND (CONCAT(targetDomain,targetAddr) = 'www.job5156.com/' OR  CONCAT(targetDomain,targetAddr) like 'www.job5156.com/?et%' OR CONCAT(targetDomain,targetAddr) like 'www.job5156.com/?rg%') GROUP BY fromSourceUrl,colId"
            + " ORDER BY fromSourceUrl,colId";
    String getPVIPRowSql =
        "select count(*) from (select 1 "
            + " FROM "
            + pvTable
            + " WHERE DATE_FORMAT(acceptTime, '%Y-%m-%d') = ? AND (CONCAT(targetDomain,targetAddr) = 'www.job5156.com/' OR  CONCAT(targetDomain,targetAddr) like 'www.job5156.com/?et%' OR CONCAT(targetDomain,targetAddr) like 'www.job5156.com/?rg%') GROUP BY fromSourceUrl,colId) a";
    int PVIPRow = countJdbcTemplate.queryForObject(getPVIPRowSql, Integer.class, countDateString);
    String insertPVIPSql =
        "insert into count_index_from_source_base (from_source_id,spread_item_tn,count_date,ip,pv) values (?,?,?,?,?)";
    String updatePVIPSql =
        "update count_index_from_source_base set ip=?,pv=? where from_source_id=? and spread_item_tn=? and count_date=?";
    long insertCount = 0;
    long updateCount = 0;
    long failCount = 0;
    for (int i = 0; i < PVIPRow; i += processLimit) {
      String limitSelectPVIPSql = selectPVIPSql + " limit ?,?";
      List<Map<String, Object>> rs =
          countJdbcTemplate.queryForList(limitSelectPVIPSql, countDateString, i, processLimit);
      for (Map<String, Object> fromSourceRsItem : rs) {
        String fromSourceUrl = ObjectUtils.toString(fromSourceRsItem.get("fromSourceUrl"));
        String spreadItemTn = ObjectUtils.toString(fromSourceRsItem.get("colId"));
        try {
          int fromSourceId = saveFromSource(fromSourceUrl, "PV_Access");
          if (fromSourceId == -1) {
            failCount++;
            continue;
          }
          Object pvCount = fromSourceRsItem.get("PV");
          Object ipCount = fromSourceRsItem.get("IP");
          try {
            countJdbcTemplate.update(
                insertPVIPSql, fromSourceId, spreadItemTn, countDate, ipCount, pvCount);
            insertCount++;
          } catch (DuplicateKeyException e) {
            countJdbcTemplate.update(
                updatePVIPSql, ipCount, pvCount, fromSourceId, spreadItemTn, countDate);
            updateCount++;
          }
        } catch (Exception e) {
          LOG.warn(
              String.format(
                  "insert pvip to index_from_source_base fail! [fromSourceUrl=%s, spreadItemTn=%s]"
                      + e.toString(),
                  fromSourceUrl,
                  spreadItemTn));
          LOG.debug("Exception Detail", e);
          failCount++;
        }
      }
    }
    log(
        "FINISH countIndexPVIPToFromSourceBase! [all count:"
            + PVIPRow
            + ", insert count:"
            + insertCount
            + ", update count:"
            + updateCount
            + ", fail count:"
            + failCount
            + "]");
  }

  private void log(String log) {
    LOG.info("[CountIndexFromSource INFO]: " + log);
  }
}