/** Created with IntelliJ IDEA. User: DJH Date: 14-11-11 Time: 下午4:12 用于统计城市的pv、ip数据 */ public class CountPVData { private JdbcTemplate jdbcTemplateCount = HibernateCountUtil.getJdbcTemplate(); private static Logger logger = Logger.getLogger(CountPVData.class); /** * 根据ip所在地统计ip数 * * @param city * @param start * @param end * @return */ public Integer countIpByCity(int city, Date start, Date end) { String sql = "select count(distinct a.userIp) from PV_Access_IP a where a.viewFirstTime between ? and ?"; List params = Lists.newArrayList(); params.add(start); params.add(end); if (city != 0) { sql += " and a.cityCode = ? "; params.add(city); } Integer count = jdbcTemplateCount.queryForObject(sql, params.toArray(), Integer.class); return (count == null) ? 0 : count; } /** * 根据站点统计ip * * @param site * @param start * @param end * @return */ public Integer countIpBySite(int site, Date start, Date end) { String sql = "select count(distinct a.userIp) from PV_Access a where a.acceptTime between ? and ?"; List params = Lists.newArrayList(); params.add(start); params.add(end); if (site != 0) { sql += " and a.site = ? "; params.add(site); } Integer count = jdbcTemplateCount.queryForObject(sql, params.toArray(), Integer.class); return (count == null) ? 0 : count; } /** 根据ip所在地统计浏览量 */ private int countViewByCity(int city, Date start, Date end) { return 0; // TODO 待PV_Access表增加cityCode字段 // String cityStr=""; // List params = Lists.newArrayList(); // params.add(start); // params.add(end); // if(city!=0){ // cityStr=" and EXISTS(select 1 from PV_Access_IP a where a.userIp = b.userIp and // a.cityCode = ? )" ; // params.add(city); // } // String sql = "select count(*) from PV_Access b " + // " where b.acceptTime between ? and ? "+cityStr; // Integer count = jdbcTemplateCount.queryForObject(sql, params.toArray(), // Integer.class); // return (count == null) ? 0 : count; } /** 所在ip城市获得 */ private int countViewBySite(int site, Date start, Date end) { String sql = "select count(*) from PV_Access a where " + " a.acceptTime between ? and ?"; List params = Lists.newArrayList(); params.add(start); params.add(end); if (site != 0) { sql += " and a.site = ? "; params.add(site); } Integer count = jdbcTemplateCount.queryForObject(sql, params.toArray(), Integer.class); return (count == null) ? 0 : count; } /** * 插入城市/站点pv统计记录 * * @param pvCountCitySite */ private void insertPVCountCitySite(PVCountCitySite pvCountCitySite) { String insertSql = "insert into pv_count_city_site(site_id," + "ip_count_city," + "ip_count_site," + "pv_count_city," + "pv_count_site," + "create_date) values(?,?,?,?,?,?)"; jdbcTemplateCount.update( insertSql, pvCountCitySite.getSiteId(), pvCountCitySite.getIpCountCity() == null ? 0 : pvCountCitySite.getIpCountCity(), pvCountCitySite.getIpCountSite() == null ? 0 : pvCountCitySite.getIpCountSite(), pvCountCitySite.getPvCountCity() == null ? 0 : pvCountCitySite.getPvCountCity(), pvCountCitySite.getPvCountSite() == null ? 0 : pvCountCitySite.getPvCountSite(), pvCountCitySite.getCreateDtae()); } public void countCitySitePv() { long startTime = System.currentTimeMillis(); Date start = DateTime.now().minusDays(1).withTimeAtStartOfDay().toDate(); Date end = DateTime.now() .minusDays(1) .hourOfDay() .withMaximumValue() .minuteOfHour() .withMaximumValue() .secondOfMinute() .withMaximumValue() .millisOfSecond() .withMaximumValue() .toDate(); for (PvCitySiteEnum citySite : PvCitySiteEnum.values()) { PVCountCitySite pvCountCitySite = new PVCountCitySite(); pvCountCitySite.setSiteId(citySite.getSiteId()); pvCountCitySite.setCreateDtae(start); pvCountCitySite.setIpCountCity(countIpByCity(citySite.getCityCode(), start, end)); pvCountCitySite.setIpCountSite(countIpBySite(citySite.getSiteId(), start, end)); pvCountCitySite.setPvCountCity(countViewByCity(citySite.getCityCode(), start, end)); pvCountCitySite.setPvCountSite(countViewBySite(citySite.getSiteId(), start, end)); insertPVCountCitySite(pvCountCitySite); } long endTime = System.currentTimeMillis(); logger.error("====城市PV统计结束!用时:" + ((endTime - startTime) / 1000 / 60) + " 分钟"); } }
/** * 来源统计 * * @author pzm */ public class CountIndexFromSource { private static Logger LOG = Logger.getLogger(CountIndexFromSource.class); JdbcTemplate countJdbcTemplate = HibernateCountUtil.getJdbcTemplate(); JdbcTemplate baseJdbcTemplate = HibernateUtil.getJdbcTemplate(); JdbcTemplate actionJdbcTemplate = HibernateActionUtil.getJdbcTemplate(); int processLimit = 100000; private Date countDate; private String countDateString; public CountIndexFromSource() throws ParseException { setCountDate(new DateTime().plusDays(-1).toString("yyyy-M-d")); } public Date getCountDate() { return countDate; } public void setCountDate(String countDate) throws ParseException { this.countDate = new Date(new DateTime(countDate).getMillis()); this.countDateString = new DateTime(countDate).toString(ISODateTimeFormat.date()); } public void countIndexFromSourceBaseData() { log("START countIndexFromSourceBaseData"); deleteFromSourceBase(); countPVIPToFromSourceBase(); log("END countIndexFromSourceBaseData"); } public void countIndexFromSourceData() { log("START countIndexFromSourceData"); String deleteSql = "DELETE FROM count_index_from_source WHERE count_date = ?"; countJdbcTemplate.update(deleteSql, countDate); String insertSql = "INSERT INTO count_index_from_source (from_source_id,count_date,ip,pv,register_count,resume_refresh_count,pos_apply_count)" + " SELECT from_source_id,max(count_date),sum(ip),sum(pv),sum(register_count),sum(resume_refresh_count),sum(pos_apply_count)" + " FROM count_index_from_source_base WHERE count_date = ? GROUP BY from_source_id"; int updateCt = countJdbcTemplate.update(insertSql, countDate); log("END countIndexFromSourceData! [insert count:" + updateCt + "]"); } public void countIndexFromSourceGeneralData() { log("START countIndexFromSourceData"); String deleteSql = "DELETE FROM count_index_from_source_general WHERE count_date = ?"; countJdbcTemplate.update(deleteSql, countDate); // 搜索引擎-自然搜索 String insertSearchEngineSql = "INSERT INTO count_index_from_source_general (source_type,source_domain,search_engine,search_keyword,count_date,ip,pv,register_count,resume_refresh_count,pos_apply_count)" + " SELECT source_type,source_domain,max(search_engine),search_keyword,max(count_date),sum(ip),sum(pv),sum(register_count),sum(resume_refresh_count),sum(pos_apply_count)" + " FROM count_index_from_source_base a LEFT JOIN sys_from_source b ON a.from_source_id = b.id" + " WHERE count_date = ? AND source_type = ? AND spread_item_tn='' GROUP BY source_type,source_domain,search_keyword"; log("BEGIN insertSearchEngineSpreadSql"); int updateCt = countJdbcTemplate.update( insertSearchEngineSql, countDate, SysFromSource.SOURCE_TYPE_SEARCHENGINE); log("FINISH insertSearchEngineSql! [insert count:" + updateCt + "]"); // 搜索引擎-推广栏目 String insertSearchEngineSpreadSql = "INSERT INTO count_index_from_source_general (source_type,source_domain,search_engine,spread_item_tn,count_date,ip,pv,register_count,resume_refresh_count,pos_apply_count)" + " SELECT source_type,source_domain,max(search_engine),spread_item_tn,max(count_date),sum(ip),sum(pv),sum(register_count),sum(resume_refresh_count),sum(pos_apply_count)" + " FROM count_index_from_source_base a LEFT JOIN sys_from_source b ON a.from_source_id = b.id" + " WHERE count_date = ? AND source_type = ? AND spread_item_tn!='' GROUP BY source_type,source_domain,spread_item_tn"; log("BEGIN insertSearchEngineSpreadSql"); updateCt = countJdbcTemplate.update( insertSearchEngineSpreadSql, countDate, SysFromSource.SOURCE_TYPE_SEARCHENGINE); log("FINISH insertSearchEngineSpreadSql! [insert count:" + updateCt + "]"); // 外部链接-自然流量 String insertOutterlinkSql = "INSERT INTO count_index_from_source_general (source_type,source_domain,count_date,ip,pv,register_count,resume_refresh_count,pos_apply_count)" + " SELECT source_type,source_domain,max(count_date),sum(ip),sum(pv),sum(register_count),sum(resume_refresh_count),sum(pos_apply_count)" + " FROM count_index_from_source_base a LEFT JOIN sys_from_source b ON a.from_source_id = b.id" + " WHERE count_date = ? AND source_type = ? AND spread_item_tn='' GROUP BY source_type,source_domain"; log("BEGIN insertOutterlinkSql"); updateCt = countJdbcTemplate.update( insertOutterlinkSql, countDate, SysFromSource.SOURCE_TYPE_OUTTERLINK); log("FINISH insertOutterlinkSql! [insert count:" + updateCt + "]"); // 外部链接-推广栏目 String insertOutterlinkSpreadSql = "INSERT INTO count_index_from_source_general (source_type,source_domain,spread_item_tn,count_date,ip,pv,register_count,resume_refresh_count,pos_apply_count)" + " SELECT source_type,source_domain,spread_item_tn,max(count_date),sum(ip),sum(pv),sum(register_count),sum(resume_refresh_count),sum(pos_apply_count)" + " FROM count_index_from_source_base a LEFT JOIN sys_from_source b ON a.from_source_id = b.id" + " WHERE count_date = ? AND source_type = ? AND spread_item_tn!='' GROUP BY source_type,source_domain,spread_item_tn"; log("BEGIN insertOutterlinkSpreadSql"); updateCt = countJdbcTemplate.update( insertOutterlinkSpreadSql, countDate, SysFromSource.SOURCE_TYPE_OUTTERLINK); log("FINISH insertOutterlinkSpreadSql! [insert count:" + updateCt + "]"); // 自主访问 String insertSelfaccessSql = "INSERT INTO count_index_from_source_general (source_type,count_date,ip,pv,register_count,resume_refresh_count,pos_apply_count)" + " SELECT source_type,max(count_date),sum(ip),sum(pv),sum(register_count),sum(resume_refresh_count),sum(pos_apply_count)" + " FROM count_index_from_source_base a LEFT JOIN sys_from_source b ON a.from_source_id = b.id" + " WHERE count_date = ? GROUP BY source_type HAVING source_type=?"; log("BEGIN insertSelfaccessSql"); updateCt = countJdbcTemplate.update( insertSelfaccessSql, countDate, SysFromSource.SOURCE_TYPE_SELFACCESS); log("FINISH insertSelfaccessSql! [insert count:" + updateCt + "]"); log("end countIndexFromSourceData"); } private int saveFromSource(String fromSourceUrl, String fromTable) { SysFromSource sysFromSource = new SysFromSource(); try { sysFromSource.parseFromSourceUrl(fromSourceUrl); } catch (Exception e) { LOG.warn( "parseFromSourceUrl 失败,忽略此url!fromSourceUrl=[" + fromSourceUrl + "],来源于:" + fromTable + ", Exception:" + e.toString()); LOG.debug("Exception Detail", e); return -1; } sysFromSource.setCreateTime(new Timestamp(System.currentTimeMillis())); String hql = "FROM SysFromSource as s WHERE s.sourceDomain=:sourceDomain and s.sourceAddr=:sourceAddr"; Query query = HibernateCountUtil.currentSession().createQuery(hql); query.setParameter("sourceDomain", sysFromSource.getSourceDomain()); query.setParameter("sourceAddr", sysFromSource.getSourceAddr()); SysFromSource sysFromSourceQuery = (SysFromSource) query.uniqueResult(); if (sysFromSourceQuery == null) { HibernateCountUtil.currentSession().save(sysFromSource); } else { sysFromSource = sysFromSourceQuery; } return sysFromSource.getId(); } private boolean isToday() { return new DateTime(countDate) .toString(ISODateTimeFormat.date()) .equals( new DateTime(new Date(System.currentTimeMillis())).toString(ISODateTimeFormat.date())); } private void deleteFromSourceBase() { countJdbcTemplate.update( "delete from count_index_from_source_base where count_date=?", countDate); } private void countPVIPToFromSourceBase() { log("BEGIN countIndexPVIPToFromSourceBase"); String pvTable = isToday() ? "PV_Access" : PVAccessUtil.getPVAccessTableName(countDate); log("pvTable is " + pvTable); String selectPVIPSql = "SELECT fromSourceUrl,colId,count(*) AS PV,count(DISTINCT(userIp)) AS IP" + " FROM " + pvTable + " WHERE DATE_FORMAT(acceptTime, '%Y-%m-%d') = ? AND (CONCAT(targetDomain,targetAddr) = 'www.job5156.com/' OR CONCAT(targetDomain,targetAddr) like 'www.job5156.com/?et%' OR CONCAT(targetDomain,targetAddr) like 'www.job5156.com/?rg%') GROUP BY fromSourceUrl,colId" + " ORDER BY fromSourceUrl,colId"; String getPVIPRowSql = "select count(*) from (select 1 " + " FROM " + pvTable + " WHERE DATE_FORMAT(acceptTime, '%Y-%m-%d') = ? AND (CONCAT(targetDomain,targetAddr) = 'www.job5156.com/' OR CONCAT(targetDomain,targetAddr) like 'www.job5156.com/?et%' OR CONCAT(targetDomain,targetAddr) like 'www.job5156.com/?rg%') GROUP BY fromSourceUrl,colId) a"; int PVIPRow = countJdbcTemplate.queryForObject(getPVIPRowSql, Integer.class, countDateString); String insertPVIPSql = "insert into count_index_from_source_base (from_source_id,spread_item_tn,count_date,ip,pv) values (?,?,?,?,?)"; String updatePVIPSql = "update count_index_from_source_base set ip=?,pv=? where from_source_id=? and spread_item_tn=? and count_date=?"; long insertCount = 0; long updateCount = 0; long failCount = 0; for (int i = 0; i < PVIPRow; i += processLimit) { String limitSelectPVIPSql = selectPVIPSql + " limit ?,?"; List<Map<String, Object>> rs = countJdbcTemplate.queryForList(limitSelectPVIPSql, countDateString, i, processLimit); for (Map<String, Object> fromSourceRsItem : rs) { String fromSourceUrl = ObjectUtils.toString(fromSourceRsItem.get("fromSourceUrl")); String spreadItemTn = ObjectUtils.toString(fromSourceRsItem.get("colId")); try { int fromSourceId = saveFromSource(fromSourceUrl, "PV_Access"); if (fromSourceId == -1) { failCount++; continue; } Object pvCount = fromSourceRsItem.get("PV"); Object ipCount = fromSourceRsItem.get("IP"); try { countJdbcTemplate.update( insertPVIPSql, fromSourceId, spreadItemTn, countDate, ipCount, pvCount); insertCount++; } catch (DuplicateKeyException e) { countJdbcTemplate.update( updatePVIPSql, ipCount, pvCount, fromSourceId, spreadItemTn, countDate); updateCount++; } } catch (Exception e) { LOG.warn( String.format( "insert pvip to index_from_source_base fail! [fromSourceUrl=%s, spreadItemTn=%s]" + e.toString(), fromSourceUrl, spreadItemTn)); LOG.debug("Exception Detail", e); failCount++; } } } log( "FINISH countIndexPVIPToFromSourceBase! [all count:" + PVIPRow + ", insert count:" + insertCount + ", update count:" + updateCount + ", fail count:" + failCount + "]"); } private void log(String log) { LOG.info("[CountIndexFromSource INFO]: " + log); } }