private void generateVisitData() throws Exception { long counter = 0; Integer[] appIds = new Integer[] {1, 2, 3, 4, 5, 6}; Long[] userIds = new Long[] { 100011L, 100010L, 10090L, 10008L, 10007L, 10006L, 10005L, 10004L, 10003L, 10002L }; String[] visitTime = new String[] { "2014-07-05 15:15:15", "2014-07-10 15:15:15", "2014-07-15 15:15:15", "2014-07-20 15:15:15", "2014-07-25 15:15:15", "2014-07-30 15:15:15", "2014-08-04 15:15:15", "2014-08-09 15:15:15", "2014-08-14 15:15:15" }; Random rnd = new Random(); // 添加模拟的数据 for (int i = 0; i < 100; i++) { counter++; int order = rnd.nextInt(5); int uOrder = rnd.nextInt(9); String orgText = String.format( "auc\t%d\t%d\t1001\t3264456\trefer.page\tvisit.page\tvisit.param\t%s", userIds[uOrder], appIds[order].intValue(), visitTime[uOrder]); VisitLogEntry entry = new VisitLogEntry(); entry.parse(orgText); mrPageVisitor.addInput(new LongWritable(counter), entry); } }
/** * @param key * @param value * @param context * @throws IOException * @throws InterruptedException * @should write to context */ @Override protected void map(LongWritable key, VisitLogEntry value, Context context) throws IOException, InterruptedException { // 如果用户的ID小于等于0, 直接忽略掉 if (value.getUserId() <= 0) { return; } // 去掉注册日期中的时间部分 Calendar cal = Calendar.getInstance(); cal.setTimeInMillis(value.getVisitTime()); GregorianCalendar gCal = new GregorianCalendar( cal.get(Calendar.YEAR), cal.get(Calendar.MONTH), cal.get(Calendar.DATE)); // 设置键中的各个字段信息 writableKey.setStatDate(gCal.getTimeInMillis()); writableKey.setAppId(value.getAppId()); writableKey.setTerminalCode(value.getTerminalCode()); writableKey.setUserId(value.getUserId()); context.write(writableKey, writableValue); }