/** * @param args * @throws java.io.IOException */ public static void main(String[] args) throws IOException { String input = args[0]; if (!new File(input).exists()) { return; } Map<String, Long> statusMap = getMap(input); if (statusMap == null) return; Report report = Report.newReport("闲置新发布商品来源跟踪"); if (true) { Table table = report.newGroupTable("all", "商品发布来源分布", "来源", "数量"); for (String key : SOURCE) { table.addCol(key, sum(statusMap, key)); } } if (true) { for (String key : SOURCE) { Table table = report.newGroupTable("lite_" + key, key + "发布来源分布", "状态", "数量"); for (String status : ItemUtils.allStatus) { Long _num = statusMap.get(key + "^" + status); if (_num == null) { _num = 0L; } table.addCol(status, ItemUtils.getItemStatusName(status), String.valueOf(_num)); } } } if (true) { Map<String, List<String[]>> today = MapUtils.map(Utils.read(input, (String[]) null)); if (true && today.containsKey("cat")) { Map<String, Long> _countMap = new HashMap<String, Long>(); for (String[] array : today.get("cat")) { String catId = array[0]; if (null != Category.getCategory(catId)) { catId = Category.getCategory(catId).getRootId(); } long value = NumberUtils.toLong(array[1]); Long _v = _countMap.get(catId); if (_v == null) { _v = 0L; } _v = _v + value; _countMap.put(catId, _v); } Table table = report.newGroupTable("cat", "今天发布商品的各个类目分布"); for (Map.Entry<String, Long> entry : _countMap.entrySet()) { table.addCol( entry.getKey(), Category.getCategoryName(entry.getKey()), String.valueOf(entry.getValue())); } table.sort(Table.SORT_VALUE); } } XmlReportFactory.dump(report, new FileOutputStream(args[0] + ".xml")); }
@Override protected void doWork(String line, OutputCollector<Text, LongWritable> output) throws IOException { String[] _allCols = StringUtils.splitPreserveAllTokens(line, TAB); if (_allCols.length < 47) { return; } String queryDate = DateStringUtils.format(inputArgs[0]); String gmtCreated = _allCols[TcBizOrder.GMT_CREATE]; String payTime = _allCols[TcBizOrder.PAY_TIME]; String gmtModified = _allCols[TcBizOrder.GMT_MODIFIED]; if (Utils.isSameDay(queryDate, gmtCreated)) { output.collect(Utils.mergeKey("t", "sys", "created"), ONE); output.collect(Utils.mergeKey("d", "sys", "created", "m", getMinutes(gmtCreated)), ONE); output.collect(Utils.mergeKey("d", "sys", "created", "s", gmtCreated), ONE); } if (Utils.isSameDay(queryDate, gmtModified)) { output.collect(Utils.mergeKey("t", "sys", "modified"), ONE); } // filter not effective order if (!TcBizOrder.isEffective(_allCols)) { return; } if (TcBizOrder.isMain(_allCols)) { commonMonitor("all", _allCols, output, queryDate); if (TcBizOrder.isB2C(_allCols)) { commonMonitor("b2c", _allCols, output, queryDate); } else { commonMonitor("c2c", _allCols, output, queryDate); } if (TcBizOrder.isFromTgroupon(_allCols)) { commonMonitor("jhs", _allCols, output, queryDate); } if (Utils.isSameDay(payTime, queryDate)) { long fee = TcBizOrder.getTotalFee(_allCols); String key = alipayTradeArea.getArea(fee); output.collect(Utils.mergeKey("t", "alipay_area", key), ONE); } } if (TcBizOrder.isDetail(_allCols)) { String rootCatId = Utils.getValue(_allCols[TcBizOrder.ATTRIBUTES], "realRootCat", "NULL"); commonTotalMonitor("cat", rootCatId, _allCols, output, queryDate); } }
protected String[] getInputPath(String[] args) { // String[] input = new String[]{HadoopTable.atpanel(args[0])}; String[] input = new String[] {"/group/tbdev/xiaodu/suoni/user_view_muli_host_setp_two/" + args[0]}; if (args.length > 1 && "false".equals(args[1])) { input = new String[] {"/group/tbdev/xiaodu/suoni/user_view_muli_host_setp_two_all/" + args[0]}; } System.out.println("input path => " + Utils.asString(input)); return input; }
/** * @param args * @throws java.io.IOException */ public static void main(String[] args) throws IOException { String input = args[0]; if (!new File(input).exists()) { System.out.println("File Not Exist ! => " + input); return; } String name = ""; if (args.length > 1) { name = args[1]; } Map<String, List<String[]>> today = MapUtils.map(Utils.read(input), CTRL_A); Map<String, String[]> lsMap = MapUtils.toMap(today.get("ls")); // Map<String, String[]> ipvMap = MapUtils.toMap(today.get("ipv")); Report report = Report.newReport(name + "统计"); Map<String, Table> marketMap = new HashMap<String, Table>(); for (Entry<String, String[]> entry : lsMap.entrySet()) { String key = entry.getKey(); // eg: 针织-针织衫市场 String[] ls = entry.getValue(); // value[0]:pv,value[1]:uv,value[2]:mid; String[] keys = StringUtils.split(key, "-"); Table market = marketMap.get(keys[1]); if (market == null) { market = report.newViewTable(keys[1], keys[1]); market .addCol("活动页面") .addCol("PV") .addCol("UV") .addCol("UV(MID)") .addCol(Report.BREAK_VALUE); marketMap.put(keys[1], market); } market.addCol(keys[0]).addCol(ls[0]).addCol(ls[1]).addCol(ls[2]).addCol(Report.BREAK_VALUE); } XmlReportFactory.dump(report, new FileOutputStream(args[0] + ".xml")); }
public int run(String[] args) throws Exception { if (args.length < 1) { args = new String[] {DateStringUtils.now()}; System.out.println( "ERROR: Please Enter Date , eg. 20101010 ! now use default => " + DateStringUtils.now()); } JobConf config = new JobConf(getConf(), getClass()); config.set("user.args", Utils.asString(args)); config.setJobName(getClass() + "-" + System.currentTimeMillis()); config.setNumReduceTasks(100); config.setMapperClass(getClass()); config.setReducerClass(getClass()); config.setInputFormat(getInputFormat()); config.setMapOutputKeyClass(Text.class); config.setMapOutputValueClass(Text.class); // add input paths for (String path : getInputPath(args)) { if (TextInputFormat.class.equals(getInputFormat())) { TextInputFormat.addInputPath(config, new Path(path)); } else if (SequenceFileInputFormat.class.equals(getInputFormat())) { SequenceFileInputFormat.addInputPath(config, new Path(path)); } } config.setOutputKeyClass(Text.class); config.setOutputValueClass(Text.class); // if output path exists then return FileSystem fs = FileSystem.get(config); Path outputPath = new Path(getOutputPath(args)); FileOutputFormat.setOutputPath(config, outputPath); if (!fs.exists(outputPath)) { JobClient.runJob(config); } else { System.out.println("You has finished this job today ! " + outputPath); } return JobClient.SUCCESS; }
private static void commonTotalMonitor( String firstKey, String secondKey, String[] _allCols, OutputCollector<Text, LongWritable> output, String queryDate) throws IOException { String gmtCreated = _allCols[TcBizOrder.GMT_CREATE]; String payTime = _allCols[TcBizOrder.PAY_TIME]; String gmtModified = _allCols[TcBizOrder.GMT_MODIFIED]; if (TcBizOrder.isDetail(_allCols)) { long fee = TcBizOrder.getTotalFee(_allCols); LongWritable totalFee = new LongWritable(fee); // 统计GMV 和支付宝交易 if (Utils.isSameDay(gmtCreated, queryDate)) { output.collect(Utils.mergeKey("t", firstKey, "gmv", secondKey), totalFee); output.collect(Utils.mergeKey("t", firstKey, "gmv_num", secondKey), ONE); } if (Utils.isSameDay(payTime, queryDate)) { output.collect(Utils.mergeKey("t", firstKey, "alipay", secondKey), totalFee); output.collect(Utils.mergeKey("t", firstKey, "alipay_num", secondKey), ONE); } } }
@Override protected String[] getInputPath(String[] args) { String[] input = HadoopTable.orderDelta(args[0]).getInputPath(); System.out.println("input path => " + Utils.asString(input)); return input; }
/** * @Title: main @Description: TODO(这里用一句话描述这个方法的作用) * * @param @param args 设定文件 * @return void 返回类型 * @throws */ public static void main(String[] args) throws IOException { // String input = "d:\\20111202"; //文件路径 // String mainName ="淘宝房产"; //报表名称 String input = args[0]; // 文件路径 String mainName = args[1]; // 报表名称 String tableName = "fangpvuv"; String tableTitle = "PVUV"; String zufang = "zufang", ershoufang = "ershoufang", all = "all", detail = "detail", qmfq = "qmfq", alluv = "unrepeatuv"; if (!new File(input).exists()) { System.out.println("File Not Exist ! => " + input); return; } Report report = Report.newReport(mainName + "每日PV、UV数据"); List<String> lines = Utils.readWithCharset(input, "utf-8"); Table table = null; int allPV = 0; if (lines != null && lines.size() > 0) { for (String line : lines) { if (line.startsWith(zufang)) { tableName = "zufangpvuv"; tableTitle = "租房频道当日PV、UV"; } else if (line.startsWith(ershoufang)) { tableName = "ershoufangpvuv"; tableTitle = "二手房频道当日PV、UV"; } else if (line.startsWith(all)) { tableName = "houseall"; tableTitle = "House下当日PV、UV"; } else if (line.startsWith(detail)) { tableName = "detailpvuv"; tableTitle = "Detail当日PV、UV"; } else if (line.startsWith(qmfq)) { tableName = "qmfqpvuv"; tableTitle = "全民疯抢当日PV、UV"; } else if (line.startsWith(alluv)) { tableName = "allpvuv"; tableTitle = "整站去重UV"; } String[] _cols = StringUtils.splitPreserveAllTokens(line, CTRL_A); if (_cols.length > 0) { if (tableName.equals("allpvuv")) { table = report.newTable(tableName, mainName + "总PV、UV(去重)"); table.addCol("allpv", "总PV", String.valueOf(allPV)); table.addCol("vpv", "访客UV", _cols[1]); table.addCol("uuv", "会员UV", _cols[2]); table.addCol( "alluv", "总UV", String.valueOf(Integer.valueOf(_cols[1]) + Integer.valueOf(_cols[2]))); break; } else { table = report.newTable(tableName, mainName + tableTitle); table.addCol("vpv", "访客PV", _cols[1]); table.addCol("upv", "会员PV", _cols[2]); if (!tableName.equals("qmfqpvuv") && !tableName.equals("alluv")) { allPV = allPV + Integer.valueOf(_cols[1]) + Integer.valueOf(_cols[2]); } table.addCol("vuv", "访客UV", _cols[3]); table.addCol("uuv", "会员UV", _cols[4]); } } table.sort(Table.SORT_VALUE); } } XmlReportFactory.dump(report, new FileOutputStream(input + ".xml")); }
public void configure(JobConf job) { this.inputArgs = Utils.asArray(job.get("user.args")); }
public void reduce( Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { List<String[]> _temp = new ArrayList<String[]>(); int count = 0; while (values.hasNext()) { Text _out = values.next(); String[] tokens = StringUtils.splitPreserveAllTokens(_out.toString(), TAB); _temp.add(tokens); if (count++ > 100000) break; } if (count > 10000) { Set<String> ipSet = new HashSet<String>(); for (int posI = 0; posI < _temp.size(); posI++) { String[] array = _temp.get(posI); if (array == null) continue; String mid = array[2]; String ip = array[3]; ipSet.add(ip); } output.collect( key, Utils.mergeKey(String.valueOf(ipSet.size()), StringUtils.join(ipSet, '|'))); return; } /** * ·Ö×éËã·¨ FOREACH ALL_DATA IF IN INDEX THEN UPDATE INDEX AND INSERT DATA ELSE FOREACH SUB_DATA * MAKE INDEX AND SET FIND'S DATA AS NULL */ // List<List<String[]>> dataList = new ArrayList<List<String[]>>(); List<StringBuffer> indexList = new ArrayList<StringBuffer>(); Set<String> ipSet = new HashSet<String>(); boolean muliHost = false; for (int posI = 0; posI < _temp.size(); posI++) { String[] array = _temp.get(posI); if (array == null) continue; String mid = array[2]; String ip = array[3]; ipSet.add(ip); boolean hasIndex = false; for (int i = 0; i < indexList.size(); i++) { StringBuffer index = indexList.get(i); if (index.indexOf("|" + mid + "|") >= 0 || index.indexOf("|" + ip + "|") >= 0) { if (index.indexOf("|" + mid + "|") < 0) { index.append('|').append(mid).append('|'); } if (index.indexOf("|" + ip + "|") < 0) { index.append('|').append(ip).append('|'); } // dataList.get(i).add(array); hasIndex = true; break; } } if (!hasIndex) { StringBuffer index = new StringBuffer("|" + mid + "|" + ip + "|"); // List<String[]> _tmp = new ArrayList<String[]>(); // _tmp.add(array); for (int k = posI + 1; k < _temp.size(); k++) { String[] _newArray = _temp.get(k); if (_newArray == null) { continue; } String _mid = _newArray[2]; String _ip = _newArray[3]; if (index.indexOf("|" + _mid + "|") >= 0 || index.indexOf("|" + _ip + "|") >= 0) { if (index.indexOf("|" + _mid + "|") < 0) { index.append('|').append(_mid).append('|'); } if (index.indexOf("|" + _ip + "|") < 0) { index.append('|').append(_ip).append('|'); } // _tmp.add(_newArray); _temp.set(k, null); } } indexList.add(index); // dataList.add(_tmp); } } // for(String[] _array : _temp){ // output.collect(key,Utils.mergeKey(_array[1],_array[2],_array[3],_array[4])); // } StringBuffer allIndex = new StringBuffer(); for (StringBuffer index : indexList) { allIndex.append(index).append(';'); } if (allIndex.length() > 0) { allIndex.deleteCharAt(allIndex.length() - 1); } output.collect( key, Utils.mergeKey(String.valueOf(indexList.size()), StringUtils.join(ipSet, '|'))); }