示例#1
0
  @Override
  public int run(String[] args) {
    if (args.length != 2) {
      System.err.println("need input path argument and output path argument.");
      return 1;
    }

    JobConf conf = new JobConf(getConf(), AccessLogJob.class);

    conf.setJobName("csp-access-log-job");

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);
    conf.setNumReduceTasks(10);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    try {
      FileSystem fstm = FileSystem.get(conf);
      Path outDir = new Path(args[1]);
      fstm.delete(outDir, true);

      List<Path> inpaths = new LinkedList<Path>();
      Path inputPath = new Path(args[0]);
      LocalUtil.getAllFiles(conf, inputPath, inpaths); // 填充输入文件

      FileInputFormat.setInputPaths(conf, inpaths.toArray(new Path[0]));
      FileOutputFormat.setOutputPath(conf, new Path(args[1]));
      JobClient.runJob(conf);
    } catch (Exception e) {
      System.err.print("job run error:\n" + LocalUtil.getThrowableTrace(e));
    }

    return 0;
  }
示例#2
0
    public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      String appHost = key.toString();
      String line = value.toString();

      try {
        String appName = appHost.split(":")[0];
        String hostIp = appHost.split(":")[1];

        PvLogFormatEnum pvLogFormatType = PvLogFormatCache.getPvLogFormat(appName, line);
        String collectTimeStr = parseLogLineCollectTime(line, pvLogFormatType);
        if (collectTimeStr == null) {
          return;
        }

        String useTimeStr = "";
        String requestUrl = "";
        String httpCode = " ";
        String sourseUrl = "";
        if (pvLogFormatType == PvLogFormatEnum.GALAXY) {
          String[] tokens = line.split("\\|\\|");
          int i = tokens.length;
          if (i == 22 || i == 19 || i == 25) {
            useTimeStr = tokens[1];
            requestUrl = LocalUtil.cleanUrl(tokens[4].split(" ")[1]);
            httpCode = tokens[5];
            sourseUrl = LocalUtil.cleanUrl(tokens[7]);
          } else {
            return;
          }
        } else if (pvLogFormatType == PvLogFormatEnum.LOGIN) {
          String[] tokens = line.split("\"");
          if (tokens.length >= 4) {
            String[] info = tokens[0].split(" ");
            useTimeStr = info[1];
            requestUrl = LocalUtil.cleanUrl(tokens[1].split(" ")[1]);
            httpCode = info[2];
            sourseUrl = LocalUtil.cleanUrl(tokens[3]);
          } else if (tokens.length == 3) {
            String[] info = tokens[0].split(" ");
            useTimeStr = info[1];
            requestUrl = LocalUtil.cleanUrl(tokens[1].split(" ")[1]);
            httpCode = info[2];
            sourseUrl = "null";
          } else {
            return;
          }
        } else {
          String[] tokens = line.split("\"");
          useTimeStr = tokens[0].split(" ")[1];
          requestUrl = LocalUtil.cleanUrl(tokens[1].split(" ")[1]);
          httpCode = tokens[2].trim().split(" ")[0];
          sourseUrl = LocalUtil.cleanUrl(tokens[3]);
        }

        // 如果状态码不为3个数字,则剔除
        if (httpCode.length() != 3 || !StringUtils.isNumeric(httpCode)) {
          httpCode = " ";
        }

        long useTime = 0L;
        // 如果存在小数点,,则将其转化为微秒单位
        try {
          if (!useTimeStr.equals("-")) {
            if (useTimeStr.indexOf(".") > -1) {
              useTime = (long) (Float.parseFloat(useTimeStr) * 1000 * 1000);
            } else {
              useTime = Long.parseLong(useTimeStr);
            }
          }
        } catch (NumberFormatException e) {
          logger.error("日志装换错误,日志行:" + line, e);
          return;
        }

        // 主信息
        Text mainKey =
            new Text(
                AccessReduceTypeEnum.MAIN + spliter + appName + hostIp + spliter + collectTimeStr);
        Text mainValye = new Text(String.valueOf(useTime));
        output.collect(mainKey, mainValye);

        // httpcode信息
        Text codeKey = new Text(AccessReduceTypeEnum.CODE + spliter + appName + spliter + httpCode);
        Text codeValye = new Text("1");
        output.collect(codeKey, codeValye);

        // source url信息
        Text sourceKey =
            new Text(AccessReduceTypeEnum.SOURCE + spliter + appName + spliter + sourseUrl);
        Text sourceValye = new Text("1");
        output.collect(sourceKey, sourceValye);

        // request url信息
        Text requestKey =
            new Text(AccessReduceTypeEnum.REQUEST + spliter + appName + spliter + requestUrl);
        Text requestValye = new Text("1");
        output.collect(requestKey, requestValye);

      } catch (Exception e) {
        logger.warn("appName + line", e);
      }
    }