public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    conf.set("I", args[3]); // Num of Row (=Columns)
    conf.set("IB", args[4]); // RowBlock Size of Matrix

    Job job = new Job(conf, "CalculateCC");

    job.setJarByClass(CorrelationCoefficient.class);

    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    // Mapperごとに読み込むファイルを変える。
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapAll.class);
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapDiag.class);
    FileOutputFormat.setOutputPath(job, new Path(args[2]));

    boolean success = job.waitForCompletion(true);
    System.out.println(success);
  }
  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = new Job(conf);
    job.setJarByClass(TestMultipleDriver.class);

    job.setJobName("Word Count");

    job.setMapperClass(TestKeyValueMapper.class);
    job.setReducerClass(TestMultipleReducer.class);

    // Definimos el tipo del InputFormat

    MultipleInputs.addInputPath(
        job,
        new Path("pruebas/score.txt"),
        KeyValueTextInputFormat.class,
        TestKeyValueMapper.class);
    MultipleInputs.addInputPath(
        job,
        new Path("pruebas/poemasequencefile"),
        SequenceFileInputFormat.class,
        TestSeqFileMapper.class);

    FileInputFormat.setInputPaths(job, new Path("pruebas/score.txt"));
    FileOutputFormat.setOutputPath(job, new Path("outkeyvalue2"));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);
  }
  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Repartition Join (projection by text)");
    job.setJarByClass(ReplicatedJoinBasic.class);

    // Input parameters
    Path donationsPath = new Path(args[0]);
    Path projectsPath = new Path(args[1]);
    Path outputPath = new Path(args[2]);

    // Mappers configuration
    MultipleInputs.addInputPath(
        job, donationsPath, SequenceFileInputFormat.class, DonationsMapper.class);
    MultipleInputs.addInputPath(
        job, projectsPath, SequenceFileInputFormat.class, ProjectsMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // Reducer configuration
    job.setNumReduceTasks(3);
    job.setReducerClass(JoinReducer.class);

    FileOutputFormat.setOutputPath(job, outputPath);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
Ejemplo n.º 4
0
  public static void main(String[] args)
      throws URISyntaxException, IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    conf.set("HADOOP_USER_NAME", "hadoop");
    FileSystem fs = FileSystem.get(new URI(OUTPATH), conf);
    if (fs.exists(new Path(OUTPATH))) {
      fs.delete(new Path(OUTPATH), true);
    }
    Job job = new Job(conf, LinkTest.class.getName());

    job.getConfiguration().set("joinType", "allOuter");

    MultipleInputs.addInputPath(
        job, new Path(INPUTPATH1), TextInputFormat.class, UserInfoMapper.class);
    MultipleInputs.addInputPath(
        job, new Path(INPUTPATH2), TextInputFormat.class, UserLogMapper.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(UserLog.class);

    job.setReducerClass(UserReduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job, new Path(OUTPATH));
    job.setOutputFormatClass(TextOutputFormat.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
Ejemplo n.º 5
0
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length != 3) {
      System.err.println("Usage: query1  <HDFS input file1> <HDFS input file2> <HDFS output file>");
      System.exit(2);
    }

    int flag = JOptionPane.showConfirmDialog(null, "Do you want to enter a rectangel?");
    if (flag == JOptionPane.YES_OPTION) {
      x1 = Integer.parseInt(JOptionPane.showInputDialog("Enter a positive int value for 'X1':"));
      y1 = Integer.parseInt(JOptionPane.showInputDialog("Enter a positive int value for 'Y1':"));
      x2 = Integer.parseInt(JOptionPane.showInputDialog("Enter a positive int value for 'X2':"));
      y2 = Integer.parseInt(JOptionPane.showInputDialog("Enter a positive int value for 'Y2':"));
      if (x1 > x2) { // make sure x1 < x2
        int temp = x1;
        x1 = x2;
        x2 = temp;
      }
      if (y1 > y2) { // make sure y1 < y2
        int ttp = y1;
        y1 = y2;
        y2 = ttp;
      }
    } else {
      x1 = x2 = y1 = y2 = -1;
    }

    conf.setInt("mx1", x1);
    conf.setInt("mx2", x2);
    conf.setInt("my1", y1);
    conf.setInt("my2", y2);

    Job job = new Job(conf, "spatialjoin");
    job.setJarByClass(SpatialJoin.class);
    job.setMapperClass(TokenizerMapper.class);
    // job.setCombinerClass(FloatSumReducer.class);
    job.setReducerClass(TransTotalReducer.class);
    job.setNumReduceTasks(1);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TextInputFormat.class);
    MultipleInputs.addInputPath(
        job, new Path(args[0]), TextInputFormat.class, TokenizerMapper.class);
    MultipleInputs.addInputPath(
        job, new Path(args[1]), TextInputFormat.class, TokenizerMapper.class);
    FileOutputFormat.setOutputPath(job, new Path(args[2]));

    if (job.waitForCompletion(true)) {
      System.out.println("=============Here is Your Input Rectangle=========================");
      System.out.println(x1 + "," + y1 + "," + x2 + "," + y2);
    }
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
Ejemplo n.º 6
0
  @Override
  protected int setJobInputData(Configuration config, Job job) throws InferenciaException {
    try {
      // Recuperamos los ficheros que vamos a procesar, y los anyadimos
      // como datos de entrada
      final FileSystem fs = FileSystem.get(new URI(InferenciaCte.hdfsUri), config);

      // Recuperamos los datos del path origen (data/*.bz2)
      FileStatus[] glob = fs.globStatus(new Path(getRutaFicheros()));

      // Si tenemos datos...
      if (null != glob) {
        if (glob.length > 0) {
          for (FileStatus fileStatus : glob) {
            Path pFich = fileStatus.getPath();
            MultipleInputs.addInputPath(job, pFich, SequenceFileInputFormat.class, LoadMap.class);
          }
        } else {
          return noDataFound();
        }
      }
    } catch (IOException e) {
      throw new InferenciaException(e, e.getMessage());
    } catch (URISyntaxException e) {
      throw new InferenciaException(e, e.getMessage());
    }
    return InferenciaCte.SUCCESS;
  }
Ejemplo n.º 7
0
  private static void ResultJob() throws IOException, InterruptedException, ClassNotFoundException {

    /*depth = 9;
    wasError = true;*/

    conf = new Configuration();
    conf.setLong("my.vertex.num", num);
    if (isErrorOccurred) {
      conf.setBoolean("my.error.was", true);
    }
    fs = FileSystem.get(conf);
    job = Job.getInstance(conf, "Levelized Nested Dissection Result");

    job.setJarByClass(LevNestDissectJob.class);
    job.setReducerClass(LNDResultReducer.class);

    /*out = new Path(outPath == null ? (FILES_OUT + depth) : (outPath + "/" + "depth_" + depth));
    out_start = out.suffix("/" + outPath_start);*/

    if (wasError) {
      in = out.suffix("/" + outPath_count);
      MultipleInputs.addInputPath(job, in, SequenceFileInputFormat.class, StartVertexMapper.class);
      in_start = out_start;
      MultipleInputs.addInputPath(
          job, in_start, SequenceFileInputFormat.class, LNDResultMapper.class);
    }
    in_vertex = out.suffix("/" + outPath_vertex);
    MultipleInputs.addInputPath(
        job, in_vertex, SequenceFileInputFormat.class, LNDResultMapper.class);

    out =
        new Path(outPath == null ? (FILES_OUT + "result") : (outPath + "/" + "depth_" + "result"));
    if (fs.exists(out)) {
      fs.delete(out, true);
    }
    FileOutputFormat.setOutputPath(job, out);

    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.waitForCompletion(true);
  }
Ejemplo n.º 8
0
  public static void main(String[] args) throws Exception {
    // Sources:
    // Mutiple inputs: http://www.lichun.cc/blog/2012/05/hadoop-multipleinputs-usage/
    // Custom classs:
    // http://www.cs.bgu.ac.il/~dsp112/Forum?action=show-thread&id=5a15ede6df2520f2b68db15f2ce752fa

    Path firstPath = new Path(args[0]);
    Path sencondPath = new Path(args[1]);
    Path outputPath = new Path(args[2]);

    Configuration conf = new Configuration();

    Job job = new Job(conf);
    job.setJarByClass(lin1_exercise1.class);
    job.setJobName("avg books");

    // conf.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setCombinerClass(MyCombiner.class);

    // output format for mapper
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Pair.class);

    // output format for reducer
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    // use MultipleOutputs and specify different Record class and Input formats
    MultipleInputs.addInputPath(job, firstPath, TextInputFormat.class, MyMapper1.class);
    MultipleInputs.addInputPath(job, sencondPath, TextInputFormat.class, MyMapper2.class);
    // FileInputFormat.setInputPaths(conf, new Path(args[0]));

    // conf.setInputFormat(TextInputFormat.class);
    // conf.setOutputFormat(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);

    // conf.setNumReduceTasks(5);
    // conf.setPartitionerClass(MusicPartitioner.class

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
Ejemplo n.º 9
0
  public int run(String[] args) throws Exception {

    Job job = new Job(getConf(), "ResysTest MergeSimJob");

    job.setJarByClass(MergeSimJob.class);

    Configuration conf = job.getConfiguration();

    conf.set(
        "io.compression.codecs",
        "org.apache.hadoop.io.compress.DefaultCodec,"
            + "org.apache.hadoop.io.compress.GzipCodec,"
            + "org.apache.hadoop.io.compress.BZip2Codec,"
            + "org.apache.hadoop.io.compress.SnappyCodec,"
            + "com.hadoop.compression.lzo.LzopCodec,"
            + "com.hadoop.compression.lzo.LzoCodec");
    String ifOutputCompress = conf.get("if_output_compress");
    if (!"false".equals(ifOutputCompress)) {
      conf.set("mapred.output.compress", "true");
      String codec = conf.get("codec");
      if (codec != null) {
        conf.set("mapred.output.compression.codec", codec);
      } else {
        conf.set("mapred.output.compression.codec", "com.hadoop.compression.lzo.LzopCodec");
      }
    }
    String ifMapCompress = conf.get("if_map_compress");
    if (!"false".equals(ifMapCompress)) {
      conf.set("mapred.compress.map.output", "true");
      conf.set("mapred.map.output.compression.codec", "com.hadoop.compression.lzo.LzoCodec");
    }
    conf.set("mapred.task.timeout", "0");

    String inputPath1 = conf.get("input1");
    String inputPath2 = conf.get("input2");
    String outputPath = conf.get("output");
    int NumReduceTasks = Integer.parseInt(conf.get("num_reduce_tasks"));

    String ifIndexed = conf.get("if_indexed");
    if (!"false".equals(ifIndexed)) {
      MultipleInputs.addInputPath(
          job, new Path(inputPath1), LzoTextInputFormat.class, MergeSimMapper4ab.class);
      MultipleInputs.addInputPath(
          job, new Path(inputPath2), LzoTextInputFormat.class, MergeSimMapper4ba.class);
    } else {
      MultipleInputs.addInputPath(
          job, new Path(inputPath1), TextInputFormat.class, MergeSimMapper4ab.class);
      MultipleInputs.addInputPath(
          job, new Path(inputPath2), TextInputFormat.class, MergeSimMapper4ba.class);
    }
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setNumReduceTasks(NumReduceTasks);

    job.setReducerClass(MergeSimReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    int result = job.waitForCompletion(true) ? 0 : 1;
    if (!"false".equals(ifOutputCompress)) {
      String ifIndex = conf.get("if_index");
      if (!"false".equals(ifIndex)) {
        System.gc();
        DistributedLzoIndexer lzoIndexer = new DistributedLzoIndexer();
        Configuration indexConf = new Configuration();
        indexConf.set("io.compression.codecs", "com.hadoop.compression.lzo.LzopCodec");
        lzoIndexer.setConf(indexConf);
        lzoIndexer.run(new String[] {outputPath});
      }
    }
    System.exit(result);
    return 0;
  }
Ejemplo n.º 10
0
  public int run(String[] args) throws Exception {

    // read job conf from xml
    Configuration conf = new Configuration();
    conf.addResource("configuration.xml");
    conf.set("mapred.job.queue.name", "amap");
    conf.set("mapred.queue.name", "amap");
    conf.set("mapred.job.priority", JobPriority.VERY_HIGH.toString());
    conf.set("mapred.textoutputformat.separatorText", ",");

    // parse date , get/set job prefix
    Date date = new Date();

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    String dateString = "";
    if (0 != otherArgs.length && 1 != otherArgs.length) {

      System.exit(-1);

    } else if (0 == otherArgs.length) {

      // current date
      conf.set("InputDate", dateString);

    } else if (1 == otherArgs.length) {

      dateString = otherArgs[0];
      conf.set("InputDate", dateString);
      SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyyMMdd");
      try {
        date = simpleDateFormat.parse(dateString);
      } catch (ParseException e) {
        try {
          simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
          date = simpleDateFormat.parse(dateString);
        } catch (ParseException pe) {
          pe.printStackTrace();
          System.out.println("ERROR.Parameter 'date' can not be parsed. Please check it.");
          System.exit(-2);
        }
      }
    }

    Job job = new Job(conf, "stat_allnavi_job" + ConstantsParseDate.outputDate(date));
    job.setJarByClass(StatAllNaviJob.class);
    job.setReducerClass(StatAllNaviReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    job.setNumReduceTasks(4);

    String aosbusrouPathString =
        conf.get("mapred.job.loadaosbusroujob.input.datapath.template", "");
    if ("".equalsIgnoreCase(aosbusrouPathString)) {
      System.exit(-3);
    }
    aosbusrouPathString = ConstantsParseDate.parseDay(aosbusrouPathString, date);

    String aoswalkrouPathString =
        conf.get("mapred.job.loadaoswalkroujob.input.datapath.template", "");
    if ("".equalsIgnoreCase(aoswalkrouPathString)) {
      System.exit(-3);
    }
    aoswalkrouPathString = ConstantsParseDate.parseDay(aoswalkrouPathString, date);

    String wsnaviautoPathString =
        conf.get("mapred.job.loadwsnaviautojob.input.datapath.template", "");
    if ("".equalsIgnoreCase(wsnaviautoPathString)) {

      System.exit(-3);
    }
    wsnaviautoPathString = ConstantsParseDate.parseDay(wsnaviautoPathString, date);

    String wsnavibusPathString =
        conf.get("mapred.job.loadwsnavibusjob.input.datapath.template", "");
    if ("".equalsIgnoreCase(wsnavibusPathString)) {

      System.exit(-3);
    }
    wsnavibusPathString = ConstantsParseDate.parseDay(wsnavibusPathString, date);

    MultipleInputs.addInputPath(
        job, new Path(aosbusrouPathString), TextInputFormat.class, StatBusRouteMapper.class);
    MultipleInputs.addInputPath(
        job, new Path(aoswalkrouPathString), TextInputFormat.class, StatWalkRouteMapper.class);
    MultipleInputs.addInputPath(
        job, new Path(wsnaviautoPathString), TextInputFormat.class, StatNaviAutoMapper.class);
    MultipleInputs.addInputPath(
        job, new Path(wsnavibusPathString), TextInputFormat.class, StatNaviBusMapper.class);

    String outputPathString = conf.get("mapred.job.statallnavi.output.datapath.template", "");

    if ("".equalsIgnoreCase(outputPathString)) {
      System.out.println("ERROR.job output path should be \"\"");
      System.exit(-3);
    }
    outputPathString = ConstantsParseDate.parseDay(outputPathString, date);
    Path outputPath = new Path(outputPathString);
    FileSystem fileSystem = FileSystem.get(URI.create(outputPathString), job.getConfiguration());
    fileSystem.delete(outputPath, true);
    FileOutputFormat.setOutputPath(job, outputPath);
    MultipleOutputs.addNamedOutput(
        job, "text", TextOutputFormat.class, Text.class, NullWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;
  }
Ejemplo n.º 11
0
  private static void DissectionJob()
      throws IOException, InterruptedException, ClassNotFoundException {

    conf = new Configuration();
    fs = FileSystem.get(conf);
    conf.set("my.out.path.vertex", outPath_vertex);
    conf.set("my.out.path.count", outPath_count);
    conf.setLong("my.vertex.num", num);
    job = Job.getInstance(conf, "Levelized Nested Dissection " + depth);

    job.setJarByClass(LevNestDissectJob.class);
    job.setReducerClass(LevNestDissectReducer.class);

    if (wasStart) {
      in_start = out_start;
      MultipleInputs.addInputPath(
          job, in_start, SequenceFileInputFormat.class, LevNestDissectMapper.class);
    }
    in_vertex = out.suffix("/" + outPath_vertex);
    MultipleInputs.addInputPath(
        job, in_vertex, SequenceFileInputFormat.class, LevNestDissectMapper.class);

    out = new Path(outPath == null ? (FILES_OUT + depth) : (outPath + "/" + "depth_" + depth));
    if (fs.exists(out)) {
      fs.delete(out, true);
    }
    FileOutputFormat.setOutputPath(job, out);
    MultipleOutputs.addNamedOutput(
        job, "vertex", SequenceFileOutputFormat.class, LongWritable.class, VertexWritable.class);
    MultipleOutputs.addNamedOutput(
        job, "count", SequenceFileOutputFormat.class, LongWritable.class, LongWritable.class);

    job.setMapOutputValueClass(VertexWritable.class);

    job.waitForCompletion(true);

    updated =
        job.getCounters().findCounter(LevNestDissectReducer.UpdatedCounter.UPDATED).getValue();
    // Костыль для непонятной ошибки
    if (notNumbered > 0 && updated == 0) {
      notNumbered_tmp =
          job.getCounters()
              .findCounter(LevNestDissectReducer.NotNumberedCounter.NOT_NUMBERED)
              .getValue();
      if (notNumbered_tmp > 0) {
        notNumbered = notNumbered_tmp;
        wasError = false;
        nextDepth();
      } else {
        wasError = true;
        isErrorOccurred = true;
        depth--;
        out =
            new Path(
                outPath == null
                    ? (FILES_OUT + (depth - 1))
                    : (outPath + "/" + "depth_" + (depth - 1)));
        /*depth -= 2;
        out = new Path(outPath == null ? (FILES_OUT + depth) : (outPath + "/" + "depth_" + depth));*/
      }
    } else {
      wasError = false;
      notNumbered =
          job.getCounters()
              .findCounter(LevNestDissectReducer.NotNumberedCounter.NOT_NUMBERED)
              .getValue();
      if (notNumbered > 0) {
        nextDepth();
      }
    }
  }