Example #1
0
 public void map(
     LongWritable key,
     Text value,
     OutputCollector<DoubleWritable, DoubleWritable> output,
     Reporter reporter)
     throws IOException {
   String line = value.toString();
   DoubleWritable clave = new DoubleWritable();
   DoubleWritable valor = new DoubleWritable();
   clave.set(Double.parseDouble(line));
   valor.set(Math.sqrt(Double.parseDouble(line)));
   output.collect(clave, valor);
 }
    /*
     * Your output should be a in the form of (DoubleWritable score, Text word)
     * where score is the co-occurrence value for the word. Your output should be
     * sorted from largest co-occurrence to smallest co-occurrence.
     */
    @Override
    public void reduce(DoubleWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

      // YOUR CODE HERE
      int amount = 0;
      for (Text gram : values) {
        key.set(new Double(Math.abs(key.get())));
        context.write(key, gram);
        amount++;

        // When reach N_TO_OUTPUT stop
        if (amount == N_TO_OUTPUT) break;
      }
    }
  public void map(
      LongWritable key,
      Text value,
      OutputCollector<IntWritable, DoubleWritable> output,
      Reporter reporter)
      throws IOException {
    /*
     * It implements the mapper. It outputs the numbers of weight and updated weights.
     *
     * Note that the format of intermediate output is <IntWritable, DoubleWritable>,
     * because the key is the number of weight (an integer), and the value is the weight's value (double)
     */
    inputData = value.toString();

    // go through the process
    initialize();
    getposphase();
    getnegphase();
    update();

    // output the intermediate data
    // The <key, value> pairs are <weightID, weightUpdate>
    double[][] vishidinc_array = vishidinc.getArray();
    for (int i = 0; i < numdims; i++) {
      for (int j = 0; j < numhid; j++) {
        weightPos.set(i * numhid + j);
        weightValue.set(vishidinc_array[i][j]);
        output.collect(weightPos, weightValue);
      }
    }
  }
  public void testBinary() throws IOException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = new Job(conf);

    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);

    FileOutputFormat.setOutputPath(job, outdir);

    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);

    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    BytesWritable bkey = new BytesWritable();
    BytesWritable bval = new BytesWritable();

    TaskAttemptContext context =
        MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    OutputFormat<BytesWritable, BytesWritable> outputFormat =
        new SequenceFileAsBinaryOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    committer.setupJob(job);
    RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);

    IntWritable iwritable = new IntWritable();
    DoubleWritable dwritable = new DoubleWritable();
    DataOutputBuffer outbuf = new DataOutputBuffer();
    LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
    try {
      for (int i = 0; i < RECORDS; ++i) {
        iwritable = new IntWritable(r.nextInt());
        iwritable.write(outbuf);
        bkey.set(outbuf.getData(), 0, outbuf.getLength());
        outbuf.reset();
        dwritable = new DoubleWritable(r.nextDouble());
        dwritable.write(outbuf);
        bval.set(outbuf.getData(), 0, outbuf.getLength());
        outbuf.reset();
        writer.write(bkey, bval);
      }
    } finally {
      writer.close(context);
    }
    committer.commitTask(context);
    committer.commitJob(job);

    InputFormat<IntWritable, DoubleWritable> iformat =
        new SequenceFileInputFormat<IntWritable, DoubleWritable>();
    int count = 0;
    r.setSeed(seed);
    SequenceFileInputFormat.setInputPaths(job, outdir);
    LOG.info("Reading data by SequenceFileInputFormat");
    for (InputSplit split : iformat.getSplits(job)) {
      RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
      MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext =
          new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(
              job.getConfiguration(),
              context.getTaskAttemptID(),
              reader,
              null,
              null,
              MapReduceTestUtil.createDummyReporter(),
              split);
      reader.initialize(split, mcontext);
      try {
        int sourceInt;
        double sourceDouble;
        while (reader.nextKeyValue()) {
          sourceInt = r.nextInt();
          sourceDouble = r.nextDouble();
          iwritable = reader.getCurrentKey();
          dwritable = reader.getCurrentValue();
          assertEquals(
              "Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*",
              sourceInt,
              iwritable.get());
          assertTrue(
              "Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*",
              Double.compare(dwritable.get(), sourceDouble) == 0);
          ++count;
        }
      } finally {
        reader.close();
      }
    }
    assertEquals("Some records not found", RECORDS, count);
  }