public void testBinary() throws IOException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = new Job(conf);

    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);

    FileOutputFormat.setOutputPath(job, outdir);

    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);

    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    BytesWritable bkey = new BytesWritable();
    BytesWritable bval = new BytesWritable();

    TaskAttemptContext context =
        MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    OutputFormat<BytesWritable, BytesWritable> outputFormat =
        new SequenceFileAsBinaryOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    committer.setupJob(job);
    RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);

    IntWritable iwritable = new IntWritable();
    DoubleWritable dwritable = new DoubleWritable();
    DataOutputBuffer outbuf = new DataOutputBuffer();
    LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
    try {
      for (int i = 0; i < RECORDS; ++i) {
        iwritable = new IntWritable(r.nextInt());
        iwritable.write(outbuf);
        bkey.set(outbuf.getData(), 0, outbuf.getLength());
        outbuf.reset();
        dwritable = new DoubleWritable(r.nextDouble());
        dwritable.write(outbuf);
        bval.set(outbuf.getData(), 0, outbuf.getLength());
        outbuf.reset();
        writer.write(bkey, bval);
      }
    } finally {
      writer.close(context);
    }
    committer.commitTask(context);
    committer.commitJob(job);

    InputFormat<IntWritable, DoubleWritable> iformat =
        new SequenceFileInputFormat<IntWritable, DoubleWritable>();
    int count = 0;
    r.setSeed(seed);
    SequenceFileInputFormat.setInputPaths(job, outdir);
    LOG.info("Reading data by SequenceFileInputFormat");
    for (InputSplit split : iformat.getSplits(job)) {
      RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
      MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext =
          new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(
              job.getConfiguration(),
              context.getTaskAttemptID(),
              reader,
              null,
              null,
              MapReduceTestUtil.createDummyReporter(),
              split);
      reader.initialize(split, mcontext);
      try {
        int sourceInt;
        double sourceDouble;
        while (reader.nextKeyValue()) {
          sourceInt = r.nextInt();
          sourceDouble = r.nextDouble();
          iwritable = reader.getCurrentKey();
          dwritable = reader.getCurrentValue();
          assertEquals(
              "Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*",
              sourceInt,
              iwritable.get());
          assertTrue(
              "Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*",
              Double.compare(dwritable.get(), sourceDouble) == 0);
          ++count;
        }
      } finally {
        reader.close();
      }
    }
    assertEquals("Some records not found", RECORDS, count);
  }
  public static List<String> getText(BytesWritable value, Boolean tokenizep)
      throws InterruptedException {
    Session s = Session.getDefaultInstance(new Properties());
    InputStream is = new ByteArrayInputStream(value.getBytes());
    List<String> out = new ArrayList<String>();
    try {
      MimeMessage message = new MimeMessage(s, is);
      message.getAllHeaderLines();

      Analyzer standard_analyzer = new StandardAnalyzer(Version.LUCENE_43);
      Analyzer email_analyzer = new UAX29URLEmailAnalyzer(Version.LUCENE_43);

      Address[] fromAddrs = message.getFrom();
      String fromAddrstr = "";
      if (fromAddrs != null) {
        for (Address addr : fromAddrs) {
          fromAddrstr += (addr.toString() + " ");
        }
      }

      Address[] toAddrs = message.getAllRecipients();
      String toAddrstr = "";
      if (toAddrs != null) {
        for (Address addr : toAddrs) {
          toAddrstr += (addr.toString() + " ");
        }
      }

      String subject = message.getSubject();

      String body = "";
      try {
        Object content = message.getContent();
        // System.err.println(content.getContentType());
        if (content instanceof String) {
          body = (String) content;
        } else if (content instanceof Multipart) {
          Multipart mp = (Multipart) content;
          for (int i = 0; i < mp.getCount(); i++) {
            BodyPart bp = mp.getBodyPart(i);
            // System.err.println(bp.getContentType());
            Object c = bp.getContent();
            if (c instanceof String) {
              body = (String) c;
            }
          }
        }
        // people do really evil things with email, we're not sorting through it all now
      } catch (DecodingException e) {
        System.err.println("DecodingException");
      } catch (UnsupportedEncodingException e) {
        System.err.println("UnsuportedEncodingException");
      } catch (IOException e) {
        System.err.println("IOException");
      }

      if (tokenizep) {
        List<String> fromData = new ArrayList<String>();
        List<String> toData = new ArrayList<String>();
        List<String> subjectData = new ArrayList<String>();
        List<String> bodyData = new ArrayList<String>();

        if (fromAddrstr != null) {
          fromData = tokenizeString(email_analyzer, fromAddrstr);
        }
        if (toAddrstr != null) {
          toData = tokenizeString(email_analyzer, toAddrstr);
        }
        if (subject != null) {
          subjectData = tokenizeString(standard_analyzer, subject);
        }
        if (body != null) {
          bodyData = tokenizeString(standard_analyzer, body);
        }

        out.add("FROM ");
        out.addAll(fromData);

        out.add("TO ");
        out.addAll(toData);

        out.add("SUBJECT ");
        out.addAll(subjectData);

        out.add("BODY ");
        out.addAll(bodyData);
      } else {
        // if not tokenizep, return list with from and subject fields only
        out.add(fromAddrstr);
        out.add(subject);
      }

    } catch (MessagingException e) {
      System.err.println("MessagineException");
    }

    return out;
  }