Пример #1
0
  public static void main(String[] args) throws Exception {
    sourcePhoto = "/home/hduser/workspace/images/source.jpg";
    sourceFingerprint = SimilarImageSearch.produceFingerPrint(sourcePhoto);

    final Configuration conf = new Configuration();
    MongoConfigUtil.setInputURI(conf, "mongodb://localhost/photo.fingerprint");
    MongoConfigUtil.setOutputURI(conf, "mongodb://localhost/photo.handsomeOut");
    System.out.println("Conf: " + conf);

    final Job job = new Job(conf, "similar photo");

    job.setJarByClass(MdbSimilarPhoto.class);

    // Mapper,Reduce and Combiner type definition
    job.setMapperClass(PhotoMapper.class);

    job.setCombinerClass(SimilarityReducer.class);
    job.setReducerClass(SimilarityReducer.class);

    // output key/value type definition
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    // InputFormat and OutputFormat type definition
    job.setInputFormatClass(MongoInputFormat.class);
    job.setOutputFormatClass(MongoOutputFormat.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
Пример #2
0
  /**
   * @param process
   * @param tap
   * @param conf
   */
  @Override
  public void sourceConfInit(
      FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {
    MongoConfigUtil.setReadSplitsFromShards(conf, true);
    MongoConfigUtil.setInputURI(conf, this.mongoUri);
    FileInputFormat.setInputPaths(conf, this.getIdentifier());
    conf.setInputFormat(MongoInputFormat.class);

    // TODO: MongoConfigUtil.setFields(conf, fieldsBson);
    // if (!this.query.isEmpty())
    MongoConfigUtil.setQuery(conf, this.query);
    // TODO: MongoConfigUtil.setFields(conf, fields);
  }
Пример #3
0
  /**
   * @param process
   * @param tap
   * @param conf
   */
  @Override
  public void sinkConfInit(
      FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {
    conf.setOutputFormat(MongoOutputFormat.class);
    MongoConfigUtil.setOutputURI(conf, this.mongoUri);

    FileOutputFormat.setOutputPath(conf, getPath());
  }
Пример #4
0
 public EnronMail() {
   JobConf conf = new JobConf(new Configuration());
   if (MongoTool.isMapRedV1()) {
     MapredMongoConfigUtil.setInputFormat(
         conf, com.mongodb.hadoop.mapred.BSONFileInputFormat.class);
     MapredMongoConfigUtil.setOutputFormat(
         conf, com.mongodb.hadoop.mapred.MongoOutputFormat.class);
   } else {
     MongoConfigUtil.setInputFormat(conf, BSONFileInputFormat.class);
     MongoConfigUtil.setOutputFormat(conf, MongoOutputFormat.class);
   }
   FileInputFormat.addInputPath(conf, new Path("/messages"));
   MongoConfig config = new MongoConfig(conf);
   config.setInputKey("headers.From");
   config.setMapper(EnronMailMapper.class);
   config.setReducer(EnronMailReducer.class);
   config.setMapperOutputKey(MailPair.class);
   config.setMapperOutputValue(IntWritable.class);
   config.setOutputKey(MailPair.class);
   config.setOutputValue(IntWritable.class);
   config.setOutputURI("mongodb://localhost:27017/mongo_hadoop.message_pairs");
   setConf(conf);
 }
Пример #5
0
  public HadoopTest() throws UnknownHostException {

    setConf(new Configuration());

    if (MongoTool.isMapRedV1()) {
      MapredMongoConfigUtil.setInputFormat(
          getConf(), com.mongodb.hadoop.mapred.MongoInputFormat.class);
      MapredMongoConfigUtil.setOutputFormat(
          getConf(), com.mongodb.hadoop.mapred.MongoOutputFormat.class);
    } else {
      MongoConfigUtil.setInputFormat(getConf(), MongoInputFormat.class);
      MongoConfigUtil.setOutputFormat(getConf(), MongoOutputFormat.class);
    }

    MongoConfigUtil.setInputURI(getConf(), "mongodb://localhost:27017/lolmaster.playerMatch");
    MongoConfigUtil.setOutputURI(getConf(), "mongodb://localhost:27017/lolmaster.out");

    MongoConfigUtil.setMapper(getConf(), HadoopMapper.class);
    MongoConfigUtil.setReducer(getConf(), HadoopReducer.class);
    MongoConfigUtil.setMapperOutputKey(getConf(), Text.class);
    MongoConfigUtil.setMapperOutputValue(getConf(), Text.class);
    MongoConfigUtil.setOutputKey(getConf(), IntWritable.class);
    MongoConfigUtil.setOutputValue(getConf(), BSONWritable.class);
  }
  @Override
  public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) throws IOException {
    // Open data output stream

    Path outPath = getDefaultWorkFile(context, ".bson");
    LOG.info("output going into " + outPath);

    FileSystem fs = outPath.getFileSystem(context.getConfiguration());
    FSDataOutputStream outFile = fs.create(outPath);

    FSDataOutputStream splitFile = null;
    if (MongoConfigUtil.getBSONOutputBuildSplits(context.getConfiguration())) {
      Path splitPath = new Path(outPath.getParent(), "." + outPath.getName() + ".splits");
      splitFile = fs.create(splitPath);
    }

    long splitSize = BSONSplitter.getSplitSize(context.getConfiguration(), null);
    return new BSONFileRecordWriter<K, V>(outFile, splitFile, splitSize);
  }