public static void main(String[] args) throws Exception { sourcePhoto = "/home/hduser/workspace/images/source.jpg"; sourceFingerprint = SimilarImageSearch.produceFingerPrint(sourcePhoto); final Configuration conf = new Configuration(); MongoConfigUtil.setInputURI(conf, "mongodb://localhost/photo.fingerprint"); MongoConfigUtil.setOutputURI(conf, "mongodb://localhost/photo.handsomeOut"); System.out.println("Conf: " + conf); final Job job = new Job(conf, "similar photo"); job.setJarByClass(MdbSimilarPhoto.class); // Mapper,Reduce and Combiner type definition job.setMapperClass(PhotoMapper.class); job.setCombinerClass(SimilarityReducer.class); job.setReducerClass(SimilarityReducer.class); // output key/value type definition job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); // InputFormat and OutputFormat type definition job.setInputFormatClass(MongoInputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
/** * @param process * @param tap * @param conf */ @Override public void sourceConfInit( FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { MongoConfigUtil.setReadSplitsFromShards(conf, true); MongoConfigUtil.setInputURI(conf, this.mongoUri); FileInputFormat.setInputPaths(conf, this.getIdentifier()); conf.setInputFormat(MongoInputFormat.class); // TODO: MongoConfigUtil.setFields(conf, fieldsBson); // if (!this.query.isEmpty()) MongoConfigUtil.setQuery(conf, this.query); // TODO: MongoConfigUtil.setFields(conf, fields); }
/** * @param process * @param tap * @param conf */ @Override public void sinkConfInit( FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setOutputFormat(MongoOutputFormat.class); MongoConfigUtil.setOutputURI(conf, this.mongoUri); FileOutputFormat.setOutputPath(conf, getPath()); }
public EnronMail() { JobConf conf = new JobConf(new Configuration()); if (MongoTool.isMapRedV1()) { MapredMongoConfigUtil.setInputFormat( conf, com.mongodb.hadoop.mapred.BSONFileInputFormat.class); MapredMongoConfigUtil.setOutputFormat( conf, com.mongodb.hadoop.mapred.MongoOutputFormat.class); } else { MongoConfigUtil.setInputFormat(conf, BSONFileInputFormat.class); MongoConfigUtil.setOutputFormat(conf, MongoOutputFormat.class); } FileInputFormat.addInputPath(conf, new Path("/messages")); MongoConfig config = new MongoConfig(conf); config.setInputKey("headers.From"); config.setMapper(EnronMailMapper.class); config.setReducer(EnronMailReducer.class); config.setMapperOutputKey(MailPair.class); config.setMapperOutputValue(IntWritable.class); config.setOutputKey(MailPair.class); config.setOutputValue(IntWritable.class); config.setOutputURI("mongodb://localhost:27017/mongo_hadoop.message_pairs"); setConf(conf); }
public HadoopTest() throws UnknownHostException { setConf(new Configuration()); if (MongoTool.isMapRedV1()) { MapredMongoConfigUtil.setInputFormat( getConf(), com.mongodb.hadoop.mapred.MongoInputFormat.class); MapredMongoConfigUtil.setOutputFormat( getConf(), com.mongodb.hadoop.mapred.MongoOutputFormat.class); } else { MongoConfigUtil.setInputFormat(getConf(), MongoInputFormat.class); MongoConfigUtil.setOutputFormat(getConf(), MongoOutputFormat.class); } MongoConfigUtil.setInputURI(getConf(), "mongodb://localhost:27017/lolmaster.playerMatch"); MongoConfigUtil.setOutputURI(getConf(), "mongodb://localhost:27017/lolmaster.out"); MongoConfigUtil.setMapper(getConf(), HadoopMapper.class); MongoConfigUtil.setReducer(getConf(), HadoopReducer.class); MongoConfigUtil.setMapperOutputKey(getConf(), Text.class); MongoConfigUtil.setMapperOutputValue(getConf(), Text.class); MongoConfigUtil.setOutputKey(getConf(), IntWritable.class); MongoConfigUtil.setOutputValue(getConf(), BSONWritable.class); }
@Override public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) throws IOException { // Open data output stream Path outPath = getDefaultWorkFile(context, ".bson"); LOG.info("output going into " + outPath); FileSystem fs = outPath.getFileSystem(context.getConfiguration()); FSDataOutputStream outFile = fs.create(outPath); FSDataOutputStream splitFile = null; if (MongoConfigUtil.getBSONOutputBuildSplits(context.getConfiguration())) { Path splitPath = new Path(outPath.getParent(), "." + outPath.getName() + ".splits"); splitFile = fs.create(splitPath); } long splitSize = BSONSplitter.getSplitSize(context.getConfiguration(), null); return new BSONFileRecordWriter<K, V>(outFile, splitFile, splitSize); }