public void map( LongWritable key, TrecDocument doc, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { reporter.incrCounter(Count.DOCS, 1); docid.set(doc.getDocid()); one.set(docMapping.getDocno(doc.getDocid())); output.collect(docid, one); }
public void configure(JobConf job) { try { Path[] localFiles = DistributedCache.getLocalCacheFiles(job); // Instead of hard-coding the actual concrete DocnoMapping class, have the name of the // class passed in as a property; this makes the mapper more general. docMapping = (DocnoMapping) Class.forName(job.get("DocnoMappingClass")).newInstance(); // Simply assume that the mappings file is the only file in the distributed cache. docMapping.loadMapping(localFiles[0], FileSystem.getLocal(job)); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("Error initializing DocnoMapping!"); } }