예제 #1
0
    public void map(
        LongWritable key,
        TrecDocument doc,
        OutputCollector<Text, IntWritable> output,
        Reporter reporter)
        throws IOException {
      reporter.incrCounter(Count.DOCS, 1);

      docid.set(doc.getDocid());
      one.set(docMapping.getDocno(doc.getDocid()));
      output.collect(docid, one);
    }
예제 #2
0
    public void configure(JobConf job) {
      try {
        Path[] localFiles = DistributedCache.getLocalCacheFiles(job);

        // Instead of hard-coding the actual concrete DocnoMapping class, have the name of the
        // class passed in as a property; this makes the mapper more general.
        docMapping = (DocnoMapping) Class.forName(job.get("DocnoMappingClass")).newInstance();

        // Simply assume that the mappings file is the only file in the distributed cache.
        docMapping.loadMapping(localFiles[0], FileSystem.getLocal(job));
      } catch (Exception e) {
        e.printStackTrace();
        throw new RuntimeException("Error initializing DocnoMapping!");
      }
    }