@Override public void map( Writable key, Indexable doc, Mapper<Writable, Indexable, ContextPatternWritable, ContextPatternStatsWritable>.Context context) throws IOException, InterruptedException { // set current document mExtractor.setDocument(doc); // extract example counts while (mExtractor.getNextPair(mPair)) { if (mPatternTarget) { if (mExampleStats.containsKey(mPair.getPattern())) { mExampleStats.increment(mPair.getPattern()); } } else if (mContextTarget) { if (mExampleStats.containsKey(mPair.getContext())) { mExampleStats.increment(mPair.getContext()); } } // increment number of pairs mTotalTerms++; } }
private void loadExample(String examplesPath, Configuration conf) throws IOException { mExampleStats.clear(); mTotalTerms = 0L; String exampleStr = null; final Text example = new Text(); BufferedReader reader = MavunoUtils.getBufferedReader(conf, examplesPath); String input; while ((input = reader.readLine()) != null) { String[] cols = input.split("\t"); if (mPatternTarget) { exampleStr = cols[ContextPatternWritable.PATTERN_FIELD]; } else if (mContextTarget) { exampleStr = cols[ContextPatternWritable.CONTEXT_FIELD]; } example.set(exampleStr); if (!mExampleStats.containsKey(example)) { mExampleStats.put(new Text(example), 0); } } reader.close(); }