@Override public void map( PactNull pactNull, PactString line, Collector<YearMonthKey, PactInteger> collector) { String[] fields = line.toString().split("\t"); short year = Short.parseShort(fields[0]); short month = Short.parseShort(fields[1]); int temperature = Integer.parseInt(fields[2]); double quality = Double.parseDouble(fields[3]); if (quality >= MINIMUM_QUALITY) { collector.collect(new YearMonthKey(year, month), new PactInteger(temperature)); } }
@Override public void map(PactNull key, PactString value, Collector<PactString, PactString> out) { String[] items = value.toString().split(Delimiter); if (items.length == 2) // it's a docs-tuple { boolean ContainAllKeywords = true; for (String keyword : keywords) { if (!items[1].contains(keyword)) // check whether content contains keyword { ContainAllKeywords = false; } } if (ContainAllKeywords) { out.collect(new PactString(items[docs_KeyPosition]), value); } } else if (items.length == 3) // it's a ranks-tuple { if (Integer.parseInt(items[0]) > rank) // check whether the rank is bigger enough { out.collect(new PactString(items[ranks_KeyPosition]), value); } } };
@Override public void reduce( PactString key, Iterator<PactString> tuples, Collector<PactNull, PactString> out) { boolean rank_tuple_found = false; boolean doc_tuple_found = false; PactString rank_tuple = new PactString(); while (tuples.hasNext()) { PactString tuple = tuples.next(); if (tuple.toString().split(Delimiter).length == 3) { rank_tuple_found = true; rank_tuple.setValue(tuple.getValue()); } else if (tuple.toString().split(Delimiter).length == 2) { doc_tuple_found = true; } } if (rank_tuple_found && doc_tuple_found) { // emit the rank tuple out.collect(PactNull.getInstance(), rank_tuple); } }