@Override public void map( IntWritable vectorID, VectorComponentArrayWritable value, OutputCollector<GenericKey, GenericValue> output, Reporter reporter) throws IOException { // vectors sort before pairs using a secondary key < MINIMUM_ID for (int i = 1; i <= nstripes; i++) { outKey.set(vectorID.get(), Preprocesser.MINIMUM_ID - i); outValue.set(value); output.collect(outKey, outValue); } }
@Override public void map( LongWritable key, IndexItemArrayWritable value, OutputCollector<GenericKey, GenericValue> output, Reporter reporter) throws IOException { IndexItem[] postingList = value.toIndexItemArray(); for (int i = 1; i < postingList.length; i++) { for (int j = 0; j < i; j++) { IndexItem x = postingList[i]; IndexItem y = postingList[j]; // |y| >= t / maxweight(x) && |x| >= t / maxweight(y) if (compare(x.vectorLength(), Math.ceil(threshold / y.vectorMaxWeight())) >= 0 && compare(y.vectorLength(), Math.ceil(threshold / x.vectorMaxWeight())) >= 0 // tight upper bound on similarity score && compare( min(x.vectorMaxWeight() * y.vectorSum(), y.vectorMaxWeight() * x.vectorSum()), threshold) >= 0) { // positional filter // && compare( // min(x.positionalMaxWeight() * y.positionalSum(), // y.positionalMaxWeight() * x.positionalSum()) // + x.getWeight() * y.getWeight(), threshold) >= 0) if (j % REPORTER_INTERVAL == 0) reporter.progress(); int lpv = IndexItem.getLeastPrunedVectorID(x, y); int mpv = IndexItem.getMostPrunedVectorID(x, y); float psim = (float) (x.getWeight() * y.getWeight()); outKey.set(lpv, mpv); payload.set(mpv, psim); outValue.set(payload); output.collect(outKey, outValue); reporter.incrCounter(APS.ADDEND, 1); } } } }