@Override public void map(IntWritable nid, PersonalizedPageRankNode node, Context context) throws IOException, InterruptedException { for (int sourceIndex = 0; sourceIndex < sources.size(); sourceIndex++) { float p = node.getPageRank(sourceIndex); float jump = 0.0f; float link = 0.0f; // float jump = (float) (Math.log(ALPHA) - Math.log(nodeCnt)); // IS A SOURCE NODE if (sources.get(sourceIndex) == node.getNodeId()) { jump = (float) Math.log(ALPHA); link = (float) Math.log(1.0f - ALPHA) + sumLogProbs(p, (float) (Math.log(missingMass[sourceIndex]))); } else { // NOT A SOURCE NODE jump = (float) Math.log(0); link = (float) Math.log(1.0f - ALPHA) + p; } p = sumLogProbs(jump, link); node.setPageRank(sourceIndex, p); } context.write(nid, node); }
@Override public void reduce(IntWritable nid, Iterable<PersonalizedPageRankNode> values, Context context) throws IOException, InterruptedException { int massMessages = 0; // Remember, PageRank mass is stored as a log prob. float[] mass = new float[sources.size()]; for (int i = 0; i < sources.size(); i++) { mass[i] = Float.NEGATIVE_INFINITY; } for (PersonalizedPageRankNode n : values) { if (n.getType() == PersonalizedPageRankNode.Type.Structure) { // Simply pass along node structure. context.write(nid, n); } else { // Accumulate PageRank mass contributions. for (int j = 0; j < sources.size(); j++) { mass[j] = sumLogProbs(mass[j], n.getPageRank(j)); } massMessages++; } } // Emit aggregated results. if (massMessages > 0) { intermediateMass.setNodeId(nid.get()); intermediateMass.setType(PersonalizedPageRankNode.Type.Mass); for (int i = 0; i < sources.size(); i++) { intermediateMass.setPageRank(i, mass[i]); } context.write(nid, intermediateMass); } }
@Override public void map(IntWritable nid, PersonalizedPageRankNode node, Context context) throws IOException, InterruptedException { // Pass along node structure. intermediateStructure.setNodeId(node.getNodeId()); intermediateStructure.setType(PersonalizedPageRankNode.Type.Structure); intermediateStructure.setAdjacencyList(node.getAdjacenyList()); context.write(nid, intermediateStructure); int massMessages = 0; // Distribute PageRank mass to neighbors (along outgoing edges). if (node.getAdjacenyList().size() > 0) { // Each neighbor gets an equal share of PageRank mass. ArrayListOfIntsWritable list = node.getAdjacenyList(); float mass[] = new float[sources.size()]; for (int i = 0; i < sources.size(); i++) { mass[i] = node.getPageRank(i) - (float) StrictMath.log(list.size()); } context.getCounter(PageRank.edges).increment(list.size()); // Iterate over neighbors. for (int i = 0; i < list.size(); i++) { neighbor.set(list.get(i)); intermediateMass.setNodeId(list.get(i)); intermediateMass.setType(PersonalizedPageRankNode.Type.Mass); for (int j = 0; j < sources.size(); j++) { intermediateMass.setPageRank(j, mass[j]); } // Emit messages with PageRank mass to neighbors. context.write(neighbor, intermediateMass); massMessages++; } } // Bookkeeping. context.getCounter(PageRank.nodes).increment(1); context.getCounter(PageRank.massMessages).increment(massMessages); }
@Override public void reduce( IntWritable nid, Iterable<PersonalizedPageRankNode> iterable, Context context) throws IOException, InterruptedException { Iterator<PersonalizedPageRankNode> values = iterable.iterator(); // Create the node structure that we're going to assemble back together from shuffled pieces. PersonalizedPageRankNode node = new PersonalizedPageRankNode(); node.setType(PersonalizedPageRankNode.Type.Complete); node.setNodeId(nid.get()); int massMessagesReceived = 0; int structureReceived = 0; float[] mass = new float[sources.size()]; for (int i = 0; i < sources.size(); i++) { mass[i] = Float.NEGATIVE_INFINITY; } while (values.hasNext()) { PersonalizedPageRankNode n = values.next(); if (n.getType().equals(PersonalizedPageRankNode.Type.Structure)) { // This is the structure; update accordingly. ArrayListOfIntsWritable list = n.getAdjacenyList(); structureReceived++; node.setAdjacencyList(list); } else { // This is a message that contains PageRank mass; accumulate. for (int i = 0; i < sources.size(); i++) { mass[i] = sumLogProbs(mass[i], n.getPageRank(i)); } massMessagesReceived++; } } // Update the final accumulated PageRank mass. for (int i = 0; i < sources.size(); i++) { node.setPageRank(i, mass[i]); } context.getCounter(PageRank.massMessagesReceived).increment(massMessagesReceived); // Error checking. if (structureReceived == 1) { // Everything checks out, emit final node structure with updated PageRank value. context.write(nid, node); // Keep track of total PageRank mass. for (int i = 0; i < sources.size(); i++) { totalMass[i] = sumLogProbs(totalMass[i], mass[i]); } } else if (structureReceived == 0) { // We get into this situation if there exists an edge pointing to a node which has no // corresponding node structure (i.e., PageRank mass was passed to a non-existent node)... // log and count but move on. context.getCounter(PageRank.missingStructure).increment(1); LOG.warn( "No structure received for nodeid: " + nid.get() + " mass: " + massMessagesReceived); // It's important to note that we don't add the PageRank mass to total... if PageRank mass // was sent to a non-existent node, it should simply vanish. } else { // This shouldn't happen! throw new RuntimeException( "Multiple structure received for nodeid: " + nid.get() + " mass: " + massMessagesReceived + " struct: " + structureReceived); } }