@Override public void map(IntWritable nid, PersonalizedPageRankNode node, Context context) throws IOException, InterruptedException { // Pass along node structure. intermediateStructure.setNodeId(node.getNodeId()); intermediateStructure.setType(PersonalizedPageRankNode.Type.Structure); intermediateStructure.setAdjacencyList(node.getAdjacenyList()); context.write(nid, intermediateStructure); int massMessages = 0; // Distribute PageRank mass to neighbors (along outgoing edges). if (node.getAdjacenyList().size() > 0) { // Each neighbor gets an equal share of PageRank mass. ArrayListOfIntsWritable list = node.getAdjacenyList(); float mass[] = new float[sources.size()]; for (int i = 0; i < sources.size(); i++) { mass[i] = node.getPageRank(i) - (float) StrictMath.log(list.size()); } context.getCounter(PageRank.edges).increment(list.size()); // Iterate over neighbors. for (int i = 0; i < list.size(); i++) { neighbor.set(list.get(i)); intermediateMass.setNodeId(list.get(i)); intermediateMass.setType(PersonalizedPageRankNode.Type.Mass); for (int j = 0; j < sources.size(); j++) { intermediateMass.setPageRank(j, mass[j]); } // Emit messages with PageRank mass to neighbors. context.write(neighbor, intermediateMass); massMessages++; } } // Bookkeeping. context.getCounter(PageRank.nodes).increment(1); context.getCounter(PageRank.massMessages).increment(massMessages); }
@SuppressWarnings("static-access") @Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path") .hasArg() .withDescription("output path") .create(matchOutput)); options.addOption( OptionBuilder.withArgName("path") .hasArg() .withDescription("output path") .create(nomatchOutput)); options.addOption( OptionBuilder.withArgName("integer") .hasArg() .withDescription("number of samples") .create(nSamplesOption)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(matchOutput) || !cmdline.hasOption(nomatchOutput) || !cmdline.hasOption(nSamplesOption)) { HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String matchOutputPath = cmdline.getOptionValue(matchOutput); String nomatchOutputPath = cmdline.getOptionValue(nomatchOutput); String nSamplesIn = cmdline.getOptionValue(nSamplesOption); LOG.info("Tool name: " + this.getClass().getName()); // LOG.info(" - input file: " + inputPath); // LOG.info(" - output file: " + outputPath); JobConf conf = new JobConf(getConf(), JaccardCompare.class); conf.setJobName(String.format("JaccardCompare")); // FileInputFormat.setInputPaths(conf, new Path(inputPath)); // FileOutputFormat.setOutputPath(conf, new Path(outputPath)); int nSentences = 1000; int nSamples = Integer.parseInt(nSamplesIn); try { File matchFile = new File(matchOutputPath); File nomatchFile = new File(nomatchOutputPath); FileOutputStream fosM = null, fosNM = null; BufferedWriter dosM = null, dosNM = null; fosM = new FileOutputStream(matchFile); fosNM = new FileOutputStream(nomatchFile); dosM = new BufferedWriter(new OutputStreamWriter(fosM)); dosNM = new BufferedWriter(new OutputStreamWriter(fosNM)); MapFile.Reader id2sentenceReader = new MapFile.Reader(new Path("id2sentence.map/part-00000"), conf); HashMap<Integer, ArrayListWritable<Text>> id2sentence = new HashMap<Integer, ArrayListWritable<Text>>(); IntWritable key = new IntWritable(); ArrayListWritable<Text> val = new ArrayListWritable<Text>(); while (id2sentenceReader.next(key, val)) { id2sentence.put(key.get(), val); val = new ArrayListWritable<Text>(); } MapFile.Reader sentence2translationReader = new MapFile.Reader(new Path("sentence2translation.map/part-00000"), conf); HashMap<Integer, ArrayListOfIntsWritable> sentence2translation = new HashMap<Integer, ArrayListOfIntsWritable>(); IntWritable key2 = new IntWritable(); ArrayListOfIntsWritable val2 = new ArrayListOfIntsWritable(); while (sentence2translationReader.next(key2, val2)) { sentence2translation.put(key2.get(), val2); val2 = new ArrayListOfIntsWritable(); } MapFile.Reader sentencematchReader = new MapFile.Reader(new Path("sentencematchpairs.map/part-00000"), conf); HashSet<PairOfInts> sentencematchpairs = new HashSet<PairOfInts>(); PairOfInts key3 = new PairOfInts(); IntWritable val3 = new IntWritable(); while (sentencematchReader.next(key3, val3)) { sentencematchpairs.add(key3); key3 = new PairOfInts(); } System.out.println("Done reading"); PairOfInts p = new PairOfInts(); IntWritable match; IntWritable eLineNum = new IntWritable(); IntWritable eLineId = new IntWritable(); ArrayListWritable<Text> eSentence = new ArrayListWritable<Text>(); for (int i = 0; i < nSentences; i++) { if (i % 100 == 0) System.out.println("eLine " + i); // eLineNum.set(2*i); ArrayListOfIntsWritable transIdList = sentence2translation.get(2 * i); // ArrayListOfIntsWritable transIdList = new ArrayListOfIntsWritable(); // sentence2translationReader.get(eLineNum, transIdList); // System.out.println("transIdList " + transIdList); for (int j = 0; j < nSentences; j++) { // System.out.println("fLine " + j); ArrayListWritable<Text> fSentence = id2sentence.get((2 * j + 1) * nSamples); // ArrayListWritable<Text> fSentence = new ArrayListWritable<Text>(); // IntWritable fLineId = new IntWritable(); // fLineId.set((2*j+1)*nSamples); // id2sentenceReader.get(fLineId, fSentence); // System.out.println("fLineId " + (2*j+1)*nSamples + " FSentence " + fSentence); float jsimMax = -1.0f; float jsimAvg = 0.0f; for (int id : transIdList) { eSentence = id2sentence.get(id); // eLineId.set(id); // id2sentenceReader.get(eLineId, eSentence); float jsim = JaccardSim.jaccardSim(eSentence, fSentence); // System.out.println("\teSentence " + eSentence + " " + jsim); jsimAvg += jsim; if (jsim > jsimMax) { jsimMax = jsim; } } jsimAvg = jsimAvg / transIdList.size(); if (2 * i < 2 * j + 1) { p.set(2 * i, 2 * j + 1); } else { p.set(2 * j + 1, 2 * i); } // match = new IntWritable(); // match = (IntWritable) sentencematchReader.get(p, match); // if(match != null){ if (sentencematchpairs.contains(p)) { if (jsimMax < .5) { System.out.println("Low match: "); System.out.println("\teSentence: " + i + " " + eSentence); System.out.println("\tfSentence: " + j + " " + fSentence); } // System.out.println("match"); dosM.write(Float.toString(jsimMax)); // dosM.write(Float.toString(jsimAvg)); dosM.write("\n"); } else { // System.out.println("no match"); dosNM.write(Float.toString(jsimMax)); // dosNM.write(Float.toString(jsimAvg)); dosNM.write("\n"); } } } sentencematchReader.close(); sentence2translationReader.close(); id2sentenceReader.close(); dosM.close(); dosNM.close(); } catch (IOException e2) { // TODO Auto-generated catch block e2.printStackTrace(); } // Delete the output directory if it exists already. // Path outputDir = new Path(outputPath); // FileSystem.get(conf).delete(outputDir, true); // JobClient.runJob(conf); return 0; }