@Override
    public void map(IntWritable nid, PersonalizedPageRankNode node, Context context)
        throws IOException, InterruptedException {
      // Pass along node structure.
      intermediateStructure.setNodeId(node.getNodeId());
      intermediateStructure.setType(PersonalizedPageRankNode.Type.Structure);
      intermediateStructure.setAdjacencyList(node.getAdjacenyList());

      context.write(nid, intermediateStructure);

      int massMessages = 0;

      // Distribute PageRank mass to neighbors (along outgoing edges).
      if (node.getAdjacenyList().size() > 0) {
        // Each neighbor gets an equal share of PageRank mass.
        ArrayListOfIntsWritable list = node.getAdjacenyList();
        float mass[] = new float[sources.size()];
        for (int i = 0; i < sources.size(); i++) {
          mass[i] = node.getPageRank(i) - (float) StrictMath.log(list.size());
        }

        context.getCounter(PageRank.edges).increment(list.size());

        // Iterate over neighbors.
        for (int i = 0; i < list.size(); i++) {
          neighbor.set(list.get(i));
          intermediateMass.setNodeId(list.get(i));
          intermediateMass.setType(PersonalizedPageRankNode.Type.Mass);

          for (int j = 0; j < sources.size(); j++) {
            intermediateMass.setPageRank(j, mass[j]);
          }

          // Emit messages with PageRank mass to neighbors.
          context.write(neighbor, intermediateMass);

          massMessages++;
        }
      }

      // Bookkeeping.
      context.getCounter(PageRank.nodes).increment(1);
      context.getCounter(PageRank.massMessages).increment(massMessages);
    }
Ejemplo n.º 2
0
  @SuppressWarnings("static-access")
  @Override
  public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(
        OptionBuilder.withArgName("path")
            .hasArg()
            .withDescription("output path")
            .create(matchOutput));
    options.addOption(
        OptionBuilder.withArgName("path")
            .hasArg()
            .withDescription("output path")
            .create(nomatchOutput));
    options.addOption(
        OptionBuilder.withArgName("integer")
            .hasArg()
            .withDescription("number of samples")
            .create(nSamplesOption));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
      cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
      System.err.println("Error parsing command line: " + exp.getMessage());
      return -1;
    }

    if (!cmdline.hasOption(matchOutput)
        || !cmdline.hasOption(nomatchOutput)
        || !cmdline.hasOption(nSamplesOption)) {
      HelpFormatter formatter = new HelpFormatter();
      formatter.setWidth(120);
      formatter.printHelp(this.getClass().getName(), options);
      ToolRunner.printGenericCommandUsage(System.out);
      return -1;
    }

    String matchOutputPath = cmdline.getOptionValue(matchOutput);
    String nomatchOutputPath = cmdline.getOptionValue(nomatchOutput);
    String nSamplesIn = cmdline.getOptionValue(nSamplesOption);

    LOG.info("Tool name: " + this.getClass().getName());
    // LOG.info(" - input file: " + inputPath);
    // LOG.info(" - output file: " + outputPath);

    JobConf conf = new JobConf(getConf(), JaccardCompare.class);
    conf.setJobName(String.format("JaccardCompare"));

    //        FileInputFormat.setInputPaths(conf, new Path(inputPath));
    //        FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    int nSentences = 1000;
    int nSamples = Integer.parseInt(nSamplesIn);
    try {

      File matchFile = new File(matchOutputPath);
      File nomatchFile = new File(nomatchOutputPath);
      FileOutputStream fosM = null, fosNM = null;
      BufferedWriter dosM = null, dosNM = null;

      fosM = new FileOutputStream(matchFile);
      fosNM = new FileOutputStream(nomatchFile);
      dosM = new BufferedWriter(new OutputStreamWriter(fosM));
      dosNM = new BufferedWriter(new OutputStreamWriter(fosNM));

      MapFile.Reader id2sentenceReader =
          new MapFile.Reader(new Path("id2sentence.map/part-00000"), conf);
      HashMap<Integer, ArrayListWritable<Text>> id2sentence =
          new HashMap<Integer, ArrayListWritable<Text>>();
      IntWritable key = new IntWritable();
      ArrayListWritable<Text> val = new ArrayListWritable<Text>();
      while (id2sentenceReader.next(key, val)) {
        id2sentence.put(key.get(), val);
        val = new ArrayListWritable<Text>();
      }

      MapFile.Reader sentence2translationReader =
          new MapFile.Reader(new Path("sentence2translation.map/part-00000"), conf);
      HashMap<Integer, ArrayListOfIntsWritable> sentence2translation =
          new HashMap<Integer, ArrayListOfIntsWritable>();
      IntWritable key2 = new IntWritable();
      ArrayListOfIntsWritable val2 = new ArrayListOfIntsWritable();
      while (sentence2translationReader.next(key2, val2)) {
        sentence2translation.put(key2.get(), val2);
        val2 = new ArrayListOfIntsWritable();
      }

      MapFile.Reader sentencematchReader =
          new MapFile.Reader(new Path("sentencematchpairs.map/part-00000"), conf);
      HashSet<PairOfInts> sentencematchpairs = new HashSet<PairOfInts>();
      PairOfInts key3 = new PairOfInts();
      IntWritable val3 = new IntWritable();
      while (sentencematchReader.next(key3, val3)) {
        sentencematchpairs.add(key3);
        key3 = new PairOfInts();
      }

      System.out.println("Done reading");
      PairOfInts p = new PairOfInts();
      IntWritable match;
      IntWritable eLineNum = new IntWritable();
      IntWritable eLineId = new IntWritable();
      ArrayListWritable<Text> eSentence = new ArrayListWritable<Text>();
      for (int i = 0; i < nSentences; i++) {
        if (i % 100 == 0) System.out.println("eLine " + i);
        // eLineNum.set(2*i);
        ArrayListOfIntsWritable transIdList = sentence2translation.get(2 * i);
        // ArrayListOfIntsWritable transIdList = new ArrayListOfIntsWritable();
        // sentence2translationReader.get(eLineNum, transIdList);
        // System.out.println("transIdList " + transIdList);
        for (int j = 0; j < nSentences; j++) {
          // System.out.println("fLine " + j);
          ArrayListWritable<Text> fSentence = id2sentence.get((2 * j + 1) * nSamples);
          // ArrayListWritable<Text> fSentence = new ArrayListWritable<Text>();
          // IntWritable fLineId = new IntWritable();
          // fLineId.set((2*j+1)*nSamples);
          // id2sentenceReader.get(fLineId, fSentence);
          // System.out.println("fLineId " + (2*j+1)*nSamples + " FSentence " + fSentence);
          float jsimMax = -1.0f;
          float jsimAvg = 0.0f;
          for (int id : transIdList) {

            eSentence = id2sentence.get(id);
            // eLineId.set(id);
            // id2sentenceReader.get(eLineId, eSentence);
            float jsim = JaccardSim.jaccardSim(eSentence, fSentence);
            // System.out.println("\teSentence " + eSentence + " " + jsim);
            jsimAvg += jsim;
            if (jsim > jsimMax) {
              jsimMax = jsim;
            }
          }
          jsimAvg = jsimAvg / transIdList.size();
          if (2 * i < 2 * j + 1) {
            p.set(2 * i, 2 * j + 1);
          } else {
            p.set(2 * j + 1, 2 * i);
          }
          // match = new IntWritable();
          // match = (IntWritable) sentencematchReader.get(p, match);
          // if(match != null){
          if (sentencematchpairs.contains(p)) {
            if (jsimMax < .5) {
              System.out.println("Low match: ");
              System.out.println("\teSentence: " + i + " " + eSentence);
              System.out.println("\tfSentence: " + j + " " + fSentence);
            }
            // System.out.println("match");
            dosM.write(Float.toString(jsimMax));
            // dosM.write(Float.toString(jsimAvg));
            dosM.write("\n");
          } else {
            // System.out.println("no match");
            dosNM.write(Float.toString(jsimMax));
            // dosNM.write(Float.toString(jsimAvg));
            dosNM.write("\n");
          }
        }
      }
      sentencematchReader.close();
      sentence2translationReader.close();
      id2sentenceReader.close();
      dosM.close();
      dosNM.close();

    } catch (IOException e2) {
      // TODO Auto-generated catch block
      e2.printStackTrace();
    }

    // Delete the output directory if it exists already.
    //        Path outputDir = new Path(outputPath);
    // FileSystem.get(conf).delete(outputDir, true);

    // JobClient.runJob(conf);

    return 0;
  }