Example #1
0
  public void runPra(
      String kbDirectory,
      String graphDirectory,
      String splitsDirectory,
      String parameterFile,
      String topK,
      String pathlength,
      String isOA,
      String outputBase)
      throws IOException, InterruptedException, ClassNotFoundException, Exception {

    outputBase = fileUtil.addDirectorySeparatorIfNecessary(outputBase);
    kbDirectory = fileUtil.addDirectorySeparatorIfNecessary(kbDirectory);
    graphDirectory = fileUtil.addDirectorySeparatorIfNecessary(graphDirectory);
    splitsDirectory = fileUtil.addDirectorySeparatorIfNecessary(splitsDirectory);

    fileUtil.mkdirOrDie(outputBase);
    boolean isOnlineAug = false;
    if (isOA.equalsIgnoreCase("yes")) isOnlineAug = true;

    KB kb = null;

    PraConfig baseConfig = null;
    PraConfig.Builder baseBuilder = null;
    if (isOnlineAug) {
      logger.info("Initializing SVO Graph");
      long initStart = System.currentTimeMillis();
      kb =
          OnlineAugment.init(
              kbDirectory,
              graphDirectory,
              splitsDirectory,
              outputBase,
              Integer.parseInt(topK),
              Integer.parseInt(pathlength));
      long initEnd = System.currentTimeMillis();
      logger.info("Initialization took " + (initEnd - initStart) / 1000.00 + " seconds");
      // ImportDriver svo = new ImportDriver();
      // svo.readSVOGraph();
      // kb.setSVONodeDict(svo.getSVONodeDict());
      // kb.setSVOAdjList(svo.getSVOAdjList());

    } else {
      baseBuilder = new PraConfig.Builder();
      parseGraphFiles(graphDirectory, baseBuilder);
      baseBuilder.setFromParamFile(fileUtil.getBufferedReader(parameterFile));

      // This call potentially uses the edge dictionary that's set in
      // parseGraphFiles - this MUST be
      // called after parseGraphFiles, or things will break with really
      // weird
      // errors. TODO(matt): I
      // really should write a test for this...
      Map<String, String> nodeNames = null;
      if (fileUtil.fileExists(kbDirectory + "node_names.tsv")) {
        nodeNames = fileUtil.readMapFromTsvFile(kbDirectory + "node_names.tsv", true);
      }
      Outputter outputter = new Outputter(baseBuilder.nodeDict, baseBuilder.edgeDict, nodeNames);
      baseBuilder.setOutputter(outputter);
      baseConfig = baseBuilder.build();
      Utils.deleteShards(graphDirectory + "graph_chi");
      GraphCreator gc = new GraphCreator(outputBase, false);
      gc.shardGraph(graphDirectory + "graph_chi/edges.tsv", 2);
    }
    long start = System.currentTimeMillis();
    FileWriter writer = fileUtil.getFileWriter(outputBase + "settings.txt");
    writer.write("KB used: " + kbDirectory + "\n");
    writer.write("Graph used: " + graphDirectory + "\n");
    writer.write("Splits used: " + splitsDirectory + "\n");
    writer.write("Parameter file used: " + parameterFile + "\n");
    writer.write("Parameters:\n");
    fileUtil.copyLines(fileUtil.getBufferedReader(parameterFile), writer);
    writer.write("End of parameters\n");
    writer.close();

    String relationsFile = splitsDirectory + "relations_to_run.tsv";

    String line;
    BufferedReader reader = fileUtil.getBufferedReader(relationsFile);
    while ((line = reader.readLine()) != null) {
      String relation = line;
      long startTrainTime = System.currentTimeMillis();
      if (isOnlineAug) {
        // *******************
        // Online Augmentation - Training Time
        logger.info("Augmenting during training time");
        // Augment during training time
        // The test code modifies the directory
        kb.setOutputDir(outputBase);

        kb = Corpus.startTrainAugmentation(kb, relation, true);

        // Bhushan, shard the graph. Num shards fixed at 2
        Utils.deleteShards(graphDirectory + "graph_chi");
        GraphCreator gc = new GraphCreator(outputBase, false);
        gc.shardGraph(graphDirectory + "graph_chi/edges.tsv", 2);
        // *******************
        /* Reread all the graph files */
        baseBuilder = new PraConfig.Builder();
        parseGraphFiles(graphDirectory, baseBuilder);
        baseBuilder.setFromParamFile(fileUtil.getBufferedReader(parameterFile));

        Map<String, String> nodeNames = null;
        if (fileUtil.fileExists(kbDirectory + "node_names.tsv")) {
          nodeNames = fileUtil.readMapFromTsvFile(kbDirectory + "node_names.tsv", true);
        }
        Outputter outputter = new Outputter(baseBuilder.nodeDict, baseBuilder.edgeDict, nodeNames);
        baseBuilder.setOutputter(outputter);
        baseConfig = baseBuilder.build();
        /* Finished Rereading the graph files */
      }

      PraConfig.Builder builder = new PraConfig.Builder(baseConfig);
      logger.info("\n\n\n\nRunning PRA for relation " + relation);
      boolean doCrossValidation = false;

      parseKbFiles(kbDirectory, relation, builder, outputBase, fileUtil);

      String outdir = fileUtil.addDirectorySeparatorIfNecessary(outputBase + relation);
      fileUtil.mkdirs(outdir);
      builder.setOutputBase(outdir);

      initializeSplit(
          splitsDirectory, kbDirectory, relation, builder, new DatasetFactory(), fileUtil);

      PraConfig config = builder.build();

      if (config.allData != null) {
        doCrossValidation = true;
      }

      // Run PRA
      if (doCrossValidation) {
        new PraTrainAndTester().crossValidate(config, kb, isOnlineAug, relation, startTrainTime);
      } else {
        new PraTrainAndTester().trainAndTest(config, kb, isOnlineAug, relation, startTrainTime);
      }
    }
    long end = System.currentTimeMillis();
    long millis = end - start;
    int seconds = (int) (millis / 1000);
    int minutes = seconds / 60;
    seconds = seconds - minutes * 60;
    BufferedWriter out = new BufferedWriter(new FileWriter(outputBase + "/timings.txt", true));
    out.write("Took " + minutes + " minutes and " + seconds + " seconds\n");
    out.flush();
    out.close();
    // kb.closeDB();
    System.out.println("Took " + minutes + " minutes and " + seconds + " seconds");
    writer.close();
  }