public void runPra( String kbDirectory, String graphDirectory, String splitsDirectory, String parameterFile, String topK, String pathlength, String isOA, String outputBase) throws IOException, InterruptedException, ClassNotFoundException, Exception { outputBase = fileUtil.addDirectorySeparatorIfNecessary(outputBase); kbDirectory = fileUtil.addDirectorySeparatorIfNecessary(kbDirectory); graphDirectory = fileUtil.addDirectorySeparatorIfNecessary(graphDirectory); splitsDirectory = fileUtil.addDirectorySeparatorIfNecessary(splitsDirectory); fileUtil.mkdirOrDie(outputBase); boolean isOnlineAug = false; if (isOA.equalsIgnoreCase("yes")) isOnlineAug = true; KB kb = null; PraConfig baseConfig = null; PraConfig.Builder baseBuilder = null; if (isOnlineAug) { logger.info("Initializing SVO Graph"); long initStart = System.currentTimeMillis(); kb = OnlineAugment.init( kbDirectory, graphDirectory, splitsDirectory, outputBase, Integer.parseInt(topK), Integer.parseInt(pathlength)); long initEnd = System.currentTimeMillis(); logger.info("Initialization took " + (initEnd - initStart) / 1000.00 + " seconds"); // ImportDriver svo = new ImportDriver(); // svo.readSVOGraph(); // kb.setSVONodeDict(svo.getSVONodeDict()); // kb.setSVOAdjList(svo.getSVOAdjList()); } else { baseBuilder = new PraConfig.Builder(); parseGraphFiles(graphDirectory, baseBuilder); baseBuilder.setFromParamFile(fileUtil.getBufferedReader(parameterFile)); // This call potentially uses the edge dictionary that's set in // parseGraphFiles - this MUST be // called after parseGraphFiles, or things will break with really // weird // errors. TODO(matt): I // really should write a test for this... Map<String, String> nodeNames = null; if (fileUtil.fileExists(kbDirectory + "node_names.tsv")) { nodeNames = fileUtil.readMapFromTsvFile(kbDirectory + "node_names.tsv", true); } Outputter outputter = new Outputter(baseBuilder.nodeDict, baseBuilder.edgeDict, nodeNames); baseBuilder.setOutputter(outputter); baseConfig = baseBuilder.build(); Utils.deleteShards(graphDirectory + "graph_chi"); GraphCreator gc = new GraphCreator(outputBase, false); gc.shardGraph(graphDirectory + "graph_chi/edges.tsv", 2); } long start = System.currentTimeMillis(); FileWriter writer = fileUtil.getFileWriter(outputBase + "settings.txt"); writer.write("KB used: " + kbDirectory + "\n"); writer.write("Graph used: " + graphDirectory + "\n"); writer.write("Splits used: " + splitsDirectory + "\n"); writer.write("Parameter file used: " + parameterFile + "\n"); writer.write("Parameters:\n"); fileUtil.copyLines(fileUtil.getBufferedReader(parameterFile), writer); writer.write("End of parameters\n"); writer.close(); String relationsFile = splitsDirectory + "relations_to_run.tsv"; String line; BufferedReader reader = fileUtil.getBufferedReader(relationsFile); while ((line = reader.readLine()) != null) { String relation = line; long startTrainTime = System.currentTimeMillis(); if (isOnlineAug) { // ******************* // Online Augmentation - Training Time logger.info("Augmenting during training time"); // Augment during training time // The test code modifies the directory kb.setOutputDir(outputBase); kb = Corpus.startTrainAugmentation(kb, relation, true); // Bhushan, shard the graph. Num shards fixed at 2 Utils.deleteShards(graphDirectory + "graph_chi"); GraphCreator gc = new GraphCreator(outputBase, false); gc.shardGraph(graphDirectory + "graph_chi/edges.tsv", 2); // ******************* /* Reread all the graph files */ baseBuilder = new PraConfig.Builder(); parseGraphFiles(graphDirectory, baseBuilder); baseBuilder.setFromParamFile(fileUtil.getBufferedReader(parameterFile)); Map<String, String> nodeNames = null; if (fileUtil.fileExists(kbDirectory + "node_names.tsv")) { nodeNames = fileUtil.readMapFromTsvFile(kbDirectory + "node_names.tsv", true); } Outputter outputter = new Outputter(baseBuilder.nodeDict, baseBuilder.edgeDict, nodeNames); baseBuilder.setOutputter(outputter); baseConfig = baseBuilder.build(); /* Finished Rereading the graph files */ } PraConfig.Builder builder = new PraConfig.Builder(baseConfig); logger.info("\n\n\n\nRunning PRA for relation " + relation); boolean doCrossValidation = false; parseKbFiles(kbDirectory, relation, builder, outputBase, fileUtil); String outdir = fileUtil.addDirectorySeparatorIfNecessary(outputBase + relation); fileUtil.mkdirs(outdir); builder.setOutputBase(outdir); initializeSplit( splitsDirectory, kbDirectory, relation, builder, new DatasetFactory(), fileUtil); PraConfig config = builder.build(); if (config.allData != null) { doCrossValidation = true; } // Run PRA if (doCrossValidation) { new PraTrainAndTester().crossValidate(config, kb, isOnlineAug, relation, startTrainTime); } else { new PraTrainAndTester().trainAndTest(config, kb, isOnlineAug, relation, startTrainTime); } } long end = System.currentTimeMillis(); long millis = end - start; int seconds = (int) (millis / 1000); int minutes = seconds / 60; seconds = seconds - minutes * 60; BufferedWriter out = new BufferedWriter(new FileWriter(outputBase + "/timings.txt", true)); out.write("Took " + minutes + " minutes and " + seconds + " seconds\n"); out.flush(); out.close(); // kb.closeDB(); System.out.println("Took " + minutes + " minutes and " + seconds + " seconds"); writer.close(); }