// Just for testing. Recommend instead is mallet/bin/vectors2topics public static void main(String[] args) { InstanceList ilist = InstanceList.load(new File(args[0])); int numIterations = args.length > 1 ? Integer.parseInt(args[1]) : 1000; int numTopWords = args.length > 2 ? Integer.parseInt(args[2]) : 20; System.out.println("Data loaded."); TopicalNGrams tng = new TopicalNGrams(10); tng.estimate(ilist, 200, 1, 0, null, new Randoms()); tng.printTopWords(60, true); }
// Recommended to use mallet/bin/vectors2topics instead. public static void main(String[] args) throws IOException { InstanceList ilist = InstanceList.load(new File(args[0])); int numIterations = args.length > 1 ? Integer.parseInt(args[1]) : 1000; int numTopWords = args.length > 2 ? Integer.parseInt(args[2]) : 20; System.out.println("Data loaded."); LDA lda = new LDA(10); lda.estimate(ilist, numIterations, 50, 0, null, new Randoms()); // should be 1100 lda.printTopWords(numTopWords, true); lda.printDocumentTopics(new File(args[0] + ".lda")); }
public static void main(String[] args) throws Exception { CommandOption.setSummary( FeatureCountTool.class, "Print feature counts and instances per feature (eg document frequencies) in an instance list"); CommandOption.process(FeatureCountTool.class, args); InstanceList instances = InstanceList.load(new File(inputFile.value)); FeatureCountTool counter = new FeatureCountTool(instances); counter.count(); counter.printCounts(); }
public static void main(String[] args) { // String malletFile = "dataset/vlc_lectures.all.en.f8.mallet"; // String simFile = "dataset/vlc/sim5p.csv"; // String solutionFile = "dataset/vlc/task1_solution.en.f8.lm.txt"; // String queryFile = "dataset/task1_query.en.f8.txt"; // String targetFile = "dataset/task1_target.en.f8.txt"; String malletFile = "dataset/vlc/folds/all.0.4189.mallet"; String trainMalletFile = "dataset/vlc/folds/training.0.mallet"; String testMalletFile = "dataset/vlc/folds/test.0.mallet"; String queryFile = "dataset/vlc/folds/query.0.csv"; String linkFile = "dataset/vlc/folds/trainingPairs.0.csv"; String targetFile = "dataset/vlc/folds/target.0.csv"; String solutionFile = "dataset/vlc/task1_solution.en.f8.lm.txt"; int numTopics = 160; int numIterations = 200; double alpha = 0.0016; double beta = 0.0001; InstanceList train = InstanceList.load(new File(trainMalletFile)); InstanceList test = InstanceList.load(new File(testMalletFile)); SeparateParallelLda spl = new SeparateParallelLda(train, test); spl.trainDocuments(numTopics, numIterations, alpha, beta); spl.generateTestInference(); spl.lda.printTopWords(System.out, 10, true); BasicTask1Solution solver = new Task1SolutionWithSeparateData(spl); double precision; try { solver.retrieveTask1Solution(queryFile, solutionFile); precision = Task1Solution.evaluateResult(targetFile, solutionFile); System.out.println( String.format( "SeparateParallelLda: iteration: %d, precisoion: %f", numIterations, precision)); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
public static void main(String[] args) throws Exception { InstanceList instances = InstanceList.load(new File(args[0])); int numTopics = Integer.parseInt(args[1]); ParallelTopicModel model = new ParallelTopicModel(numTopics, 5.0, 0.01); model.addInstances(instances); model.setNumIterations(1000); model.estimate(); TopicModelDiagnostics diagnostics = new TopicModelDiagnostics(model, 20); if (args.length == 3) { PrintWriter out = new PrintWriter(args[2]); out.println(diagnostics.toXML()); out.close(); } }