Esempio n. 1
0
 // Just for testing.  Recommend instead is mallet/bin/vectors2topics
 public static void main(String[] args) {
   InstanceList ilist = InstanceList.load(new File(args[0]));
   int numIterations = args.length > 1 ? Integer.parseInt(args[1]) : 1000;
   int numTopWords = args.length > 2 ? Integer.parseInt(args[2]) : 20;
   System.out.println("Data loaded.");
   TopicalNGrams tng = new TopicalNGrams(10);
   tng.estimate(ilist, 200, 1, 0, null, new Randoms());
   tng.printTopWords(60, true);
 }
Esempio n. 2
0
 // Recommended to use mallet/bin/vectors2topics instead.
 public static void main(String[] args) throws IOException {
   InstanceList ilist = InstanceList.load(new File(args[0]));
   int numIterations = args.length > 1 ? Integer.parseInt(args[1]) : 1000;
   int numTopWords = args.length > 2 ? Integer.parseInt(args[2]) : 20;
   System.out.println("Data loaded.");
   LDA lda = new LDA(10);
   lda.estimate(ilist, numIterations, 50, 0, null, new Randoms()); // should be 1100
   lda.printTopWords(numTopWords, true);
   lda.printDocumentTopics(new File(args[0] + ".lda"));
 }
Esempio n. 3
0
  public static void main(String[] args) throws Exception {
    CommandOption.setSummary(
        FeatureCountTool.class,
        "Print feature counts and instances per feature (eg document frequencies) in an instance list");
    CommandOption.process(FeatureCountTool.class, args);

    InstanceList instances = InstanceList.load(new File(inputFile.value));
    FeatureCountTool counter = new FeatureCountTool(instances);
    counter.count();
    counter.printCounts();
  }
  public static void main(String[] args) {
    //		String malletFile = "dataset/vlc_lectures.all.en.f8.mallet";
    //		String simFile = "dataset/vlc/sim5p.csv";
    //		String solutionFile = "dataset/vlc/task1_solution.en.f8.lm.txt";
    //		String queryFile = "dataset/task1_query.en.f8.txt";
    //		String targetFile = "dataset/task1_target.en.f8.txt";

    String malletFile = "dataset/vlc/folds/all.0.4189.mallet";
    String trainMalletFile = "dataset/vlc/folds/training.0.mallet";
    String testMalletFile = "dataset/vlc/folds/test.0.mallet";
    String queryFile = "dataset/vlc/folds/query.0.csv";
    String linkFile = "dataset/vlc/folds/trainingPairs.0.csv";
    String targetFile = "dataset/vlc/folds/target.0.csv";
    String solutionFile = "dataset/vlc/task1_solution.en.f8.lm.txt";

    int numTopics = 160;
    int numIterations = 200;
    double alpha = 0.0016;
    double beta = 0.0001;

    InstanceList train = InstanceList.load(new File(trainMalletFile));
    InstanceList test = InstanceList.load(new File(testMalletFile));
    SeparateParallelLda spl = new SeparateParallelLda(train, test);
    spl.trainDocuments(numTopics, numIterations, alpha, beta);
    spl.generateTestInference();
    spl.lda.printTopWords(System.out, 10, true);
    BasicTask1Solution solver = new Task1SolutionWithSeparateData(spl);

    double precision;
    try {
      solver.retrieveTask1Solution(queryFile, solutionFile);
      precision = Task1Solution.evaluateResult(targetFile, solutionFile);
      System.out.println(
          String.format(
              "SeparateParallelLda: iteration: %d, precisoion: %f", numIterations, precision));
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
  public static void main(String[] args) throws Exception {
    InstanceList instances = InstanceList.load(new File(args[0]));
    int numTopics = Integer.parseInt(args[1]);
    ParallelTopicModel model = new ParallelTopicModel(numTopics, 5.0, 0.01);
    model.addInstances(instances);
    model.setNumIterations(1000);

    model.estimate();

    TopicModelDiagnostics diagnostics = new TopicModelDiagnostics(model, 20);

    if (args.length == 3) {
      PrintWriter out = new PrintWriter(args[2]);
      out.println(diagnostics.toXML());
      out.close();
    }
  }