Esempio n. 1
0
  public void doInference() {

    try {

      ParallelTopicModel model = ParallelTopicModel.read(new File(inferencerFile));
      TopicInferencer inferencer = model.getInferencer();

      // TopicInferencer inferencer =
      //    TopicInferencer.read(new File(inferencerFile));

      // InstanceList testing = readFile();
      readFile();
      InstanceList testing = generateInstanceList(); // readFile();

      for (int i = 0; i < testing.size(); i++) {

        StringBuilder probabilities = new StringBuilder();
        double[] testProbabilities = inferencer.getSampledDistribution(testing.get(i), 10, 1, 5);

        ArrayList probabilityList = new ArrayList();

        for (int j = 0; j < testProbabilities.length; j++) {
          probabilityList.add(new Pair<Integer, Double>(j, testProbabilities[j]));
        }

        Collections.sort(probabilityList, new CustomComparator());

        for (int j = 0; j < testProbabilities.length && j < topN; j++) {
          if (j > 0) probabilities.append(" ");
          probabilities.append(
              ((Pair<Integer, Double>) probabilityList.get(j)).getFirst().toString()
                  + ","
                  + ((Pair<Integer, Double>) probabilityList.get(j)).getSecond().toString());
        }

        System.out.println(docIds.get(i) + "," + probabilities.toString());
      }

    } catch (Exception e) {
      e.printStackTrace();
      System.err.println(e.getMessage());
    }
  }
  public static void main(String[] args) {
    //		String malletFile = "dataset/vlc_lectures.all.en.f8.mallet";
    //		String simFile = "dataset/vlc/sim5p.csv";
    //		String solutionFile = "dataset/vlc/task1_solution.en.f8.lm.txt";
    //		String queryFile = "dataset/task1_query.en.f8.txt";
    //		String targetFile = "dataset/task1_target.en.f8.txt";

    String malletFile = "dataset/vlc/folds/all.0.4189.mallet";
    String trainMalletFile = "dataset/vlc/folds/training.0.mallet";
    String testMalletFile = "dataset/vlc/folds/test.0.mallet";
    String queryFile = "dataset/vlc/folds/query.0.csv";
    String linkFile = "dataset/vlc/folds/trainingPairs.0.csv";
    String targetFile = "dataset/vlc/folds/target.0.csv";
    String solutionFile = "dataset/vlc/task1_solution.en.f8.lm.txt";

    int numTopics = 160;
    int numIterations = 200;
    double alpha = 0.0016;
    double beta = 0.0001;

    InstanceList train = InstanceList.load(new File(trainMalletFile));
    InstanceList test = InstanceList.load(new File(testMalletFile));
    SeparateParallelLda spl = new SeparateParallelLda(train, test);
    spl.trainDocuments(numTopics, numIterations, alpha, beta);
    spl.generateTestInference();
    spl.lda.printTopWords(System.out, 10, true);
    BasicTask1Solution solver = new Task1SolutionWithSeparateData(spl);

    double precision;
    try {
      solver.retrieveTask1Solution(queryFile, solutionFile);
      precision = Task1Solution.evaluateResult(targetFile, solutionFile);
      System.out.println(
          String.format(
              "SeparateParallelLda: iteration: %d, precisoion: %f", numIterations, precision));
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }