Пример #1
0
  @Override
  public double[][] predict(List<PredictionPaper> testDocs) {
    String testData = "lda/test.dat";

    createLdaInputTest(testData, testDocs);
    Utils.runCommand(
        "lib/lda-c-dist/lda inf "
            + " lib/lda-c-dist/settings.txt "
            + "lda/final "
            + testData
            + " lda/output",
        false);

    double[][] gammasMatrix = Utils.readMatrix("lda/output-gamma.dat", false);
    double alpha = Utils.readAlpha("lda/final.other");
    for (int i = 0; i < gammasMatrix.length; i++) {
      for (int j = 0; j < gammasMatrix[i].length; j++) {
        gammasMatrix[i][j] -= alpha;
      }
    }
    SimpleMatrix gammas = new SimpleMatrix(gammasMatrix);
    SimpleMatrix beta = new SimpleMatrix(betaMatrix);
    SimpleMatrix probabilities = gammas.mult(beta);

    double[][] result = new double[probabilities.numRows()][probabilities.numCols()];
    for (int row = 0; row < probabilities.numRows(); row++) {
      for (int col = 0; col < probabilities.numCols(); col++) {
        result[row][col] = probabilities.get(row, col);
      }
    }
    return result;
  }
Пример #2
0
 private void train() {
   if (!new File("kmeans").exists()) {
     new File("kmeans").mkdir();
   }
   createKmeansInput("kmeans/documents", trainingSet);
   Utils.runCommand(
       "python src/plusone/clustering/kmeans.py "
           + "kmeans/documents -k "
           + numTopics
           + " -m cosine "
           + "-w kmeans -i 50 -q",
       true);
   betaMatrix = Utils.readMatrix("kmeans/centers", false);
   System.out.print("moving kmeans beta to lda folder...");
   Utils.runCommand("cp kmeans/centers lda", false);
   Utils.runCommand("mv lda/centers lda/final.beta", true);
   Utils.createLdaInfo("kmeans/final.other", numTopics, terms.size(), synthetic);
   Utils.runCommand("cp kmeans/final.other lda", true);
   System.out.println("done.");
 }