Ejemplo n.º 1
0
 /**
  * TODO move this to Utils.java because other classes have a similar method Creates a file
  * containing the documents to be used for training Documents are represented as an n x m matrix,
  * where each row is normalized (ie each entry i, j is the percentage of document i that is word
  * j)
  *
  * @param filename The name of the file to be created
  * @param papers The list of documents to be used for training
  */
 private void createKmeansInput(String filename, List<TrainingPaper> papers) {
   System.out.print("creating kmeans input: " + filename + " ... ");
   double[][] fullRepresentation = new double[papers.size()][terms.size()];
   int paperIndex = 0;
   for (TrainingPaper paper : papers) {
     double sum = 0;
     for (int word : paper.getTrainingWords()) {
       double wordFrequency = paper.getTrainingTf(word);
       fullRepresentation[paperIndex][word] = wordFrequency;
       sum += wordFrequency;
     }
     for (int col = 0; col < fullRepresentation[paperIndex].length; col++) {
       fullRepresentation[paperIndex][col] /= sum;
     }
     paperIndex++;
   }
   PlusoneFileWriter fileWriter = new PlusoneFileWriter(filename);
   for (int row = 0; row < fullRepresentation.length; row++) {
     for (int col = 0; col < fullRepresentation[row].length; col++) {
       fileWriter.write(fullRepresentation[row][col] + " ");
     }
     fileWriter.write("\n");
   }
   fileWriter.close();
   System.out.println("done.");
 }
Ejemplo n.º 2
0
  /**
   * TODO This method exists in several classes--move to Utils? Takes a list of PaperAbstract
   * documents and writes them to file according to the format specified by lda-c-dist
   *
   * @param filename name of the file to be created (will be overwritten if it already exists)
   * @param papers list of papers to be written to file
   */
  private void createLdaInputTest(String filename, List<PredictionPaper> papers) {

    System.out.print("creating lda test input in file: " + filename + " ... ");
    PlusoneFileWriter fileWriter = new PlusoneFileWriter(filename);

    for (PredictionPaper paper : papers) {
      fileWriter.write(paper.getTrainingWords().size() + " ");

      for (int word : paper.getTrainingWords()) {
        fileWriter.write(word + ":" + paper.getTrainingTf(word) + " ");
      }
      fileWriter.write("\n");
    }
    fileWriter.close();
    System.out.println("done.");
  }