コード例 #1
0
  private static void generateTopicFromAbstractsInDB()
      throws SQLException, FileNotFoundException, IOException, ClassNotFoundException {
    InstanceList allInstances, trainingInstances = null, testingInstances = null;
    ParallelTopicModel topicModel;
    TopicTrainingModel ldaTrainModel = new TopicTrainingModel();

    allInstances =
        Utilities.readInstancesFromMalletFile(
            TopicConstants.INPUTDIRPATH + File.separator + TopicConstants.ALLMALLETFILENAME);
    trainingInstances =
        Utilities.readInstancesFromMalletFile(
            TopicConstants.INPUTDIRPATH + File.separator + TopicConstants.TRAININGMALLETFILENAME);
    testingInstances =
        Utilities.readInstancesFromMalletFile(
            TopicConstants.INPUTDIRPATH + File.separator + TopicConstants.TESTINGMALLETFILENAME);
    /** ******** Extraction of topics from training data using the optimal parameters *********** */
    ldaTrainModel.buildTopicModelUsingLDA(
        trainingInstances,
        TopicConstants.NUMTOPICS,
        TopicConstants.NUMTOPICS * TopicConstants.ALPHA,
        TopicConstants.BETA,
        TopicConstants.NUMITERATIONS);
    topicModel = ldaTrainModel.getTopicModel();
    ldaTrainModel.generateTopicOutputFiles(topicModel, TopicConstants.OUTPUTDIRPATH);
  }
コード例 #2
0
  /** Best values for SE: 80 topics, 20000 iterations, alpha= 0.001, beta= 0.01 */
  private static void optimizeLDAParameters() throws IOException, ClassNotFoundException {
    /** ******** Selection of optimal Parameters ********** */
    //		double[] alphasToBeConsidered= {0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1};
    //		double[] betasToBeConsidered= {0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1};
    //		int[] topicSizesToBeConsidered= {3, 5, 10, 15, 20, 30, 40, 50, 60, 80, 100};
    //		int[] iterationsToBeConsidered= {5, 10, 50, 100, 500, 1000, 2000, 2500, 3000};
    double[] alphasToBeConsidered = {0.001};
    double[] betasToBeConsidered = {0.01};
    int[] topicSizesToBeConsidered = {90};
    int[] iterationsToBeConsidered = {20000};

    InstanceList trainingInstances =
        Utilities.readInstancesFromMalletFile(
            TopicConstants.INPUTDIRPATH + File.separator + TopicConstants.ALLMALLETFILENAME);
    TopicTrainingModel ldaTrainModel = new TopicTrainingModel();
    String res =
        ldaTrainModel.selectOptimalParameters(
            trainingInstances,
            TopicConstants.OUTPUTDIRPATH,
            alphasToBeConsidered,
            betasToBeConsidered,
            iterationsToBeConsidered,
            topicSizesToBeConsidered);
    System.out.println(res);
    /*		ldaTrainModel.selectOptimalAlpha(trainingInstances, inputDir,outputDir, alphasToBeConsidered);
    ldaTrainModel.selectOptimalBeta(trainingInstances, inputDir, outputDir, betasToBeConsidered);
    ldaTrainModel.selectOptimalIterationsSize(trainingInstances, inputDir, outputDir, iterationsToBeConsidered);
    ldaTrainModel.selectOptimalTopicSize(trainingInstances, inputDir, outputDir, topicSizesToBeConsidered); */
  }
コード例 #3
0
 private static int[] createMalletInstances(DataManager mgr, int venueId)
     throws SQLException, FileNotFoundException, IOException {
   PaperAbstractReader instanceGenerator = new PaperAbstractReader();
   instanceGenerator.setDomainId(venueId);
   String inputPath = TopicConstants.INPUTDIRPATH + File.separator + "papers";
   //	instanceGenerator.readAbstractsFromDirectory(MalletConstants.commentsFolderPath,true,true);
   int[] counts = instanceGenerator.readAbstractsFromDatabase(mgr, inputPath, true, true);
   instanceGenerator.splitInstances(0.8, 0.0, TopicConstants.INPUTDIRPATH);
   Utilities.writeInstancesToMalletFile(
       instanceGenerator.getAllInstances(),
       TopicConstants.INPUTDIRPATH + File.separator + TopicConstants.ALLMALLETFILENAME);
   Utilities.writeInstancesToMalletFile(
       instanceGenerator.getTrainingInstances(),
       TopicConstants.INPUTDIRPATH + File.separator + TopicConstants.TRAININGMALLETFILENAME);
   Utilities.writeInstancesToMalletFile(
       instanceGenerator.getTestingInstances(),
       TopicConstants.INPUTDIRPATH + File.separator + TopicConstants.TESTINGMALLETFILENAME);
   return counts;
 }