Ejemplo n.º 1
0
  public static void main(String[] args) throws Exception {
    int minTokenCount = 5;
    File corpusFile = new File(args[0]);
    CharSequence[] articleTexts = LdaWormbase.readCorpus(corpusFile);
    SymbolTable symbolTable = new MapSymbolTable();
    TokenizerFactory tokenizerFactory = LdaWormbase.WORMBASE_TOKENIZER_FACTORY;
    int[][] docTokens =
        LatentDirichletAllocation.tokenizeDocuments(
            articleTexts, tokenizerFactory, symbolTable, minTokenCount);

    LdaRunnable runnable1 =
        new LdaRunnable(docTokens, new LdaReportingHandler(symbolTable), new Random());

    LdaRunnable runnable2 =
        new LdaRunnable(docTokens, new LdaReportingHandler(symbolTable), new Random());

    Thread thread1 = new Thread(runnable1);
    Thread thread2 = new Thread(runnable2);

    thread1.start();
    thread2.start();

    thread1.join();
    thread2.join();

    LatentDirichletAllocation lda0 = runnable1.mLda;

    LatentDirichletAllocation lda1 = runnable2.mLda;

    System.out.println("\nComputing Greedy Aligned Symmetrized KL Divergences");
    double[] scores = similarity(lda0, lda1);
    for (int i = 0; i < scores.length; ++i) System.out.printf("%4d %15.3f\n", i, scores[i]);
  }