Java LanguageModel.generateSentenceの例

プログラミング言語: Java

名前空間/パッケージ名: nlp.langmodel

クラス/型: LanguageModel

メソッド/関数: generateSentence

hotexamples.comのコード掲載数: 1

Java LanguageModel.generateSentence - 1件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたJavaのnlp.langmodel.LanguageModel.generateSentenceの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

getSentenceProbability(3)

generateSentence(1)

コード例 #1

ファイルを表示

ファイル: LanguageModelTester.java プロジェクト: jocelynk/CS7540-NLP-HW1

  public static void main(String[] args) throws IOException {
    // Parse command line flags and arguments
    final Map<String, String> argMap = CommandLineUtils.simpleCommandLineParser(args);

    // Set up default parameters and settings
    String basePath = ".";
    String model = "baseline";
    boolean verbose = false;

    // Update defaults using command line specifications

    // The path to the assignment data
    if (argMap.containsKey("-path")) {
      basePath = argMap.get("-path");
    }
    System.out.println("Using base path: " + basePath);

    // A string descriptor of the model to use
    if (argMap.containsKey("-model")) {
      model = argMap.get("-model");
    }
    System.out.println("Using model: " + model);

    // Whether or not to print the individual speech errors.
    if (argMap.containsKey("-verbose")) {
      verbose = true;
    }
    if (argMap.containsKey("-quiet")) {
      verbose = false;
    }

    // Read in all the assignment data
    final String trainingSentencesFile = "/treebank-sentences-spoken-train.txt";
    final String speechNBestListsPath = "/wsj_n_bst";
    final Collection<List<String>> trainingSentenceCollection =
        SentenceCollection.Reader.readSentenceCollection(basePath + trainingSentencesFile);
    final Set<String> trainingVocabulary = extractVocabulary(trainingSentenceCollection);
    final List<SpeechNBestList> speechNBestLists =
        SpeechNBestList.Reader.readSpeechNBestLists(
            basePath + speechNBestListsPath, trainingVocabulary);

    // String validationSentencesFile =
    // "/treebank-sentences-spoken-validate.txt";
    // Collection<List<String>> validationSentenceCollection =
    // SentenceCollection.Reader.readSentenceCollection(basePath +
    // validationSentencesFile);

    // String testSentencesFile = "/treebank-sentences-spoken-test.txt";
    // Collection<List<String>> testSentenceCollection =
    // SentenceCollection.Reader.readSentenceCollection(basePath +
    // testSentencesFile);

    // Build the language model
    LanguageModel languageModel = null;
    if (model.equalsIgnoreCase("baseline")) {
      languageModel = new EmpiricalUnigramLanguageModel(trainingSentenceCollection);
    } else if (model.equalsIgnoreCase("sri")) {
      languageModel = new SriLanguageModel(argMap.get("-sri"));
    } else if (model.equalsIgnoreCase("bigram")) {
      languageModel = new EmpiricalBigramLanguageModel(trainingSentenceCollection);
    } else if (model.equalsIgnoreCase("trigram")) {
      languageModel = new EmpiricalTrigramLanguageModel(trainingSentenceCollection);
    } else if (model.equalsIgnoreCase("katz-bigram")) {
      languageModel = new KatzBigramLanguageModel(trainingSentenceCollection);
    } else if (model.equalsIgnoreCase("katz-bigram-pp")) {
      languageModel = new KatzPPBigramLanguageModel(trainingSentenceCollection);
    } else if (model.equalsIgnoreCase("katz-trigram")) {
      throw new IllegalStateException(
          "Katz trigram model not fully implemented -- remove exception and uncomment next line if implemented");
      // languageModel = new KatzTrigramLanguageModel(
      // trainingSentenceCollection);
    } else {
      throw new RuntimeException("Unknown model descriptor: " + model);
    }

    // Evaluate the language model
    // final double wsjPerplexity = calculatePerplexity(languageModel,
    // testSentenceCollection);
    final double hubPerplexity =
        calculatePerplexity(languageModel, extractCorrectSentenceList(speechNBestLists));
    // System.out.println("WSJ Perplexity: " + wsjPerplexity);
    System.out.println("HUB Perplexity:  " + hubPerplexity);
    System.out.println("WER Baselines:");
    System.out.println("  Best Path:  " + calculateWordErrorRateLowerBound(speechNBestLists));
    System.out.println("  Worst Path: " + calculateWordErrorRateUpperBound(speechNBestLists));
    System.out.println("  Avg Path:   " + calculateWordErrorRateRandomChoice(speechNBestLists));
    final double wordErrorRate = calculateWordErrorRate(languageModel, speechNBestLists, verbose);
    System.out.println("HUB Word Error Rate: " + wordErrorRate);
    System.out.println("Generated Sentences:");
    for (int i = 0; i < 10; i++) System.out.println(" " + languageModel.generateSentence());
  }