Ejemplo n.º 1
0
  public static void main(String[] args) throws IOException {
    // Parse command line flags and arguments
    Map<String, String> argMap = CommandLineUtils.simpleCommandLineParser(args);

    // Set up default parameters and settings
    String basePath = ".";
    String model = "baseline";
    boolean verbose = false;

    // Update defaults using command line specifications

    // The path to the assignment data
    if (argMap.containsKey("-path")) {
      basePath = argMap.get("-path");
    }
    System.out.println("Using base path: " + basePath);

    // A string descriptor of the model to use
    if (argMap.containsKey("-model")) {
      model = argMap.get("-model");
    }
    System.out.println("Using model: " + model);

    // Whether or not to print the individual speech errors.
    if (argMap.containsKey("-verbose")) {
      verbose = true;
    }
    if (argMap.containsKey("-quiet")) {
      verbose = false;
    }

    // Read in all the assignment data
    String trainingSentencesFile = "/treebank-sentences-spoken-train.txt";
    String validationSentencesFile = "/treebank-sentences-spoken-validate.txt";
    String testSentencesFile = "/treebank-sentences-spoken-test.txt";
    String speechNBestListsPath = "/wsj_n_bst";
    Collection<List<String>> trainingSentenceCollection =
        SentenceCollection.Reader.readSentenceCollection(basePath + trainingSentencesFile);
    Collection<List<String>> validationSentenceCollection =
        SentenceCollection.Reader.readSentenceCollection(basePath + validationSentencesFile);
    Collection<List<String>> testSentenceCollection =
        SentenceCollection.Reader.readSentenceCollection(basePath + testSentencesFile);
    Set trainingVocabulary = extractVocabulary(trainingSentenceCollection);
    List<SpeechNBestList> speechNBestLists =
        SpeechNBestList.Reader.readSpeechNBestLists(
            basePath + speechNBestListsPath, trainingVocabulary);

    // Build the language model
    LanguageModel languageModel = null;
    if (model.equalsIgnoreCase("baseline")) {
      languageModel = new EmpiricalUnigramLanguageModel(trainingSentenceCollection);
    } else if (model.equalsIgnoreCase("bigram")) {
      languageModel = new EmpiricalBigramLanguageModel(trainingSentenceCollection);
    } else if (model.equalsIgnoreCase("trigram")) {
      languageModel = new EmpiricalTrigramLanguageModel(trainingSentenceCollection);
    } else if (model.equalsIgnoreCase("KN")) {
      languageModel = new KNBigramLanguageModel(trainingSentenceCollection);
    } else {
      throw new RuntimeException("Unknown model descriptor: " + model);
    }

    boolean tunning = true;
    if (tunning == true) {
      // Evaluate the language model
      File f = new File("3-gram result2.txt");
      BufferedWriter output = new BufferedWriter(new FileWriter(f));
      double min = 1;
      double minR1 = 1;
      double minR2 = 1;
      double minR3 = 1;
      double minR4 = 1;
      double minR5 = 1;
      double minK = 1;
      double minPerp = 1000000;
      double k = 0.1;
      double step = 0.1;
      double r1 = 0;
      double r2 = 0;
      double r3 = 0;
      double r4 = 0;
      double r5 = 0;
      for (r1 = 0; r1 < 1; r1 += 0.2) {
        for (r2 = 0; (r1 + r2) < 1; r2 += 0.1) {
          r3 = 1 - (r1 + r2);
          for (r4 = 0; r4 < 1; r4 += 0.1) {
            r5 = 1 - r4;
            EmpiricalTrigramLanguageModel.r1 = r1;
            EmpiricalTrigramLanguageModel.r2 = r2;
            EmpiricalTrigramLanguageModel.r3 = r3;
            EmpiricalTrigramLanguageModel.r4 = r4;
            EmpiricalTrigramLanguageModel.r5 = r5;

            double wordErrorRate = calculateWordErrorRate(languageModel, speechNBestLists, verbose);
            double wsjPerplexity = calculatePerplexity(languageModel, validationSentenceCollection);
            double hubPerplexity =
                calculatePerplexity(languageModel, extractCorrectSentenceList(speechNBestLists));
            if (minPerp > wsjPerplexity) {
              minPerp = wsjPerplexity;
              // minK = k;
              minR1 = r1;
              minR2 = r2;
              minR3 = r3;
              minR4 = r4;
              minR5 = r5;
            }
            if (min > wordErrorRate) min = wordErrorRate;
            System.out.println(
                "r1:" + r1 + "\t" + "r2:" + r2 + "\t" + "r3:" + r3 + "\t" + "r4:" + r4 + "\t"
                    + "r5:" + r5 + "\t");
            System.out.println("HUB Word Error Rate: " + wordErrorRate);
            System.out.println("Min Error Rate till now: " + min);
            // System.out.println("minK=" + minK);

            System.out.println("WSJ Perplexity:  " + wsjPerplexity);
            System.out.println("HUB Perplexity:  " + hubPerplexity);
            System.out.println();

            BigDecimal big_r1 = new BigDecimal(r1);
            BigDecimal big_r2 = new BigDecimal(r2);
            BigDecimal big_r3 = new BigDecimal(r3);
            BigDecimal big_r4 = new BigDecimal(r4);
            BigDecimal big_r5 = new BigDecimal(r5);
            BigDecimal big_wsjPerplexity = new BigDecimal(wsjPerplexity);
            BigDecimal big_hubPerplexity = new BigDecimal(hubPerplexity);
            BigDecimal big_wordErrorRate = new BigDecimal(wordErrorRate);

            big_r1 = big_r1.setScale(1, BigDecimal.ROUND_HALF_UP);
            big_r2 = big_r2.setScale(1, BigDecimal.ROUND_HALF_UP);
            big_r3 = big_r3.setScale(1, BigDecimal.ROUND_HALF_UP);
            big_r4 = big_r4.setScale(1, BigDecimal.ROUND_HALF_UP);
            big_r5 = big_r5.setScale(1, BigDecimal.ROUND_HALF_UP);
            big_wsjPerplexity = big_wsjPerplexity.setScale(2, BigDecimal.ROUND_HALF_UP);
            big_hubPerplexity = big_hubPerplexity.setScale(2, BigDecimal.ROUND_HALF_UP);
            big_wordErrorRate = big_wordErrorRate.setScale(4, BigDecimal.ROUND_HALF_UP);

            output.write(
                big_r1
                    + "\t\t\t"
                    + big_r2
                    + "\t\t\t"
                    + big_r3
                    + "\t\t\t"
                    + big_r4
                    + "\t\t\t"
                    + big_r5
                    + "\t\t\t"
                    + big_wsjPerplexity
                    + "\t\t\t"
                    + big_hubPerplexity
                    + "\t\t\t"
                    + big_wordErrorRate);
            output.write("\n");
          }
        }
      }
      output.write("\n");
      output.write("min WER:" + min + "\n");
      output.write("min Perp:" + minPerp + "\n");
      output.write(
          "minR1:" + "\t\t\t" + minR1 + "\t\t\t" + "minR2:" + "\t\t\t" + minR2 + "\t\t\t" + "minR3:"
              + "\t\t\t" + minR3 + "\t\t\t" + "minR4:" + "\t\t\t" + minR4 + "\t\t\t" + "minR5:"
              + "\t\t\t" + minR5 + "\n");
      output.close();
    } else {
      EmpiricalTrigramLanguageModel.k = 0.1;
      EmpiricalTrigramLanguageModel.r1 = 0.7;
      EmpiricalTrigramLanguageModel.r2 = 0.2;
      EmpiricalTrigramLanguageModel.r3 = 0.1;
      double wordErrorRate = calculateWordErrorRate(languageModel, speechNBestLists, verbose);
      double wsjPerplexity = calculatePerplexity(languageModel, testSentenceCollection);
      double hubPerplexity =
          calculatePerplexity(languageModel, extractCorrectSentenceList(speechNBestLists));
      System.out.println("HUB Word Error Rate: " + wordErrorRate);
      System.out.println("WSJ Perplexity:  " + wsjPerplexity);
      System.out.println("HUB Perplexity:  " + hubPerplexity);
    }
  }