private void doParseSentencesInCorpus(File ntvmsnbcCorpus) throws IOException {
   /* SentenceMorphParse parse = parser.parse("Turgut Özal'ın ölüm raporu ile ilgili flaş bir gelişme.");
   parse.dump();
   System.out.println("After disambiguation:");
   parser.disambiguate(parse);
   parse.dump();
   for (SentenceMorphParse.Entry entry : parse) {
       System.out.println(entry.input + "=" + entry.parses.get(0));
   }
   for (SentenceMorphParse.Entry entry : parse) {
       System.out.println(entry.input + " kök=" + entry.parses.get(0).stem);
   }*/
   List<String> sentences = SimpleTextReader.trimmingUTF8Reader(ntvmsnbcCorpus).asStringList();
   Stopwatch sw = new Stopwatch().start();
   int wc = 0;
   for (String sentence : sentences) {
     SentenceMorphParse parse = parser.parse(sentence);
     wc += parse.size();
     parser.disambiguate(parse);
     // System.out.println(sentence);
     // parse.dump();
   }
   System.out.println(wc);
   System.out.println(sw.elapsed(TimeUnit.MILLISECONDS));
 }
Beispiel #2
0
 public Ambiguous[] getAmbiguousSequence(SentenceMorphParse sentence) {
   Ambiguous[] awords = new Ambiguous[sentence.size() + 3];
   awords[0] = startWord;
   awords[1] = startWord;
   int i = 2;
   for (SentenceMorphParse.Entry entry : sentence) {
     int[] roots = new int[entry.parses.size()];
     int[][] igs = new int[entry.parses.size()][];
     int j = 0;
     for (MorphParse parse : entry.parses) {
       String rootPart = parse.dictionaryItem.lemma;
       roots[j] = rootLm.getVocabulary().indexOf(rootPart);
       igs[j] = new int[parse.inflectionalGroups.size()];
       for (int k = 0; j < parse.inflectionalGroups.size(); k++) {
         igs[j][k] =
             igLm.getVocabulary().indexOf(parse.inflectionalGroups.get(k).formatNoSurface());
       }
       j++;
     }
     awords[i] = new Ambiguous(roots, igs);
     i++;
   }
   awords[i] = endWord;
   return awords;
 }