private void doParseSentencesInCorpus(File ntvmsnbcCorpus) throws IOException {
   /* SentenceMorphParse parse = parser.parse("Turgut Özal'ın ölüm raporu ile ilgili flaş bir gelişme.");
   parse.dump();
   System.out.println("After disambiguation:");
   parser.disambiguate(parse);
   parse.dump();
   for (SentenceMorphParse.Entry entry : parse) {
       System.out.println(entry.input + "=" + entry.parses.get(0));
   }
   for (SentenceMorphParse.Entry entry : parse) {
       System.out.println(entry.input + " kök=" + entry.parses.get(0).stem);
   }*/
   List<String> sentences = SimpleTextReader.trimmingUTF8Reader(ntvmsnbcCorpus).asStringList();
   Stopwatch sw = new Stopwatch().start();
   int wc = 0;
   for (String sentence : sentences) {
     SentenceMorphParse parse = parser.parse(sentence);
     wc += parse.size();
     parser.disambiguate(parse);
     // System.out.println(sentence);
     // parse.dump();
   }
   System.out.println(wc);
   System.out.println(sw.elapsed(TimeUnit.MILLISECONDS));
 }
Ejemplo n.º 2
0
 public Ambiguous[] getAmbiguousSequence(SentenceMorphParse sentence) {
   Ambiguous[] awords = new Ambiguous[sentence.size() + 3];
   awords[0] = startWord;
   awords[1] = startWord;
   int i = 2;
   for (SentenceMorphParse.Entry entry : sentence) {
     int[] roots = new int[entry.parses.size()];
     int[][] igs = new int[entry.parses.size()][];
     int j = 0;
     for (MorphParse parse : entry.parses) {
       String rootPart = parse.dictionaryItem.lemma;
       roots[j] = rootLm.getVocabulary().indexOf(rootPart);
       igs[j] = new int[parse.inflectionalGroups.size()];
       for (int k = 0; j < parse.inflectionalGroups.size(); k++) {
         igs[j][k] =
             igLm.getVocabulary().indexOf(parse.inflectionalGroups.get(k).formatNoSurface());
       }
       j++;
     }
     awords[i] = new Ambiguous(roots, igs);
     i++;
   }
   awords[i] = endWord;
   return awords;
 }
Ejemplo n.º 3
0
 @Override
 public void disambiguate(SentenceMorphParse sentenceParse) {
   Ambiguous[] ambiguousSeq = getAmbiguousSequence(sentenceParse);
   int[] bestSequence = bestSequence(ambiguousSeq);
   for (int i = 0; i < bestSequence.length; i++) {
     List<MorphParse> results = sentenceParse.getParses(i);
     if (results.size() == 1) continue;
     MorphParse tmp = results.get(0);
     results.set(0, results.get(bestSequence[i]));
     results.set(bestSequence[i], tmp);
   }
 }