Example #1
0
 /**
  * Try to process the input word, as it stands, or by cutting off prefixes or inflectional
  * suffixes.
  *
  * @param toBePhonemised the input word
  * @param allowOtherLanguage allowOtherLanguage
  * @return the transcription of the word, or null if the word could not be transcribed
  */
 private String processFlection(Word word, Result currentResult, boolean allowOtherLanguage) {
   String toBePhonemised = word.getToBePhonemised();
   logger.debug("processFlection is starting with: " + toBePhonemised);
   // First of all, make sure there is no userdict/lexicon entry:
   String transcription = jphon.userdictLookup(toBePhonemised, null);
   if (transcription != null) {
     return transcription;
   }
   transcription = jphon.lexiconLookup(toBePhonemised, null);
   if (transcription != null) {
     return transcription;
   }
   // Try to process by cutting off endings only, without cutting off prefix:
   if (allowOtherLanguage) {
     transcription = processFlectionEnding(word, currentResult);
   }
   if (transcription != null) {
     return transcription;
   }
   // try removing prefix:
   // Enforce at least 3 characters in the stem (the part of the word that comes after the prefix):
   int maxPrefLen = Math.min(this.maxPrefixLength, word.getToBePhonemised().length() - 3);
   for (int i = maxPrefLen; i > 0; i--) {
     String prefix = word.getToBePhonemised().substring(0, i).toLowerCase();
     String prefixPhon = prefixLexiconLookup(prefix);
     if (prefixPhon != null) {
       logger.debug("Prefix found: " + prefix + " [" + prefixPhon + "]");
       Word partialWord = new Word(word.getToBePhonemised().substring(i));
       // recursively call this method, i.e. allow multiple prefixes:
       String restTranscription = processFlection(partialWord, currentResult, allowOtherLanguage);
       if (restTranscription != null) { // yes, found valid analysis
         if (prefixPhon.indexOf("'") != -1) {
           restTranscription = restTranscription.replaceAll("'", "");
         }
         transcription = prefixPhon + "-" + restTranscription;
         return transcription;
       }
     }
   }
   return null;
 }
Example #2
0
 /**
  * If the given string ends with a consonant, insert a syllable boundary before that consonant.
  * Otherwise, append a syllable boundary.
  *
  * @param s input syllable
  * @return syllable with boundaries reset
  */
 private String rebuildTrans(String s) {
   AllophoneSet set = jphon.getAllophoneSet();
   if (set != null) {
     Allophone[] allophones = set.splitIntoAllophones(s);
     if (allophones != null && allophones.length > 0) {
       Allophone last = allophones[allophones.length - 1];
       if (last.isConsonant()) { // insert a syllable boundary before final consonant
         String lastPh = last.name();
         return s.substring(0, s.length() - lastPh.length()) + "-" + lastPh;
       }
     }
   }
   return s + "-";
 }
Example #3
0
 /**
  * Checks if item is in english lexicon.
  *
  * @param s english base form
  * @return true if item is in english lexicon, false if not
  */
 private boolean knowEnBaseForm(String s) {
   if (jphon.phonemiseEn(s) != null) {
     return true;
   }
   return false;
 }
Example #4
0
  /**
   * Building the transcription and syllabification of a flection
   *
   * @param currentResult currentResult
   * @param word : the English infinitive as found in English lexicon
   * @return transcription of complete input word
   */
  private String transcribeFlection(Word word, Result currentResult) {

    String result = null;
    String otherLanguageTranscription = null;
    String endingTranscription = null;
    String gerundEndingTrans = null;
    String participleBaseShortEndingTrans = null;
    String flectionFugeTrans = null;
    otherLanguageTranscription = jphon.phonemiseEn(word.getOtherLanguageBaseForm());
    if (otherLanguageTranscription != null) {
      // System.out.println("var should be true");
      currentResult.setUsedOtherLanguageToPhonemise(true);
      for (int j = 0; j < this.dentalPlosives.length; j++) {
        if (otherLanguageTranscription.endsWith(this.dentalPlosives[j])) {
          word.setExtraSyll(true);
          logger.debug("extraSyll true");
        }
      }
      // System.out.println("var is in transcribeFlection:
      // "+currentResult.isUsedOtherLanguageToPhonemise());
      // for cases like 'scrollet' where 'et' is flection ending and NOT ending of
      // participleBaseForm; otherwise 'scrollet' would sound like 'scrollt'
      String[] participleBaseLongEndings = getEndingsAndAffixes("participle_base_long");
      for (int j = 0; j < participleBaseLongEndings.length; j++) {
        if (word.getFlectionEnding() != null
            && word.getFlectionEnding().equals(participleBaseLongEndings[j])
            && !(word.getCutOffCharacter())) { // 'et'
          word.setExtraSyll(true);
        }
      }
      String[] gerundEndings = getEndingsAndAffixes("gerund_ending"); // should be 'end' -> bootend
      // String gerundEndingTrans = endingTranscriptionLookup(gerundEnding);//should be '@nt'
      for (int j = 0; j < gerundEndings.length; j++) {
        if (endingTranscriptionLookup(gerundEndings[j]) != null) {
          gerundEndingTrans = endingTranscriptionLookup(gerundEndings[j]);
        }
      }
      String[] participleBaseShortEndings = getEndingsAndAffixes("participle_base_short");
      // If the participle ends with 'ed' or 'et' doesn't matter -> you get the same transcription
      // String participleBaseEndingTrans = endingTranscriptionLookup(participleBaseEnding);//gives
      // you 't'
      for (int j = 0; j < participleBaseShortEndings.length; j++) {
        if (endingTranscriptionLookup(participleBaseShortEndings[j]) != null) {
          participleBaseShortEndingTrans =
              endingTranscriptionLookup(participleBaseShortEndings[j]); // gives you 't'
        }
      }
      String[] flectionFuge = getEndingsAndAffixes("flection_fuge"); // gives you 'e'
      for (int j = 0; j < flectionFuge.length; j++) {
        if (endingTranscriptionLookup(flectionFuge[j]) != null) {
          flectionFugeTrans = endingTranscriptionLookup(flectionFuge[j]);
        }
      }

      endingTranscription = endingTranscriptionLookup(word.getFlectionEnding());
      String newEnTranscription = rebuildTrans(otherLanguageTranscription);
      String newGerundEndingTrans = rebuildTrans(gerundEndingTrans); // should then be '@n-t'
      String voicedNewGerundEndingTrans = voiceFinal(newGerundEndingTrans); // should be '@n-d'
      // String voicedGerundEndingTrans = voiceFinal(gerundEndingTrans); //should be '@nd'
      logger.debug("enTrans: " + otherLanguageTranscription);

      if (word.getFlectionEnding() != null) {
        if (endingTranscriptionLookup(word.getFlectionEnding()) != null) {
          // special rule in case of enBaseForm's last char equals valid flection ending i.e. 't'
          // in this case give us back the enBaseForm aka enInfinitive
          // testing for participle because of date>te< enBaseForm ends with found ending
          if (otherLanguageTranscription.endsWith(word.getFlectionEnding())
              && !(word.getIsVerbalGerund())
              && !(word.getCouldBeParticiple())) {
            result = otherLanguageTranscription;
            logger.debug("(0)");
          } else {
            if (word.getCouldBeParticiple()
                && isShortSuperlative(word.getFlectionEnding())
                && word.getExtraSyll()) { // i.e.
              // downgeloadetsten
              result =
                  newEnTranscription
                      + flectionFugeTrans
                      + participleBaseShortEndingTrans
                      + endingTranscription;
              logger.debug("(1)");
            } else if (word.getCouldBeParticiple()
                && word.getCouldBeParticipleInBaseForm()
                && word.getExtraSyll()) { // scrollet
              // or
              // downloadet
              result = newEnTranscription + flectionFugeTrans + participleBaseShortEndingTrans;
              logger.debug("(2)");
            } else if (word.getCouldBeParticiple()
                && word.getExtraSyll()
                && word.getWordMinusFlectionEndsWithVowel()) {
              result = newEnTranscription + flectionFugeTrans + "-" + endingTranscription;
              logger.debug("(3)");
            } else if (word.getCouldBeParticiple() && word.getExtraSyll()) { // i.e. downgeloadetere
              result =
                  newEnTranscription
                      + flectionFugeTrans
                      + "-"
                      + participleBaseShortEndingTrans
                      + endingTranscription;
              logger.debug("(4)");
            } else if (word.getCouldBeParticiple()
                && isShortSuperlative(word.getFlectionEnding())) { // i.e.
              // gescrolltstem
              result =
                  otherLanguageTranscription + participleBaseShortEndingTrans + endingTranscription;
              logger.debug("(5)");
            } else if (word.getCouldBeParticiple() && word.getCouldBeParticipleInBaseForm()) {
              result = otherLanguageTranscription + participleBaseShortEndingTrans;
              logger.debug("(6)");
            } else if (word.getCouldBeParticiple()) { // i.e. gescrolltestem
              result =
                  otherLanguageTranscription
                      + "-"
                      + participleBaseShortEndingTrans
                      + endingTranscription;
              logger.debug("(7)");
            } else {
              if (word.getIsVerbalGerund()) {
                logger.debug("isVerbalGerund");
                if (isShortSuperlative(word.getFlectionEnding())) {
                  result = newEnTranscription + gerundEndingTrans + endingTranscription;
                } else {
                  result = newEnTranscription + voicedNewGerundEndingTrans + endingTranscription;
                }
              } else {
                if (isShortSuperlative(word.getFlectionEnding())) {
                  result = otherLanguageTranscription + endingTranscription;
                } else { // no Gerund, no superlative but maybe something like 'scannst'
                  if (word.getExtraSyll()) { // means: word ends on 't' or 'd'
                    logger.debug("extraSyll is true here...");
                    result = newEnTranscription + endingTranscription;
                  } else { // means: word ends on something else
                    if (endingContainsVowel(word.getFlectionEnding())
                        && (!(endingBeginsWithVowel(word.getFlectionEnding())))) {
                      result = otherLanguageTranscription + "-" + endingTranscription;
                    } else {
                      if (endingContainsVowel(word.getFlectionEnding())
                          && endingBeginsWithVowel(word.getFlectionEnding())) {
                        result = newEnTranscription + endingTranscription;
                      } else {
                        result = otherLanguageTranscription + endingTranscription;
                      }
                    }
                  }
                }
              }
            }
          }
        }
      } else { // flection ending is null: two possibilities: en-Word like boot or ger gerund like
        // bootend
        if (word.getIsVerbalGerund()) {
          result = newEnTranscription + gerundEndingTrans;
          logger.debug("(((1)))");
        } else { // scann, date
          result = otherLanguageTranscription;
          logger.debug("(((2)))");
        }
      }
    }
    return result;
  }
Example #5
0
  /**
   * Analyses parts of input word for affixes, compounds etc.
   *
   * @param word the input word
   * @param currentResult currentResult
   * @param allowOtherLanguage whether to allow component words from other language in compound
   *     analysis
   * @return If a transcription for the input can be found, then it is returned. Otherwise returns
   *     null.
   */
  private String compoundAnalysis(Word word, Result currentResult, boolean allowOtherLanguage) {
    // Chop off longest possible prefixes and try to look them up
    // in the lexicon. Any part must have a minimum length of 3 -> 2!! characters.
    logger.debug("compoundAnalysis is starting with: " + word.getToBePhonemised());

    for (int i = word.getToBePhonemised().length() - 3; i >= 3; i--) { // -3!!! >= 3!!!

      String firstPhon = null;
      String fugePhon = null;
      String restPhon = null;
      String[] genitiveAccusativeAndPluralEndings =
          getEndingsAndAffixes("noun_genitive_accusative_and_plural_endings"); // should
      // be
      // 's'
      // and
      // 'n'
      // for
      // german
      String prefix = word.getToBePhonemised().substring(0, i);
      logger.debug("Pre: " + prefix);

      firstPhon = jphon.userdictLookup(prefix, null);
      if (firstPhon == null) {
        firstPhon = jphon.lexiconLookup(prefix, null);
      }
      if (firstPhon == null && allowOtherLanguage) {
        firstPhon = jphon.phonemiseEn(prefix);
        if (firstPhon != null) {
          currentResult.setUsedOtherLanguageToPhonemise(true);
        }
      }
      if (firstPhon != null) { // found a valid prefix
        // TODO: shouldn't this call processFlection()?
        String rest = word.getToBePhonemised().substring(i);
        logger.debug("Rest is: " + rest);
        // Is the rest a simple lexical entry?
        // restPhon = germanLexiconLookup(rest);
        restPhon = prefixLexiconLookup(rest);
        logger.debug("RestPhon: " + restPhon);
        if (restPhon == null) {
          restPhon = jphon.userdictLookup(rest, null);
        }
        if (restPhon == null) {
          restPhon = jphon.lexiconLookup(rest, null);
        }
        if (restPhon == null && allowOtherLanguage) {
          restPhon = jphon.phonemiseEn(rest);
          if (restPhon != null) {
            currentResult.setUsedOtherLanguageToPhonemise(true);
          }
        }
        if (restPhon == null) {
          for (int j = 0; j < genitiveAccusativeAndPluralEndings.length; j++) {
            if (rest.endsWith(genitiveAccusativeAndPluralEndings[j])) {
              logger.debug("rest ends with: " + genitiveAccusativeAndPluralEndings[j]);
              String restWithoutLast = rest.substring(0, rest.length() - 1);
              String restPhonDe = jphon.userdictLookup(restWithoutLast, null);
              if (restPhonDe == null) restPhonDe = jphon.lexiconLookup(restWithoutLast, null);
              String genitiveAndPluralEndingTrans =
                  endingTranscriptionLookup(genitiveAccusativeAndPluralEndings[j]);
              if (restPhonDe != null) {
                restPhon = restPhonDe + genitiveAndPluralEndingTrans;
              } else if (allowOtherLanguage) {
                String restPhonEn = jphon.phonemiseEn(rest.substring(0, rest.length() - 1));
                if (restPhonEn != null) {
                  currentResult.setUsedOtherLanguageToPhonemise(true);
                  restPhon = restPhonEn + genitiveAndPluralEndingTrans;
                }
              }
            }
            if (restPhon != null) break;
          }
        }

        // Or does it help if we cut off a Fuge?
        if (restPhon == null) {
          String[] helper = fugeSearch(rest);
          if (helper != null && helper.length == 2) {
            fugePhon = helper[0];
            String rest2 = helper[1];
            restPhon = jphon.userdictLookup(rest2, null);
            if (restPhon == null) {
              restPhon = jphon.lexiconLookup(rest2, null);
            }
            if (restPhon == null && allowOtherLanguage) {
              restPhon = jphon.phonemiseEn(rest2);
              if (restPhon != null) {
                currentResult.setUsedOtherLanguageToPhonemise(true);
              }
            }
            if (restPhon == null)
              restPhon = compoundAnalysis(new Word(rest2), currentResult, allowOtherLanguage);
          }
        }
        // Maybe rest is a flection
        if (restPhon == null) {
          // System.out.println("1) new word is : "+rest+". processFlection is called from here. var
          // is : "+currentResult.isUsedOtherLanguageToPhonemise());

          restPhon = processFlection(new Word(rest), currentResult, allowOtherLanguage);
          // System.out.println("2) new word was : "+rest+". processFlection is called from here.
          // var is : "+currentResult.isUsedOtherLanguageToPhonemise());
        }
        // Or can the rest be analysed as a compound?
        if (restPhon == null)
          restPhon = compoundAnalysis(new Word(rest), currentResult, allowOtherLanguage);

        if (restPhon != null) {
          // In restPhon, delete stress signs:
          restPhon = restPhon.replaceAll("'", "");
          return firstPhon + (fugePhon != null ? fugePhon : "") + "-" + restPhon;
        }
      }
    }
    return null;
  }