/** * Try to process the input word, as it stands, or by cutting off prefixes or inflectional * suffixes. * * @param toBePhonemised the input word * @param allowOtherLanguage allowOtherLanguage * @return the transcription of the word, or null if the word could not be transcribed */ private String processFlection(Word word, Result currentResult, boolean allowOtherLanguage) { String toBePhonemised = word.getToBePhonemised(); logger.debug("processFlection is starting with: " + toBePhonemised); // First of all, make sure there is no userdict/lexicon entry: String transcription = jphon.userdictLookup(toBePhonemised, null); if (transcription != null) { return transcription; } transcription = jphon.lexiconLookup(toBePhonemised, null); if (transcription != null) { return transcription; } // Try to process by cutting off endings only, without cutting off prefix: if (allowOtherLanguage) { transcription = processFlectionEnding(word, currentResult); } if (transcription != null) { return transcription; } // try removing prefix: // Enforce at least 3 characters in the stem (the part of the word that comes after the prefix): int maxPrefLen = Math.min(this.maxPrefixLength, word.getToBePhonemised().length() - 3); for (int i = maxPrefLen; i > 0; i--) { String prefix = word.getToBePhonemised().substring(0, i).toLowerCase(); String prefixPhon = prefixLexiconLookup(prefix); if (prefixPhon != null) { logger.debug("Prefix found: " + prefix + " [" + prefixPhon + "]"); Word partialWord = new Word(word.getToBePhonemised().substring(i)); // recursively call this method, i.e. allow multiple prefixes: String restTranscription = processFlection(partialWord, currentResult, allowOtherLanguage); if (restTranscription != null) { // yes, found valid analysis if (prefixPhon.indexOf("'") != -1) { restTranscription = restTranscription.replaceAll("'", ""); } transcription = prefixPhon + "-" + restTranscription; return transcription; } } } return null; }
/** * If the given string ends with a consonant, insert a syllable boundary before that consonant. * Otherwise, append a syllable boundary. * * @param s input syllable * @return syllable with boundaries reset */ private String rebuildTrans(String s) { AllophoneSet set = jphon.getAllophoneSet(); if (set != null) { Allophone[] allophones = set.splitIntoAllophones(s); if (allophones != null && allophones.length > 0) { Allophone last = allophones[allophones.length - 1]; if (last.isConsonant()) { // insert a syllable boundary before final consonant String lastPh = last.name(); return s.substring(0, s.length() - lastPh.length()) + "-" + lastPh; } } } return s + "-"; }
/** * Checks if item is in english lexicon. * * @param s english base form * @return true if item is in english lexicon, false if not */ private boolean knowEnBaseForm(String s) { if (jphon.phonemiseEn(s) != null) { return true; } return false; }
/** * Building the transcription and syllabification of a flection * * @param currentResult currentResult * @param word : the English infinitive as found in English lexicon * @return transcription of complete input word */ private String transcribeFlection(Word word, Result currentResult) { String result = null; String otherLanguageTranscription = null; String endingTranscription = null; String gerundEndingTrans = null; String participleBaseShortEndingTrans = null; String flectionFugeTrans = null; otherLanguageTranscription = jphon.phonemiseEn(word.getOtherLanguageBaseForm()); if (otherLanguageTranscription != null) { // System.out.println("var should be true"); currentResult.setUsedOtherLanguageToPhonemise(true); for (int j = 0; j < this.dentalPlosives.length; j++) { if (otherLanguageTranscription.endsWith(this.dentalPlosives[j])) { word.setExtraSyll(true); logger.debug("extraSyll true"); } } // System.out.println("var is in transcribeFlection: // "+currentResult.isUsedOtherLanguageToPhonemise()); // for cases like 'scrollet' where 'et' is flection ending and NOT ending of // participleBaseForm; otherwise 'scrollet' would sound like 'scrollt' String[] participleBaseLongEndings = getEndingsAndAffixes("participle_base_long"); for (int j = 0; j < participleBaseLongEndings.length; j++) { if (word.getFlectionEnding() != null && word.getFlectionEnding().equals(participleBaseLongEndings[j]) && !(word.getCutOffCharacter())) { // 'et' word.setExtraSyll(true); } } String[] gerundEndings = getEndingsAndAffixes("gerund_ending"); // should be 'end' -> bootend // String gerundEndingTrans = endingTranscriptionLookup(gerundEnding);//should be '@nt' for (int j = 0; j < gerundEndings.length; j++) { if (endingTranscriptionLookup(gerundEndings[j]) != null) { gerundEndingTrans = endingTranscriptionLookup(gerundEndings[j]); } } String[] participleBaseShortEndings = getEndingsAndAffixes("participle_base_short"); // If the participle ends with 'ed' or 'et' doesn't matter -> you get the same transcription // String participleBaseEndingTrans = endingTranscriptionLookup(participleBaseEnding);//gives // you 't' for (int j = 0; j < participleBaseShortEndings.length; j++) { if (endingTranscriptionLookup(participleBaseShortEndings[j]) != null) { participleBaseShortEndingTrans = endingTranscriptionLookup(participleBaseShortEndings[j]); // gives you 't' } } String[] flectionFuge = getEndingsAndAffixes("flection_fuge"); // gives you 'e' for (int j = 0; j < flectionFuge.length; j++) { if (endingTranscriptionLookup(flectionFuge[j]) != null) { flectionFugeTrans = endingTranscriptionLookup(flectionFuge[j]); } } endingTranscription = endingTranscriptionLookup(word.getFlectionEnding()); String newEnTranscription = rebuildTrans(otherLanguageTranscription); String newGerundEndingTrans = rebuildTrans(gerundEndingTrans); // should then be '@n-t' String voicedNewGerundEndingTrans = voiceFinal(newGerundEndingTrans); // should be '@n-d' // String voicedGerundEndingTrans = voiceFinal(gerundEndingTrans); //should be '@nd' logger.debug("enTrans: " + otherLanguageTranscription); if (word.getFlectionEnding() != null) { if (endingTranscriptionLookup(word.getFlectionEnding()) != null) { // special rule in case of enBaseForm's last char equals valid flection ending i.e. 't' // in this case give us back the enBaseForm aka enInfinitive // testing for participle because of date>te< enBaseForm ends with found ending if (otherLanguageTranscription.endsWith(word.getFlectionEnding()) && !(word.getIsVerbalGerund()) && !(word.getCouldBeParticiple())) { result = otherLanguageTranscription; logger.debug("(0)"); } else { if (word.getCouldBeParticiple() && isShortSuperlative(word.getFlectionEnding()) && word.getExtraSyll()) { // i.e. // downgeloadetsten result = newEnTranscription + flectionFugeTrans + participleBaseShortEndingTrans + endingTranscription; logger.debug("(1)"); } else if (word.getCouldBeParticiple() && word.getCouldBeParticipleInBaseForm() && word.getExtraSyll()) { // scrollet // or // downloadet result = newEnTranscription + flectionFugeTrans + participleBaseShortEndingTrans; logger.debug("(2)"); } else if (word.getCouldBeParticiple() && word.getExtraSyll() && word.getWordMinusFlectionEndsWithVowel()) { result = newEnTranscription + flectionFugeTrans + "-" + endingTranscription; logger.debug("(3)"); } else if (word.getCouldBeParticiple() && word.getExtraSyll()) { // i.e. downgeloadetere result = newEnTranscription + flectionFugeTrans + "-" + participleBaseShortEndingTrans + endingTranscription; logger.debug("(4)"); } else if (word.getCouldBeParticiple() && isShortSuperlative(word.getFlectionEnding())) { // i.e. // gescrolltstem result = otherLanguageTranscription + participleBaseShortEndingTrans + endingTranscription; logger.debug("(5)"); } else if (word.getCouldBeParticiple() && word.getCouldBeParticipleInBaseForm()) { result = otherLanguageTranscription + participleBaseShortEndingTrans; logger.debug("(6)"); } else if (word.getCouldBeParticiple()) { // i.e. gescrolltestem result = otherLanguageTranscription + "-" + participleBaseShortEndingTrans + endingTranscription; logger.debug("(7)"); } else { if (word.getIsVerbalGerund()) { logger.debug("isVerbalGerund"); if (isShortSuperlative(word.getFlectionEnding())) { result = newEnTranscription + gerundEndingTrans + endingTranscription; } else { result = newEnTranscription + voicedNewGerundEndingTrans + endingTranscription; } } else { if (isShortSuperlative(word.getFlectionEnding())) { result = otherLanguageTranscription + endingTranscription; } else { // no Gerund, no superlative but maybe something like 'scannst' if (word.getExtraSyll()) { // means: word ends on 't' or 'd' logger.debug("extraSyll is true here..."); result = newEnTranscription + endingTranscription; } else { // means: word ends on something else if (endingContainsVowel(word.getFlectionEnding()) && (!(endingBeginsWithVowel(word.getFlectionEnding())))) { result = otherLanguageTranscription + "-" + endingTranscription; } else { if (endingContainsVowel(word.getFlectionEnding()) && endingBeginsWithVowel(word.getFlectionEnding())) { result = newEnTranscription + endingTranscription; } else { result = otherLanguageTranscription + endingTranscription; } } } } } } } } } else { // flection ending is null: two possibilities: en-Word like boot or ger gerund like // bootend if (word.getIsVerbalGerund()) { result = newEnTranscription + gerundEndingTrans; logger.debug("(((1)))"); } else { // scann, date result = otherLanguageTranscription; logger.debug("(((2)))"); } } } return result; }
/** * Analyses parts of input word for affixes, compounds etc. * * @param word the input word * @param currentResult currentResult * @param allowOtherLanguage whether to allow component words from other language in compound * analysis * @return If a transcription for the input can be found, then it is returned. Otherwise returns * null. */ private String compoundAnalysis(Word word, Result currentResult, boolean allowOtherLanguage) { // Chop off longest possible prefixes and try to look them up // in the lexicon. Any part must have a minimum length of 3 -> 2!! characters. logger.debug("compoundAnalysis is starting with: " + word.getToBePhonemised()); for (int i = word.getToBePhonemised().length() - 3; i >= 3; i--) { // -3!!! >= 3!!! String firstPhon = null; String fugePhon = null; String restPhon = null; String[] genitiveAccusativeAndPluralEndings = getEndingsAndAffixes("noun_genitive_accusative_and_plural_endings"); // should // be // 's' // and // 'n' // for // german String prefix = word.getToBePhonemised().substring(0, i); logger.debug("Pre: " + prefix); firstPhon = jphon.userdictLookup(prefix, null); if (firstPhon == null) { firstPhon = jphon.lexiconLookup(prefix, null); } if (firstPhon == null && allowOtherLanguage) { firstPhon = jphon.phonemiseEn(prefix); if (firstPhon != null) { currentResult.setUsedOtherLanguageToPhonemise(true); } } if (firstPhon != null) { // found a valid prefix // TODO: shouldn't this call processFlection()? String rest = word.getToBePhonemised().substring(i); logger.debug("Rest is: " + rest); // Is the rest a simple lexical entry? // restPhon = germanLexiconLookup(rest); restPhon = prefixLexiconLookup(rest); logger.debug("RestPhon: " + restPhon); if (restPhon == null) { restPhon = jphon.userdictLookup(rest, null); } if (restPhon == null) { restPhon = jphon.lexiconLookup(rest, null); } if (restPhon == null && allowOtherLanguage) { restPhon = jphon.phonemiseEn(rest); if (restPhon != null) { currentResult.setUsedOtherLanguageToPhonemise(true); } } if (restPhon == null) { for (int j = 0; j < genitiveAccusativeAndPluralEndings.length; j++) { if (rest.endsWith(genitiveAccusativeAndPluralEndings[j])) { logger.debug("rest ends with: " + genitiveAccusativeAndPluralEndings[j]); String restWithoutLast = rest.substring(0, rest.length() - 1); String restPhonDe = jphon.userdictLookup(restWithoutLast, null); if (restPhonDe == null) restPhonDe = jphon.lexiconLookup(restWithoutLast, null); String genitiveAndPluralEndingTrans = endingTranscriptionLookup(genitiveAccusativeAndPluralEndings[j]); if (restPhonDe != null) { restPhon = restPhonDe + genitiveAndPluralEndingTrans; } else if (allowOtherLanguage) { String restPhonEn = jphon.phonemiseEn(rest.substring(0, rest.length() - 1)); if (restPhonEn != null) { currentResult.setUsedOtherLanguageToPhonemise(true); restPhon = restPhonEn + genitiveAndPluralEndingTrans; } } } if (restPhon != null) break; } } // Or does it help if we cut off a Fuge? if (restPhon == null) { String[] helper = fugeSearch(rest); if (helper != null && helper.length == 2) { fugePhon = helper[0]; String rest2 = helper[1]; restPhon = jphon.userdictLookup(rest2, null); if (restPhon == null) { restPhon = jphon.lexiconLookup(rest2, null); } if (restPhon == null && allowOtherLanguage) { restPhon = jphon.phonemiseEn(rest2); if (restPhon != null) { currentResult.setUsedOtherLanguageToPhonemise(true); } } if (restPhon == null) restPhon = compoundAnalysis(new Word(rest2), currentResult, allowOtherLanguage); } } // Maybe rest is a flection if (restPhon == null) { // System.out.println("1) new word is : "+rest+". processFlection is called from here. var // is : "+currentResult.isUsedOtherLanguageToPhonemise()); restPhon = processFlection(new Word(rest), currentResult, allowOtherLanguage); // System.out.println("2) new word was : "+rest+". processFlection is called from here. // var is : "+currentResult.isUsedOtherLanguageToPhonemise()); } // Or can the rest be analysed as a compound? if (restPhon == null) restPhon = compoundAnalysis(new Word(rest), currentResult, allowOtherLanguage); if (restPhon != null) { // In restPhon, delete stress signs: restPhon = restPhon.replaceAll("'", ""); return firstPhon + (fugePhon != null ? fugePhon : "") + "-" + restPhon; } } } return null; }