/** * Convert an item in the Token relation into XML, inserting it at the specified location in the * XML tree. * * @param deep whether to create a deep structure of <syllable> and <ph> elements or not. */ protected void insertToken(Item tokenItem, Element parent, boolean deep) { if (tokenItem == null || parent == null) { throw new NullPointerException("Null arguments to insertToken()"); } Document doc = parent.getOwnerDocument(); Voice maryVoice = null; if (tokenItem.getUtterance().getVoice() != null) { maryVoice = FreeTTSVoices.getMaryVoice(tokenItem.getUtterance().getVoice()); } AllophoneSet allophoneSet = (AllophoneSet) tokenItem.getUtterance().getObject("allophoneset"); if (allophoneSet == null) { throw new NullPointerException( "Utterance does not have an AllophoneSet -- should have been set in XML2UttBase.process()"); } Element insertHere = parent; boolean needMtu = false; boolean insertPhonesFromToken = tokenItem.getFeatures().isPresent("phones"); Item testWordItem = null; if (tokenItem.getFeatures().isPresent("precedingMarks")) { String marks = tokenItem.getFeatures().getString("precedingMarks"); StringTokenizer markTok = new StringTokenizer(marks, ","); while (markTok.hasMoreTokens()) { String markStr = markTok.nextToken(); Element markEl = MaryXML.createElement(doc, MaryXML.MARK); markEl.setAttribute("name", markStr); insertHere.appendChild(markEl); } } // Any boundary preceding the word? if (tokenItem.getFeatures().isPresent("precedingBoundaryTone") || tokenItem.getFeatures().isPresent("precedingBoundaryBreakindex") || tokenItem.getFeatures().isPresent("precedingBoundaryDuration")) { Element boundary = MaryXML.createElement(doc, MaryXML.BOUNDARY); insertHere.appendChild(boundary); if (tokenItem.getFeatures().isPresent("precedingBoundaryTone")) boundary.setAttribute("tone", tokenItem.getFeatures().getString("precedingBoundaryTone")); if (tokenItem.getFeatures().isPresent("precedingBoundaryBreakindex")) boundary.setAttribute( "breakindex", tokenItem.getFeatures().getString("precedingBoundaryBreakindex")); if (tokenItem.getFeatures().isPresent("precedingBoundaryDuration")) boundary.setAttribute( "duration", tokenItem.getFeatures().getString("precedingBoundaryDuration")); } if (tokenItem.getNthDaughter(1) != null || (testWordItem = tokenItem.getDaughter()) != null && !testWordItem.toString().equals(tokenItem.toString().toLowerCase())) { // Token has more than one daughter, or the only daughter is an // expanded form -- need to create an <mtu> element needMtu = true; Element mtu = MaryXML.createElement(doc, MaryXML.MTU); parent.appendChild(mtu); mtu.setAttribute("orig", tokenItem.toString()); insertHere = mtu; } // Any words? FeatureSet tokenFeatureSet = tokenItem.getFeatures(); Item tokenDaughter = tokenItem.getDaughter(); if (tokenDaughter == null) { // no word relation present // Create a <t> element based on token information only Element t = MaryXML.createElement(doc, MaryXML.TOKEN); insertHere.appendChild(t); MaryDomUtils.setTokenText(t, tokenItem.toString()); if (insertPhonesFromToken) { String[] phones = (String[]) tokenItem.getFeatures().getObject("phones"); t.setAttribute("ph", phoneArray2phoneString(allophoneSet, phones)); insertPhonesFromToken = false; } if (tokenFeatureSet.isPresent("accent")) { t.setAttribute("accent", tokenFeatureSet.getString("accent")); } } while (tokenDaughter != null) { // Part of speech, if present, is associated with the word // relation. Item wordItem = tokenDaughter.getItemAs("Word"); Element t = null; StringBuilder sampa = new StringBuilder(); if (wordItem != null) { t = MaryXML.createElement(doc, MaryXML.TOKEN); insertHere.appendChild(t); String tokenText = null; // If there is only one, non-expanded word, use text from // tokenItem in order to retain capitalisation: if (needMtu) tokenText = wordItem.toString(); else tokenText = tokenItem.toString(); MaryDomUtils.setTokenText(t, tokenText); if (insertPhonesFromToken) { String[] phones = (String[]) tokenItem.getFeatures().getObject("phones"); t.setAttribute("ph", phoneArray2phoneString(allophoneSet, phones)); insertPhonesFromToken = false; } else if (wordItem.getFeatures().isPresent("phones")) { // the word item has phones, take them only if there are no Token phones String[] phones = (String[]) wordItem.getFeatures().getObject("phones"); t.setAttribute("ph", phoneArray2phoneString(allophoneSet, phones)); } if (tokenFeatureSet.isPresent("accent")) { t.setAttribute("accent", tokenFeatureSet.getString("accent")); } FeatureSet wordFeatureSet = wordItem.getFeatures(); if (wordFeatureSet.isPresent("pos")) t.setAttribute("pos", wordFeatureSet.getString("pos")); } // Any syllables? Item sylStruct = tokenDaughter.getItemAs("SylStructure"); if (sylStruct != null && sylStruct.hasDaughters()) { Item syllableItem = sylStruct.getDaughter(); while (syllableItem != null) { if (sampa.length() > 0) sampa.append(" - "); sampa.append(insertSyllable(syllableItem, t, deep)); syllableItem = syllableItem.getNext(); } } if (sampa.length() > 0) t.setAttribute("ph", sampa.toString()); tokenDaughter = tokenDaughter.getNext(); } // Any marks after the word but before the punctuation? if (tokenItem.getFeatures().isPresent("prePuncMarks")) { String marks = tokenItem.getFeatures().getString("prePuncMarks"); StringTokenizer markTok = new StringTokenizer(marks, ","); while (markTok.hasMoreTokens()) { String markStr = markTok.nextToken(); Element markEl = MaryXML.createElement(doc, MaryXML.MARK); markEl.setAttribute("name", markStr); insertHere.appendChild(markEl); } } // Any punctuation after the word? if (tokenItem.getFeatures().isPresent("punc")) { String puncString = tokenItem.getFeatures().getString("punc"); if (!puncString.equals("")) { Element punctuation = MaryXML.createElement(doc, MaryXML.TOKEN); MaryDomUtils.setTokenText(punctuation, puncString); String pos = null; if (puncString.equals(",")) pos = "$,"; else pos = "$PUNCT"; punctuation.setAttribute("pos", pos); parent.appendChild(punctuation); } } // Any marks after the word? if (tokenItem.getFeatures().isPresent("followingMarks")) { String marks = tokenItem.getFeatures().getString("followingMarks"); StringTokenizer markTok = new StringTokenizer(marks, ","); while (markTok.hasMoreTokens()) { String markStr = markTok.nextToken(); Element markEl = MaryXML.createElement(doc, MaryXML.MARK); markEl.setAttribute("name", markStr); insertHere.appendChild(markEl); } } // Any boundary after the word? if (tokenItemHasFollowingBoundary(tokenItem)) { Element boundary = MaryXML.createElement(doc, MaryXML.BOUNDARY); insertHere.appendChild(boundary); if (tokenItem.getFeatures().isPresent("followingBoundaryTone")) boundary.setAttribute("tone", tokenItem.getFeatures().getString("followingBoundaryTone")); int breakindex = 0; if (tokenItem.getFeatures().isPresent("followingBoundaryBreakindex")) { String breakindexString = tokenItem.getFeatures().getString("followingBoundaryBreakindex"); boundary.setAttribute("breakindex", breakindexString); try { breakindex = Integer.parseInt(breakindexString); } catch (NumberFormatException nfe) { } } if (tokenItem.getFeatures().isPresent("followingBoundaryDuration")) boundary.setAttribute( "duration", tokenItem.getFeatures().getString("followingBoundaryDuration")); else { // estimate reasonable duration values based on the break index if (breakindex >= 4) { boundary.setAttribute("duration", "400"); } else if (breakindex == 3) { boundary.setAttribute("duration", "200"); } // and no duration for boundaries with bi < 3 } } }
/** * Prepends a schwa to the given item * * @param item the item to prepend the schwa to. */ private static void prependSchwa(Item item) { Item schwa = item.prependItem(null); schwa.getFeatures().setString("name", "ax"); item.getItemAs(Relation.SYLLABLE_STRUCTURE).prependItem(schwa); }