public MaryData process(MaryData d) throws Exception { Document doc = d.getDocument(); NodeIterator it = MaryDomUtils.createNodeIterator(doc, doc, MaryXML.TOKEN); Element t = null; while ((t = (Element) it.nextNode()) != null) { String text; // Do not touch tokens for which a transcription is already // given (exception: transcription contains a '*' character: if (t.hasAttribute("ph") && !t.getAttribute("ph").contains("*")) { continue; } if (t.hasAttribute("sounds_like")) text = t.getAttribute("sounds_like"); else text = MaryDomUtils.tokenText(t); String pos = null; // use part-of-speech if available if (t.hasAttribute("pos")) { pos = t.getAttribute("pos"); } if (text != null && !text.equals("") && (pos == null || !pos.startsWith("$") /*punctuation*/)) { // If text consists of several parts (e.g., because that was // inserted into the sounds_like attribute), each part // is transcribed separately. StringBuilder ph = new StringBuilder(); String g2pMethod = null; StringTokenizer st = new StringTokenizer(text, " -"); while (st.hasMoreTokens()) { String graph = st.nextToken(); StringBuilder helper = new StringBuilder(); String phon = phonemise(graph, pos, helper); if (ph.length() == 0) { // first part // The g2pMethod of the combined beast is // the g2pMethod of the first constituant. g2pMethod = helper.toString(); ph.append(phon); } else { // following parts ph.append(" - "); // Reduce primary to secondary stress: ph.append(phon.replace('\'', ',')); } } if (ph != null && ph.length() > 0) { setPh(t, ph.toString()); t.setAttribute("g2p_method", g2pMethod); } } } MaryData result = new MaryData(outputType(), d.getLocale()); result.setDocument(doc); return result; }
public MaryData process(MaryData d) throws Exception { Document doc = MaryXML.newDocument(); Element root = doc.getDocumentElement(); Locale locale = d.getLocale(); if (locale != null) { root.setAttribute("xml:lang", MaryUtils.locale2xmllang(locale)); } Element paragraph = MaryXML.appendChildElement(root, MaryXML.PARAGRAPH); List<Utterance> utterances = d.getUtterances(); Iterator<Utterance> it = utterances.iterator(); while (it.hasNext()) { Utterance utterance = it.next(); Element insertHere = paragraph; if (logger.getEffectiveLevel().equals(Level.DEBUG)) { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); utterance.dump(pw, 2, name(), true); // padding, justRelations logger.debug("Converting the following Utterance to XML:\n" + sw.toString()); } // Make sure we have the correct voice: Voice maryVoice = null; if (utterance.getVoice() != null) { maryVoice = FreeTTSVoices.getMaryVoice(utterance.getVoice()); } if (maryVoice != null) { if (insertHere.getTagName().equals(MaryXML.VOICE)) { // Are utterance voice and voiceElement voice the same? if (maryVoice.hasName(insertHere.getAttribute("name"))) { // then insertHere is set OK, leave it like it is } else { // get one higher up, create new voice element after this // one, and make insertHere point to the new voice element Element parent = (Element) insertHere.getParentNode(); Element newVoice = MaryXML.createElement(doc, MaryXML.VOICE); parent.appendChild(newVoice); newVoice.setAttribute("name", maryVoice.getName()); insertHere = newVoice; } } else { // Check if the last child of insertHere is a voice with the right name Element lastChild = MaryDomUtils.getLastChildElement(insertHere); if (lastChild != null && lastChild.getTagName().equals(MaryXML.VOICE) && maryVoice.hasName(lastChild.getAttribute("name"))) { insertHere = lastChild; } else { // create a new voice element, insert it as a child of this // node, and let insertHere point to it Element newVoice = MaryXML.createElement(doc, MaryXML.VOICE); insertHere.appendChild(newVoice); newVoice.setAttribute("name", maryVoice.getName()); insertHere = newVoice; } } // Now insertHere is the correct <voice> element. // Any prosodic settings to insert? Element prosody = insertProsodySettings(insertHere, utterance); if (prosody != null) insertHere = prosody; } // Create a sentence element <s> for this utterance: Element sentence = MaryXML.createElement(doc, MaryXML.SENTENCE); insertHere.appendChild(sentence); fillSentence(sentence, utterance); } if (logger.getEffectiveLevel().equals(Level.DEBUG)) { logger.debug("Constructed the following XML structure:"); MaryNormalisedWriter mnw = new MaryNormalisedWriter(); ByteArrayOutputStream debugOut = new ByteArrayOutputStream(); mnw.output(doc, debugOut); logger.debug(debugOut.toString()); } MaryData output = new MaryData(outputType(), d.getLocale()); output.setDocument(doc); return output; }
public MaryData process(MaryData d) throws Exception { // prevUnitIndex; // numberOfConsecutiveUnits; // basenameDuration; // phoneTier; // PraatIntervalTier unitTier; // PraatIntervalTier sourceTier; // sourceInterval; Document doc = d.getDocument(); // initialize various variables: Double duration = 0.0; String phone = null; // initialize some class variables: PraatIntervalTier phoneTier = new PraatIntervalTier("phones"); Double basenameDuration = 0.0; int prevUnitIndex = Integer.MIN_VALUE; int numberOfConsecutiveUnits = 0; // counter to track consecutive units PraatInterval sourceInterval = new PraatInterval(basenameDuration); // until we have a robust way of checking the voice type, just initialize unit and source tiers // anyway: PraatIntervalTier unitTier = new PraatIntervalTier("units"); PraatIntervalTier sourceTier = new PraatIntervalTier("sources"); // prepare to iterate only over the PHONE, SENTENCE, and BOUNDARY nodes in the MaryXML: NodeIterator ni = DomUtils.createNodeIterator(doc, PHONE, BOUNDARY); Element element; // now iterate over these nodes: while ((element = (Element) ni.nextNode()) != null) { switch (element.getTagName()) { // <s>, <ph>, or <boundary> as specified above case PHONE: phone = element.getAttribute("p"); duration = Integer.parseInt(element.getAttribute("d")) / 1000.0; // duration is always in ms break; case BOUNDARY: phone = "_"; // TODO: perhaps we should access TargetFeatureComputer.getPauseSymbol() instead if (element.hasAttribute("duration")) { duration = Double.parseDouble(element.getAttribute("duration")) / 1000.0; // duration is always in ms } else { duration = 0.0; // HMM voices can have duration-less <boundary/> tags } break; default: logger.error( "NodeIterator should not find an element of type " + element.getTagName() + " here!"); break; } PraatInterval phoneInterval = new PraatInterval(duration, phone); // TODO: crude way of checking for unit selection voice; also, name of attribute could change! if (element.hasAttribute("units")) { // unitselectionProcessing(element, unitTier, prevUnitIndex, numberOfConsecutiveUnits, // basenameDuration, // sourceInterval, sourceTier); String units = element.getAttribute("units"); String[] unitStrings = units.split("; "); // boundaries have only one unit string boolean differentSource = false; String basename = null; String unitRange = null; for (String unitString : unitStrings) { // TODO verify that unit string matches "UNITNAME BASENAME UNITINDEX UNITDURATION" String[] unitFields = unitString.split(" "); String unitName = unitFields[0]; basename = unitFields[1]; int unitIndex = Integer.parseInt(unitFields[2]); Double unitDuration = Double.parseDouble(unitFields[3]); // units are straightforward, just like phones: unitTier.appendInterval(new PraatInterval(unitDuration, unitString)); // unit source processing is a little more elaborate: /* * Note: the following assumes that consecutive selected units are ALWAYS from the same basename! That could * change if basename boundaries are no longer marked by null units in the timeline. */ differentSource = unitIndex != prevUnitIndex + 1; // is source unit from a different part of the timeline?; if (differentSource) { // reset primary variables: numberOfConsecutiveUnits = 0; basenameDuration = 0.0; } // increment/increase primary variables: numberOfConsecutiveUnits++; basenameDuration += unitDuration; // construct unit index range string: unitRange = Integer.toString(unitIndex - numberOfConsecutiveUnits + 1); if (numberOfConsecutiveUnits > 1) { unitRange = unitRange + "-" + unitIndex; } // append source intervals to source tier: if (differentSource) { sourceInterval = new PraatInterval(basenameDuration, basename + ": " + unitRange); sourceTier.appendInterval(sourceInterval); } else { sourceInterval.setDuration(basenameDuration); sourceInterval.setText(basename + ": " + unitRange); } prevUnitIndex = unitIndex; } // HACK: arbitrary threshold to detect end points in ms (in the case of diphone voice or // boundary segment) } else if (duration > 10) { // TODO: there is still a bug somewhere regarding boundary durations with mbrola... phoneInterval.setDuration(duration / 1000.0); } phoneTier.appendInterval(phoneInterval); } PraatTextGrid textGrid = new PraatTextGrid(); phoneTier.updateBoundaries(); // force full specification of timings textGrid.appendTier(phoneTier); // fragile way of checking whether this is a unit selection voice: if (unitTier.getNumberOfIntervals() > 0) { // complete and append unit and source tiers: unitTier.updateBoundaries(); textGrid.appendTier(unitTier); sourceTier.updateBoundaries(); textGrid.appendTier(sourceTier); } // return raw TextGrid as result: MaryData result = new MaryData(getOutputType(), d.getLocale()); result.setPlainText(textGrid.toString()); return result; }