예제 #1
0
 /**
  * Convert an item in the Syllable relation into XML, inserting it at the specified location in
  * the XML tree.
  *
  * @param deep whether to create a deep structure of <syllable> and <ph> elements or not.
  */
 protected String insertSyllable(Item syllableItem, Element token, boolean deep) {
   if (syllableItem == null || token == null) {
     throw new NullPointerException("Null arguments to insertSyllable()");
   }
   if (!token.getTagName().equals(MaryXML.TOKEN)) {
     throw new IllegalArgumentException("Syllables can only be inserted in <t> elements");
   }
   Document doc = token.getOwnerDocument();
   Element syllable = null;
   StringBuilder sampa = new StringBuilder();
   if (deep) {
     syllable = MaryXML.createElement(doc, MaryXML.SYLLABLE);
     token.appendChild(syllable);
   }
   if (syllableItem.getFeatures().isPresent("accent")) {
     String accentString = syllableItem.getFeatures().getString("accent");
     if (deep) syllable.setAttribute("accent", accentString);
     token.setAttribute("accent", accentString);
   }
   if (syllableItem.getFeatures().isPresent("stress")) {
     String stressString = syllableItem.getFeatures().getString("stress");
     if (!stressString.equals("0")) {
       if (deep) syllable.setAttribute("stress", stressString);
       if (stressString.equals("1")) sampa.append("'");
       else if (stressString.equals("2")) sampa.append(",");
     }
   }
   // Any segments?
   Item segmentItem = syllableItem.getDaughter();
   while (segmentItem != null) {
     if (sampa.length() > 0) sampa.append(" ");
     sampa.append(insertSegment(segmentItem, syllable, deep));
     segmentItem = segmentItem.getNext();
   }
   String sampaString = sampa.toString();
   if (deep) syllable.setAttribute("ph", sampaString);
   // Any boundary?
   if (syllableItem.getFeatures().isPresent("endtone")
       && !tokenItemHasFollowingBoundary(
           syllableItem.getParent().getItemAs(Relation.TOKEN).getParent())) {
     String endtone = syllableItem.getFeatures().getString("endtone");
     if (!endtone.equals("")) {
       Element boundary = MaryXML.createElement(doc, MaryXML.BOUNDARY);
       boundary.setAttribute("tone", endtone);
       boundary.setAttribute("breakindex", "4");
       boundary.setAttribute("duration", "200");
       // And the boundary comes after the current token:
       token.getParentNode().appendChild(boundary);
     }
   }
   return sampaString;
 }
예제 #2
0
  /**
   * Changes the pronunciation of "the" from 'd ax' to 'd iy' if the following word starts with a
   * vowel. "The every" is a good example.
   *
   * @param utterance the utterance to process
   */
  private void fixTheIy(Utterance utterance) {
    Voice voice = utterance.getVoice();
    for (Item item = utterance.getRelation(Relation.SEGMENT).getHead();
        item != null;
        item = item.getNext()) {

      if ("ax".equals(item.toString())) {
        String word = wordPath.findFeature(item).toString();
        if ("the".equals(word) && ("+".equals(N_PH_VC.findFeature(item)))) {
          item.getFeatures().setString("name", "iy");
        }
      }
    }
  }
예제 #3
0
  /**
   * Fixes apostrophe s segments.
   *
   * @param utterance the utterance to fix
   */
  private void fixApostrophe(Utterance utterance) {
    Voice voice = utterance.getVoice();
    for (Item item = utterance.getRelation(Relation.SEGMENT).getHead();
        item != null;
        item = item.getNext()) {
      String word = wordPath.findFeature(item).toString();

      if (word.equals("'s")) {

        String pname = item.getPrevious().toString();

        if (("fa".indexOf(voice.getPhoneFeature(pname, "ctype")) != -1)
            && ("dbg".indexOf(voice.getPhoneFeature(pname, "cplace")) == -1)) {
          prependSchwa(item);
        } else if (voice.getPhoneFeature(pname, "cvox").equals("-")) {
          item.getFeatures().setString("name", "s");
        }
      } else if (word.equals("'ve") || word.equals("'ll") || word.equals("'d")) {
        if ("-".equals(P_PH_VC.findFeature(item))) {
          prependSchwa(item);
        }
      }
    }
  }
예제 #4
0
  /**
   * Convert an item in the Token relation into XML, inserting it at the specified location in the
   * XML tree.
   *
   * @param deep whether to create a deep structure of <syllable> and <ph> elements or not.
   */
  protected void insertToken(Item tokenItem, Element parent, boolean deep) {
    if (tokenItem == null || parent == null) {
      throw new NullPointerException("Null arguments to insertToken()");
    }
    Document doc = parent.getOwnerDocument();
    Voice maryVoice = null;
    if (tokenItem.getUtterance().getVoice() != null) {
      maryVoice = FreeTTSVoices.getMaryVoice(tokenItem.getUtterance().getVoice());
    }
    AllophoneSet allophoneSet = (AllophoneSet) tokenItem.getUtterance().getObject("allophoneset");
    if (allophoneSet == null) {
      throw new NullPointerException(
          "Utterance does not have an AllophoneSet -- should have been set in XML2UttBase.process()");
    }
    Element insertHere = parent;
    boolean needMtu = false;
    boolean insertPhonesFromToken = tokenItem.getFeatures().isPresent("phones");
    Item testWordItem = null;
    if (tokenItem.getFeatures().isPresent("precedingMarks")) {
      String marks = tokenItem.getFeatures().getString("precedingMarks");
      StringTokenizer markTok = new StringTokenizer(marks, ",");
      while (markTok.hasMoreTokens()) {
        String markStr = markTok.nextToken();
        Element markEl = MaryXML.createElement(doc, MaryXML.MARK);
        markEl.setAttribute("name", markStr);
        insertHere.appendChild(markEl);
      }
    }
    // Any boundary preceding the word?
    if (tokenItem.getFeatures().isPresent("precedingBoundaryTone")
        || tokenItem.getFeatures().isPresent("precedingBoundaryBreakindex")
        || tokenItem.getFeatures().isPresent("precedingBoundaryDuration")) {
      Element boundary = MaryXML.createElement(doc, MaryXML.BOUNDARY);
      insertHere.appendChild(boundary);
      if (tokenItem.getFeatures().isPresent("precedingBoundaryTone"))
        boundary.setAttribute("tone", tokenItem.getFeatures().getString("precedingBoundaryTone"));
      if (tokenItem.getFeatures().isPresent("precedingBoundaryBreakindex"))
        boundary.setAttribute(
            "breakindex", tokenItem.getFeatures().getString("precedingBoundaryBreakindex"));
      if (tokenItem.getFeatures().isPresent("precedingBoundaryDuration"))
        boundary.setAttribute(
            "duration", tokenItem.getFeatures().getString("precedingBoundaryDuration"));
    }
    if (tokenItem.getNthDaughter(1) != null
        || (testWordItem = tokenItem.getDaughter()) != null
            && !testWordItem.toString().equals(tokenItem.toString().toLowerCase())) {
      // Token has more than one daughter, or the only daughter is an
      // expanded form -- need to create an <mtu> element
      needMtu = true;
      Element mtu = MaryXML.createElement(doc, MaryXML.MTU);
      parent.appendChild(mtu);
      mtu.setAttribute("orig", tokenItem.toString());
      insertHere = mtu;
    }
    // Any words?
    FeatureSet tokenFeatureSet = tokenItem.getFeatures();
    Item tokenDaughter = tokenItem.getDaughter();
    if (tokenDaughter == null) { // no word relation present
      // Create a <t> element based on token information only
      Element t = MaryXML.createElement(doc, MaryXML.TOKEN);
      insertHere.appendChild(t);
      MaryDomUtils.setTokenText(t, tokenItem.toString());
      if (insertPhonesFromToken) {
        String[] phones = (String[]) tokenItem.getFeatures().getObject("phones");
        t.setAttribute("ph", phoneArray2phoneString(allophoneSet, phones));
        insertPhonesFromToken = false;
      }
      if (tokenFeatureSet.isPresent("accent")) {
        t.setAttribute("accent", tokenFeatureSet.getString("accent"));
      }
    }
    while (tokenDaughter != null) {
      // Part of speech, if present, is associated with the word
      // relation.
      Item wordItem = tokenDaughter.getItemAs("Word");
      Element t = null;
      StringBuilder sampa = new StringBuilder();
      if (wordItem != null) {
        t = MaryXML.createElement(doc, MaryXML.TOKEN);
        insertHere.appendChild(t);
        String tokenText = null;
        // If there is only one, non-expanded word, use text from
        // tokenItem in order to retain capitalisation:
        if (needMtu) tokenText = wordItem.toString();
        else tokenText = tokenItem.toString();
        MaryDomUtils.setTokenText(t, tokenText);
        if (insertPhonesFromToken) {
          String[] phones = (String[]) tokenItem.getFeatures().getObject("phones");
          t.setAttribute("ph", phoneArray2phoneString(allophoneSet, phones));
          insertPhonesFromToken = false;
        } else if (wordItem.getFeatures().isPresent("phones")) {
          // the word item has phones, take them only if there are no Token phones
          String[] phones = (String[]) wordItem.getFeatures().getObject("phones");
          t.setAttribute("ph", phoneArray2phoneString(allophoneSet, phones));
        }
        if (tokenFeatureSet.isPresent("accent")) {
          t.setAttribute("accent", tokenFeatureSet.getString("accent"));
        }
        FeatureSet wordFeatureSet = wordItem.getFeatures();
        if (wordFeatureSet.isPresent("pos")) t.setAttribute("pos", wordFeatureSet.getString("pos"));
      }
      // Any syllables?
      Item sylStruct = tokenDaughter.getItemAs("SylStructure");
      if (sylStruct != null && sylStruct.hasDaughters()) {
        Item syllableItem = sylStruct.getDaughter();
        while (syllableItem != null) {
          if (sampa.length() > 0) sampa.append(" - ");
          sampa.append(insertSyllable(syllableItem, t, deep));
          syllableItem = syllableItem.getNext();
        }
      }
      if (sampa.length() > 0) t.setAttribute("ph", sampa.toString());
      tokenDaughter = tokenDaughter.getNext();
    }
    // Any marks after the word but before the punctuation?
    if (tokenItem.getFeatures().isPresent("prePuncMarks")) {
      String marks = tokenItem.getFeatures().getString("prePuncMarks");
      StringTokenizer markTok = new StringTokenizer(marks, ",");
      while (markTok.hasMoreTokens()) {
        String markStr = markTok.nextToken();
        Element markEl = MaryXML.createElement(doc, MaryXML.MARK);
        markEl.setAttribute("name", markStr);
        insertHere.appendChild(markEl);
      }
    }
    // Any punctuation after the word?
    if (tokenItem.getFeatures().isPresent("punc")) {
      String puncString = tokenItem.getFeatures().getString("punc");
      if (!puncString.equals("")) {
        Element punctuation = MaryXML.createElement(doc, MaryXML.TOKEN);
        MaryDomUtils.setTokenText(punctuation, puncString);
        String pos = null;
        if (puncString.equals(",")) pos = "$,";
        else pos = "$PUNCT";
        punctuation.setAttribute("pos", pos);
        parent.appendChild(punctuation);
      }
    }
    // Any marks after the word?
    if (tokenItem.getFeatures().isPresent("followingMarks")) {
      String marks = tokenItem.getFeatures().getString("followingMarks");
      StringTokenizer markTok = new StringTokenizer(marks, ",");
      while (markTok.hasMoreTokens()) {
        String markStr = markTok.nextToken();
        Element markEl = MaryXML.createElement(doc, MaryXML.MARK);
        markEl.setAttribute("name", markStr);
        insertHere.appendChild(markEl);
      }
    }
    // Any boundary after the word?
    if (tokenItemHasFollowingBoundary(tokenItem)) {
      Element boundary = MaryXML.createElement(doc, MaryXML.BOUNDARY);
      insertHere.appendChild(boundary);
      if (tokenItem.getFeatures().isPresent("followingBoundaryTone"))
        boundary.setAttribute("tone", tokenItem.getFeatures().getString("followingBoundaryTone"));

      int breakindex = 0;
      if (tokenItem.getFeatures().isPresent("followingBoundaryBreakindex")) {
        String breakindexString = tokenItem.getFeatures().getString("followingBoundaryBreakindex");
        boundary.setAttribute("breakindex", breakindexString);
        try {
          breakindex = Integer.parseInt(breakindexString);
        } catch (NumberFormatException nfe) {
        }
      }

      if (tokenItem.getFeatures().isPresent("followingBoundaryDuration"))
        boundary.setAttribute(
            "duration", tokenItem.getFeatures().getString("followingBoundaryDuration"));
      else { // estimate reasonable duration values based on the break index
        if (breakindex >= 4) {
          boundary.setAttribute("duration", "400");
        } else if (breakindex == 3) {
          boundary.setAttribute("duration", "200");
        } // and no duration for boundaries with bi < 3
      }
    }
  }
예제 #5
0
 /**
  * Depending on the data type, find the right information in the utterance and insert it into the
  * sentence.
  */
 protected final void fillSentence(Element sentence, Utterance utterance) {
   Document doc = sentence.getOwnerDocument();
   Relation tokenRelation = utterance.getRelation(Relation.TOKEN);
   if (tokenRelation == null) return;
   Item tokenItem = tokenRelation.getHead();
   Relation phraseRelation = utterance.getRelation(Relation.PHRASE);
   Item phraseItem = null;
   if (phraseRelation != null) {
     phraseItem = phraseRelation.getHead();
     // Challenge: Bring token and phrase relations together. They have
     // common children, which can be interpreted as Word or SylStructure
     // items. Algorithm: For a given phrase, look at tokens. If a token's
     // first child, interpreted in the phrase relation, has the phrase as
     // its parent, then insert the token and all its children, and move to
     // the next token. If not, move to the next phrase.
     while (phraseItem != null) {
       // The phrases:
       Element phrase = MaryXML.createElement(doc, MaryXML.PHRASE);
       sentence.appendChild(phrase);
       Element insertHere = phrase;
       // Is this token part of this phrase?
       while (tokenItem != null
           && tokenItem.getDaughter().findItem("R:Phrase.parent").equals(phraseItem)) {
         FeatureSet tokenFeatures = tokenItem.getFeatures();
         if (tokenFeatures.isPresent(XML2UttBase.PROSODY_START)) {
           Element prosody = insertProsodySettings(insertHere, tokenFeatures);
           if (prosody != null) {
             insertHere = prosody;
           }
         }
         insertToken(tokenItem, phrase, true); // create deep structure
         if (tokenFeatures.isPresent(XML2UttBase.PROSODY_END)) {
           assert insertHere.getTagName().equals(MaryXML.PROSODY);
           insertHere = (Element) insertHere.getParentNode();
         }
         tokenItem = tokenItem.getNext();
       }
       phraseItem = phraseItem.getNext();
     }
   } else {
     // No phrase relation, simply create tokens.
     Element insertHere = sentence;
     while (tokenItem != null) {
       FeatureSet tokenFeatures = tokenItem.getFeatures();
       if (tokenFeatures.isPresent(XML2UttBase.PROSODY_START)) {
         Element prosody = insertProsodySettings(insertHere, tokenFeatures);
         if (prosody != null) {
           insertHere = prosody;
         }
       }
       insertToken(tokenItem, insertHere);
       if (tokenFeatures.isPresent(XML2UttBase.PROSODY_END)) {
         if (insertHere.getTagName().equals(MaryXML.PROSODY)) {
           insertHere = (Element) insertHere.getParentNode();
         } // else, we are looking at an empty prosody tag with no arguments, which is being
         // deleted right now.
       }
       tokenItem = tokenItem.getNext();
     }
   }
 }