Пример #1
0
 /**
  * Convert an item in the Segment relation into XML, inserting it at the specified location in the
  * XML tree.
  *
  * @param deep whether to create a deep structure of <syllable> and <ph> elements or not.
  */
 protected String insertSegment(Item segmentItem, Element syllable, boolean deep) {
   // allow for syllable == null if not deep:
   if (segmentItem == null || deep && syllable == null) {
     throw new NullPointerException("Null arguments to insertSegment()");
   }
   if (deep && !syllable.getTagName().equals(MaryXML.SYLLABLE)) {
     throw new IllegalArgumentException("Segments can only be inserted in <syllable> elements");
   }
   String segmentString = segmentItem.toString();
   Voice maryVoice = FreeTTSVoices.getMaryVoice(segmentItem.getUtterance().getVoice());
   if (deep) {
     Document doc = syllable.getOwnerDocument();
     Element segment = MaryXML.createElement(doc, MaryXML.PHONE);
     syllable.appendChild(segment);
     segment.setAttribute("p", segmentString);
     if (segmentItem.getFeatures().isPresent("end")) {
       float endInSeconds = segmentItem.getFeatures().getFloat("end");
       int endInMillis = (int) (1000 * endInSeconds);
       segment.setAttribute("end", String.valueOf(endInMillis));
     }
     if (segmentItem.getFeatures().isPresent("mbr_dur")) {
       int mbrDur = segmentItem.getFeatures().getInt("mbr_dur");
       segment.setAttribute("d", String.valueOf(mbrDur));
     }
     if (segmentItem.getFeatures().isPresent("mbr_targets")) {
       String mbrTargets = segmentItem.getFeatures().getString("mbr_targets");
       if (!mbrTargets.equals("")) {
         segment.setAttribute("f0", mbrTargets);
       }
     }
   }
   return segmentString;
 }
Пример #2
0
  /**
   * Changes the pronunciation of "the" from 'd ax' to 'd iy' if the following word starts with a
   * vowel. "The every" is a good example.
   *
   * @param utterance the utterance to process
   */
  private void fixTheIy(Utterance utterance) {
    Voice voice = utterance.getVoice();
    for (Item item = utterance.getRelation(Relation.SEGMENT).getHead();
        item != null;
        item = item.getNext()) {

      if ("ax".equals(item.toString())) {
        String word = wordPath.findFeature(item).toString();
        if ("the".equals(word) && ("+".equals(N_PH_VC.findFeature(item)))) {
          item.getFeatures().setString("name", "iy");
        }
      }
    }
  }
Пример #3
0
  /**
   * Convert an item in the Token relation into XML, inserting it at the specified location in the
   * XML tree.
   *
   * @param deep whether to create a deep structure of <syllable> and <ph> elements or not.
   */
  protected void insertToken(Item tokenItem, Element parent, boolean deep) {
    if (tokenItem == null || parent == null) {
      throw new NullPointerException("Null arguments to insertToken()");
    }
    Document doc = parent.getOwnerDocument();
    Voice maryVoice = null;
    if (tokenItem.getUtterance().getVoice() != null) {
      maryVoice = FreeTTSVoices.getMaryVoice(tokenItem.getUtterance().getVoice());
    }
    AllophoneSet allophoneSet = (AllophoneSet) tokenItem.getUtterance().getObject("allophoneset");
    if (allophoneSet == null) {
      throw new NullPointerException(
          "Utterance does not have an AllophoneSet -- should have been set in XML2UttBase.process()");
    }
    Element insertHere = parent;
    boolean needMtu = false;
    boolean insertPhonesFromToken = tokenItem.getFeatures().isPresent("phones");
    Item testWordItem = null;
    if (tokenItem.getFeatures().isPresent("precedingMarks")) {
      String marks = tokenItem.getFeatures().getString("precedingMarks");
      StringTokenizer markTok = new StringTokenizer(marks, ",");
      while (markTok.hasMoreTokens()) {
        String markStr = markTok.nextToken();
        Element markEl = MaryXML.createElement(doc, MaryXML.MARK);
        markEl.setAttribute("name", markStr);
        insertHere.appendChild(markEl);
      }
    }
    // Any boundary preceding the word?
    if (tokenItem.getFeatures().isPresent("precedingBoundaryTone")
        || tokenItem.getFeatures().isPresent("precedingBoundaryBreakindex")
        || tokenItem.getFeatures().isPresent("precedingBoundaryDuration")) {
      Element boundary = MaryXML.createElement(doc, MaryXML.BOUNDARY);
      insertHere.appendChild(boundary);
      if (tokenItem.getFeatures().isPresent("precedingBoundaryTone"))
        boundary.setAttribute("tone", tokenItem.getFeatures().getString("precedingBoundaryTone"));
      if (tokenItem.getFeatures().isPresent("precedingBoundaryBreakindex"))
        boundary.setAttribute(
            "breakindex", tokenItem.getFeatures().getString("precedingBoundaryBreakindex"));
      if (tokenItem.getFeatures().isPresent("precedingBoundaryDuration"))
        boundary.setAttribute(
            "duration", tokenItem.getFeatures().getString("precedingBoundaryDuration"));
    }
    if (tokenItem.getNthDaughter(1) != null
        || (testWordItem = tokenItem.getDaughter()) != null
            && !testWordItem.toString().equals(tokenItem.toString().toLowerCase())) {
      // Token has more than one daughter, or the only daughter is an
      // expanded form -- need to create an <mtu> element
      needMtu = true;
      Element mtu = MaryXML.createElement(doc, MaryXML.MTU);
      parent.appendChild(mtu);
      mtu.setAttribute("orig", tokenItem.toString());
      insertHere = mtu;
    }
    // Any words?
    FeatureSet tokenFeatureSet = tokenItem.getFeatures();
    Item tokenDaughter = tokenItem.getDaughter();
    if (tokenDaughter == null) { // no word relation present
      // Create a <t> element based on token information only
      Element t = MaryXML.createElement(doc, MaryXML.TOKEN);
      insertHere.appendChild(t);
      MaryDomUtils.setTokenText(t, tokenItem.toString());
      if (insertPhonesFromToken) {
        String[] phones = (String[]) tokenItem.getFeatures().getObject("phones");
        t.setAttribute("ph", phoneArray2phoneString(allophoneSet, phones));
        insertPhonesFromToken = false;
      }
      if (tokenFeatureSet.isPresent("accent")) {
        t.setAttribute("accent", tokenFeatureSet.getString("accent"));
      }
    }
    while (tokenDaughter != null) {
      // Part of speech, if present, is associated with the word
      // relation.
      Item wordItem = tokenDaughter.getItemAs("Word");
      Element t = null;
      StringBuilder sampa = new StringBuilder();
      if (wordItem != null) {
        t = MaryXML.createElement(doc, MaryXML.TOKEN);
        insertHere.appendChild(t);
        String tokenText = null;
        // If there is only one, non-expanded word, use text from
        // tokenItem in order to retain capitalisation:
        if (needMtu) tokenText = wordItem.toString();
        else tokenText = tokenItem.toString();
        MaryDomUtils.setTokenText(t, tokenText);
        if (insertPhonesFromToken) {
          String[] phones = (String[]) tokenItem.getFeatures().getObject("phones");
          t.setAttribute("ph", phoneArray2phoneString(allophoneSet, phones));
          insertPhonesFromToken = false;
        } else if (wordItem.getFeatures().isPresent("phones")) {
          // the word item has phones, take them only if there are no Token phones
          String[] phones = (String[]) wordItem.getFeatures().getObject("phones");
          t.setAttribute("ph", phoneArray2phoneString(allophoneSet, phones));
        }
        if (tokenFeatureSet.isPresent("accent")) {
          t.setAttribute("accent", tokenFeatureSet.getString("accent"));
        }
        FeatureSet wordFeatureSet = wordItem.getFeatures();
        if (wordFeatureSet.isPresent("pos")) t.setAttribute("pos", wordFeatureSet.getString("pos"));
      }
      // Any syllables?
      Item sylStruct = tokenDaughter.getItemAs("SylStructure");
      if (sylStruct != null && sylStruct.hasDaughters()) {
        Item syllableItem = sylStruct.getDaughter();
        while (syllableItem != null) {
          if (sampa.length() > 0) sampa.append(" - ");
          sampa.append(insertSyllable(syllableItem, t, deep));
          syllableItem = syllableItem.getNext();
        }
      }
      if (sampa.length() > 0) t.setAttribute("ph", sampa.toString());
      tokenDaughter = tokenDaughter.getNext();
    }
    // Any marks after the word but before the punctuation?
    if (tokenItem.getFeatures().isPresent("prePuncMarks")) {
      String marks = tokenItem.getFeatures().getString("prePuncMarks");
      StringTokenizer markTok = new StringTokenizer(marks, ",");
      while (markTok.hasMoreTokens()) {
        String markStr = markTok.nextToken();
        Element markEl = MaryXML.createElement(doc, MaryXML.MARK);
        markEl.setAttribute("name", markStr);
        insertHere.appendChild(markEl);
      }
    }
    // Any punctuation after the word?
    if (tokenItem.getFeatures().isPresent("punc")) {
      String puncString = tokenItem.getFeatures().getString("punc");
      if (!puncString.equals("")) {
        Element punctuation = MaryXML.createElement(doc, MaryXML.TOKEN);
        MaryDomUtils.setTokenText(punctuation, puncString);
        String pos = null;
        if (puncString.equals(",")) pos = "$,";
        else pos = "$PUNCT";
        punctuation.setAttribute("pos", pos);
        parent.appendChild(punctuation);
      }
    }
    // Any marks after the word?
    if (tokenItem.getFeatures().isPresent("followingMarks")) {
      String marks = tokenItem.getFeatures().getString("followingMarks");
      StringTokenizer markTok = new StringTokenizer(marks, ",");
      while (markTok.hasMoreTokens()) {
        String markStr = markTok.nextToken();
        Element markEl = MaryXML.createElement(doc, MaryXML.MARK);
        markEl.setAttribute("name", markStr);
        insertHere.appendChild(markEl);
      }
    }
    // Any boundary after the word?
    if (tokenItemHasFollowingBoundary(tokenItem)) {
      Element boundary = MaryXML.createElement(doc, MaryXML.BOUNDARY);
      insertHere.appendChild(boundary);
      if (tokenItem.getFeatures().isPresent("followingBoundaryTone"))
        boundary.setAttribute("tone", tokenItem.getFeatures().getString("followingBoundaryTone"));

      int breakindex = 0;
      if (tokenItem.getFeatures().isPresent("followingBoundaryBreakindex")) {
        String breakindexString = tokenItem.getFeatures().getString("followingBoundaryBreakindex");
        boundary.setAttribute("breakindex", breakindexString);
        try {
          breakindex = Integer.parseInt(breakindexString);
        } catch (NumberFormatException nfe) {
        }
      }

      if (tokenItem.getFeatures().isPresent("followingBoundaryDuration"))
        boundary.setAttribute(
            "duration", tokenItem.getFeatures().getString("followingBoundaryDuration"));
      else { // estimate reasonable duration values based on the break index
        if (breakindex >= 4) {
          boundary.setAttribute("duration", "400");
        } else if (breakindex == 3) {
          boundary.setAttribute("duration", "200");
        } // and no duration for boundaries with bi < 3
      }
    }
  }