Java MaryData.getDocument示例，marytts.datatypes.MaryData.getDocument Java示例

示例#1

0

显示文件

文件： MaryData.java 项目： suhasp1j/marytts

 public void append(MaryData md) {
   if (md == null) throw new NullPointerException("Received null marydata");
   if (!md.getType().equals(this.getType()))
     throw new IllegalArgumentException(
         "Cannot append mary data of type `"
             + md.getType().name()
             + "' to mary data of type `"
             + this.getType().name()
             + "'");
   if (getType().isXMLType()) {
     NodeList kids = md.getDocument().getDocumentElement().getChildNodes();
     logger.debug("Appending " + kids.getLength() + " nodes to MaryXML structure");
     Element docEl = this.getDocument().getDocumentElement();
     for (int i = 0; i < kids.getLength(); i++) {
       docEl.appendChild(this.getDocument().importNode(kids.item(i), true));
     }
   } else if (getType().isTextType()) {
     // Attention: XML type is a text type!
     if (this.plainText == null) {
       this.plainText = md.getPlainText();
     } else {
       this.plainText = this.plainText + "\n\n" + md.getPlainText();
     }
   } else if (getType().equals(MaryDataType.get("AUDIO"))) {
     appendAudio(md.getAudio());
   } else {
     throw new UnsupportedOperationException(
         "Cannot append two mary data items of type `" + getType() + "'");
   }
 }

示例#2

0

显示文件

文件： JapanesePhonemiser.java 项目： nlphacker/marytts

  public MaryData process(MaryData d) throws Exception {
    Document doc = d.getDocument();
    NodeIterator it = MaryDomUtils.createNodeIterator(doc, doc, MaryXML.TOKEN);
    Element t = null;
    while ((t = (Element) it.nextNode()) != null) {
      String text;

      // Do not touch tokens for which a transcription is already
      // given (exception: transcription contains a '*' character:
      if (t.hasAttribute("ph") && !t.getAttribute("ph").contains("*")) {
        continue;
      }
      if (t.hasAttribute("sounds_like")) text = t.getAttribute("sounds_like");
      else text = MaryDomUtils.tokenText(t);

      String pos = null;
      // use part-of-speech if available
      if (t.hasAttribute("pos")) {
        pos = t.getAttribute("pos");
      }

      if (text != null
          && !text.equals("")
          && (pos == null || !pos.startsWith("$") /*punctuation*/)) {
        // If text consists of several parts (e.g., because that was
        // inserted into the sounds_like attribute), each part
        // is transcribed separately.
        StringBuilder ph = new StringBuilder();
        String g2pMethod = null;
        StringTokenizer st = new StringTokenizer(text, " -");
        while (st.hasMoreTokens()) {
          String graph = st.nextToken();
          StringBuilder helper = new StringBuilder();

          String phon = phonemise(graph, pos, helper);

          if (ph.length() == 0) { // first part
            // The g2pMethod of the combined beast is
            // the g2pMethod of the first constituant.
            g2pMethod = helper.toString();
            ph.append(phon);
          } else { // following parts
            ph.append(" - ");
            // Reduce primary to secondary stress:
            ph.append(phon.replace('\'', ','));
          }
        }

        if (ph != null && ph.length() > 0) {
          setPh(t, ph.toString());
          t.setAttribute("g2p_method", g2pMethod);
        }
      }
    }
    MaryData result = new MaryData(outputType(), d.getLocale());
    result.setDocument(doc);
    return result;
  }

示例#3

0

显示文件

文件： PraatTextGridGenerator.java 项目： alienisty/marytts

  public MaryData process(MaryData d) throws Exception {
    // prevUnitIndex;
    // numberOfConsecutiveUnits;
    // basenameDuration;
    // phoneTier;
    // PraatIntervalTier unitTier;
    // PraatIntervalTier sourceTier;
    // sourceInterval;

    Document doc = d.getDocument();

    // initialize various variables:
    Double duration = 0.0;
    String phone = null;

    // initialize some class variables:
    PraatIntervalTier phoneTier = new PraatIntervalTier("phones");
    Double basenameDuration = 0.0;
    int prevUnitIndex = Integer.MIN_VALUE;
    int numberOfConsecutiveUnits = 0; // counter to track consecutive units
    PraatInterval sourceInterval = new PraatInterval(basenameDuration);

    // until we have a robust way of checking the voice type, just initialize unit and source tiers
    // anyway:
    PraatIntervalTier unitTier = new PraatIntervalTier("units");
    PraatIntervalTier sourceTier = new PraatIntervalTier("sources");

    // prepare to iterate only over the PHONE, SENTENCE, and BOUNDARY nodes in the MaryXML:
    NodeIterator ni = DomUtils.createNodeIterator(doc, PHONE, BOUNDARY);
    Element element;

    // now iterate over these nodes:
    while ((element = (Element) ni.nextNode()) != null) {
      switch (element.getTagName()) { // <s>, <ph>, or <boundary> as specified above
        case PHONE:
          phone = element.getAttribute("p");
          duration =
              Integer.parseInt(element.getAttribute("d")) / 1000.0; // duration is always in ms
          break;
        case BOUNDARY:
          phone =
              "_"; // TODO: perhaps we should access TargetFeatureComputer.getPauseSymbol() instead
          if (element.hasAttribute("duration")) {
            duration =
                Double.parseDouble(element.getAttribute("duration"))
                    / 1000.0; // duration is always in ms
          } else {
            duration = 0.0; // HMM voices can have duration-less <boundary/> tags
          }
          break;
        default:
          logger.error(
              "NodeIterator should not find an element of type " + element.getTagName() + " here!");
          break;
      }

      PraatInterval phoneInterval = new PraatInterval(duration, phone);

      // TODO: crude way of checking for unit selection voice; also, name of attribute could change!
      if (element.hasAttribute("units")) {
        // unitselectionProcessing(element, unitTier, prevUnitIndex, numberOfConsecutiveUnits,
        // basenameDuration,
        // sourceInterval, sourceTier);
        String units = element.getAttribute("units");
        String[] unitStrings = units.split("; "); // boundaries have only one unit string
        boolean differentSource = false;
        String basename = null;
        String unitRange = null;
        for (String unitString : unitStrings) {
          // TODO verify that unit string matches "UNITNAME BASENAME UNITINDEX UNITDURATION"
          String[] unitFields = unitString.split(" ");
          String unitName = unitFields[0];
          basename = unitFields[1];
          int unitIndex = Integer.parseInt(unitFields[2]);
          Double unitDuration = Double.parseDouble(unitFields[3]);

          // units are straightforward, just like phones:
          unitTier.appendInterval(new PraatInterval(unitDuration, unitString));

          // unit source processing is a little more elaborate:

          /*
           * Note: the following assumes that consecutive selected units are ALWAYS from the same basename! That could
           * change if basename boundaries are no longer marked by null units in the timeline.
           */
          differentSource =
              unitIndex
                  != prevUnitIndex + 1; // is source unit from a different part of the timeline?;
          if (differentSource) {
            // reset primary variables:
            numberOfConsecutiveUnits = 0;
            basenameDuration = 0.0;
          }
          // increment/increase primary variables:
          numberOfConsecutiveUnits++;
          basenameDuration += unitDuration;

          // construct unit index range string:
          unitRange = Integer.toString(unitIndex - numberOfConsecutiveUnits + 1);
          if (numberOfConsecutiveUnits > 1) {
            unitRange = unitRange + "-" + unitIndex;
          }

          // append source intervals to source tier:
          if (differentSource) {
            sourceInterval = new PraatInterval(basenameDuration, basename + ": " + unitRange);
            sourceTier.appendInterval(sourceInterval);
          } else {
            sourceInterval.setDuration(basenameDuration);
            sourceInterval.setText(basename + ": " + unitRange);
          }

          prevUnitIndex = unitIndex;
        }
        // HACK: arbitrary threshold to detect end points in ms (in the case of diphone voice or
        // boundary segment)
      } else if (duration > 10) {
        // TODO: there is still a bug somewhere regarding boundary durations with mbrola...
        phoneInterval.setDuration(duration / 1000.0);
      }
      phoneTier.appendInterval(phoneInterval);
    }

    PraatTextGrid textGrid = new PraatTextGrid();
    phoneTier.updateBoundaries(); // force full specification of timings
    textGrid.appendTier(phoneTier);

    // fragile way of checking whether this is a unit selection voice:
    if (unitTier.getNumberOfIntervals() > 0) {
      // complete and append unit and source tiers:
      unitTier.updateBoundaries();
      textGrid.appendTier(unitTier);
      sourceTier.updateBoundaries();
      textGrid.appendTier(sourceTier);
    }

    // return raw TextGrid as result:
    MaryData result = new MaryData(getOutputType(), d.getLocale());
    result.setPlainText(textGrid.toString());
    return result;
  }

示例#4

0

显示文件

文件： UnknownWordsFrequencyComputer.java 项目： harveydsou/marytts

  public void computeFeaturesFor(String basename) throws IOException, Exception {
    String text;
    Locale localVoice;
    localVoice = MaryUtils.string2locale(locale);

    // First, test if there is a corresponding .rawmaryxml file in textdir:
    File rawmaryxmlFile =
        new File(db.getProp(db.MARYXMLDIR) + basename + db.getProp(db.MARYXMLEXT));
    if (rawmaryxmlFile.exists()) {
      text = FileUtils.getFileAsString(rawmaryxmlFile, "UTF-8");
    } else {
      text =
          getMaryXMLHeaderWithInitialBoundary(locale)
              + FileUtils.getFileAsString(
                  new File(db.getProp(db.TEXTDIR) + basename + db.getProp(db.TEXTEXT)), "UTF-8")
              + "</maryxml>";
    }
    File pfeatFile = new File(unitfeatureDir, basename + featsExt);
    OutputStream os = new BufferedOutputStream(new FileOutputStream(pfeatFile));
    MaryClient maryClient = getMaryClient();
    /*Vector voices = maryClient.getVoices(localVoice);
    MaryClient.Voice defaultVoice = (MaryClient.Voice) voices.firstElement();
    String voiceName = defaultVoice.name();*/
    // maryClient.process(text, maryInputType, maryOutputType, null, null, os);

    maryClient.process(text, maryInputType, maryOutputType, locale, null, "slt-arctic", os);
    // maryClient.process(text, maryInputType, maryOutputType, null, "slt-arctic", os, timeout);
    // maryClient.getOutputDataTypes().size()
    // MaryData result = new MaryData(os);

    os.flush();
    os.close();

    // System.out.println(" TO STRING: "+new FileReader(pfeatFile).toString());
    // BufferedReader bfr = new BufferedReader(new FileReader(pfeatFile));
    String line;
    MaryData d = new MaryData(MaryDataType.get("PHONEMISED_EN"), Locale.US);
    // d.readFrom(new ByteArrayInputStream(os.toByteArray()));
    d.readFrom(new FileReader(pfeatFile));

    // MaryData d = new MaryData(pfeatFile);
    Document doc = d.getDocument();
    // Document acoustparams = d.getDocument();

    // NodeIterator it = ((DocumentTraversal)acoustparams).createNodeIterator(acoustparams,
    // NodeFilter.SHOW_ELEMENT,new NameNodeFilter(new String[]{MaryXML.TOKEN,
    // MaryXML.BOUNDARY}),false);
    NodeIterator it =
        ((DocumentTraversal) doc)
            .createNodeIterator(
                doc, NodeFilter.SHOW_ELEMENT, new NameNodeFilter(MaryXML.TOKEN), false);

    Element t = null;
    while ((t = (Element) it.nextNode()) != null) {
      if (t.hasAttribute("g2p_method")) {
        String g2p = t.getAttribute("g2p_method");
        String nodeText = t.getTextContent().trim();
        if (g2p.equals("rules")) { // && nodeText.equals("!")){
          System.out.print(basename + " ----> " + nodeText);
          if (bnl.contains(basename)) bnl.remove(basename);
          System.out.println(" SO removing basename: " + basename);
        }

        // System.out.println("G2P:"+t.getAttribute("g2p_method"));
        // System.out.println("Text:"+t.getTextContent());
      }
    }

    /*while((line =bfr.readLine()) != null){
        //boolean b = m.matches();
        if(Pattern.matches("rules", line))
                System.out.println(basename + " LINE ---> " + line);

    }*/
    // System.out.println(" TO STRING: "+line);

  }