/**
   * 1) Performs re-ranking using a linear function, which takes into account only three features:
   * newRank = 1 * oldRank + 10 * underconstrained_query + 10 * incosistent_tense; <br>
   * 2) Takes the hypothesis which is smallest in (new) rank as the best one. <br>
   * 3) Computes the semantic error rate (SER) as follows: -- a recognition is considered as
   * semantically correct if the dialogue move representation it produces is both a) non-null and b)
   * the same as the one that would have been produced from a perfect recognition result. -- if
   * dialogue move is not the same: then counts the number of deletions/insertions required in order
   * to obtain the perfect dialogue move.
   *
   * @deprecated Use instead the linear re-ranking with more than 3 feat and SER manually defined
   *     (available in xml file)
   * @param xmlFileName - input xml file containing both the n-best hypothesis and the reference
   *     transcription.
   * @return
   * @throws Exception
   */
  public float[] getER4LinearReranking3Feat(String xmlFileName) throws Exception {
    // read the xml file in order to get the utterance transcripts
    try {
      Document d =
          new org.jdom.input.SAXBuilder().build(new File(xmlFileName)); // PARSE THE XML FILE
      java.util.List nbestList = d.getRootElement().getChildren("nbest_data");
      float[] serArray = new float[nbestList.size()];
      int noUtt = 0;
      int minNewRankID = 1;

      for (int i = 0; i < nbestList.size(); i++) {
        Element nbestElem = (Element) nbestList.get(i);
        noUtt++;

        // In order to COMPUTE SEMANTIC ERROR RATE (ser),
        // get the dialogue_move feature value for the correct transcription
        // dialogue_move
        Element dmElem = nbestElem.getChild("dialogue_move");
        String refDM = "";
        if (dmElem != null) if (!dmElem.getValue().equalsIgnoreCase("")) refDM = dmElem.getValue();

        // In the xml tree: find hyp_transcription,
        // i.e. the transcription corresponding to the 1-rank predicted hypothesis
        java.util.List recList = nbestElem.getChildren("recognition");

        // PERFORM LINEAR RE-RANKING
        int minNewRank = 100;
        for (int j = 1; j < recList.size(); j++) {
          Element recElem = (Element) recList.get(j);
          int rank = new Integer(recElem.getChild("rank").getValue()).intValue();
          int uq = new Integer(recElem.getChild("underconstrained_query").getValue()).intValue();
          int it = new Integer(recElem.getChild("inconsistent_tense").getValue()).intValue();

          int newRank = rank + 10 * uq + 10 * it;
          if (newRank < minNewRank) {
            minNewRank = newRank;
            minNewRankID = j;
          }
        }

        Element recElem = (Element) recList.get(minNewRankID);
        Element dm4recElem = recElem.getChild("dialogue_move");
        String dm4rec = "";
        if (dm4recElem != null)
          if (!dm4recElem.getValue().equalsIgnoreCase("")) dm4rec = dm4recElem.getValue();

        WordErrorRate wer = new WordErrorRate(refDM, dm4rec, this.wordDeliminator);
        serArray[i] = wer.computeNumerator();
      }

      return serArray;
    } catch (IOException eIO) {
      eIO.printStackTrace();
    } catch (JDOMException eJDOM) {
      eJDOM.printStackTrace();
    }
    return null;
  }
  /**
   * Compute the best semantic error rate that can be achieved starting from n-best hypothesis.
   *
   * @param xmlFileName
   * @return a list of error rates corresponding to each utterance
   * @throws Exception
   */
  public float[] getBestERCanBeAchieved(String xmlFileName) throws Exception {
    // read the xml file in order to get the utterance transcripts
    try {
      Document d =
          new org.jdom.input.SAXBuilder().build(new File(xmlFileName)); // PARSE THE XML FILE
      java.util.List nbestList = d.getRootElement().getChildren("nbest_data");
      float[] serArray = new float[nbestList.size()];
      int noUtt = 0;
      for (int i = 0; i < nbestList.size(); i++) {
        Element nbestElem = (Element) nbestList.get(i);
        noUtt++;

        // In order to COMPUTE SEMANTIC ERROR RATE (SER)

        java.util.List recList = nbestElem.getChildren("recognition");

        float bestSER = 10000;
        // (start with k=1 since we skip first value in recList which corresponds to the correct
        // transcription)
        for (int k = 1; k < recList.size(); k++) {
          Element recElem = (Element) recList.get(k);

          Element semCorrectElem = recElem.getChild("semantically_correct");
          String semCorrect = semCorrectElem.getValue();
          int ser;
          if (semCorrect.equalsIgnoreCase("good")) ser = 0;
          else ser = 1;

          if (bestSER > ser) {
            bestSER = ser;
          }
        } // end for k
        serArray[i] = bestSER;
      }
      return serArray;
    } catch (IOException eIO) {
      eIO.printStackTrace();
    } catch (JDOMException eJDOM) {
      eJDOM.printStackTrace();
    }
    return null;
  }
  /**
   * Gets the SER for a naive algorithm which does random re-ranking.
   *
   * @param xmlFileName
   * @param svmPredFileName
   * @return
   * @throws Exception
   */
  public float[] getER4RandomHyp(String xmlFileName) throws Exception {
    // read the xml file in order to get the utterance transcripts
    try {

      Document d =
          new org.jdom.input.SAXBuilder().build(new File(xmlFileName)); // PARSE THE XML FILE
      java.util.List nbestList = d.getRootElement().getChildren("nbest_data");
      float[] serArray = new float[nbestList.size()];
      int noUtt = 0;
      java.util.Random rand = new java.util.Random();

      for (int i = 0; i < nbestList.size(); i++) {
        Element nbestElem = (Element) nbestList.get(i);
        noUtt++;
        java.util.List recList = nbestElem.getChildren("recognition");

        int bestRankIdx = rand.nextInt(recList.size() - 1) + 1;

        // System.out.println("best rank = " + bestRankIdx);
        if (recList.size() < bestRankIdx)
          System.out.println("Less than " + bestRankIdx + " recognitions !!! ");

        Element recElem = (Element) recList.get(bestRankIdx);

        Element semCorrectElem = recElem.getChild("semantically_correct");
        String semCorrect = semCorrectElem.getValue();
        if (semCorrect.equalsIgnoreCase("good")) serArray[i] = 0;
        else serArray[i] = 1;
      }
      return serArray;
    } catch (IOException eIO) {
      eIO.printStackTrace();
    } catch (JDOMException eJDOM) {
      eJDOM.printStackTrace();
    } catch (NullPointerException nullE) {
      nullE.printStackTrace();
    }
    return null;
  }
  // ------------------------------------------------------------------------------------------------
  // 描述:
  // 设计: Skyline(2001.12.29)
  // 实现: Skyline
  // 修改:
  // ------------------------------------------------------------------------------------------------
  public void InitObject(Element PublicElement, Element PrivateElement, String OpFrom) {
    /*
        List nodelist;Element RFElement=null;Element node;int i=0;String Name="FunctionManager";
        if ( PublicElement == null ) System.out.println("PublicElement=null");
        if ( PrivateElement == null ) System.out.println("PrivateElement=null");
        RFElement = JActiveDComDM.XMLRegistry.GetElementByName(PublicElement,Name);
        if ( RFElement == null ) return;
        InitFunctionConfigXML(RFElement.getAttribute("functionconfig").getValue());
        JFunctionStub FS;
        nodelist = JActiveDComDM.XMLRegistry.BeginEnumerate(RFElement);
        while ( nodelist != null ) {
          node = JActiveDComDM.XMLRegistry.Enumerate(nodelist,i);
          if ( node == null ) break;
    //      if ( node.getNodeType() == node.ELEMENT_NODE ) {
            RFElement     = (Element)node;
            FS            = new JFunctionStub();
            FS.FunctionID = RFElement.getAttribute("id").getValue().toUpperCase();
            FS.ClassName  = RFElement.getAttribute("class").getValue();
            FunctionList.add(FS);
    //      }
          i++;
        }
        JActiveDComDM.XMLRegistry.EndEnumerate();
     */
    Element RFElement = null;
    Element node;
    int i = 0;
    String Name = "FunctionManager";
    if (PublicElement == null) System.out.println("PublicElement=null");
    if (PrivateElement == null) System.out.println("PrivateElement=null");
    RFElement = JActiveDComDM.XMLRegistry.GetElementByName(PublicElement, Name);
    if (RFElement == null) return;
    InitFunctionConfigXML(RFElement.getAttribute("functionconfig").getValue());

    java.util.List nodelist = PackageStub.getContentVector("fmisReportFunction");
    JFunctionStub FS;
    StubObject so = null;
    for (i = 0; i < nodelist.size(); i++) {
      so = (StubObject) nodelist.get(i);
      FS = new JFunctionStub();
      FS.FunctionID = so.getString("id", "");
      ;
      FS.ClassName = so.getString(OpFrom, "");
      ;
      FunctionList.add(FS);
    }
  }
Beispiel #5
0
  public String getPreviousStage(String currStage) {

    if (currStage == null) {

      return null;
    }

    int ind = currStage.indexOf(":");

    Version v = new Version(currStage.substring(0, ind));

    int lind = Integer.parseInt(currStage.substring(ind + 1));

    java.util.List<WhatsNewItem> its = this.items.get(v);

    if (its == null) {

      return null;
    }

    lind--;

    if (lind > -1) {

      return v.getVersion() + ":" + lind;
    }

    Version p = this.items.lowerKey(v);

    if (p == null) {

      return null;
    }

    java.util.List<WhatsNewItem> pits = this.items.get(p);

    if (pits != null) {

      return p.getVersion() + ":" + (pits.size() - 1);
    }

    return null;
  }
Beispiel #6
0
  public String getNextStage(String currStage) {

    if (currStage == null) {

      return this.getStartStage();
    }

    int ind = currStage.indexOf(":");

    Version v = new Version(currStage.substring(0, ind));

    int lind = Integer.parseInt(currStage.substring(ind + 1));

    java.util.List<WhatsNewItem> its = this.items.get(v);

    if (its == null) {

      return null;
    }

    lind++;

    if (lind <= (its.size() - 1)) {

      return v.getVersion() + ":" + lind;
    }

    Version n = this.items.higherKey(v);

    if (n == null) {

      return null;
    }

    java.util.List<WhatsNewItem> nits = this.items.get(n);

    if (nits != null) {

      return n.getVersion() + ":0";
    }

    return null;
  }
  /**
   * 设置默认值
   *
   * @param dv
   * @param formName
   * @param actionType
   * @return
   */
  private S_FrameWork_Query_OperationValueObject setDefault(
      S_FrameWork_Query_OperationValueObject valueObject, String formName, String actionType) {

    String entityName = "S_FrameWork_Query_Operation"; // 代码自动生成
    Document doc = DomService.getXMLDocFromEntity(entityName, formName);
    // 1.遍历所有节点,判断是否有默认设置
    // 2.比较actionType,看是否当前默认设置的操作
    // 3.反射设置当前属性的默认值
    try {

      if (doc == null) {
        return valueObject;
      }
      String methordName = "";

      Element root = doc.getRootElement();
      java.util.List elements = root.getChildren("item");
      Element tempElm = null;
      String s = null;
      String sType = "string";
      String sDBType = "String";
      DefaultValue dv = new DefaultValue();
      String whendefault = "";
      String defaultValue = "";

      for (int i = 0; i < elements.size(); i++) {
        tempElm = (Element) elements.get(i);
        s = tempElm.getAttribute("name").getValue();
        methordName = "set" + s.substring(0, 1).toUpperCase() + s.substring(1, s.length());
        if ((tempElm.getAttribute("datatype") != null
            && tempElm.getAttribute("datatype").getValue() != null)) {
          if (tempElm.getAttribute("dbtype") == null) {
            sType = tempElm.getAttribute("datatype").getValue();
          } else {
            sType = tempElm.getAttribute("dbtype").getValue();
          }
        }

        // 设置缺省值;当新增加的时候,设置缺省值,此处只在初始化和load的时候设置;修改的时候不设置
        // updated by wzp 20050323
        if (tempElm.getAttribute("datatype") != null
            && tempElm.getAttribute("default") != null
            && tempElm.getAttribute("whendefault") != null) {
          whendefault = tempElm.getAttribute("whendefault").getValue();
          defaultValue = tempElm.getAttribute("default").getValue();
          // 如何判断是新增?
          if (actionType.toLowerCase().indexOf("init_") == 0
              && whendefault.toLowerCase().indexOf("init") > -1) {
            // 初始化
            valueObject = this.invokeMethord(valueObject, methordName, sType, defaultValue);
          } else if (actionType.toLowerCase().indexOf("create_") == 0
              && whendefault.toLowerCase().indexOf("create") > -1) {
            // 创建
            valueObject = this.invokeMethord(valueObject, methordName, sType, defaultValue);
          } else if (actionType.toLowerCase().indexOf("save_") == 0
              && whendefault.toLowerCase().indexOf("save") > -1) {
            // 修改
            valueObject = this.invokeMethord(valueObject, methordName, sType, defaultValue);
          }
        }
      }

    } catch (Exception exe) {
      exe.printStackTrace();
    }

    return valueObject;
  }
  /**
   * Gets the SER for re-ranking predicted by the machine learning technique.
   *
   * @param xmlFileName the XML file name containing the reference data (i.e. the human utterance
   *     transcription).
   * @param svmPredFileName - file name of the hypothetised data (i.e. the re-estimated recognition
   *     )
   * @return Semantic Error Rate
   * @throws Exception
   */
  public float[] getER4Predicted(String xmlFileName, String svmPredFileName) throws Exception {
    // read the xml file in order to get the utterance transcripts
    try {
      BufferedReader br = getBufferedReader(svmPredFileName);
      Document d =
          new org.jdom.input.SAXBuilder().build(new File(xmlFileName)); // PARSE THE XML FILE
      java.util.List nbestList = d.getRootElement().getChildren("nbest_data");
      float[] serArray = new float[nbestList.size()];
      int noUtt = 0;
      for (int i = 0; i < nbestList.size(); i++) {
        Element nbestElem = (Element) nbestList.get(i);
        noUtt++;

        // In the xml tree: find utterance features for the 1-best predicted hypothesis
        java.util.List recList = nbestElem.getChildren("recognition");

        //				 Find the hypothesis that is classified by re-ranking procedure as minimal in rank:
        float bestRank = 100000;
        int bestRankIdx = -1;
        // Take from predictions file: the index of the 1-best
        for (int k = 1; k < recList.size(); k++) {
          String thisLine = br.readLine();
          if (thisLine == null)
            throw new Exception(
                "End of file in predictions file before reading the entire xml file!");
          float predictedRank = new Float(thisLine).floatValue();
          if (bestRank > predictedRank) {
            bestRankIdx = k;
            bestRank = predictedRank;
          }
        }
        // bestRankIdx = 1; // just for checking what's the SER for always choosing first hyp as the
        // best
        // System.out.println("best rank = " + bestRankIdx);
        if (recList.size() < bestRankIdx)
          System.out.println("Less than " + bestRankIdx + " recognitions !!! ");

        Element recElem = (Element) recList.get(bestRankIdx);

        Element semCorrectElem = recElem.getChild("semantically_correct");
        String semCorrect = semCorrectElem.getValue();
        if (semCorrect.equalsIgnoreCase("good")) serArray[i] = 0;
        else serArray[i] = 1;
      }

      String thisLine;
      if ((thisLine = br.readLine()) != null) {
        System.out.println(
            "There are still some " + "predictions while the xml file reached endOfFile!!! ");
        System.out.println(thisLine);
      }
      return serArray;
    } catch (IOException eIO) {
      eIO.printStackTrace();
    } catch (JDOMException eJDOM) {
      eJDOM.printStackTrace();
    } catch (NullPointerException nullE) {
      nullE.printStackTrace();
    }
    return null;
  }
  /**
   * Gets the SER for linear re-ranking. Two main steps are performed: * 1) Performs re-ranking
   * using a linear function, which takes into account ..., i.e. newRank = ... <br>
   * 2) Takes the hypothesis which is smallest in (new) rank as the best one. <br>
   * 3) Computes the semantic error rate (SER) as defined in the "semantically_correct" tag
   */
  public float[] getER4LinearReranking(String xmlFileName) throws Exception {
    // read the xml file in order to get the utterance transcripts
    try {

      Document d =
          new org.jdom.input.SAXBuilder().build(new File(xmlFileName)); // PARSE THE XML FILE
      java.util.List nbestList = d.getRootElement().getChildren("nbest_data");
      float[] serArray = new float[nbestList.size()];
      int noUtt = 0;
      int minNewRankID = 1;

      for (int i = 0; i < nbestList.size(); i++) {
        Element nbestElem = (Element) nbestList.get(i);
        noUtt++;
        java.util.List recList = nbestElem.getChildren("recognition");

        ///////////////////////////////////////////
        int minNewRank = 100;
        for (int j = 1; j < recList.size(); j++) {
          Element recElem = (Element) recList.get(j);
          int rank = new Integer(recElem.getChild("rank").getValue()).intValue();
          int no_dialogue_move =
              new Integer(recElem.getChild("no_dialogue_move").getValue()).intValue();
          int underconstrained_query =
              new Integer(recElem.getChild("underconstrained_query").getValue()).intValue();
          int non_indefinite_existential =
              new Integer(recElem.getChild("non_indefinite_existential").getValue()).intValue();
          int non_show_imperative =
              new Integer(recElem.getChild("non_show_imperative").getValue()).intValue();
          int indefinite_meeting_and_meeting_referent =
              new Integer(recElem.getChild("indefinite_meeting_and_meeting_referent").getValue())
                  .intValue();

          /*
          					% 1) Place in the N-best list - lower number is better
          					feature_weight(rank, -1).
          					% 2) Strongly penalise hypotheses that produce no dialogue move
          					feature_weight(no_dialogue_move, -50).
          					% 3) Penalise queries with no contentful constraints
          					feature_weight(underconstrained_query, -10).
          					% 4) Penalise existentials which aren't indefinite, e.g. "is there the meeting next week"
          					feature_weight(non_indefinite_existential, -10).
          					% 5) Strongly penalise imperatives where the main verb isn't "show" or something similar
          					feature_weight(non_show_imperative, -50).
          					% 6) Disprefer combination of indefinite mention of meeting + available meeting referent
          					feature_weight(indefinite_meeting_and_meeting_referent, -2).
          */
          int newRank =
              rank
                  + 50 * no_dialogue_move
                  + 10 * underconstrained_query
                  + 10 * non_indefinite_existential
                  + 50 * non_show_imperative
                  + 2 * indefinite_meeting_and_meeting_referent;
          if (newRank < minNewRank) {
            minNewRank = newRank;
            minNewRankID = j;
          }
        }

        Element recElem = (Element) recList.get(minNewRankID);

        //////////////////////////////////////////
        // System.out.println("best rank = " + minNewRankID);
        if (recList.size() < minNewRankID)
          System.out.println("Less than " + minNewRankID + " recognitions !!! ");

        Element semCorrectElem = recElem.getChild("semantically_correct");
        String semCorrect = semCorrectElem.getValue();
        if (semCorrect.equalsIgnoreCase("good")) serArray[i] = 0;
        else serArray[i] = 1;
      }
      return serArray;
    } catch (IOException eIO) {
      eIO.printStackTrace();
    } catch (JDOMException eJDOM) {
      eJDOM.printStackTrace();
    } catch (NullPointerException nullE) {
      nullE.printStackTrace();
    }
    return null;
  }
Beispiel #10
0
  public WhatsNew(AbstractProjectViewer pv, boolean onlyShowCurrentVersion)
      throws GeneralException {

    super(pv);

    String wn = Environment.getProperty(Constants.WHATS_NEW_VERSION_VIEWED_PROPERTY_NAME);

    if (wn == null) {

      wn = "0";
    }

    // Get the current whats new version (i.e. old).
    Version lastWhatsNewVersion = new Version(wn);

    boolean betasAllowed =
        Environment.getUserProperties()
            .getPropertyAsBoolean(Constants.OPTIN_TO_BETA_VERSIONS_PROPERTY_NAME);

    try {

      String whatsNew = Environment.getResourceFileAsString(Constants.WHATS_NEW_FILE);

      // Load up all the whats new for greater versions.
      Element root = JDOMUtils.getStringAsElement(whatsNew);

      java.util.List verEls = JDOMUtils.getChildElements(root, XMLConstants.version, false);

      // Assume they are in the right order
      // TODO: Enforce the order and/or sort.
      for (int i = 0; i < verEls.size(); i++) {

        Element vEl = (Element) verEls.get(i);

        String id = JDOMUtils.getAttributeValue(vEl, XMLConstants.id, true);

        Version v = new Version(id);
        /*
                      if ((v.isBeta ())
                          &&
                          (!betasAllowed)
                         )
                      {

                          // Ignore, the user isn't interested in betas.
                          continue;

                      }
        */
        if ((lastWhatsNewVersion.isNewer(v))
            || ((onlyShowCurrentVersion) && (v.isSame(Environment.getQuollWriterVersion())))) {

          String c = WhatsNewComponentProvider.class.getName();

          int ind = c.lastIndexOf(".");

          if (ind > 0) {

            c = c.substring(0, ind);
          }

          WhatsNewComponentProvider compProv = null;

          String cl = JDOMUtils.getAttributeValue(vEl, XMLConstants.clazz, false);

          if (!cl.equals("")) {

            Class clz = null;

            try {

              clz = Class.forName(cl);

              if (WhatsNewComponentProvider.class.isAssignableFrom(clz)) {

                compProv = (WhatsNewComponentProvider) clz.newInstance();
              }

            } catch (Exception e) {

            }
          }

          // This is a version we are interested in.
          java.util.List itemEls =
              JDOMUtils.getChildElements(vEl, WhatsNewItem.XMLConstants.root, true);

          java.util.List<WhatsNewItem> its = new ArrayList();

          for (int j = 0; j < itemEls.size(); j++) {

            Element itEl = (Element) itemEls.get(j);

            WhatsNewItem it = new WhatsNewItem(itEl, compProv, pv);

            if (it.onlyIfCurrentVersion) {

              if (!Environment.getQuollWriterVersion().isSame(v)) {

                continue;
              }
            }

            if ((it.description == null) && (it.component == null)) {

              Environment.logMessage(
                  "Whats new item has no description or component, referenced by: "
                      + JDOMUtils.getPath(itEl));

              continue;
            }

            its.add(it);
          }

          if (its.size() > 0) {

            this.items.put(v, its);
          }
        }
      }

    } catch (Exception e) {

      throw new GeneralException("Unable to init whats new", e);
    }
  }