Пример #1
0
  /**
   * Export this feature definition in the "all.desc" format which can be read by wagon.
   *
   * @param out the destination of the data
   * @param featuresToIgnore a set of Strings containing the names of features that wagon should
   *     ignore. Can be null.
   */
  private void createDescFile() throws IOException {
    PrintWriter out = new PrintWriter(new FileOutputStream(descFile));
    Set<String> featuresToIgnore = new HashSet<String>();
    featuresToIgnore.add("unit_logf0");
    featuresToIgnore.add("unit_duration");

    int numDiscreteFeatures =
        featureDefinition.getNumberOfByteFeatures() + featureDefinition.getNumberOfShortFeatures();
    out.println("(");
    out.println("(occurid cluster)");
    for (int i = 0, n = featureDefinition.getNumberOfFeatures(); i < n; i++) {
      out.print("( ");
      String featureName = featureDefinition.getFeatureName(i);
      out.print(featureName);
      if (featuresToIgnore != null && featuresToIgnore.contains(featureName)) {
        out.print(" ignore");
      }
      if (i < numDiscreteFeatures) { // list values
        for (int v = 0, vmax = featureDefinition.getNumberOfValues(i); v < vmax; v++) {
          out.print("  ");
          // Print values surrounded by double quotes, and make sure any
          // double quotes in the value are preceded by a backslash --
          // otherwise, we get problems e.g. for sentence_punc
          String val = featureDefinition.getFeatureValueAsString(i, v);
          if (val.indexOf('"') != -1) {
            StringBuilder buf = new StringBuilder();
            for (int c = 0; c < val.length(); c++) {
              char ch = val.charAt(c);
              if (ch == '"') buf.append("\\\"");
              else buf.append(ch);
            }
            val = buf.toString();
          }
          out.print("\"" + val + "\"");
        }
        out.println(" )");
      } else { // float feature
        out.println(" float )");
      }
    }
    out.println(")");
    out.close();
  }