protected void produceKelpExample(
      JCas questionCas,
      JCas commentCas,
      String outputPath,
      TreeSerializer ts,
      String qid,
      String cid,
      String cgold,
      String cgold_yn,
      List<Double> features) {
    /** Produce output for Kelp */
    TokenTree questionTree = RichTree.getPosChunkTree(questionCas);
    String questionTreeString = ts.serializeTree(questionTree, RichNode.OUTPUT_PAR_SEMANTIC_KERNEL);

    TokenTree commentTree = RichTree.getPosChunkTree(commentCas);
    String commentTreeString = ts.serializeTree(commentTree, RichNode.OUTPUT_PAR_SEMANTIC_KERNEL);

    String output = cgold + " ";
    output +=
        "|<||BT:tree| "
            + questionTreeString.replace('|', '-')
            + " |ET||,||BT:tree| "
            + commentTreeString.replace('|', '-')
            + " |ET| |>| ";

    String featureString = "|BV:features|";

    for (int i = 0; i < features.size(); i++) {
      int featureIndex = i + 1;
      Double feature = features.get(i);
      if (!feature.isNaN() && !feature.isInfinite() && feature.compareTo(0.0) != 0) {

        if (Math.abs(feature) > 1e100) {
          feature = 0.0;
        }

        featureString += featureIndex + ":" + String.format("%f", feature).replace(',', '.') + " ";
      }
    }

    output += featureString + "|EV|";

    output += "|BS:info| #" + qid + "\t" + cid + "|ES|";

    fm.writeLn(outputPath, output.trim());
  }
Exemplo n.º 2
0
  @Override
  public void process(JCas cas) throws AnalysisEngineProcessException {

    // Return early if no Constituent is present
    if (!JCasUtil.exists(cas, Constituent.class)) return;

    // Lazy loading
    if (this.classifier == null) {
      init();
    }

    String tree = ts.serializeTree(RichTree.getConstituencyTree(cas), this.parameterList);

    String example = "|BT| " + tree + " |ET|";
    String questionClass = this.classifier.getMostConfidentModel(example);

    addQuestionClassAnnotation(cas, questionClass);
  }
  protected void produceSVMLightTKExample(
      JCas questionCas,
      JCas commentCas,
      String suffix,
      TreeSerializer ts,
      String qid,
      String cid,
      String cgold,
      String cgold_yn,
      List<Double> features) {
    /** Produce output for SVMLightTK */
    TokenTree questionTree = RichTree.getPosChunkTree(questionCas);
    String questionTreeString = ts.serializeTree(questionTree);

    TokenTree commentTree = RichTree.getPosChunkTree(commentCas);
    String commentTreeString = ts.serializeTree(commentTree);

    for (String label : this.a_labels) {
      String svmLabel = "-1";
      if (label.equals(cgold)) {
        svmLabel = "+1";
      }

      String output = svmLabel + " ";
      output += " |BT| " + questionTreeString + " |BT| " + commentTreeString + " |ET| ";

      String featureString = "";

      for (int i = 0; i < features.size(); i++) {
        int featureIndex = i + 1;
        Double feature = features.get(i);
        if (!feature.isNaN() && !feature.isInfinite() && feature.compareTo(0.0) != 0) {

          if (Math.abs(feature) > 1e100) {
            feature = 0.0;
          }

          featureString += featureIndex + ":" + String.format("%f", feature) + " ";
        }
      }

      output += featureString + "|EV|";

      output += " #" + qid + "\t" + cid;

      fm.writeLn(
          "semeval2015-3/svmlighttk/a/" + suffix + "/" + label.replaceAll(" ", "_") + ".svm",
          output.trim());
    }

    for (String label : this.b_labels) {

      if (cgold_yn.equals("Not Applicable")) {
        continue;
      }

      String svmLabel = "-1";
      if (label.equals(cgold_yn)) {
        svmLabel = "+1";
      }

      String output =
          svmLabel + " |BT| " + questionTreeString + " |BT| " + commentTreeString + " |ET| ";

      String featureString = "";

      for (int i = 0; i < features.size(); i++) {
        int featureIndex = i + 1;
        Double feature = features.get(i);
        if (!feature.isNaN() && !feature.isInfinite() && feature.compareTo(0.0) != 0) {

          if (Math.abs(feature) > 1e100) {
            feature = 0.0;
          }

          featureString += featureIndex + ":" + String.format("%f", feature) + " ";
        }
      }

      output += featureString + "|EV|";

      output += " #" + qid + "\t" + cid;

      fm.writeLn(
          "semeval2015-3/svmlighttk/b/" + suffix + "/" + label.replaceAll(" ", "_") + ".svm",
          output);
    }
  }