예제 #1
0
  private JCas computeCommentCas(Element comment) throws UIMAException {
    JCas cCas = JCasFactory.createJCas();
    String cid = comment.attr("CID");
    String cuserid = comment.attr("CUSERID");
    // String cgold = comment.attr("CGOLD");
    // String cgold = getgold(comment.attr("CGOLD"));

    // String cgold_yn = comment.attr("CGOLD_YN");
    String csubject = comment.getElementsByTag("CSubject").get(0).text();
    String cbody = comment.getElementsByTag("CBody").get(0).text();

    /** Setup comment CAS */
    cCas.reset();
    cCas.setDocumentLanguage("en");
    String commentText =
        TextNormalizer.normalize(SubjectBodyAggregator.getCommentText(csubject, cbody));
    cCas.setDocumentText(commentText);
    // cCas.setDocumentText(csubject + ". " + cbody);

    /** Run the UIMA pipeline */
    SimplePipeline.runPipeline(cCas, this.analysisEngineList);

    // this.analyzer.analyze(commentCas, new SimpleContent("c-" + cid, csubject + ". " + cbody));
    return cCas;
  }
  /**
   * Gets the contents of the question and feed it into a Question object TODO this method should be
   * deprecated and moved into a class that just reads the XML file and generates the object
   *
   * @param qelement
   * @return object instance with the question data
   */
  private CQAinstance qElementToObject(Element qelement) {
    String id = qelement.attr("QID");
    String category = qelement.attr("QCATEGORY");
    String date = qelement.attr("QDATE");
    String userid = qelement.attr("QUSERID");
    String type = qelement.attr("QTYPE");
    String goldYN = qelement.attr("QGOLD_YN");
    String subject =
        TextNormalizer.normalize(
            JsoupUtils.recoverOriginalText(qelement.getElementsByTag("QSubject").get(0).text()));
    // TODO we don't normalise the subject?
    String body = qelement.getElementsByTag("QBody").get(0).text();
    // FIXME make it use useprofiles as below
    // body = JsoupUtils.recoverOriginalText(
    //            UserProfile.removeSignature(body,
    //                          userProfiles.get(userid)));
    body = TextNormalizer.normalize(body);
    CQAquestion q = new CQAquestion(id, date, userid, type, goldYN, subject, body);
    CQAinstance cqa = new CQAinstance(q, category);

    /** Parse comment nodes */
    for (Element comment : qelement.getElementsByTag("Comment")) {
      String cid = comment.attr("CID");
      String cuserid = comment.attr("CUSERID");
      String cgold = comment.attr("CGOLD");

      if (ONLY_BAD_AND_GOOD_CLASSES) {
        cgold = (cgold.equalsIgnoreCase("good")) ? GOOD : BAD;
      }
      String cgold_yn = comment.attr("CGOLD_YN");
      String csubject =
          JsoupUtils.recoverOriginalText(comment.getElementsByTag("CSubject").get(0).text());
      csubject = TextNormalizer.normalize(csubject);
      String cbody = comment.getElementsByTag("CBody").get(0).text();
      // FIXME make the following line work
      // cbody = JsoupUtils.recoverOriginalText(
      //          UserProfile.removeSignature(cbody, userProfiles.get(cuserid)));
      cbody = TextNormalizer.normalize(cbody);
      cqa.addComment(cid, cuserid, cgold, cgold_yn, csubject, cbody);
    }
    return cqa;
  }