Java WriteFile.writeLn Examples

Programming Language: Java

Namespace/Package Name: qa.qcri.qf.fileutil

Class/Type: WriteFile

Method/Function: writeLn

Examples at hotexamples.com: 3

Java WriteFile.writeLn - 3 examples found. These are the top rated real world Java examples of qa.qcri.qf.fileutil.WriteFile.writeLn extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

close(3)

write(3)

writeLn(3)

Frequently Used Methods

close (3)

write (3)

writeLn (3)

Example #1

Show file

File: Baseline.java Project: pakchoi/Iyas

  /**
   * Process the xml file and output a csv file with the results in the same directory
   *
   * @param dataFile the xml file to process
   * @suffix suffix for identifying the data file
   * @param suffix
   * @throws ResourceInitializationException
   * @throws UIMAException
   * @throws IOException
   * @throws AnalysisEngineProcessException
   * @throws SimilarityException
   */
  private void processEnglishFile(String dataFile, String suffix)
      throws ResourceInitializationException, UIMAException, IOException,
          AnalysisEngineProcessException, SimilarityException {

    /** Parameters for matching tree structures */
    String parameterList =
        Joiner.on(",")
            .join(new String[] {RichNode.OUTPUT_PAR_LEMMA, RichNode.OUTPUT_PAR_TOKEN_LOWERCASE});

    /** Marker which adds relational information to a pair of trees */
    MarkTreesOnRepresentation marker = new MarkTreesOnRepresentation(new MarkTwoAncestors());

    /** Load stopwords for english */
    marker.useStopwords(Stopwords.STOPWORD_EN);

    /** Tree serializer for converting tree structures to string */
    TreeSerializer ts = new TreeSerializer().enableRelationalTags().useRoundBrackets();

    /** Instantiate CASes */
    JCas questionCas = JCasFactory.createJCas();
    JCas commentCas = JCasFactory.createJCas();

    WriteFile out = new WriteFile(dataFile + ".csv");

    Document doc = Jsoup.parse(new File(dataFile), "UTF-8");

    doc.select("QURAN").remove();
    doc.select("HADEETH").remove();

    boolean firstRow = true;

    /** Consume data */
    Elements questions = doc.getElementsByTag("Question");
    int numberOfQuestions = questions.size();
    int questionNumber = 1;

    Map<String, Boolean> commentIsDialogue = new HashMap<>();

    for (Element question : questions) {
      System.out.println("[INFO]: Processing " + questionNumber++ + " out of " + numberOfQuestions);
      /** Parse question node */
      String qid = question.attr("QID");
      String qcategory = question.attr("QCATEGORY");
      String qdate = question.attr("QDATE");
      String quserid = question.attr("QUSERID");
      String qtype = question.attr("QTYPE");
      String qgold_yn = question.attr("QGOLD_YN");
      String qsubject = question.getElementsByTag("QSubject").get(0).text();
      String qbody = question.getElementsByTag("QBody").get(0).text();

      /** Setup question CAS */
      questionCas.reset();
      questionCas.setDocumentLanguage("en");
      questionCas.setDocumentText(qsubject + ". " + qbody);

      /** Run the UIMA pipeline */
      SimplePipeline.runPipeline(questionCas, this.analysisEngineList);

      // this.analyzer.analyze(questionCas, new SimpleContent("q-" + qid, qsubject + ". " + qbody));

      /** Parse comment nodes */
      Elements comments = question.getElementsByTag("Comment");
      for (Element comment : comments) {
        String cid = comment.attr("CID");
        String cuserid = comment.attr("CUSERID");
        String cgold = comment.attr("CGOLD");
        String cgold_yn = comment.attr("CGOLD_YN");
        String csubject = comment.getElementsByTag("CSubject").get(0).text();
        String cbody = comment.getElementsByTag("CBody").get(0).text();

        /** Setup comment CAS */
        commentCas.reset();
        commentCas.setDocumentLanguage("en");
        commentCas.setDocumentText(csubject + ". " + cbody);

        /** Run the UIMA pipeline */
        SimplePipeline.runPipeline(commentCas, this.analysisEngineList);

        // this.analyzer.analyze(commentCas, new SimpleContent("c-" + cid, csubject + ". " +
        // cbody));

        FeatureVector fv = pfEnglish.getPairFeatures(questionCas, commentCas, parameterList);

        /**
         * ************************************* * * * PLUG YOUR FEATURES HERE * * * *
         * *************************************
         */

        /**
         * fv is actually an AugmentableFeatureVector from the Mallet library
         *
         * <p>Internally the features are named so you must specify an unique identifier.
         *
         * <p>An example:
         *
         * <p>((AugmentableFeatureVector) fv).add("your_super_feature_id", 42);
         *
         * <p>or:
         *
         * <p>AugmentableFeatureVector afv = (AugmentableFeatureVector) fv;
         * afv.add("your_super_feature_id", 42);
         */
        boolean quseridEqCuserid = quserid.equals(cuserid);
        if (quseridEqCuserid) {
          commentIsDialogue.put(cid, true);
        }

        // ((AugmentableFeatureVector) fv).add("quseridEqCuserid", quseridEqCuserid);

        /**
         * ************************************* * * * THANKS! * * * *
         * *************************************
         */

        /** Produce output line */
        if (firstRow) {
          out.write("qid,cgold,cgold_yn");
          for (int i = 0; i < fv.numLocations(); i++) {
            int featureIndex = i + 1;
            out.write(",f" + featureIndex);
          }
          out.write("\n");

          firstRow = false;
        }

        List<Double> features = this.serializeFv(fv);

        out.writeLn(cid + "," + cgold + "," + cgold_yn + "," + Joiner.on(",").join(features));

        /** Produce also the file needed to train structural models */
        if (PRODUCE_SVMLIGHTTK_DATA) {
          produceSVMLightTKExample(
              questionCas, commentCas, suffix, ts, qid, cid, cgold, cgold_yn, features);
        }
      }
    }

    for (String commentId : commentIsDialogue.keySet()) {
      this.fm.writeLn(dataFile + ".dialogue.txt", commentId);
    }

    this.fm.closeFiles();
    out.close();
  }

Example #2

Show file

File: CommentwiseSimilarity.java Project: pakchoi/Iyas

  /**
   * Process the xml file and output a csv file with the results in the same directory
   *
   * @param dataFile the xml file to process
   * @suffix suffix for identifying the data file
   * @param suffix
   * @throws ResourceInitializationException
   * @throws UIMAException
   * @throws IOException
   * @throws AnalysisEngineProcessException
   * @throws SimilarityException
   */
  private void processEnglishFile(String dataFile)
      throws ResourceInitializationException, UIMAException, IOException,
          AnalysisEngineProcessException, SimilarityException {

    /** Parameters for matching tree structures */
    String parameterList =
        Joiner.on(",")
            .join(new String[] {RichNode.OUTPUT_PAR_LEMMA, RichNode.OUTPUT_PAR_TOKEN_LOWERCASE});

    /** Marker which adds relational information to a pair of trees */
    MarkTreesOnRepresentation marker = new MarkTreesOnRepresentation(new MarkTwoAncestors());

    /** Load stopwords for english */
    marker.useStopwords(Stopwords.STOPWORD_EN);

    /** Tree serializer for converting tree structures to string */
    // TreeSerializer ts = new TreeSerializer().enableRelationalTags().useRoundBrackets();

    WriteFile out = new WriteFile(dataFile + SUFFIX);

    Document doc = Jsoup.parse(new File(dataFile), "UTF-8");

    doc.select("QURAN").remove();
    doc.select("HADEETH").remove();

    boolean firstRow = true;

    /** Consume data */
    Elements questions = doc.getElementsByTag("Question");
    int numberOfQuestions = questions.size();
    int questionNumber = 1;

    // Map<String, Boolean> commentIsDialogue = new HashMap<>();
    //		HashSet<String> questionCategories = new HashSet<String>();

    double[] matches = new double[11];
    int[] totals = new int[11];
    int bin;

    for (Element question : questions) {

      Question q = new Question();
      System.out.println("[INFO]: Processing " + questionNumber++ + " out of " + numberOfQuestions);
      /** Parse question node */
      String qid = question.attr("QID");
      String qcategory = question.attr("QCATEGORY");
      String qdate = question.attr("QDATE");
      String quserid = question.attr("QUSERID");
      String qtype = question.attr("QTYPE");
      String qgold_yn = question.attr("QGOLD_YN");
      String qsubject = question.getElementsByTag("QSubject").get(0).text();
      String qbody = question.getElementsByTag("QBody").get(0).text();

      //			questionCategories.add(qcategory);

      q.setQid(qid);
      q.setQcategory(qcategory);
      q.setQdate(qdate);
      q.setQuserId(quserid);
      q.setQtype(qtype);
      q.setQgoldYN(qgold_yn);
      q.setQsubject(qsubject);
      q.setQbody(qbody);

      // this.analyzer.analyze(questionCas, new SimpleContent("q-" + qid, qsubject + ". " + qbody));

      /** Parse comment nodes */
      Elements comments = question.getElementsByTag("Comment");

      if (LIMIT_COMMENTS_PER_Q && comments.size() >= LIMIT_COMMENTS) {
        continue;
      }

      for (Element comment : comments) {
        String cid = comment.attr("CID");
        String cuserid = comment.attr("CUSERID");
        String cgold = comment.attr("CGOLD");
        String cgold_yn = comment.attr("CGOLD_YN");
        String csubject = comment.getElementsByTag("CSubject").get(0).text();
        String cbody = comment.getElementsByTag("CBody").get(0).text();
        q.addComment(cid, cuserid, cgold, cgold_yn, csubject, cbody);
      }

      List<JCas> allCommentsCas = new ArrayList<JCas>();
      List<String> ids = new ArrayList<String>();
      List<String> labels = new ArrayList<String>();

      for (Element comment : comments) {
        allCommentsCas.add(computeCommentCas(comment));
        ids.add(comment.attr("CID"));
        labels.add(getgold(comment.attr("CGOLD")));
      }

      for (int i = 0; i < allCommentsCas.size() - 1; i++) {
        for (int j = i + 1; j <= allCommentsCas.size() - 1; j++) {
          AugmentableFeatureVector fv;
          // COMPUTE THE SIMILARITY HERE
          // TODO where to assign this
          // Whether the CAS are exactly identical
          // how to store/display the output
          fv =
              (AugmentableFeatureVector)
                  pfEnglish.getPairFeatures(
                      allCommentsCas.get(i), allCommentsCas.get(j), parameterList);

          //					System.out.println(
          //							ids.get(i) + ","+
          //							labels.get(i) + ","+
          //							ids.get(j)+","+
          //							labels.get(j) + ","+
          //							Joiner.on(",").join(this.serializeFv(fv))
          //					);

          bin = (int) Math.round(fv.getValues()[0] * 10);
          if (labels.get(i).equals(labels.get(j))) matches[bin]++;
          totals[bin]++;

          /** Produce output line */
          if (firstRow) {
            out.write("qid,cgold");
            for (int c = 0; c < fv.numLocations(); c++) {
              int featureIndex = c + 1;
              out.write(",f" + featureIndex);
            }
            out.write("\n");
            firstRow = false;
          }

          // System.out.println(bin);
          out.writeLn(
              ids.get(i)
                  + "-"
                  + ids.get(j)
                  + ","
                  + labels.get(i)
                  + "-"
                  + labels.get(j)
                  + ","
                  + Joiner.on(",").join(this.serializeFv(fv)));
        }
      }
    }
    for (int i = 0; i < 11; i++)
      System.out.println("BIN: " + i + " pctge: " + matches[i] / totals[i]);

    this.fm.closeFiles();

    out.close();
  }

Example #3

Show file

File: Baseline.java Project: pakchoi/Iyas

  public void processArabicFile(Analyzer analyzer, String dataFile, String suffix)
      throws SimilarityException, UIMAException, IOException {
    /** We do not have a lemmatizer so we work with tokens */
    String parameterList = Joiner.on(",").join(new String[] {RichNode.OUTPUT_PAR_TOKEN_LOWERCASE});

    /** Instantiate CASes */
    JCas questionCas = JCasFactory.createJCas();
    JCas commentCas = JCasFactory.createJCas();

    WriteFile out = new WriteFile(dataFile + ".csv");

    Document doc = Jsoup.parse(new File(dataFile), "UTF-8");

    boolean firstRow = true;

    /** Consume data */
    Elements questions = doc.getElementsByTag("Question");

    int numberOfQuestions = questions.size();
    int questionNumber = 1;

    for (Element question : questions) {
      System.out.println("[INFO]: Processing " + questionNumber++ + " out of " + numberOfQuestions);
      /** Parse question node */
      String qid = question.attr("QID");
      String qcategory = question.attr("QCATEGORY");
      String qdate = question.attr("QDATE");
      String qsubject =
          question
              .getElementsByTag("QSubject")
              .get(0)
              .text()
              .replaceAll("/", "")
              .replaceAll("~", "");
      String qbody =
          question.getElementsByTag("QBody").get(0).text().replaceAll("/", "").replaceAll("~", "");

      /** Get analyzed text for question */
      if (USE_QCRI_ALT_TOOLS) {
        questionCas = this.getPreliminarCas(analyzer, questionCas, qid, qsubject + ". " + qbody);
      } else {
        questionCas.reset();
        questionCas.setDocumentLanguage("ar");
        questionCas.setDocumentText(qsubject + ". " + qbody);
        SimplePipeline.runPipeline(questionCas, this.analysisEngineList);
      }

      /** Parse answer nodes */
      Elements comments = question.getElementsByTag("Answer");
      for (Element comment : comments) {
        String cid = comment.attr("CID");
        String cgold = comment.attr("CGOLD");
        String cbody = comment.text().replaceAll("/", "").replaceAll("~", "");
        ;

        /** Get analyzed text for comment */
        if (USE_QCRI_ALT_TOOLS) {
          commentCas = this.getPreliminarCas(analyzer, commentCas, cid, cbody);
        } else {
          commentCas.reset();
          commentCas.setDocumentLanguage("ar");
          commentCas.setDocumentText(cbody);

          SimplePipeline.runPipeline(commentCas, this.analysisEngineList);
        }

        /** Compute features between question and comment */
        FeatureVector fv = pfArabic.getPairFeatures(questionCas, commentCas, parameterList);

        /**
         * ************************************* * * * PLUG YOUR FEATURES HERE * * * *
         * *************************************
         */

        /**
         * fv is actually an AugmentableFeatureVector from the Mallet library
         *
         * <p>Internally the features are named so you must specify an unique identifier.
         *
         * <p>An example:
         *
         * <p>((AugmentableFeatureVector) fv).add("your_super_feature_id", 42);
         *
         * <p>or:
         *
         * <p>AugmentableFeatureVector afv = (AugmentableFeatureVector) fv;
         * afv.add("your_super_feature_id", 42);
         */

        /**
         * ************************************* * * * THANKS! * * * *
         * *************************************
         */

        /** Produce output line */
        if (firstRow) {
          out.write("cid,cgold");
          for (int i = 0; i < fv.numLocations(); i++) {
            int featureIndex = i + 1;
            out.write(",f" + featureIndex);
          }
          out.write("\n");

          firstRow = false;
        }

        List<Double> features = this.serializeFv(fv);

        /** Produce output line */
        out.writeLn(qid + "-" + cid + "," + cgold + "," + Joiner.on(",").join(features));
      }
    }

    this.fm.closeFiles();
    out.close();
  }