public void addNumbers(DataHolder dataholderHandler) {
    PropertyConfigurator.configure("conf/log4j.properties");
    Logger myLogger = Logger.getLogger("learn.addNumbers");
    myLogger.trace("Add numbers");

    List<String> nums = new ArrayList<String>();
    nums.addAll(Arrays.asList(this.myLearnerUtility.getConstant().NUMBER.split("\\|")));

    // System.out.println(nums);
    // System.out.println(this.myLearnerUtility.getConstant().NUMBER);

    for (int i = 0; i < nums.size(); i++) {
      String word = nums.get(i);
      // String reg="\\b("+this.myLearnerUtility.getConstant().FORBIDDEN+")\\b";
      // boolean f = word.matches(reg);
      if (word.matches("\\b(" + this.myLearnerUtility.getConstant().FORBIDDEN + ")\\b")) {
        continue;
      }
      dataholderHandler.updateDataHolder(word, "b", "*", "wordpos", 0);
      // this.getWordPOSHolder().put(new WordPOSKey(word, "b"), new
      // WordPOSValue("*", 0, 0, null, null));
      // System.out.println("add Number: " + word);
    }
    dataholderHandler.updateDataHolder("NUM", "b", "*", "wordpos", 0);
    // this.getWordPOSHolder().put(new WordPOSKey("NUM", "b"), new
    // WordPOSValue("*",0, 0, null, null));
  }
  public void addProperNouns(DataHolder dataholderHandler) {
    PropertyConfigurator.configure("conf/log4j.properties");
    Logger myLogger = Logger.getLogger("learn.addProperNouns");
    myLogger.trace("Add proper nouns");

    List<String> ppnouns = new ArrayList<String>();
    ppnouns.addAll(Arrays.asList(Constant.PROPERNOUN.split("\\|")));

    for (int i = 0; i < ppnouns.size(); i++) {
      String word = ppnouns.get(i);
      if (word.matches("\\b(" + this.myLearnerUtility.getConstant().FORBIDDEN + ")\\b")) {
        continue;
      }
      dataholderHandler.updateDataHolder(word, "b", "*", "wordpos", 0);
      // this.getWordPOSHolder().put(new WordPOSKey(word, "z"), new
      // WordPOSValue("*", 0, 0, null, null));
      // System.out.println("Add ProperNoun: " + word);
    }
  }
  public void addClusterStrings(DataHolder dataholderHandler) {
    PropertyConfigurator.configure("conf/log4j.properties");
    Logger myLogger = Logger.getLogger("learn.addClusterstrings");
    myLogger.trace("Add clusterstrings");

    List<String> cltstrs = new ArrayList<String>();
    cltstrs.addAll(Arrays.asList(this.myLearnerUtility.getConstant().CLUSTERSTRING.split("\\|")));

    // System.out.println(cltstrs);
    // System.out.println(this.myLearnerUtility.getConstant().CLUSTERSTRING);

    for (int i = 0; i < cltstrs.size(); i++) {
      String word = cltstrs.get(i);
      if (word.matches("\\b(" + this.myLearnerUtility.getConstant().FORBIDDEN + ")\\b")) {
        continue;
      }
      dataholderHandler.updateDataHolder(word, "b", "*", "wordpos", 0);
      // this.getWordPOSHolder().put(new WordPOSKey(word, "b"), new
      // WordPOSValue("*", 1, 1, null, null));
      // System.out.println("addClusterString: " + word);
    }
  }
  public void addStopWords(DataHolder dataholderHandler) {
    PropertyConfigurator.configure("conf/log4j.properties");
    Logger myLogger = Logger.getLogger("learn.addStopWords");
    myLogger.trace("Add stop words");

    List<String> stops = new ArrayList<String>();
    stops.addAll(Arrays.asList(this.myLearnerUtility.getConstant().STOP.split("\\|")));
    stops.addAll(Arrays.asList(new String[] {"NUM", "(", "[", "{", ")", "]", "}", "d+"}));

    myLogger.trace("Stop Words: " + stops);
    for (int i = 0; i < stops.size(); i++) {
      String word = stops.get(i);
      if (word.matches("\\b(" + this.myLearnerUtility.getConstant().FORBIDDEN + ")\\b")) {
        continue;
      }
      dataholderHandler.updateDataHolder(word, "b", "*", "wordpos", 0);
      myLogger.trace(String.format("(\"%s\", \"b\", \"*\", \"wordpos\", 0) added\n", word));
      // this.getWordPOSHolder().put(new WordPOSKey(word, "b"), new
      // WordPOSValue("*", 0, 0, null, null));
      // System.out.println("Add Stop Word: " + word+"\n");
    }
    myLogger.trace("Quite\n");
  }
  /**
   * comma used for 'and': seen in TreatiseH, using comma for 'and' as in "adductor , diductor scars
   * clearly differentiated ;", which is the same as "adductor and diductor scars clearly
   * differentiated ;". ^m*n+,m*n+ or m*n+,m*n+;$, or m,mn. Clauses dealt in commaand do not contain
   * "and/or". andortag() deals with clauses that do.
   *
   * @param dataholderHandler
   */
  public void commaAnd(DataHolder dataholderHandler) {
    // cover m,mn

    // last + =>*
    // "(?:<[A-Z]*[NO]+[A-Z]*>[^<]+?<\/[A-Z]*[NO]+[A-Z]*>\\s*)+"
    String nPhrasePattern = "(?:<[A-Z]*[NO]+[A-Z]*>[^<]+?<\\/[A-Z]*[NO]+[A-Z]*>\\s*)+";

    // add last \\s*
    // "(?:<[A-Z]*M[A-Z]*>[^<]+?<\/[A-Z]*M[A-Z]*>\\s*)"
    String mPhrasePattern = "(?:<[A-Z]*M[A-Z]*>[^<]+?<\\/[A-Z]*M[A-Z]*>\\s*)";

    // "(?:<[A-Z]*B[A-Z]*>[,:\.;<]<\/[A-Z]*B[A-Z]*>)"
    String bPattern = "(?:<[A-Z]*B[A-Z]*>[,:.;<]<\\/[A-Z]*B[A-Z]*>)";

    String commaPattern = "<B>,</B>";

    String phrasePattern = mPhrasePattern + "\\s*" + nPhrasePattern;
    String pattern =
        phrasePattern
            + "\\s+"
            + commaPattern
            + "\\s+(?:"
            + phrasePattern
            + "| |"
            + commaPattern
            + ")+";
    String pattern1 = "^(" + pattern + ")";
    String pattern2 = "(.*?)(" + pattern + ")\\s*" + bPattern + "\\$";
    // changed last * to +
    String pattern3 =
        "^((?:"
            + mPhrasePattern
            + "\\s+)+"
            + commaPattern
            + "\\s+(?:"
            + mPhrasePattern
            + "|\\s*|"
            + commaPattern
            + ")+"
            + mPhrasePattern
            + "+\\s*"
            + nPhrasePattern
            + ")";

    for (SentenceStructure sentenceItem : dataholderHandler.getSentenceHolder()) {
      int sentenceID = sentenceItem.getID();
      String sentence = sentenceItem.getSentence();

      String sentenceCopy = "" + sentence;
      sentenceCopy = sentenceCopy.replaceAll("></?", "");

      Matcher m1 = StringUtility.createMatcher(sentenceCopy, pattern1);
      Matcher m2 = StringUtility.createMatcher(sentenceCopy, pattern2);
      Matcher m3 = StringUtility.createMatcher(sentenceCopy, pattern3);

      // case 1
      if (m1.find()) {
        String tag = m1.group(1);
        tag = tag.replaceAll(",", "and");
        tag = tag.replaceAll("</?\\S+?>", "");
        tag = StringUtility.trimString(tag);
        // case 1.1
        if (!StringUtility.isMatchedNullSafe(tag, " and$")) {
          dataholderHandler.tagSentenceWithMT(sentenceID, sentence, "", tag, "commaand[CA1]");
        }
      }
      // case 2
      else if (m2.find()) {
        String g1 = m2.group(1);
        String tag = m2.group(2);
        if (!StringUtility.isMatchedNullSafe(
                g1, "\\b(" + this.myLearnerUtility.getConstant().PREPOSITION + ")\\b")
            && !StringUtility.isMatchedNullSafe(g1, "<N>")) {
          tag = tag.replaceAll(",", "and");
          tag = tag.replaceAll("</?\\S+?>", "");
          tag = StringUtility.trimString(tag);
          // case 2.1.1
          if (!StringUtility.isMatchedNullSafe(tag, " and$")) {
            dataholderHandler.tagSentenceWithMT(sentenceID, sentence, "", tag, "commaand[CA2]");
          }
        }
      }
      // case 3
      else if (m3.find()) {
        String tag = m3.group(1);
        String g1 = m3.group(1);
        // case 3.1
        if (!StringUtility.isMatchedNullSafe(
            g1, "\\b(" + this.myLearnerUtility.getConstant().PREPOSITION + ")\\b")) {
          tag = tag.replaceAll(",", "and");
          tag = tag.replaceAll("</?\\S+?>", "");
          tag = StringUtility.trimString(tag);
          // case 3.1.1
          if (!StringUtility.isMatchedNullSafe(tag, " and$")) {
            String[] tagWords = tag.split("\\s+");
            List<String> tagWordsList = new ArrayList<String>(Arrays.asList(tagWords));
            tag = tagWordsList.get(tagWordsList.size() - 1);
            String modifier =
                StringUtils.join(tagWordsList.subList(0, tagWordsList.size() - 1), " ");
            dataholderHandler.tagSentenceWithMT(
                sentenceID, sentence, modifier, tag, "commaand[CA3]");
          }
        }
      }
    }
  }