Ejemplo n.º 1
0
  @Override
  protected String getField(FtrToken token) {
    DEPNode node = getNode(token);
    if (node == null) return null;
    Matcher m;

    if (token.isField(JointFtrXml.F_SIMPLIFIED_FORM)) {
      return (s_lsfs.contains(node.lowerSimplifiedForm)) ? node.simplifiedForm : null;
    } else if (token.isField(JointFtrXml.F_LOWER_SIMPLIFIED_FORM)) {
      return (s_lsfs.contains(node.lowerSimplifiedForm)) ? node.lowerSimplifiedForm : null;
    } else if (token.isField(JointFtrXml.F_POS)) {
      return node.pos;
    } else if (token.isField(JointFtrXml.F_AMBIGUITY_CLASS)) {
      return m_ambi.get(node.simplifiedForm);
    } else if ((m = JointFtrXml.P_BOOLEAN.matcher(token.field)).find()) {
      int field = Integer.parseInt(m.group(1));

      switch (field) {
        case 0:
          return UTString.isAllUpperCase(node.simplifiedForm) ? token.field : null;
        case 1:
          return UTString.isAllLowerCase(node.simplifiedForm) ? token.field : null;
        case 2:
          return UTString.beginsWithUpperCase(node.simplifiedForm) & (i_input != 1)
              ? token.field
              : null;
        case 3:
          return UTString.getNumOfCapitalsNotAtBeginning(node.simplifiedForm) == 1
              ? token.field
              : null;
        case 4:
          return UTString.getNumOfCapitalsNotAtBeginning(node.simplifiedForm) > 1
              ? token.field
              : null;
        case 5:
          return node.simplifiedForm.contains(".") ? token.field : null;
        case 6:
          return UTString.containsDigit(node.simplifiedForm) ? token.field : null;
        case 7:
          return node.simplifiedForm.contains("-") ? token.field : null;
        case 8:
          return (i_input == t_size - 1) ? token.field : null;
        case 9:
          return (i_input == 1) ? token.field : null;
        default:
          throw new IllegalArgumentException("Unsupported feature: " + field);
      }
    } else if ((m = JointFtrXml.P_FEAT.matcher(token.field)).find()) {
      return node.getFeat(m.group(1));
    } else if ((m = JointFtrXml.P_PREFIX.matcher(token.field)).find()) {
      int n = Integer.parseInt(m.group(1)), len = node.lowerSimplifiedForm.length();
      return (n <= len) ? node.lowerSimplifiedForm.substring(0, n) : null;
    } else if ((m = JointFtrXml.P_SUFFIX.matcher(token.field)).find()) {
      int n = Integer.parseInt(m.group(1)), len = node.lowerSimplifiedForm.length();
      return (n <= len) ? node.lowerSimplifiedForm.substring(len - n, len) : null;
    }

    return null;
  }
Ejemplo n.º 2
0
  /** Called by {@link AbstractPOSTagger#processAux()}. */
  protected List<Pair<String, StringFeatureVector>> tag() {
    List<Pair<String, StringFeatureVector>> insts =
        new ArrayList<Pair<String, StringFeatureVector>>();
    DEPNode node;

    for (i_input = 1; i_input < t_size; i_input++) {
      if (!applyRules()) {
        node = d_tree.get(i_input);
        node.pos = getLabel(insts);
      }
    }

    return insts;
  }