public void addSimplePairFeatures(int pidx, int aidx, ArrayList<String> feats) { String predForm = sent.getWord(pidx); String argForm = sent.getWord(aidx); Set<String> a = sig.getSimpleUnkFeatures(predForm, pidx, cs.prm.language); for (String c : a) { feats.add(c); } Set<String> b = sig.getSimpleUnkFeatures(argForm, aidx, cs.prm.language); for (String c : b) { feats.add(c); } }
/** Adds factors and variables to the given factor graph. */ public void build( SimpleAnnoSentence sent, CorpusStatistics cs, ObsFeatureExtractor obsFe, ObsFeatureConjoiner ofc, FactorGraph fg) { build( sent.getWords(), sent.getLemmas(), sent.getKnownPreds(), cs.roleStateNames, cs.predSenseListMap, obsFe, ofc, fg); }
// ---------- Meg's "Simple" features. ---------- public void addSimpleSoloFeatures(int idx, ArrayList<String> feats) { String wordForm = sent.getWord(idx); Set<String> a = sig.getSimpleUnkFeatures(wordForm, idx, cs.prm.language); for (String c : a) { feats.add(c); } }
// ---------- Naradowsky et al.'s 2011 ACL features. ---------- public void addNaradowskySenseFeatures(int idx, ArrayList<String> feats) { String word = sent.getWord(idx); String wordForm = decideForm(word, idx); String wordPos = sent.getPosTag(idx); String wordLemma = sent.getLemma(idx); feats.add("head_" + wordForm + "_word"); feats.add("head_" + wordLemma + "_lemma"); feats.add("head_" + wordPos + "_tag"); feats.add("head_" + wordForm + "_" + wordPos + "_wordtag"); String cap; if (FeaturizedToken.capitalized(wordForm)) { cap = "UC"; } else { cap = "LC"; } feats.add("head_" + cap + "_caps"); }
public int getSentSize() { return sent.size(); }
/** * Made up by Meg based on pair and sense features. public void addNaradowskySoloFeatures(int idx, * ArrayList<String> feats) { addNaradowskySenseFeatures(idx, feats); String word = * sent.getWord(idx); String wordForm = decideForm(word, idx); String wordPos = * sent.getPosTag(idx); * * <p>feats.add("head_" + wordForm + "_dep_" + wordPos + "_wordpos"); feats.add("slen_" + * sent.size()); feats.add("head_" + wordForm + "_word"); feats.add("head_" + wordPos + "_tag"); } * * */ public void addNaradowskyPairFeatures(int pidx, int aidx, ArrayList<String> feats) { String predWord = sent.getWord(pidx); String argWord = sent.getWord(aidx); String predForm = decideForm(predWord, pidx); String argForm = decideForm(argWord, aidx); String predPos = sent.getPosTag(pidx); String argPos = sent.getPosTag(aidx); String dir; int dist = Math.abs(aidx - pidx); if (aidx > pidx) dir = "RIGHT"; else if (aidx < pidx) dir = "LEFT"; else dir = "SAME"; feats.add("head_" + predForm + "dep_" + argForm + "_word"); feats.add("head_" + predPos + "_dep_" + argPos + "_pos"); feats.add("head_" + predForm + "_dep_" + argPos + "_wordpos"); feats.add("head_" + predPos + "_dep_" + argForm + "_posword"); feats.add( "head_" + predForm + "_dep_" + argForm + "_head_" + predPos + "_dep_" + argPos + "_wordwordpospos"); feats.add("head_" + predPos + "_dep_" + argPos + "_dist_" + dist + "_posdist"); feats.add("head_" + predPos + "_dep_" + argPos + "_dir_" + dir + "_posdir"); feats.add("head_" + predPos + "_dist_" + dist + "_dir_" + dir + "_posdistdir"); feats.add("head_" + argPos + "_dist_" + dist + "_dir_" + dir + "_posdistdir"); feats.add("slen_" + sent.size()); feats.add("dir_" + dir); feats.add("dist_" + dist); feats.add("dir_dist_" + dir + dist); feats.add("head_" + predForm + "_word"); feats.add("head_" + predPos + "_tag"); feats.add("arg_" + argForm + "_word"); feats.add("arg_" + argPos + "_tag"); if (prm.withSupervision) { List<String> predFeats = sent.getFeats(pidx); List<String> argFeats = sent.getFeats(aidx); if (predFeats == null) { predFeats = new ArrayList<String>(); predFeats.add("_"); } if (argFeats == null) { argFeats = new ArrayList<String>(); argFeats.add("_"); } for (String m1 : predFeats) { for (String m2 : argFeats) { feats.add(m1 + "_" + m2 + "_morph"); } } } }