@Override public String getLexiconURL(Predicate pred) { // dunno if this still works return "http://clic.ub.edu/mbertran/ancora/lexentry.php?file=" + pred.getLemma() + ".lex.xml&lexicon=AnCoraVerb_ES"; }
@Override public String getCoreArgumentLabelSequence(Predicate pred, Map<Word, String> proposition) { Sentence s = pred.getMySentence(); StringBuffer ret = new StringBuffer(); for (int i = 1; i < s.size(); ++i) { Word w = s.get(i); if (pred == w) { ret.append(" ").append(pred.getSense()); if (useVoice) ret.append(isPassive(pred) ? "/P" : "/A"); } if (proposition.containsKey(w)) { Matcher m = CALSPattern.matcher(proposition.get(w)); if (m.matches()) ret.append(" ").append(m.group(1)); } } return ret.toString(); }
@Override public String getDefaultSense(Predicate pred) { String PFeat = pred.getFeats(); String label; if (PFeat.contains("postype=auxiliary")) { label = "c2"; } else if (PFeat.contains("postype=common")) { label = "a2"; } else if (PFeat.contains("postype=main")) { label = "a2"; } else if (PFeat.contains("postype=qualificative")) { label = "b2"; } else if (PFeat.contains("postype=semiauxiliary")) { label = "c2"; } else { label = "a2"; } return pred.getLemma() + "." + label; }
public void write(Sentence s) { try { for (Predicate p : s.getPredicates()) { if (p.getSense().equals("Action") || p.getSense().equals("OPERATION")) { out.write( id(p) + "\t" + "Action" + " " + p.getBegin() + " " + p.getEnd() + "\t" + p.getForm() + "\n"); for (Word w : p.getArgMap().keySet()) { String label = p.getArgMap().get(w); if (label.equals("Theme")) label = "Object"; if (!word2id.containsKey(w)) out.write( id(w) + "\t" + label + " " + w.getBegin() + " " + w.getEnd() + "\t" + w.getForm() + "\n"); out.write( "R" + (rnum++) + "\t" + (label.equals("Actor") ? ("IsActorOf Arg1:" + id(w) + " Arg2:" + id(p)) : (label.equals("Property") ? ("HasProperty Arg1:" + id(p) + " Arg2:" + id(w)) : ("ActsOn Arg1:" + id(p) + " Arg2:" + id(w)))) + "\n"); } } if (p.getSense().equals("Object") || p.getSense().equals("CONCEPT") || p.getSense().equals("Property")) { if (!word2id.containsKey(p)) out.write( id(p) + "\t" + p.getSense() + " " + p.getBegin() + " " + p.getEnd() + "\t" + p.getForm() + "\n"); for (Word w : p.getArgMap().keySet()) { String label = p.getArgMap().get(w); if (label.equals("Theme")) label = "Object"; if (!word2id.containsKey(w)) out.write( id(w) + "\t" + label + " " + w.getBegin() + " " + w.getEnd() + "\t" + w.getForm() + "\n"); out.write( "R" + (rnum++) + "\t" + "HasProperty Arg1:" + id(p) + " Arg2:" + id(w) + "\n"); } } } // out.write(s.toString()+"\n\n"); } catch (Exception e) { e.printStackTrace(); System.out.println("Failed to write sentance."); System.exit(1); } }
public void write(Sentence s) { StringBuffer text = new StringBuffer(); /** hard-coded fix for test set starting with "Simply" * */ if (s.get(1).getForm().equals("Simply")) { // System.err.println("XXX"); text.append(" "); } for (int i = 1; i < s.size(); i++) { if (i > 1) text.append(' '); s.get(i).setBegin(text.length()); text.append(s.get(i).getForm()); s.get(i).setEnd(text.length()); } int annID = count * 100; try { out.write(" <sentence ID=\"" + count + "\">\n"); out.write(" <text>" + text + "</text>\n"); out.write(" <annotationSets>\n"); for (int i = 1; i < s.size(); i++) { if (s.get(i) instanceof Predicate) { Predicate p = (Predicate) s.get(i); int layerID = annID * 100 + 1; int labelID = layerID * 100 + 1; out.write( " <annotationSet ID=\"" + annID + "\" frameName=\"" + p.getSense() + "\">\n"); out.write(" <layers>\n"); out.write(" <layer ID=\"" + layerID + "\" name=\"Target\">\n"); out.write(" <labels>\n"); out.write( " <label ID=\"" + labelID + "\" end=\"" + (p.getEnd() - 1) + "\" name=\"Target\" start=\"" + p.getBegin() + "\"/>\n"); out.write(" </labels>\n"); out.write(" </layer>\n"); layerID++; labelID = layerID * 100 + 1; out.write(" <layer ID=\"" + layerID + "\" name=\"FE\">\n"); out.write(" <labels>\n"); if (!reconstructSpan) for (Word a : p.getArgMap().keySet()) { out.write( " <label ID=\"" + labelID + "\" end=\"" + (a.getEnd() - 1) + "\" name=\"" + p.getArgMap().get(a) + "\" start=\"" + a.getBegin() + "\"/>\n"); labelID++; } else { Map<String, List<Word[]>> label2spans = new HashMap<String, List<Word[]>>(); for (Word a : p.getArgMap().keySet()) { Word[] begin_end = DasFilter.pass(p, a); if (begin_end == null) continue; String label = p.getArgMap().get(a); if (!label2spans.containsKey(label)) label2spans.put(label, new LinkedList<Word[]>()); label2spans.get(label).add(begin_end); } Map<String, List<Word[]>> newlabel2spans = new HashMap<String, List<Word[]>>(); for (String label : label2spans.keySet()) { List<Word[]> begin_ends = DasFilter.merge(label2spans.get(label)); newlabel2spans.put(label, begin_ends); } DasFilter.resolveConflictingSpans(newlabel2spans); for (String label : newlabel2spans.keySet()) { List<Word[]> begin_ends = newlabel2spans.get(label); for (Word[] begin_end : begin_ends) { out.write( " <label ID=\"" + labelID + "\" end=\"" + (begin_end[1].getEnd() - 1) + "\" name=\"" + label + "\" start=\"" + (begin_end[0].getBegin()) + "\"/>\n"); labelID++; } } } out.write(" </labels>\n"); out.write(" </layer>\n"); out.write(" </layers>\n"); out.write(" </annotationSet>\n"); } } out.write(" </annotationSets>\n"); out.write(" </sentence>\n"); count++; } catch (IOException e) { e.printStackTrace(); System.out.println("Failed to write sentance."); System.exit(1); } }
/** * This is the code we used to deduce voice in Spanish (and Catalan) for CoNLL 2009, however we * didn't actually use it in the final submission. I think it was because we never saw any real * improvement. I'm not sure it's proper though, my Spanish skills are rather non-existant. I just * put it here for future reference. * * @param pred the predicate * @return true if the predicate (verb) is in passive tense, false otherwise */ private boolean isPassive(Predicate pred) { for (Word c : pred.getChildren()) if ((c.getLemma().equals("estar") || c.getLemma().equals("ser")) && c.getFeats().contains("auxiliary")) return true; return false; }