private Label labelForTok(String tok, int lvl) { while (labelDicts.size() <= lvl) { labelDicts.add(new LabelAlphabet()); } LabelAlphabet dict = (LabelAlphabet) labelDicts.get(lvl); return dict.lookupLabel(tok); }
@Override public Instance pipe(Instance carrier) { Arg1RankInstance instance = (Arg1RankInstance) carrier; Document document = (Document) instance.getData(); List<Pair<Integer, Integer>> candidates = instance.getCandidates(); int connStart = instance.getConnStart(); int connEnd = instance.getConnEnd(); int arg2Line = instance.getArg2Line(); int arg2HeadPos = instance.getArg2HeadPos(); FeatureVector fvs[] = new FeatureVector[candidates.size()]; for (int i = 0; i < candidates.size(); i++) { Pair<Integer, Integer> candidate = candidates.get(i); PropertyList pl = null; pl = addBaselineFeatures(pl, document, candidate, arg2Line, arg2HeadPos, connStart, connEnd); pl = addConstituentFeatures( pl, document, candidate, arg2Line, arg2HeadPos, connStart, connEnd); pl = addDependencyFeatures(pl, document, candidate, arg2Line, arg2HeadPos, connStart, connEnd); // pl = addLexicoSyntacticFeatures(pl, document, candidate, arg2Line, arg2HeadPos, connStart, // connEnd); fvs[i] = new FeatureVector(getDataAlphabet(), pl, true, true); } // set target label LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet(); carrier.setTarget(ldict.lookupLabel(String.valueOf(instance.getTrueArg1Candidate()))); carrier.setData(new FeatureVectorSequence(fvs)); return carrier; }
/** * converts the sentence based instance list into a token based one This is needed for the * ME-version of JET (JetMeClassifier) * * @param METrainerDummyPipe * @param inst just the features for one sentence to be transformed * @return */ public static InstanceList convertFeatsforClassifier( final Pipe METrainerDummyPipe, final Instance inst) { final InstanceList iList = new InstanceList(METrainerDummyPipe); final FeatureVectorSequence fvs = (FeatureVectorSequence) inst.getData(); final LabelSequence ls = (LabelSequence) inst.getTarget(); final LabelAlphabet ldict = (LabelAlphabet) ls.getAlphabet(); final Object source = inst.getSource(); final Object name = inst.getName(); if (ls.size() != fvs.size()) { System.err.println( "failed making token instances: size of labelsequence != size of featue vector sequence: " + ls.size() + " - " + fvs.size()); System.exit(-1); } for (int j = 0; j < fvs.size(); j++) { final Instance I = new Instance(fvs.getFeatureVector(j), ldict.lookupLabel(ls.get(j)), name, source); iList.add(I); } return iList; }