/** * converts the sentence based instance list into a token based one This is needed for the * ME-version of JET (JetMeClassifier) * * @param METrainerDummyPipe * @param inst just the features for one sentence to be transformed * @return */ public static InstanceList convertFeatsforClassifier( final Pipe METrainerDummyPipe, final Instance inst) { final InstanceList iList = new InstanceList(METrainerDummyPipe); final FeatureVectorSequence fvs = (FeatureVectorSequence) inst.getData(); final LabelSequence ls = (LabelSequence) inst.getTarget(); final LabelAlphabet ldict = (LabelAlphabet) ls.getAlphabet(); final Object source = inst.getSource(); final Object name = inst.getName(); if (ls.size() != fvs.size()) { System.err.println( "failed making token instances: size of labelsequence != size of featue vector sequence: " + ls.size() + " - " + fvs.size()); System.exit(-1); } for (int j = 0; j < fvs.size(); j++) { final Instance I = new Instance(fvs.getFeatureVector(j), ldict.lookupLabel(ls.get(j)), name, source); iList.add(I); } return iList; }
public static InstanceList copy(InstanceList instances) { InstanceList ret = (InstanceList) instances.clone(); // LabelAlphabet labelDict = (LabelAlphabet) ret.getTargetAlphabet(); Alphabet featDict = ret.getDataAlphabet(); for (int i = 0; i < ret.size(); i++) { Instance instance = ret.get(i); Instance clone = (Instance) instance.clone(); FeatureVector fv = (FeatureVector) clone.getData(); int[] indices = fv.getIndices(); double[] values = fv.getValues(); int[] newIndices = new int[indices.length]; System.arraycopy(indices, 0, newIndices, 0, indices.length); double[] newValues = new double[indices.length]; System.arraycopy(values, 0, newValues, 0, indices.length); FeatureVector newFv = new FeatureVector(featDict, newIndices, newValues); Instance newInstance = new Instance(newFv, instance.getTarget(), instance.getName(), instance.getSource()); ret.set(i, newInstance); } return ret; }
public Instance pipe(Instance carrier) { TokenSequence ts = (TokenSequence) carrier.getData(); TokenSequence targets = carrier.getTarget() instanceof TokenSequence ? (TokenSequence) carrier.getTarget() : null; TokenSequence source = carrier.getSource() instanceof TokenSequence ? (TokenSequence) carrier.getSource() : null; StringBuffer sb = new StringBuffer(); if (prefix != null) sb.append(prefix); sb.append("name: " + carrier.getName() + "\n"); for (int i = 0; i < ts.size(); i++) { if (source != null) { sb.append(source.get(i).getText()); sb.append(' '); } if (carrier.getTarget() instanceof TokenSequence) { sb.append(((TokenSequence) carrier.getTarget()).get(i).getText()); sb.append(' '); } if (carrier.getTarget() instanceof FeatureSequence) { sb.append(((FeatureSequence) carrier.getTarget()).getObjectAtPosition(i).toString()); sb.append(' '); } PropertyList pl = ts.get(i).getFeatures(); if (pl != null) { PropertyList.Iterator iter = pl.iterator(); while (iter.hasNext()) { iter.next(); double v = iter.getNumericValue(); if (v == 1.0) sb.append(iter.getKey()); else sb.append(iter.getKey() + '=' + v); sb.append(' '); } } sb.append('\n'); } System.out.print(sb.toString()); return carrier; }
public Instance pipe(Instance carrier) { Object inputData = carrier.getData(); Alphabet features = getDataAlphabet(); LabelAlphabet labels; LabelSequence target = null; String[][] tokens; if (inputData instanceof String) tokens = parseSentence((String) inputData); else if (inputData instanceof String[][]) tokens = (String[][]) inputData; else throw new IllegalArgumentException("Not a String or String[][]; got " + inputData); FeatureVector[] fvs = new FeatureVector[tokens.length]; if (isTargetProcessing()) { labels = (LabelAlphabet) getTargetAlphabet(); target = new LabelSequence(labels, tokens.length); } for (int l = 0; l < tokens.length; l++) { int nFeatures; if (isTargetProcessing()) { if (tokens[l].length < 1) throw new IllegalStateException( "Missing label at line " + l + " instance " + carrier.getName()); nFeatures = tokens[l].length - 1; target.add(tokens[l][nFeatures]); } else nFeatures = tokens[l].length; int featureIndices[] = new int[nFeatures]; for (int f = 0; f < nFeatures; f++) featureIndices[f] = features.lookupIndex(tokens[l][f]); fvs[l] = featureInductionOption.value ? new AugmentableFeatureVector( features, featureIndices, null, featureIndices.length) : new FeatureVector(features, featureIndices); } carrier.setData(new FeatureVectorSequence(fvs)); if (isTargetProcessing()) carrier.setTarget(target); else carrier.setTarget(new LabelSequence(getTargetAlphabet())); return carrier; }