コード例 #1
0
  /**
   * converts the sentence based instance list into a token based one This is needed for the
   * ME-version of JET (JetMeClassifier)
   *
   * @param METrainerDummyPipe
   * @param inst just the features for one sentence to be transformed
   * @return
   */
  public static InstanceList convertFeatsforClassifier(
      final Pipe METrainerDummyPipe, final Instance inst) {

    final InstanceList iList = new InstanceList(METrainerDummyPipe);

    final FeatureVectorSequence fvs = (FeatureVectorSequence) inst.getData();
    final LabelSequence ls = (LabelSequence) inst.getTarget();
    final LabelAlphabet ldict = (LabelAlphabet) ls.getAlphabet();
    final Object source = inst.getSource();
    final Object name = inst.getName();

    if (ls.size() != fvs.size()) {
      System.err.println(
          "failed making token instances: size of labelsequence != size of featue vector sequence: "
              + ls.size()
              + " - "
              + fvs.size());
      System.exit(-1);
    }

    for (int j = 0; j < fvs.size(); j++) {
      final Instance I =
          new Instance(fvs.getFeatureVector(j), ldict.lookupLabel(ls.get(j)), name, source);
      iList.add(I);
    }

    return iList;
  }
コード例 #2
0
  public static InstanceList copy(InstanceList instances) {
    InstanceList ret = (InstanceList) instances.clone();
    // LabelAlphabet labelDict = (LabelAlphabet) ret.getTargetAlphabet();
    Alphabet featDict = ret.getDataAlphabet();

    for (int i = 0; i < ret.size(); i++) {
      Instance instance = ret.get(i);
      Instance clone = (Instance) instance.clone();
      FeatureVector fv = (FeatureVector) clone.getData();

      int[] indices = fv.getIndices();
      double[] values = fv.getValues();

      int[] newIndices = new int[indices.length];
      System.arraycopy(indices, 0, newIndices, 0, indices.length);

      double[] newValues = new double[indices.length];
      System.arraycopy(values, 0, newValues, 0, indices.length);

      FeatureVector newFv = new FeatureVector(featDict, newIndices, newValues);
      Instance newInstance =
          new Instance(newFv, instance.getTarget(), instance.getName(), instance.getSource());
      ret.set(i, newInstance);
    }

    return ret;
  }
コード例 #3
0
 public Instance pipe(Instance carrier) {
   TokenSequence ts = (TokenSequence) carrier.getData();
   TokenSequence targets =
       carrier.getTarget() instanceof TokenSequence ? (TokenSequence) carrier.getTarget() : null;
   TokenSequence source =
       carrier.getSource() instanceof TokenSequence ? (TokenSequence) carrier.getSource() : null;
   StringBuffer sb = new StringBuffer();
   if (prefix != null) sb.append(prefix);
   sb.append("name: " + carrier.getName() + "\n");
   for (int i = 0; i < ts.size(); i++) {
     if (source != null) {
       sb.append(source.get(i).getText());
       sb.append(' ');
     }
     if (carrier.getTarget() instanceof TokenSequence) {
       sb.append(((TokenSequence) carrier.getTarget()).get(i).getText());
       sb.append(' ');
     }
     if (carrier.getTarget() instanceof FeatureSequence) {
       sb.append(((FeatureSequence) carrier.getTarget()).getObjectAtPosition(i).toString());
       sb.append(' ');
     }
     PropertyList pl = ts.get(i).getFeatures();
     if (pl != null) {
       PropertyList.Iterator iter = pl.iterator();
       while (iter.hasNext()) {
         iter.next();
         double v = iter.getNumericValue();
         if (v == 1.0) sb.append(iter.getKey());
         else sb.append(iter.getKey() + '=' + v);
         sb.append(' ');
       }
     }
     sb.append('\n');
   }
   System.out.print(sb.toString());
   return carrier;
 }
コード例 #4
0
ファイル: SimpleTagger.java プロジェクト: zabak/digmap
 public Instance pipe(Instance carrier) {
   Object inputData = carrier.getData();
   Alphabet features = getDataAlphabet();
   LabelAlphabet labels;
   LabelSequence target = null;
   String[][] tokens;
   if (inputData instanceof String) tokens = parseSentence((String) inputData);
   else if (inputData instanceof String[][]) tokens = (String[][]) inputData;
   else throw new IllegalArgumentException("Not a String or String[][]; got " + inputData);
   FeatureVector[] fvs = new FeatureVector[tokens.length];
   if (isTargetProcessing()) {
     labels = (LabelAlphabet) getTargetAlphabet();
     target = new LabelSequence(labels, tokens.length);
   }
   for (int l = 0; l < tokens.length; l++) {
     int nFeatures;
     if (isTargetProcessing()) {
       if (tokens[l].length < 1)
         throw new IllegalStateException(
             "Missing label at line " + l + " instance " + carrier.getName());
       nFeatures = tokens[l].length - 1;
       target.add(tokens[l][nFeatures]);
     } else nFeatures = tokens[l].length;
     int featureIndices[] = new int[nFeatures];
     for (int f = 0; f < nFeatures; f++) featureIndices[f] = features.lookupIndex(tokens[l][f]);
     fvs[l] =
         featureInductionOption.value
             ? new AugmentableFeatureVector(
                 features, featureIndices, null, featureIndices.length)
             : new FeatureVector(features, featureIndices);
   }
   carrier.setData(new FeatureVectorSequence(fvs));
   if (isTargetProcessing()) carrier.setTarget(target);
   else carrier.setTarget(new LabelSequence(getTargetAlphabet()));
   return carrier;
 }