/**
  * The examples are assumed to be a list of RFVDatum. The datums are assumed to contain the zeroes
  * as well.
  */
 @Override
 @Deprecated
 public NaiveBayesClassifier<L, F> trainClassifier(List<RVFDatum<L, F>> examples) {
   RVFDatum<L, F> d0 = examples.get(0);
   int numFeatures = d0.asFeatures().size();
   int[][] data = new int[examples.size()][numFeatures];
   int[] labels = new int[examples.size()];
   labelIndex = new HashIndex<L>();
   featureIndex = new HashIndex<F>();
   for (int d = 0; d < examples.size(); d++) {
     RVFDatum<L, F> datum = examples.get(d);
     Counter<F> c = datum.asFeaturesCounter();
     for (F feature : c.keySet()) {
       if (featureIndex.add(feature)) {
         int fNo = featureIndex.indexOf(feature);
         int value = (int) c.getCount(feature);
         data[d][fNo] = value;
       }
     }
     labelIndex.add(datum.label());
     labels[d] = labelIndex.indexOf(datum.label());
   }
   int numClasses = labelIndex.size();
   return trainClassifier(data, labels, numFeatures, numClasses, labelIndex, featureIndex);
 }
 private int add(AmbiguityClass a) {
   if (classes.contains(a)) {
     return classes.indexOf(a);
   }
   classes.add(a);
   return classes.indexOf(a);
 }
Ejemplo n.º 3
0
 public Index<IntPair> createIndex() {
   Index<IntPair> index = new HashIndex<>();
   for (int x = 0; x < px.length; x++) {
     int numberY = numY(x);
     for (int y = 0; y < numberY; y++) {
       index.add(new IntPair(x, y));
     }
   }
   return index;
 }
Ejemplo n.º 4
0
  protected void read(DataInputStream file) {
    try {
      int size = file.readInt();
      index = new HashIndex<String>();
      for (int i = 0; i < size; i++) {
        String tag = file.readUTF();
        boolean inClosed = file.readBoolean();
        index.add(tag);

        if (inClosed) closed.add(tag);
      }
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
  @Override
  public void train(List<TaggedWord> sentence) {
    lex.train(sentence, 1.0);

    String last = null;
    for (TaggedWord tagLabel : sentence) {
      String tag = tagLabel.tag();
      tagIndex.add(tag);
      if (last == null) {
        initial.incrementCount(tag);
      } else {
        ruleCounter.incrementCount2D(last, tag);
      }
      last = tag;
    }
  }
  public Classifier<L, F> trainClassifier(Iterable<Datum<L, F>> dataIterable) {
    Minimizer<DiffFunction> minimizer = getMinimizer();
    Index<F> featureIndex = Generics.newIndex();
    Index<L> labelIndex = Generics.newIndex();
    for (Datum<L, F> d : dataIterable) {
      labelIndex.add(d.label());
      featureIndex.addAll(d.asFeatures()); // If there are duplicates, it doesn't add them again.
    }
    System.err.println(
        String.format(
            "Training linear classifier with %d features and %d labels",
            featureIndex.size(), labelIndex.size()));

    LogConditionalObjectiveFunction<L, F> objective =
        new LogConditionalObjectiveFunction<L, F>(dataIterable, logPrior, featureIndex, labelIndex);
    objective.setPrior(new LogPrior(LogPrior.LogPriorType.QUADRATIC));

    double[] initial = objective.initial();
    double[] weights = minimizer.minimize(objective, TOL, initial);

    LinearClassifier<L, F> classifier =
        new LinearClassifier<L, F>(objective.to2D(weights), featureIndex, labelIndex);
    return classifier;
  }
Ejemplo n.º 7
0
 static {
   binaryIndex = new HashIndex<>();
   binaryIndex.add(POS_LABEL);
   binaryIndex.add(NEG_LABEL);
   posIndex = binaryIndex.indexOf(POS_LABEL);
 }