/** * The examples are assumed to be a list of RFVDatum. The datums are assumed to contain the zeroes * as well. */ @Override @Deprecated public NaiveBayesClassifier<L, F> trainClassifier(List<RVFDatum<L, F>> examples) { RVFDatum<L, F> d0 = examples.get(0); int numFeatures = d0.asFeatures().size(); int[][] data = new int[examples.size()][numFeatures]; int[] labels = new int[examples.size()]; labelIndex = new HashIndex<L>(); featureIndex = new HashIndex<F>(); for (int d = 0; d < examples.size(); d++) { RVFDatum<L, F> datum = examples.get(d); Counter<F> c = datum.asFeaturesCounter(); for (F feature : c.keySet()) { if (featureIndex.add(feature)) { int fNo = featureIndex.indexOf(feature); int value = (int) c.getCount(feature); data[d][fNo] = value; } } labelIndex.add(datum.label()); labels[d] = labelIndex.indexOf(datum.label()); } int numClasses = labelIndex.size(); return trainClassifier(data, labels, numFeatures, numClasses, labelIndex, featureIndex); }
private int add(AmbiguityClass a) { if (classes.contains(a)) { return classes.indexOf(a); } classes.add(a); return classes.indexOf(a); }
public Index<IntPair> createIndex() { Index<IntPair> index = new HashIndex<>(); for (int x = 0; x < px.length; x++) { int numberY = numY(x); for (int y = 0; y < numberY; y++) { index.add(new IntPair(x, y)); } } return index; }
protected void read(DataInputStream file) { try { int size = file.readInt(); index = new HashIndex<String>(); for (int i = 0; i < size; i++) { String tag = file.readUTF(); boolean inClosed = file.readBoolean(); index.add(tag); if (inClosed) closed.add(tag); } } catch (IOException e) { e.printStackTrace(); } }
@Override public void train(List<TaggedWord> sentence) { lex.train(sentence, 1.0); String last = null; for (TaggedWord tagLabel : sentence) { String tag = tagLabel.tag(); tagIndex.add(tag); if (last == null) { initial.incrementCount(tag); } else { ruleCounter.incrementCount2D(last, tag); } last = tag; } }
public Classifier<L, F> trainClassifier(Iterable<Datum<L, F>> dataIterable) { Minimizer<DiffFunction> minimizer = getMinimizer(); Index<F> featureIndex = Generics.newIndex(); Index<L> labelIndex = Generics.newIndex(); for (Datum<L, F> d : dataIterable) { labelIndex.add(d.label()); featureIndex.addAll(d.asFeatures()); // If there are duplicates, it doesn't add them again. } System.err.println( String.format( "Training linear classifier with %d features and %d labels", featureIndex.size(), labelIndex.size())); LogConditionalObjectiveFunction<L, F> objective = new LogConditionalObjectiveFunction<L, F>(dataIterable, logPrior, featureIndex, labelIndex); objective.setPrior(new LogPrior(LogPrior.LogPriorType.QUADRATIC)); double[] initial = objective.initial(); double[] weights = minimizer.minimize(objective, TOL, initial); LinearClassifier<L, F> classifier = new LinearClassifier<L, F>(objective.to2D(weights), featureIndex, labelIndex); return classifier; }
static { binaryIndex = new HashIndex<>(); binaryIndex.add(POS_LABEL); binaryIndex.add(NEG_LABEL); posIndex = binaryIndex.indexOf(POS_LABEL); }