/** * Process a classifier's prediction for an instance and update a set of plotting instances and * additional plotting info. m_PlotShape for nominal class datasets holds shape types (actual data * points have automatic shape type assignment; classifier error data points have box shape type). * For numeric class datasets, the actual data points are stored in m_PlotInstances and m_PlotSize * stores the error (which is later converted to shape size values). * * @param toPredict the actual data point * @param classifier the classifier * @param eval the evaluation object to use for evaluating the classifier on the instance to * predict * @see #m_PlotShapes * @see #m_PlotSizes * @see #m_PlotInstances */ public void process(Instance toPredict, Classifier classifier, Evaluation eval) { double pred; double[] values; int i; try { pred = eval.evaluateModelOnceAndRecordPrediction(classifier, toPredict); if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) { toPredict = ((weka.classifiers.misc.InputMappedClassifier) classifier) .constructMappedInstance(toPredict); } if (!m_SaveForVisualization) return; if (m_PlotInstances != null) { values = new double[m_PlotInstances.numAttributes()]; for (i = 0; i < m_PlotInstances.numAttributes(); i++) { if (i < toPredict.classIndex()) { values[i] = toPredict.value(i); } else if (i == toPredict.classIndex()) { values[i] = pred; values[i + 1] = toPredict.value(i); i++; } else { values[i] = toPredict.value(i - 1); } } m_PlotInstances.add(new DenseInstance(1.0, values)); if (toPredict.classAttribute().isNominal()) { if (toPredict.isMissing(toPredict.classIndex()) || Utils.isMissingValue(pred)) { m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE)); } else if (pred != toPredict.classValue()) { // set to default error point shape m_PlotShapes.addElement(new Integer(Plot2D.ERROR_SHAPE)); } else { // otherwise set to constant (automatically assigned) point shape m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE)); } m_PlotSizes.addElement(new Integer(Plot2D.DEFAULT_SHAPE_SIZE)); } else { // store the error (to be converted to a point size later) Double errd = null; if (!toPredict.isMissing(toPredict.classIndex()) && !Utils.isMissingValue(pred)) { errd = new Double(pred - toPredict.classValue()); m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE)); } else { // missing shape if actual class not present or prediction is missing m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE)); } m_PlotSizes.addElement(errd); } } } catch (Exception ex) { ex.printStackTrace(); } }
/** * Use <code> classifyInstance </code> from <code> OSDLCore </code> and assign probability one to * the chosen label. The implementation is heavily based on the same method in the <code> * Classifier </code> class. * * @param instance the instance to be classified * @return an array containing a single '1' on the index that <code> classifyInstance </code> * returns. */ public double[] distributionForInstance(Instance instance) { // based on the code from the Classifier class double[] dist = new double[instance.numClasses()]; int classification = 0; switch (instance.classAttribute().type()) { case Attribute.NOMINAL: try { classification = (int) Math.round(classifyInstance(instance)); } catch (Exception e) { System.out.println("There was a problem with classifyIntance"); System.out.println(e.getMessage()); e.printStackTrace(); } if (Utils.isMissingValue(classification)) { return dist; } dist[classification] = 1.0; return dist; case Attribute.NUMERIC: try { dist[0] = classifyInstance(instance); } catch (Exception e) { System.out.println("There was a problem with classifyIntance"); System.out.println(e.getMessage()); e.printStackTrace(); } return dist; default: return dist; } }
/** * Convert an input instance * * @param current the input instance to convert * @return a transformed instance * @throws Exception if a problem occurs */ protected Instance convertInstance(Instance current) throws Exception { double[] vals = new double[getOutputFormat().numAttributes()]; int index = 0; for (int j = 0; j < current.numAttributes(); j++) { if (j != current.classIndex()) { if (m_unchanged != null && m_unchanged.attribute(current.attribute(j).name()) != null) { vals[index++] = current.value(j); } else { Estimator[] estForAtt = m_estimatorLookup.get(current.attribute(j).name()); for (int k = 0; k < current.classAttribute().numValues(); k++) { if (current.isMissing(j)) { vals[index++] = Utils.missingValue(); } else { double e = estForAtt[k].getProbability(current.value(j)); vals[index++] = e; } } } } } vals[vals.length - 1] = current.classValue(); DenseInstance instNew = new DenseInstance(current.weight(), vals); return instNew; }
/** * Classifies the given test instance. The instance has to belong to a dataset when it's being * classified. Note that a classifier MUST implement either this or distributionForInstance(). * * @param instance the instance to be classified * @return the predicted most likely class for the instance or Utils.missingValue() if no * prediction is made * @exception Exception if an error occurred during the prediction */ @Override public double classifyInstance(Instance instance) throws Exception { double[] dist = distributionForInstance(instance); if (dist == null) { throw new Exception("Null distribution predicted"); } switch (instance.classAttribute().type()) { case Attribute.NOMINAL: double max = 0; int maxIndex = 0; for (int i = 0; i < dist.length; i++) { if (dist[i] > max) { maxIndex = i; max = dist[i]; } } if (max > 0) { return maxIndex; } else { return Utils.missingValue(); } case Attribute.NUMERIC: case Attribute.DATE: return dist[0]; default: return Utils.missingValue(); } }
private void updateInstanceAndPutInField( Instance instance, int correctClass, int predictedClassIndex) { Segment segment = toSegment(instance); String classAsString = instance.classAttribute().value(predictedClassIndex); int predictedLabelId = Integer.parseInt(classAsString); segment.setPredictedLabelDetail(labelMapReader.getSchema().get(predictedLabelId)); putInField(segment, correctClass, predictedClassIndex); }
// use the TriTrainer Classifier to classify Instance; public double classifyInstance(Instance instance) throws Exception { double result; double[] dist; int index; dist = distributionForInstance(instance); // 分类概率 if (instance.classAttribute().isNominal()) { index = Utils.maxIndex(dist); // 返回概率最大的 if (dist[index] == 0) result = Instance.missingValue(); else result = dist[index]; } else if (instance.classAttribute().isNumeric()) { result = dist[0]; } else { result = Instance.missingValue(); } return result; }
@Override public double[] distributionForInstance(Instance instance) throws Exception { double[] distribution = getClassifier().distributionForInstance(instance); int maxIndex = 0; for (int i = 0; i < distribution.length; i++) { if (distribution[maxIndex] < distribution[i]) maxIndex = i; } final String maxLabel = instance.classAttribute().value(maxIndex); if (sureClasses.contains(maxLabel)) { Arrays.fill(distribution, 0.0); distribution[maxIndex] = 1.0; System.err.println("INFO: Hacked confidence of '" + maxLabel + "'."); } else { Arrays.fill(distribution, 1.0d / instance.numClasses()); } return distribution; }
/** * Calculates the class membership probabilities for the given test instance. * * @param instance the instance to be classified * @return predicted class probability distribution * @exception Exception if distribution can't be computed successfully */ public double[] distributionForInstance(Instance instance) throws Exception { if (instance.classAttribute().isNumeric()) { throw new UnsupportedClassTypeException("Decorate can't handle a numeric class!"); } double[] sums = new double[instance.numClasses()], newProbs; Classifier curr; for (int i = 0; i < m_Committee.size(); i++) { curr = (Classifier) m_Committee.get(i); newProbs = curr.distributionForInstance(instance); for (int j = 0; j < newProbs.length; j++) sums[j] += newProbs[j]; } if (Utils.eq(Utils.sum(sums), 0)) { return sums; } else { Utils.normalize(sums); return sums; } }
public double classifyInstance(Instance sample) throws Exception { // transform instance to sequence MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); } Sequence seq = new Sequence(sequence); double minD = Double.MAX_VALUE; String classValue = null; for (ClassedSequence s : prototypes) { double tmpD = seq.distance(s.sequence); if (tmpD < minD) { minD = tmpD; classValue = s.classValue; } } // System.out.println(prototypes.size()); return sample.classAttribute().indexOfValue(classValue); }
/** * Predicts the class memberships for a given instance. If an instance is unclassified, the * returned array elements must be all zero. If the class is numeric, the array must consist of * only one element, which contains the predicted value. Note that a classifier MUST implement * either this or classifyInstance(). * * @param instance the instance to be classified * @return an array containing the estimated membership probabilities of the test instance in each * class or the numeric prediction * @exception Exception if distribution could not be computed successfully */ @Override public double[] distributionForInstance(Instance instance) throws Exception { double[] dist = new double[instance.numClasses()]; switch (instance.classAttribute().type()) { case Attribute.NOMINAL: double classification = classifyInstance(instance); if (Utils.isMissingValue(classification)) { return dist; } else { dist[(int) classification] = 1.0; } return dist; case Attribute.NUMERIC: case Attribute.DATE: dist[0] = classifyInstance(instance); return dist; default: return dist; } }
// this method MajorityVoting to decide the probs of the Instance; // protected double[] distributionForInstanceMajorityVoting(Instance instance) throws Exception { double[] probs = new double[instance.classAttribute().numValues()]; double[] votes = new double[probs.length]; for (int i = 0; i < class_Array.length; i++) { probs = class_Array[i].distributionForInstance(instance); int maxIndex = 0; for (int j = 0; j < probs.length; j++) { if (probs[j] > probs[maxIndex]) maxIndex = j; } // Consider the cases when multiple classes happen to have the same probability for (int j = 0; j < probs.length; j++) { if (probs[j] == probs[maxIndex]) votes[j]++; } } int tmpMajorityIndex = 0; for (int k = 1; k < votes.length; k++) { if (votes[k] > votes[tmpMajorityIndex]) tmpMajorityIndex = k; } // Consider the cases when multiple classes receive the same amount of votes Vector<Integer> majorityIndexes = new Vector<Integer>(); for (int k = 0; k < votes.length; k++) { if (votes[k] == votes[tmpMajorityIndex]) majorityIndexes.add(k); } // System.out.println("forth"); // Resolve the ties according to a uniform random distribution int majorityIndex = majorityIndexes.get(m_Random.nextInt(majorityIndexes.size())); // set the probs of the classes which have not been voted to 0 for (int k = 0; k < probs.length; k++) probs[k] = 0; // the class that have been voted the most receives 1 probs[majorityIndex] = 1; return probs; }
/** * test on one sample * * @param sample * @return p(y|sample) forall y * @throws Exception */ public double classifyInstance(Instance sample) throws Exception { // transform instance to sequence MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); } Sequence seq = new Sequence(sequence); // for each class String classValue = null; double maxProb = 0.0; double[] pr = new double[classedData.keySet().size()]; for (String clas : classedData.keySet()) { int c = trainingData.classAttribute().indexOfValue(clas); double prob = 0.0; for (int k = 0; k < centroidsPerClass[c].length; k++) { // compute P(Q|k_c) if (sigmasPerClass[c][k] == Double.NaN || sigmasPerClass[c][k] == 0) { System.err.println("sigma=NAN||sigma=0"); continue; } double dist = seq.distanceEuc(centroidsPerClass[c][k]); double p = computeProbaForQueryAndCluster(sigmasPerClass[c][k], dist); prob += p / centroidsPerClass[c].length; // prob += p*prior[c][k]; if (p > maxProb) { maxProb = p; classValue = clas; } } // if (prob > maxProb) { // maxProb = prob; // classValue = clas; // } } // System.out.println(Arrays.toString(pr)); // System.out.println(classValue); return sample.classAttribute().indexOfValue(classValue); }
@Override public void updateNode(Instance inst) throws Exception { super.updateDistribution(inst); for (int i = 0; i < inst.numAttributes(); i++) { Attribute a = inst.attribute(i); if (i != inst.classIndex()) { ConditionalSufficientStats stats = m_nodeStats.get(a.name()); if (stats == null) { if (a.isNumeric()) { stats = new GaussianConditionalSufficientStats(); } else { stats = new NominalConditionalSufficientStats(); } m_nodeStats.put(a.name(), stats); } stats.update( inst.value(a), inst.classAttribute().value((int) inst.classValue()), inst.weight()); } } }
public double crearMetodo(Instances pTrain, Instances pTest, Instances pEvaluar) { // Cargamos las instancias clasificadas Instances train = pTrain; Instances test = pTest; double bestAccuracy = 0; double accuracy = 0; KNN bestKNN = null; int bestL = 0; int indice = 0; // Movemos el valor de L para cambiar la cantidad de las muestras for (int i = 2; i < 16; i++) { ArrayList<ArrayList<Instance>> L = crearArrays(i); // Le metemos las instancias a cada ArrayList for (ArrayList<Instance> grupoInstancias : L) { grupoInstancias.addAll(rellenarConInstancias(train)); } // Creamos los clasificadores ArrayList<KNN> clasificadores = new ArrayList<KNN>(i); for (int j = 0; j < i; j++) { try { clasificadores.set(j, new KNN(L.get(j))); } catch (IndexOutOfBoundsException e) { clasificadores.add(new KNN(L.get(j))); } } // Array de resultados ArrayList<Instance> Resultados = new ArrayList<Instance>(); // Clasificamos las instancias for (int j = 0; j < test.numInstances(); j++) { // Creo un Array de Instancias para guardar los resultados ArrayList<Instance> clasificadas = new ArrayList<Instance>(); // Clasificamos la instancia con todo el grupo de KNN for (KNN knn : clasificadores) { clasificadas.add(knn.clasificarInstacia(2, 2, test.instance(j))); } // Escogemos la clase mayoritaria del Array de clasificadas. Integer[] clases = new Integer[test.classAttribute().numValues()]; // Inicio el array a cero for (int k = 0; k < clases.length; k++) { clases[k] = 0; } // Asigno el numero de instancias con esa clase. for (Instance instancia : clasificadas) { clases[ instancia .classAttribute() .indexOfValue(instancia.stringValue(instancia.classAttribute()))] += 1; } // Elegir la clase mayoritaria y introducir a resultado int numero = 0; // numero indice = 0; // indice for (int k = 0; k < clases.length; k++) { if (clases[k] > numero) { numero = clases[k]; indice = k; } } clasificadas.get(0).setClassValue(clasificadas.get(0).classAttribute().value(indice)); Resultados.add(clasificadas.get(0)); Evaluador ev = new Evaluador(pEvaluar); accuracy = ev.EvaluateModel(Resultados); if (accuracy > bestAccuracy) { bestAccuracy = accuracy; bestL = i; } } } System.out.println("Mejor L: " + bestL); System.out.println("Mejor accuracy: " + bestAccuracy); return accuracy; }
public double classifyInstance(Instance instance) { int classAttribute = instance.classAttribute().index(); Object classValue = classifier.classifyInstance(convert(instance), classAttribute); return instance.classAttribute().indexOfValue(classValue.toString()); }