/** * Convert a single instance over. The converted instance is added to the end of the output queue. * * @param instance the instance to convert */ private void convertInstance(Instance instance) { Instance inst = null; if (instance instanceof SparseInstance) { double[] newVals = new double[instance.numAttributes()]; int[] newIndices = new int[instance.numAttributes()]; double[] vals = instance.toDoubleArray(); int ind = 0; for (int j = 0; j < instance.numAttributes(); j++) { double value; if (instance.attribute(j).isNumeric() && (!Instance.isMissingValue(vals[j])) && (getInputFormat().classIndex() != j)) { value = vals[j] - m_Means[j]; if (value != 0.0) { newVals[ind] = value; newIndices[ind] = j; ind++; } } else { value = vals[j]; if (value != 0.0) { newVals[ind] = value; newIndices[ind] = j; ind++; } } } double[] tempVals = new double[ind]; int[] tempInd = new int[ind]; System.arraycopy(newVals, 0, tempVals, 0, ind); System.arraycopy(newIndices, 0, tempInd, 0, ind); inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes()); } else { double[] vals = instance.toDoubleArray(); for (int j = 0; j < getInputFormat().numAttributes(); j++) { if (instance.attribute(j).isNumeric() && (!Instance.isMissingValue(vals[j])) && (getInputFormat().classIndex() != j)) { vals[j] = (vals[j] - m_Means[j]); } } inst = new Instance(instance.weight(), vals); } inst.setDataset(instance.dataset()); push(inst); }
/** * Adds the prediction intervals as additional attributes at the end. Since classifiers can * returns varying number of intervals per instance, the dataset is filled with missing values for * non-existing intervals. */ protected void addPredictionIntervals() { int maxNum; int num; int i; int n; FastVector preds; FastVector atts; Instances data; Instance inst; Instance newInst; double[] values; double[][] predInt; // determine the maximum number of intervals maxNum = 0; preds = m_Evaluation.predictions(); for (i = 0; i < preds.size(); i++) { num = ((NumericPrediction) preds.elementAt(i)).predictionIntervals().length; if (num > maxNum) maxNum = num; } // create new header atts = new FastVector(); for (i = 0; i < m_PlotInstances.numAttributes(); i++) atts.addElement(m_PlotInstances.attribute(i)); for (i = 0; i < maxNum; i++) { atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-lowerBoundary")); atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-upperBoundary")); atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-width")); } data = new Instances(m_PlotInstances.relationName(), atts, m_PlotInstances.numInstances()); data.setClassIndex(m_PlotInstances.classIndex()); // update data for (i = 0; i < m_PlotInstances.numInstances(); i++) { inst = m_PlotInstances.instance(i); // copy old values values = new double[data.numAttributes()]; System.arraycopy(inst.toDoubleArray(), 0, values, 0, inst.numAttributes()); // add interval data predInt = ((NumericPrediction) preds.elementAt(i)).predictionIntervals(); for (n = 0; n < maxNum; n++) { if (n < predInt.length) { values[m_PlotInstances.numAttributes() + n * 3 + 0] = predInt[n][0]; values[m_PlotInstances.numAttributes() + n * 3 + 1] = predInt[n][1]; values[m_PlotInstances.numAttributes() + n * 3 + 2] = predInt[n][1] - predInt[n][0]; } else { values[m_PlotInstances.numAttributes() + n * 3 + 0] = Utils.missingValue(); values[m_PlotInstances.numAttributes() + n * 3 + 1] = Utils.missingValue(); values[m_PlotInstances.numAttributes() + n * 3 + 2] = Utils.missingValue(); } } // create new Instance newInst = new DenseInstance(inst.weight(), values); data.add(newInst); } m_PlotInstances = data; }
/** * Normalize the instance * * @param inst instance to be normalized * @return a new Instance with normalized values */ private Instance normalizeInstance(Instance inst) { double[] vals = inst.toDoubleArray(); double sum = Utils.sum(vals); for (int i = 0; i < vals.length; i++) { vals[i] /= sum; } return new DenseInstance(inst.weight(), vals); }
/** * Classifies an instance w.r.t. the partitions found. It applies a naive min-distance algorithm. * * @param instance the instance to classify * @return the cluster that contains the nearest point to the instance */ public int clusterInstance(Instance instance) throws java.lang.Exception { DoubleMatrix1D u = DoubleFactory1D.dense.make(instance.toDoubleArray()); double min_dist = Double.POSITIVE_INFINITY; int c = -1; for (int i = 0; i < v.rows(); i++) { double dist = distnorm2(u, v.viewRow(i)); if (dist < min_dist) { c = cluster[i]; min_dist = dist; } } return c; }
/** * Processes the given data (may change the provided dataset) and returns the modified version. * This method is called in batchFinished(). * * @param instances the data to process * @return the modified data * @throws Exception in case the processing goes wrong * @see #batchFinished() */ protected Instances process(Instances instances) throws Exception { Instances result; int i; int n; double[] values; String value; Instance inst; Instance newInst; // we need the complete input data! if (!isFirstBatchDone()) setOutputFormat(determineOutputFormat(getInputFormat())); result = new Instances(getOutputFormat()); for (i = 0; i < instances.numInstances(); i++) { inst = instances.instance(i); values = inst.toDoubleArray(); for (n = 0; n < values.length; n++) { if (!m_Cols.isInRange(n) || !instances.attribute(n).isNumeric() || inst.isMissing(n)) continue; // get index of value if (instances.attribute(n).type() == Attribute.DATE) value = inst.stringValue(n); else value = Utils.doubleToString(inst.value(n), MAX_DECIMALS); values[n] = result.attribute(n).indexOfValue(value); } // generate new instance if (inst instanceof SparseInstance) newInst = new SparseInstance(inst.weight(), values); else newInst = new DenseInstance(inst.weight(), values); // copy possible string, relational values newInst.setDataset(getOutputFormat()); copyValues(newInst, false, inst.dataset(), getOutputFormat()); result.add(newInst); } return result; }
/** * Compute the JS divergence between an instance and a cluster, used for test data * * @param inst instance to be clustered * @param t index of the cluster * @param pi1 * @param pi2 * @return the JS divergence */ private double JS(Instance inst, int t, double pi1, double pi2) { if (Math.min(pi1, pi2) <= 0) { System.out.format( "Warning: zero or negative weights in JS calculation! (pi1 %s, pi2 %s)\n", pi1, pi2); return 0; } double sum = Utils.sum(inst.toDoubleArray()); double kl1 = 0.0, kl2 = 0.0, tmp = 0.0; for (int i = 0; i < inst.numValues(); i++) { tmp = inst.valueSparse(i) / sum; if (tmp != 0) { kl1 += tmp * Math.log(tmp / (tmp * pi1 + pi2 * bestT.Py_t.get(inst.index(i), t))); } } for (int i = 0; i < m_numAttributes; i++) { if ((tmp = bestT.Py_t.get(i, t)) != 0) { kl2 += tmp * Math.log(tmp / (inst.value(i) * pi1 / sum + pi2 * tmp)); } } return pi1 * kl1 + pi2 * kl2; }
private Instance getFVSFilteredInstance( Instances output, Instance old_inst, List<List<Value>> list, Double[] substitution) { double[] oldValues = old_inst.toDoubleArray(); Instance instance = new Instance(old_inst); // Change with value that is available for (int i = 0; i < oldValues.length - 1; i++) { // System.out.println(oldValues[i]); // System.out.println(list.get(i)); // System.out.println("############################"); // If list doesn't contain, then delete Value v = new Value(oldValues[i]); int idx = list.get(i).indexOf(v); // If not found in the index if (idx == -1) { // Change with substitution instance.setValue(i, substitution[i]); // Change into missing // instance.setMissing(i); } } return instance; }
/** * See interface <code>Cluster</code> * * @param point * @return */ @Override public double getInclusionProbability(Instance instance) { // trivial cluster if (N == 1) { double distance = 0.0; for (int i = 0; i < LS.length; i++) { double d = LS[i] - instance.value(i); distance += d * d; } distance = Math.sqrt(distance); if (distance < EPSILON) return 1.0; return 0.0; } else { double dist = calcNormalizedDistance(instance.toDoubleArray()); if (dist <= getRadius()) { return 1; } else { return 0; } // double res = AuxiliaryFunctions.distanceProbabilty(dist, LS.length); // return res; } }
/** * Medoto que verifica se a instancia passada como parametro e igual a regra. Retorna resultados * que serao usados para a construcao da matriz de contigencia * * @param i Instancia de teste a ser comparada com a regra * @return Retorno da comparacao: 0 hb, 1 h'b, 2 hb', 3 h'b' */ public void compararRegraContigencia(Instance i) { double b[] = i.toDoubleArray(); double h = b[b.length - 1]; boolean compCorpo = compararCorpo(b); if (compCorpo) { if (cabeca == (int) h) { // Corpo e cabeca iguais - hb matrizContigencia.incH_B(); } else { // Corpo igual mas cabeca diferente - h'b matrizContigencia.incNotH_B(); } } else { if (cabeca == (int) h) { // Corpo diferente e cabeca igual - hb' matrizContigencia.incH_NotB(); } else { // Corpo e cabeca diferentes - h'b' matrizContigencia.incNotH_NotB(); } } }
/** * Método que define se a regra cobre corretamento o exemplo * * @param exemplo Exemplo que será verificado se a regra o cobre ou não. * @return */ public boolean cobreCorretamente(Instance exemplo) { if (compararCorpo(exemplo.toDoubleArray())) if (exemplo.classValue() == cabeca) return true; else return false; else return false; }
/** * Processes the given data (may change the provided dataset) and returns the modified version. * This method is called in batchFinished(). This implementation only calls process(Instance) for * each instance in the given dataset. * * @param instances the data to process * @return the modified data * @throws Exception in case the processing goes wrong * @see #batchFinished() */ protected Instances process(Instances instances) throws Exception { Instances result; Instance instOld; Instance instNew; int i; int n; double[] values; int numAttNew; int numAttOld; if (!isFirstBatchDone()) computeThresholds(instances); result = getOutputFormat(); numAttOld = instances.numAttributes(); numAttNew = result.numAttributes(); for (n = 0; n < instances.numInstances(); n++) { instOld = instances.instance(n); values = new double[numAttNew]; System.arraycopy(instOld.toDoubleArray(), 0, values, 0, numAttOld); // generate new instance instNew = new Instance(1.0, values); instNew.setDataset(result); // per attribute? if (!getDetectionPerAttribute()) { // outlier? if (isOutlier(instOld)) instNew.setValue(m_OutlierAttributePosition[0], 1); // extreme value? if (isExtremeValue(instOld)) { instNew.setValue(m_OutlierAttributePosition[0] + 1, 1); // tag extreme values also as outliers? if (getExtremeValuesAsOutliers()) instNew.setValue(m_OutlierAttributePosition[0], 1); } } else { for (i = 0; i < m_AttributeIndices.length; i++) { // non-numeric attribute? if (m_AttributeIndices[i] == NON_NUMERIC) continue; // outlier? if (isOutlier(instOld, m_AttributeIndices[i])) instNew.setValue(m_OutlierAttributePosition[i], 1); // extreme value? if (isExtremeValue(instOld, m_AttributeIndices[i])) { instNew.setValue(m_OutlierAttributePosition[i] + 1, 1); // tag extreme values also as outliers? if (getExtremeValuesAsOutliers()) instNew.setValue(m_OutlierAttributePosition[i], 1); } // add multiplier? if (getOutputOffsetMultiplier()) instNew.setValue( m_OutlierAttributePosition[i] + 2, calculateMultiplier(instOld, m_AttributeIndices[i])); } } // copy possible strings, relational values... copyValues(instNew, false, instOld.dataset(), getOutputFormat()); // add to output result.add(instNew); } return result; }
/** * Convert a single instance over. The converted instance is added to the end of the output queue. * * @param instance the instance to convert * @throws Exception if conversion fails */ protected void convertInstance(Instance instance) throws Exception { Instance inst = null; if (instance instanceof SparseInstance) { double[] newVals = new double[instance.numAttributes()]; int[] newIndices = new int[instance.numAttributes()]; double[] vals = instance.toDoubleArray(); int ind = 0; for (int j = 0; j < instance.numAttributes(); j++) { double value; if (instance.attribute(j).isNumeric() && (!Utils.isMissingValue(vals[j])) && (getInputFormat().classIndex() != j)) { if (Double.isNaN(m_MinArray[j]) || (m_MaxArray[j] == m_MinArray[j])) { value = 0; } else { value = (vals[j] - m_MinArray[j]) / (m_MaxArray[j] - m_MinArray[j]) * m_Scale + m_Translation; if (Double.isNaN(value)) { throw new Exception( "A NaN value was generated " + "while normalizing " + instance.attribute(j).name()); } } if (value != 0.0) { newVals[ind] = value; newIndices[ind] = j; ind++; } } else { value = vals[j]; if (value != 0.0) { newVals[ind] = value; newIndices[ind] = j; ind++; } } } double[] tempVals = new double[ind]; int[] tempInd = new int[ind]; System.arraycopy(newVals, 0, tempVals, 0, ind); System.arraycopy(newIndices, 0, tempInd, 0, ind); inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes()); } else { double[] vals = instance.toDoubleArray(); for (int j = 0; j < getInputFormat().numAttributes(); j++) { if (instance.attribute(j).isNumeric() && (!Utils.isMissingValue(vals[j])) && (getInputFormat().classIndex() != j)) { if (Double.isNaN(m_MinArray[j]) || (m_MaxArray[j] == m_MinArray[j])) { vals[j] = 0; } else { vals[j] = (vals[j] - m_MinArray[j]) / (m_MaxArray[j] - m_MinArray[j]) * m_Scale + m_Translation; if (Double.isNaN(vals[j])) { throw new Exception( "A NaN value was generated " + "while normalizing " + instance.attribute(j).name()); } } } } inst = new DenseInstance(instance.weight(), vals); } inst.setDataset(instance.dataset()); push(inst); }
/** * Convert a single instance over. The converted instance is added to the end of the output queue. * * @param instance the instance to convert * @throws Exception if instance cannot be converted */ private void convertInstance(Instance instance) throws Exception { Instance inst = null; HashMap symbols = new HashMap(5); if (instance instanceof SparseInstance) { double[] newVals = new double[instance.numAttributes()]; int[] newIndices = new int[instance.numAttributes()]; double[] vals = instance.toDoubleArray(); int ind = 0; double value; for (int j = 0; j < instance.numAttributes(); j++) { if (m_SelectCols.isInRange(j)) { if (instance.attribute(j).isNumeric() && (!Utils.isMissingValue(vals[j])) && (getInputFormat().classIndex() != j)) { symbols.put("A", new Double(vals[j])); symbols.put("MAX", new Double(m_attStats[j].numericStats.max)); symbols.put("MIN", new Double(m_attStats[j].numericStats.min)); symbols.put("MEAN", new Double(m_attStats[j].numericStats.mean)); symbols.put("SD", new Double(m_attStats[j].numericStats.stdDev)); symbols.put("COUNT", new Double(m_attStats[j].numericStats.count)); symbols.put("SUM", new Double(m_attStats[j].numericStats.sum)); symbols.put("SUMSQUARED", new Double(m_attStats[j].numericStats.sumSq)); value = eval(symbols); if (Double.isNaN(value) || Double.isInfinite(value)) { System.err.println("WARNING:Error in evaluating the expression: missing value set"); value = Utils.missingValue(); } if (value != 0.0) { newVals[ind] = value; newIndices[ind] = j; ind++; } } } else { value = vals[j]; if (value != 0.0) { newVals[ind] = value; newIndices[ind] = j; ind++; } } } double[] tempVals = new double[ind]; int[] tempInd = new int[ind]; System.arraycopy(newVals, 0, tempVals, 0, ind); System.arraycopy(newIndices, 0, tempInd, 0, ind); inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes()); } else { double[] vals = instance.toDoubleArray(); for (int j = 0; j < getInputFormat().numAttributes(); j++) { if (m_SelectCols.isInRange(j)) { if (instance.attribute(j).isNumeric() && (!Utils.isMissingValue(vals[j])) && (getInputFormat().classIndex() != j)) { symbols.put("A", new Double(vals[j])); symbols.put("MAX", new Double(m_attStats[j].numericStats.max)); symbols.put("MIN", new Double(m_attStats[j].numericStats.min)); symbols.put("MEAN", new Double(m_attStats[j].numericStats.mean)); symbols.put("SD", new Double(m_attStats[j].numericStats.stdDev)); symbols.put("COUNT", new Double(m_attStats[j].numericStats.count)); symbols.put("SUM", new Double(m_attStats[j].numericStats.sum)); symbols.put("SUMSQUARED", new Double(m_attStats[j].numericStats.sumSq)); vals[j] = eval(symbols); if (Double.isNaN(vals[j]) || Double.isInfinite(vals[j])) { System.err.println("WARNING:Error in Evaluation the Expression: missing value set"); vals[j] = Utils.missingValue(); } } } } inst = new DenseInstance(instance.weight(), vals); } inst.setDataset(instance.dataset()); push(inst); }
/** * Métod que percorre todos os dados pertencentes à Instances dados. Imprimindo as informações da * base. */ public void percorrerDados() { if (dados != null) { /*Cada exemplo contido nos dados é identificado no Weka através da * classe Instance. Assim, o objeto dados, do tipo Instances, é uma coleçao de * Instance. Voce vai ter metodos que possibilitam acessar todos os exemplos * presentes na base. * */ // Percorre todos os exemples presentes na base for (int i = 0; i < dados.numInstances(); i++) { // Método para obter a instance de número 1. // Voce pode pegar a primeira e a ultima instance tb. // Além de poder deletar entre outras coisas. Instance exemplo = dados.instance(i); /*Uma Intance é formada por vários atributos, que são os atributos * da base. Voce pode percorrer todos os atributos Instace, ou pode * "setar" (set) ou pegar (get) um atributo especifico. * */ // É possível transforma todos os atributos em um array de double double[] arrayAtributos = exemplo.toDoubleArray(); System.out.println("Valores para o exemplo " + i); System.out.print("Array de atributos: "); for (int j = 0; j < arrayAtributos.length; j++) { System.out.print(arrayAtributos[j] + " "); } System.out.println(); // Percorrendo todos os atributos para se obter informacoes sobre eles for (int j = 0; j < exemplo.numAttributes(); j++) { Attribute att = exemplo.attribute(j); double valor = exemplo.value(att); System.out.println( "Valor do atributo " + att.name() + ":" + valor + " - " + att.value((int) valor)); } System.out.println(); // Mudando o valor do atributo 0, para um valor possível do atributos // Obtendo as informacoes do atributo 0; Attribute att = exemplo.attribute(0); // Obtendo o valor do atributo 0. double valorDoAtributo0 = exemplo.value(att); System.out.println("Valor antigo, em double: " + valorDoAtributo0); System.out.println("Valor antigo, em nome: " + att.value((int) valorDoAtributo0)); int novoValor = 1; exemplo.setValue(att, novoValor); valorDoAtributo0 = exemplo.value(att); System.out.println("Valor novo, em nome: " + att.value((int) valorDoAtributo0)); System.out.println(); System.out.println(); } } }