Ejemplo n.º 1
   * Convert a single instance over. The converted instance is added to the end of the output queue.
   * @param instance the instance to convert
  private void convertInstance(Instance instance) {
    Instance inst = null;

    if (instance instanceof SparseInstance) {
      double[] newVals = new double[instance.numAttributes()];
      int[] newIndices = new int[instance.numAttributes()];
      double[] vals = instance.toDoubleArray();
      int ind = 0;
      for (int j = 0; j < instance.numAttributes(); j++) {
        double value;
        if (instance.attribute(j).isNumeric()
            && (!Instance.isMissingValue(vals[j]))
            && (getInputFormat().classIndex() != j)) {

          value = vals[j] - m_Means[j];
          if (value != 0.0) {
            newVals[ind] = value;
            newIndices[ind] = j;
        } else {
          value = vals[j];
          if (value != 0.0) {
            newVals[ind] = value;
            newIndices[ind] = j;
      double[] tempVals = new double[ind];
      int[] tempInd = new int[ind];
      System.arraycopy(newVals, 0, tempVals, 0, ind);
      System.arraycopy(newIndices, 0, tempInd, 0, ind);
      inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes());
    } else {
      double[] vals = instance.toDoubleArray();
      for (int j = 0; j < getInputFormat().numAttributes(); j++) {
        if (instance.attribute(j).isNumeric()
            && (!Instance.isMissingValue(vals[j]))
            && (getInputFormat().classIndex() != j)) {
          vals[j] = (vals[j] - m_Means[j]);
      inst = new Instance(instance.weight(), vals);


   * Adds the prediction intervals as additional attributes at the end. Since classifiers can
   * returns varying number of intervals per instance, the dataset is filled with missing values for
   * non-existing intervals.
  protected void addPredictionIntervals() {
    int maxNum;
    int num;
    int i;
    int n;
    FastVector preds;
    FastVector atts;
    Instances data;
    Instance inst;
    Instance newInst;
    double[] values;
    double[][] predInt;

    // determine the maximum number of intervals
    maxNum = 0;
    preds = m_Evaluation.predictions();
    for (i = 0; i < preds.size(); i++) {
      num = ((NumericPrediction) preds.elementAt(i)).predictionIntervals().length;
      if (num > maxNum) maxNum = num;

    // create new header
    atts = new FastVector();
    for (i = 0; i < m_PlotInstances.numAttributes(); i++)
    for (i = 0; i < maxNum; i++) {
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-lowerBoundary"));
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-upperBoundary"));
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-width"));
    data = new Instances(m_PlotInstances.relationName(), atts, m_PlotInstances.numInstances());

    // update data
    for (i = 0; i < m_PlotInstances.numInstances(); i++) {
      inst = m_PlotInstances.instance(i);
      // copy old values
      values = new double[data.numAttributes()];
      System.arraycopy(inst.toDoubleArray(), 0, values, 0, inst.numAttributes());
      // add interval data
      predInt = ((NumericPrediction) preds.elementAt(i)).predictionIntervals();
      for (n = 0; n < maxNum; n++) {
        if (n < predInt.length) {
          values[m_PlotInstances.numAttributes() + n * 3 + 0] = predInt[n][0];
          values[m_PlotInstances.numAttributes() + n * 3 + 1] = predInt[n][1];
          values[m_PlotInstances.numAttributes() + n * 3 + 2] = predInt[n][1] - predInt[n][0];
        } else {
          values[m_PlotInstances.numAttributes() + n * 3 + 0] = Utils.missingValue();
          values[m_PlotInstances.numAttributes() + n * 3 + 1] = Utils.missingValue();
          values[m_PlotInstances.numAttributes() + n * 3 + 2] = Utils.missingValue();
      // create new Instance
      newInst = new DenseInstance(inst.weight(), values);

    m_PlotInstances = data;
Ejemplo n.º 3
  * Normalize the instance
  * @param inst instance to be normalized
  * @return a new Instance with normalized values
 private Instance normalizeInstance(Instance inst) {
   double[] vals = inst.toDoubleArray();
   double sum = Utils.sum(vals);
   for (int i = 0; i < vals.length; i++) {
     vals[i] /= sum;
   return new DenseInstance(inst.weight(), vals);
  * Classifies an instance w.r.t. the partitions found. It applies a naive min-distance algorithm.
  * @param instance the instance to classify
  * @return the cluster that contains the nearest point to the instance
 public int clusterInstance(Instance instance) throws java.lang.Exception {
   DoubleMatrix1D u = DoubleFactory1D.dense.make(instance.toDoubleArray());
   double min_dist = Double.POSITIVE_INFINITY;
   int c = -1;
   for (int i = 0; i < v.rows(); i++) {
     double dist = distnorm2(u, v.viewRow(i));
     if (dist < min_dist) {
       c = cluster[i];
       min_dist = dist;
   return c;
Ejemplo n.º 5
   * Processes the given data (may change the provided dataset) and returns the modified version.
   * This method is called in batchFinished().
   * @param instances the data to process
   * @return the modified data
   * @throws Exception in case the processing goes wrong
   * @see #batchFinished()
  protected Instances process(Instances instances) throws Exception {
    Instances result;
    int i;
    int n;
    double[] values;
    String value;
    Instance inst;
    Instance newInst;

    // we need the complete input data!
    if (!isFirstBatchDone()) setOutputFormat(determineOutputFormat(getInputFormat()));

    result = new Instances(getOutputFormat());

    for (i = 0; i < instances.numInstances(); i++) {
      inst = instances.instance(i);
      values = inst.toDoubleArray();

      for (n = 0; n < values.length; n++) {
        if (!m_Cols.isInRange(n) || !instances.attribute(n).isNumeric() || inst.isMissing(n))

        // get index of value
        if (instances.attribute(n).type() == Attribute.DATE) value = inst.stringValue(n);
        else value = Utils.doubleToString(inst.value(n), MAX_DECIMALS);

        values[n] = result.attribute(n).indexOfValue(value);

      // generate new instance
      if (inst instanceof SparseInstance) newInst = new SparseInstance(inst.weight(), values);
      else newInst = new DenseInstance(inst.weight(), values);

      // copy possible string, relational values
      copyValues(newInst, false, inst.dataset(), getOutputFormat());


    return result;
Ejemplo n.º 6
  * Compute the JS divergence between an instance and a cluster, used for test data
  * @param inst instance to be clustered
  * @param t index of the cluster
  * @param pi1
  * @param pi2
  * @return the JS divergence
 private double JS(Instance inst, int t, double pi1, double pi2) {
   if (Math.min(pi1, pi2) <= 0) {
         "Warning: zero or negative weights in JS calculation! (pi1 %s, pi2 %s)\n", pi1, pi2);
     return 0;
   double sum = Utils.sum(inst.toDoubleArray());
   double kl1 = 0.0, kl2 = 0.0, tmp = 0.0;
   for (int i = 0; i < inst.numValues(); i++) {
     tmp = inst.valueSparse(i) / sum;
     if (tmp != 0) {
       kl1 += tmp * Math.log(tmp / (tmp * pi1 + pi2 * bestT.Py_t.get(inst.index(i), t)));
   for (int i = 0; i < m_numAttributes; i++) {
     if ((tmp = bestT.Py_t.get(i, t)) != 0) {
       kl2 += tmp * Math.log(tmp / (inst.value(i) * pi1 / sum + pi2 * tmp));
   return pi1 * kl1 + pi2 * kl2;
Ejemplo n.º 7
 private Instance getFVSFilteredInstance(
     Instances output, Instance old_inst, List<List<Value>> list, Double[] substitution) {
   double[] oldValues = old_inst.toDoubleArray();
   Instance instance = new Instance(old_inst);
   // Change with value that is available
   for (int i = 0; i < oldValues.length - 1; i++) {
     // System.out.println(oldValues[i]);
     // System.out.println(list.get(i));
     // System.out.println("############################");
     // If list doesn't contain, then delete
     Value v = new Value(oldValues[i]);
     int idx = list.get(i).indexOf(v);
     // If not found in the index
     if (idx == -1) {
       // Change with substitution
       instance.setValue(i, substitution[i]);
       // Change into missing
       // instance.setMissing(i);
   return instance;
Ejemplo n.º 8
  * See interface <code>Cluster</code>
  * @param point
  * @return
 public double getInclusionProbability(Instance instance) {
   // trivial cluster
   if (N == 1) {
     double distance = 0.0;
     for (int i = 0; i < LS.length; i++) {
       double d = LS[i] - instance.value(i);
       distance += d * d;
     distance = Math.sqrt(distance);
     if (distance < EPSILON) return 1.0;
     return 0.0;
   } else {
     double dist = calcNormalizedDistance(instance.toDoubleArray());
     if (dist <= getRadius()) {
       return 1;
     } else {
       return 0;
     //            double res = AuxiliaryFunctions.distanceProbabilty(dist, LS.length);
     //            return res;
Ejemplo n.º 9
   * Medoto que verifica se a instancia passada como parametro e igual a regra. Retorna resultados
   * que serao usados para a construcao da matriz de contigencia
   * @param i Instancia de teste a ser comparada com a regra
   * @return Retorno da comparacao: 0 hb, 1 h'b, 2 hb', 3 h'b'
  public void compararRegraContigencia(Instance i) {
    double b[] = i.toDoubleArray();
    double h = b[b.length - 1];
    boolean compCorpo = compararCorpo(b);
    if (compCorpo) {
      if (cabeca == (int) h) {
        // Corpo e cabeca iguais - hb

      } else {
        // Corpo igual mas cabeca diferente - h'b
    } else {

      if (cabeca == (int) h) {
        // Corpo diferente e cabeca igual - hb'
      } else {
        // Corpo e cabeca diferentes - h'b'
Ejemplo n.º 10
  * Método que define se a regra cobre corretamento o exemplo
  * @param exemplo Exemplo que será verificado se a regra o cobre ou não.
  * @return
 public boolean cobreCorretamente(Instance exemplo) {
   if (compararCorpo(exemplo.toDoubleArray()))
     if (exemplo.classValue() == cabeca) return true;
     else return false;
   else return false;
Ejemplo n.º 11
   * Processes the given data (may change the provided dataset) and returns the modified version.
   * This method is called in batchFinished(). This implementation only calls process(Instance) for
   * each instance in the given dataset.
   * @param instances the data to process
   * @return the modified data
   * @throws Exception in case the processing goes wrong
   * @see #batchFinished()
  protected Instances process(Instances instances) throws Exception {
    Instances result;
    Instance instOld;
    Instance instNew;
    int i;
    int n;
    double[] values;
    int numAttNew;
    int numAttOld;

    if (!isFirstBatchDone()) computeThresholds(instances);

    result = getOutputFormat();
    numAttOld = instances.numAttributes();
    numAttNew = result.numAttributes();

    for (n = 0; n < instances.numInstances(); n++) {
      instOld = instances.instance(n);
      values = new double[numAttNew];
      System.arraycopy(instOld.toDoubleArray(), 0, values, 0, numAttOld);

      // generate new instance
      instNew = new Instance(1.0, values);

      // per attribute?
      if (!getDetectionPerAttribute()) {
        // outlier?
        if (isOutlier(instOld)) instNew.setValue(m_OutlierAttributePosition[0], 1);
        // extreme value?
        if (isExtremeValue(instOld)) {
          instNew.setValue(m_OutlierAttributePosition[0] + 1, 1);
          // tag extreme values also as outliers?
          if (getExtremeValuesAsOutliers()) instNew.setValue(m_OutlierAttributePosition[0], 1);
      } else {
        for (i = 0; i < m_AttributeIndices.length; i++) {
          // non-numeric attribute?
          if (m_AttributeIndices[i] == NON_NUMERIC) continue;

          // outlier?
          if (isOutlier(instOld, m_AttributeIndices[i]))
            instNew.setValue(m_OutlierAttributePosition[i], 1);
          // extreme value?
          if (isExtremeValue(instOld, m_AttributeIndices[i])) {
            instNew.setValue(m_OutlierAttributePosition[i] + 1, 1);
            // tag extreme values also as outliers?
            if (getExtremeValuesAsOutliers()) instNew.setValue(m_OutlierAttributePosition[i], 1);
          // add multiplier?
          if (getOutputOffsetMultiplier())
                m_OutlierAttributePosition[i] + 2,
                calculateMultiplier(instOld, m_AttributeIndices[i]));

      // copy possible strings, relational values...
      copyValues(instNew, false, instOld.dataset(), getOutputFormat());

      // add to output

    return result;
Ejemplo n.º 12
  * Convert a single instance over. The converted instance is added to the end of the output queue.
  * @param instance the instance to convert
  * @throws Exception if conversion fails
 protected void convertInstance(Instance instance) throws Exception {
   Instance inst = null;
   if (instance instanceof SparseInstance) {
     double[] newVals = new double[instance.numAttributes()];
     int[] newIndices = new int[instance.numAttributes()];
     double[] vals = instance.toDoubleArray();
     int ind = 0;
     for (int j = 0; j < instance.numAttributes(); j++) {
       double value;
       if (instance.attribute(j).isNumeric()
           && (!Utils.isMissingValue(vals[j]))
           && (getInputFormat().classIndex() != j)) {
         if (Double.isNaN(m_MinArray[j]) || (m_MaxArray[j] == m_MinArray[j])) {
           value = 0;
         } else {
           value =
               (vals[j] - m_MinArray[j]) / (m_MaxArray[j] - m_MinArray[j]) * m_Scale
                   + m_Translation;
           if (Double.isNaN(value)) {
             throw new Exception(
                 "A NaN value was generated "
                     + "while normalizing "
                     + instance.attribute(j).name());
         if (value != 0.0) {
           newVals[ind] = value;
           newIndices[ind] = j;
       } else {
         value = vals[j];
         if (value != 0.0) {
           newVals[ind] = value;
           newIndices[ind] = j;
     double[] tempVals = new double[ind];
     int[] tempInd = new int[ind];
     System.arraycopy(newVals, 0, tempVals, 0, ind);
     System.arraycopy(newIndices, 0, tempInd, 0, ind);
     inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes());
   } else {
     double[] vals = instance.toDoubleArray();
     for (int j = 0; j < getInputFormat().numAttributes(); j++) {
       if (instance.attribute(j).isNumeric()
           && (!Utils.isMissingValue(vals[j]))
           && (getInputFormat().classIndex() != j)) {
         if (Double.isNaN(m_MinArray[j]) || (m_MaxArray[j] == m_MinArray[j])) {
           vals[j] = 0;
         } else {
           vals[j] =
               (vals[j] - m_MinArray[j]) / (m_MaxArray[j] - m_MinArray[j]) * m_Scale
                   + m_Translation;
           if (Double.isNaN(vals[j])) {
             throw new Exception(
                 "A NaN value was generated "
                     + "while normalizing "
                     + instance.attribute(j).name());
     inst = new DenseInstance(instance.weight(), vals);
Ejemplo n.º 13
   * Convert a single instance over. The converted instance is added to the end of the output queue.
   * @param instance the instance to convert
   * @throws Exception if instance cannot be converted
  private void convertInstance(Instance instance) throws Exception {

    Instance inst = null;
    HashMap symbols = new HashMap(5);
    if (instance instanceof SparseInstance) {
      double[] newVals = new double[instance.numAttributes()];
      int[] newIndices = new int[instance.numAttributes()];
      double[] vals = instance.toDoubleArray();
      int ind = 0;
      double value;
      for (int j = 0; j < instance.numAttributes(); j++) {
        if (m_SelectCols.isInRange(j)) {
          if (instance.attribute(j).isNumeric()
              && (!Utils.isMissingValue(vals[j]))
              && (getInputFormat().classIndex() != j)) {
            symbols.put("A", new Double(vals[j]));
            symbols.put("MAX", new Double(m_attStats[j].numericStats.max));
            symbols.put("MIN", new Double(m_attStats[j].numericStats.min));
            symbols.put("MEAN", new Double(m_attStats[j].numericStats.mean));
            symbols.put("SD", new Double(m_attStats[j].numericStats.stdDev));
            symbols.put("COUNT", new Double(m_attStats[j].numericStats.count));
            symbols.put("SUM", new Double(m_attStats[j].numericStats.sum));
            symbols.put("SUMSQUARED", new Double(m_attStats[j].numericStats.sumSq));
            value = eval(symbols);
            if (Double.isNaN(value) || Double.isInfinite(value)) {
              System.err.println("WARNING:Error in evaluating the expression: missing value set");
              value = Utils.missingValue();
            if (value != 0.0) {
              newVals[ind] = value;
              newIndices[ind] = j;
        } else {
          value = vals[j];
          if (value != 0.0) {
            newVals[ind] = value;
            newIndices[ind] = j;
      double[] tempVals = new double[ind];
      int[] tempInd = new int[ind];
      System.arraycopy(newVals, 0, tempVals, 0, ind);
      System.arraycopy(newIndices, 0, tempInd, 0, ind);
      inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes());
    } else {
      double[] vals = instance.toDoubleArray();
      for (int j = 0; j < getInputFormat().numAttributes(); j++) {
        if (m_SelectCols.isInRange(j)) {
          if (instance.attribute(j).isNumeric()
              && (!Utils.isMissingValue(vals[j]))
              && (getInputFormat().classIndex() != j)) {
            symbols.put("A", new Double(vals[j]));
            symbols.put("MAX", new Double(m_attStats[j].numericStats.max));
            symbols.put("MIN", new Double(m_attStats[j].numericStats.min));
            symbols.put("MEAN", new Double(m_attStats[j].numericStats.mean));
            symbols.put("SD", new Double(m_attStats[j].numericStats.stdDev));
            symbols.put("COUNT", new Double(m_attStats[j].numericStats.count));
            symbols.put("SUM", new Double(m_attStats[j].numericStats.sum));
            symbols.put("SUMSQUARED", new Double(m_attStats[j].numericStats.sumSq));
            vals[j] = eval(symbols);
            if (Double.isNaN(vals[j]) || Double.isInfinite(vals[j])) {
              System.err.println("WARNING:Error in Evaluation the Expression: missing value set");
              vals[j] = Utils.missingValue();
      inst = new DenseInstance(instance.weight(), vals);
   * Métod que percorre todos os dados pertencentes à Instances dados. Imprimindo as informações da
   * base.
  public void percorrerDados() {

    if (dados != null) {
      /*Cada exemplo contido nos dados é identificado no Weka através da
       * classe Instance. Assim, o objeto dados, do tipo Instances, é uma coleçao de
       * Instance. Voce vai ter metodos que possibilitam acessar todos os exemplos
       * presentes na base.
       * */
      // Percorre todos os exemples presentes na base
      for (int i = 0; i < dados.numInstances(); i++) {
        // Método para obter a instance de número 1.
        // Voce pode pegar a primeira e a ultima instance tb.
        // Além de poder deletar entre outras coisas.
        Instance exemplo = dados.instance(i);

        /*Uma Intance é formada por vários atributos, que são os atributos
         * da base. Voce pode percorrer todos os atributos Instace, ou pode
         * "setar" (set) ou pegar (get) um atributo especifico.
         * */

        // É possível transforma todos os atributos em um array de double

        double[] arrayAtributos = exemplo.toDoubleArray();

        System.out.println("Valores para o exemplo " + i);
        System.out.print("Array de atributos: ");
        for (int j = 0; j < arrayAtributos.length; j++) {
          System.out.print(arrayAtributos[j] + " ");

        // Percorrendo todos os atributos para se obter informacoes sobre eles
        for (int j = 0; j < exemplo.numAttributes(); j++) {
          Attribute att = exemplo.attribute(j);
          double valor = exemplo.value(att);
              "Valor do atributo " + att.name() + ":" + valor + " - " + att.value((int) valor));

        // Mudando o valor do atributo 0, para um valor possível do atributos
        // Obtendo as informacoes do atributo 0;
        Attribute att = exemplo.attribute(0);
        // Obtendo o valor do atributo 0.
        double valorDoAtributo0 = exemplo.value(att);

        System.out.println("Valor antigo, em double: " + valorDoAtributo0);
        System.out.println("Valor antigo, em nome: " + att.value((int) valorDoAtributo0));

        int novoValor = 1;
        exemplo.setValue(att, novoValor);

        valorDoAtributo0 = exemplo.value(att);

        System.out.println("Valor novo, em nome: " + att.value((int) valorDoAtributo0));
