Пример #1
0
  /**
   * Convert a single instance over. The converted instance is added to the end of the output queue.
   *
   * @param instance the instance to convert
   */
  private void convertInstance(Instance instance) {
    Instance inst = null;

    if (instance instanceof SparseInstance) {
      double[] newVals = new double[instance.numAttributes()];
      int[] newIndices = new int[instance.numAttributes()];
      double[] vals = instance.toDoubleArray();
      int ind = 0;
      for (int j = 0; j < instance.numAttributes(); j++) {
        double value;
        if (instance.attribute(j).isNumeric()
            && (!Instance.isMissingValue(vals[j]))
            && (getInputFormat().classIndex() != j)) {

          value = vals[j] - m_Means[j];
          if (value != 0.0) {
            newVals[ind] = value;
            newIndices[ind] = j;
            ind++;
          }
        } else {
          value = vals[j];
          if (value != 0.0) {
            newVals[ind] = value;
            newIndices[ind] = j;
            ind++;
          }
        }
      }
      double[] tempVals = new double[ind];
      int[] tempInd = new int[ind];
      System.arraycopy(newVals, 0, tempVals, 0, ind);
      System.arraycopy(newIndices, 0, tempInd, 0, ind);
      inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes());
    } else {
      double[] vals = instance.toDoubleArray();
      for (int j = 0; j < getInputFormat().numAttributes(); j++) {
        if (instance.attribute(j).isNumeric()
            && (!Instance.isMissingValue(vals[j]))
            && (getInputFormat().classIndex() != j)) {
          vals[j] = (vals[j] - m_Means[j]);
        }
      }
      inst = new Instance(instance.weight(), vals);
    }

    inst.setDataset(instance.dataset());

    push(inst);
  }
  /**
   * Adds the prediction intervals as additional attributes at the end. Since classifiers can
   * returns varying number of intervals per instance, the dataset is filled with missing values for
   * non-existing intervals.
   */
  protected void addPredictionIntervals() {
    int maxNum;
    int num;
    int i;
    int n;
    FastVector preds;
    FastVector atts;
    Instances data;
    Instance inst;
    Instance newInst;
    double[] values;
    double[][] predInt;

    // determine the maximum number of intervals
    maxNum = 0;
    preds = m_Evaluation.predictions();
    for (i = 0; i < preds.size(); i++) {
      num = ((NumericPrediction) preds.elementAt(i)).predictionIntervals().length;
      if (num > maxNum) maxNum = num;
    }

    // create new header
    atts = new FastVector();
    for (i = 0; i < m_PlotInstances.numAttributes(); i++)
      atts.addElement(m_PlotInstances.attribute(i));
    for (i = 0; i < maxNum; i++) {
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-lowerBoundary"));
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-upperBoundary"));
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-width"));
    }
    data = new Instances(m_PlotInstances.relationName(), atts, m_PlotInstances.numInstances());
    data.setClassIndex(m_PlotInstances.classIndex());

    // update data
    for (i = 0; i < m_PlotInstances.numInstances(); i++) {
      inst = m_PlotInstances.instance(i);
      // copy old values
      values = new double[data.numAttributes()];
      System.arraycopy(inst.toDoubleArray(), 0, values, 0, inst.numAttributes());
      // add interval data
      predInt = ((NumericPrediction) preds.elementAt(i)).predictionIntervals();
      for (n = 0; n < maxNum; n++) {
        if (n < predInt.length) {
          values[m_PlotInstances.numAttributes() + n * 3 + 0] = predInt[n][0];
          values[m_PlotInstances.numAttributes() + n * 3 + 1] = predInt[n][1];
          values[m_PlotInstances.numAttributes() + n * 3 + 2] = predInt[n][1] - predInt[n][0];
        } else {
          values[m_PlotInstances.numAttributes() + n * 3 + 0] = Utils.missingValue();
          values[m_PlotInstances.numAttributes() + n * 3 + 1] = Utils.missingValue();
          values[m_PlotInstances.numAttributes() + n * 3 + 2] = Utils.missingValue();
        }
      }
      // create new Instance
      newInst = new DenseInstance(inst.weight(), values);
      data.add(newInst);
    }

    m_PlotInstances = data;
  }
Пример #3
0
 /**
  * Normalize the instance
  *
  * @param inst instance to be normalized
  * @return a new Instance with normalized values
  */
 private Instance normalizeInstance(Instance inst) {
   double[] vals = inst.toDoubleArray();
   double sum = Utils.sum(vals);
   for (int i = 0; i < vals.length; i++) {
     vals[i] /= sum;
   }
   return new DenseInstance(inst.weight(), vals);
 }
 /**
  * Classifies an instance w.r.t. the partitions found. It applies a naive min-distance algorithm.
  *
  * @param instance the instance to classify
  * @return the cluster that contains the nearest point to the instance
  */
 public int clusterInstance(Instance instance) throws java.lang.Exception {
   DoubleMatrix1D u = DoubleFactory1D.dense.make(instance.toDoubleArray());
   double min_dist = Double.POSITIVE_INFINITY;
   int c = -1;
   for (int i = 0; i < v.rows(); i++) {
     double dist = distnorm2(u, v.viewRow(i));
     if (dist < min_dist) {
       c = cluster[i];
       min_dist = dist;
     }
   }
   return c;
 }
Пример #5
0
  /**
   * Processes the given data (may change the provided dataset) and returns the modified version.
   * This method is called in batchFinished().
   *
   * @param instances the data to process
   * @return the modified data
   * @throws Exception in case the processing goes wrong
   * @see #batchFinished()
   */
  protected Instances process(Instances instances) throws Exception {
    Instances result;
    int i;
    int n;
    double[] values;
    String value;
    Instance inst;
    Instance newInst;

    // we need the complete input data!
    if (!isFirstBatchDone()) setOutputFormat(determineOutputFormat(getInputFormat()));

    result = new Instances(getOutputFormat());

    for (i = 0; i < instances.numInstances(); i++) {
      inst = instances.instance(i);
      values = inst.toDoubleArray();

      for (n = 0; n < values.length; n++) {
        if (!m_Cols.isInRange(n) || !instances.attribute(n).isNumeric() || inst.isMissing(n))
          continue;

        // get index of value
        if (instances.attribute(n).type() == Attribute.DATE) value = inst.stringValue(n);
        else value = Utils.doubleToString(inst.value(n), MAX_DECIMALS);

        values[n] = result.attribute(n).indexOfValue(value);
      }

      // generate new instance
      if (inst instanceof SparseInstance) newInst = new SparseInstance(inst.weight(), values);
      else newInst = new DenseInstance(inst.weight(), values);

      // copy possible string, relational values
      newInst.setDataset(getOutputFormat());
      copyValues(newInst, false, inst.dataset(), getOutputFormat());

      result.add(newInst);
    }

    return result;
  }
Пример #6
0
 /**
  * Compute the JS divergence between an instance and a cluster, used for test data
  *
  * @param inst instance to be clustered
  * @param t index of the cluster
  * @param pi1
  * @param pi2
  * @return the JS divergence
  */
 private double JS(Instance inst, int t, double pi1, double pi2) {
   if (Math.min(pi1, pi2) <= 0) {
     System.out.format(
         "Warning: zero or negative weights in JS calculation! (pi1 %s, pi2 %s)\n", pi1, pi2);
     return 0;
   }
   double sum = Utils.sum(inst.toDoubleArray());
   double kl1 = 0.0, kl2 = 0.0, tmp = 0.0;
   for (int i = 0; i < inst.numValues(); i++) {
     tmp = inst.valueSparse(i) / sum;
     if (tmp != 0) {
       kl1 += tmp * Math.log(tmp / (tmp * pi1 + pi2 * bestT.Py_t.get(inst.index(i), t)));
     }
   }
   for (int i = 0; i < m_numAttributes; i++) {
     if ((tmp = bestT.Py_t.get(i, t)) != 0) {
       kl2 += tmp * Math.log(tmp / (inst.value(i) * pi1 / sum + pi2 * tmp));
     }
   }
   return pi1 * kl1 + pi2 * kl2;
 }
Пример #7
0
 private Instance getFVSFilteredInstance(
     Instances output, Instance old_inst, List<List<Value>> list, Double[] substitution) {
   double[] oldValues = old_inst.toDoubleArray();
   Instance instance = new Instance(old_inst);
   // Change with value that is available
   for (int i = 0; i < oldValues.length - 1; i++) {
     // System.out.println(oldValues[i]);
     // System.out.println(list.get(i));
     // System.out.println("############################");
     // If list doesn't contain, then delete
     Value v = new Value(oldValues[i]);
     int idx = list.get(i).indexOf(v);
     // If not found in the index
     if (idx == -1) {
       // Change with substitution
       instance.setValue(i, substitution[i]);
       // Change into missing
       // instance.setMissing(i);
     }
   }
   return instance;
 }
Пример #8
0
 /**
  * See interface <code>Cluster</code>
  *
  * @param point
  * @return
  */
 @Override
 public double getInclusionProbability(Instance instance) {
   // trivial cluster
   if (N == 1) {
     double distance = 0.0;
     for (int i = 0; i < LS.length; i++) {
       double d = LS[i] - instance.value(i);
       distance += d * d;
     }
     distance = Math.sqrt(distance);
     if (distance < EPSILON) return 1.0;
     return 0.0;
   } else {
     double dist = calcNormalizedDistance(instance.toDoubleArray());
     if (dist <= getRadius()) {
       return 1;
     } else {
       return 0;
     }
     //            double res = AuxiliaryFunctions.distanceProbabilty(dist, LS.length);
     //            return res;
   }
 }
Пример #9
0
  /**
   * Medoto que verifica se a instancia passada como parametro e igual a regra. Retorna resultados
   * que serao usados para a construcao da matriz de contigencia
   *
   * @param i Instancia de teste a ser comparada com a regra
   * @return Retorno da comparacao: 0 hb, 1 h'b, 2 hb', 3 h'b'
   */
  public void compararRegraContigencia(Instance i) {
    double b[] = i.toDoubleArray();
    double h = b[b.length - 1];
    boolean compCorpo = compararCorpo(b);
    if (compCorpo) {
      if (cabeca == (int) h) {
        // Corpo e cabeca iguais - hb
        matrizContigencia.incH_B();

      } else {
        // Corpo igual mas cabeca diferente - h'b
        matrizContigencia.incNotH_B();
      }
    } else {

      if (cabeca == (int) h) {
        // Corpo diferente e cabeca igual - hb'
        matrizContigencia.incH_NotB();
      } else {
        // Corpo e cabeca diferentes - h'b'
        matrizContigencia.incNotH_NotB();
      }
    }
  }
Пример #10
0
 /**
  * Método que define se a regra cobre corretamento o exemplo
  *
  * @param exemplo Exemplo que será verificado se a regra o cobre ou não.
  * @return
  */
 public boolean cobreCorretamente(Instance exemplo) {
   if (compararCorpo(exemplo.toDoubleArray()))
     if (exemplo.classValue() == cabeca) return true;
     else return false;
   else return false;
 }
Пример #11
0
  /**
   * Processes the given data (may change the provided dataset) and returns the modified version.
   * This method is called in batchFinished(). This implementation only calls process(Instance) for
   * each instance in the given dataset.
   *
   * @param instances the data to process
   * @return the modified data
   * @throws Exception in case the processing goes wrong
   * @see #batchFinished()
   */
  protected Instances process(Instances instances) throws Exception {
    Instances result;
    Instance instOld;
    Instance instNew;
    int i;
    int n;
    double[] values;
    int numAttNew;
    int numAttOld;

    if (!isFirstBatchDone()) computeThresholds(instances);

    result = getOutputFormat();
    numAttOld = instances.numAttributes();
    numAttNew = result.numAttributes();

    for (n = 0; n < instances.numInstances(); n++) {
      instOld = instances.instance(n);
      values = new double[numAttNew];
      System.arraycopy(instOld.toDoubleArray(), 0, values, 0, numAttOld);

      // generate new instance
      instNew = new Instance(1.0, values);
      instNew.setDataset(result);

      // per attribute?
      if (!getDetectionPerAttribute()) {
        // outlier?
        if (isOutlier(instOld)) instNew.setValue(m_OutlierAttributePosition[0], 1);
        // extreme value?
        if (isExtremeValue(instOld)) {
          instNew.setValue(m_OutlierAttributePosition[0] + 1, 1);
          // tag extreme values also as outliers?
          if (getExtremeValuesAsOutliers()) instNew.setValue(m_OutlierAttributePosition[0], 1);
        }
      } else {
        for (i = 0; i < m_AttributeIndices.length; i++) {
          // non-numeric attribute?
          if (m_AttributeIndices[i] == NON_NUMERIC) continue;

          // outlier?
          if (isOutlier(instOld, m_AttributeIndices[i]))
            instNew.setValue(m_OutlierAttributePosition[i], 1);
          // extreme value?
          if (isExtremeValue(instOld, m_AttributeIndices[i])) {
            instNew.setValue(m_OutlierAttributePosition[i] + 1, 1);
            // tag extreme values also as outliers?
            if (getExtremeValuesAsOutliers()) instNew.setValue(m_OutlierAttributePosition[i], 1);
          }
          // add multiplier?
          if (getOutputOffsetMultiplier())
            instNew.setValue(
                m_OutlierAttributePosition[i] + 2,
                calculateMultiplier(instOld, m_AttributeIndices[i]));
        }
      }

      // copy possible strings, relational values...
      copyValues(instNew, false, instOld.dataset(), getOutputFormat());

      // add to output
      result.add(instNew);
    }

    return result;
  }
Пример #12
0
 /**
  * Convert a single instance over. The converted instance is added to the end of the output queue.
  *
  * @param instance the instance to convert
  * @throws Exception if conversion fails
  */
 protected void convertInstance(Instance instance) throws Exception {
   Instance inst = null;
   if (instance instanceof SparseInstance) {
     double[] newVals = new double[instance.numAttributes()];
     int[] newIndices = new int[instance.numAttributes()];
     double[] vals = instance.toDoubleArray();
     int ind = 0;
     for (int j = 0; j < instance.numAttributes(); j++) {
       double value;
       if (instance.attribute(j).isNumeric()
           && (!Utils.isMissingValue(vals[j]))
           && (getInputFormat().classIndex() != j)) {
         if (Double.isNaN(m_MinArray[j]) || (m_MaxArray[j] == m_MinArray[j])) {
           value = 0;
         } else {
           value =
               (vals[j] - m_MinArray[j]) / (m_MaxArray[j] - m_MinArray[j]) * m_Scale
                   + m_Translation;
           if (Double.isNaN(value)) {
             throw new Exception(
                 "A NaN value was generated "
                     + "while normalizing "
                     + instance.attribute(j).name());
           }
         }
         if (value != 0.0) {
           newVals[ind] = value;
           newIndices[ind] = j;
           ind++;
         }
       } else {
         value = vals[j];
         if (value != 0.0) {
           newVals[ind] = value;
           newIndices[ind] = j;
           ind++;
         }
       }
     }
     double[] tempVals = new double[ind];
     int[] tempInd = new int[ind];
     System.arraycopy(newVals, 0, tempVals, 0, ind);
     System.arraycopy(newIndices, 0, tempInd, 0, ind);
     inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes());
   } else {
     double[] vals = instance.toDoubleArray();
     for (int j = 0; j < getInputFormat().numAttributes(); j++) {
       if (instance.attribute(j).isNumeric()
           && (!Utils.isMissingValue(vals[j]))
           && (getInputFormat().classIndex() != j)) {
         if (Double.isNaN(m_MinArray[j]) || (m_MaxArray[j] == m_MinArray[j])) {
           vals[j] = 0;
         } else {
           vals[j] =
               (vals[j] - m_MinArray[j]) / (m_MaxArray[j] - m_MinArray[j]) * m_Scale
                   + m_Translation;
           if (Double.isNaN(vals[j])) {
             throw new Exception(
                 "A NaN value was generated "
                     + "while normalizing "
                     + instance.attribute(j).name());
           }
         }
       }
     }
     inst = new DenseInstance(instance.weight(), vals);
   }
   inst.setDataset(instance.dataset());
   push(inst);
 }
Пример #13
0
  /**
   * Convert a single instance over. The converted instance is added to the end of the output queue.
   *
   * @param instance the instance to convert
   * @throws Exception if instance cannot be converted
   */
  private void convertInstance(Instance instance) throws Exception {

    Instance inst = null;
    HashMap symbols = new HashMap(5);
    if (instance instanceof SparseInstance) {
      double[] newVals = new double[instance.numAttributes()];
      int[] newIndices = new int[instance.numAttributes()];
      double[] vals = instance.toDoubleArray();
      int ind = 0;
      double value;
      for (int j = 0; j < instance.numAttributes(); j++) {
        if (m_SelectCols.isInRange(j)) {
          if (instance.attribute(j).isNumeric()
              && (!Utils.isMissingValue(vals[j]))
              && (getInputFormat().classIndex() != j)) {
            symbols.put("A", new Double(vals[j]));
            symbols.put("MAX", new Double(m_attStats[j].numericStats.max));
            symbols.put("MIN", new Double(m_attStats[j].numericStats.min));
            symbols.put("MEAN", new Double(m_attStats[j].numericStats.mean));
            symbols.put("SD", new Double(m_attStats[j].numericStats.stdDev));
            symbols.put("COUNT", new Double(m_attStats[j].numericStats.count));
            symbols.put("SUM", new Double(m_attStats[j].numericStats.sum));
            symbols.put("SUMSQUARED", new Double(m_attStats[j].numericStats.sumSq));
            value = eval(symbols);
            if (Double.isNaN(value) || Double.isInfinite(value)) {
              System.err.println("WARNING:Error in evaluating the expression: missing value set");
              value = Utils.missingValue();
            }
            if (value != 0.0) {
              newVals[ind] = value;
              newIndices[ind] = j;
              ind++;
            }
          }
        } else {
          value = vals[j];
          if (value != 0.0) {
            newVals[ind] = value;
            newIndices[ind] = j;
            ind++;
          }
        }
      }
      double[] tempVals = new double[ind];
      int[] tempInd = new int[ind];
      System.arraycopy(newVals, 0, tempVals, 0, ind);
      System.arraycopy(newIndices, 0, tempInd, 0, ind);
      inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes());
    } else {
      double[] vals = instance.toDoubleArray();
      for (int j = 0; j < getInputFormat().numAttributes(); j++) {
        if (m_SelectCols.isInRange(j)) {
          if (instance.attribute(j).isNumeric()
              && (!Utils.isMissingValue(vals[j]))
              && (getInputFormat().classIndex() != j)) {
            symbols.put("A", new Double(vals[j]));
            symbols.put("MAX", new Double(m_attStats[j].numericStats.max));
            symbols.put("MIN", new Double(m_attStats[j].numericStats.min));
            symbols.put("MEAN", new Double(m_attStats[j].numericStats.mean));
            symbols.put("SD", new Double(m_attStats[j].numericStats.stdDev));
            symbols.put("COUNT", new Double(m_attStats[j].numericStats.count));
            symbols.put("SUM", new Double(m_attStats[j].numericStats.sum));
            symbols.put("SUMSQUARED", new Double(m_attStats[j].numericStats.sumSq));
            vals[j] = eval(symbols);
            if (Double.isNaN(vals[j]) || Double.isInfinite(vals[j])) {
              System.err.println("WARNING:Error in Evaluation the Expression: missing value set");
              vals[j] = Utils.missingValue();
            }
          }
        }
      }
      inst = new DenseInstance(instance.weight(), vals);
    }
    inst.setDataset(instance.dataset());
    push(inst);
  }
  /**
   * Métod que percorre todos os dados pertencentes à Instances dados. Imprimindo as informações da
   * base.
   */
  public void percorrerDados() {

    if (dados != null) {
      /*Cada exemplo contido nos dados é identificado no Weka através da
       * classe Instance. Assim, o objeto dados, do tipo Instances, é uma coleçao de
       * Instance. Voce vai ter metodos que possibilitam acessar todos os exemplos
       * presentes na base.
       * */
      // Percorre todos os exemples presentes na base
      for (int i = 0; i < dados.numInstances(); i++) {
        // Método para obter a instance de número 1.
        // Voce pode pegar a primeira e a ultima instance tb.
        // Além de poder deletar entre outras coisas.
        Instance exemplo = dados.instance(i);

        /*Uma Intance é formada por vários atributos, que são os atributos
         * da base. Voce pode percorrer todos os atributos Instace, ou pode
         * "setar" (set) ou pegar (get) um atributo especifico.
         * */

        // É possível transforma todos os atributos em um array de double

        double[] arrayAtributos = exemplo.toDoubleArray();

        System.out.println("Valores para o exemplo " + i);
        System.out.print("Array de atributos: ");
        for (int j = 0; j < arrayAtributos.length; j++) {
          System.out.print(arrayAtributos[j] + " ");
        }

        System.out.println();
        // Percorrendo todos os atributos para se obter informacoes sobre eles
        for (int j = 0; j < exemplo.numAttributes(); j++) {
          Attribute att = exemplo.attribute(j);
          double valor = exemplo.value(att);
          System.out.println(
              "Valor do atributo " + att.name() + ":" + valor + " - " + att.value((int) valor));
        }

        System.out.println();
        // Mudando o valor do atributo 0, para um valor possível do atributos
        // Obtendo as informacoes do atributo 0;
        Attribute att = exemplo.attribute(0);
        // Obtendo o valor do atributo 0.
        double valorDoAtributo0 = exemplo.value(att);

        System.out.println("Valor antigo, em double: " + valorDoAtributo0);
        System.out.println("Valor antigo, em nome: " + att.value((int) valorDoAtributo0));

        int novoValor = 1;
        exemplo.setValue(att, novoValor);

        valorDoAtributo0 = exemplo.value(att);

        System.out.println("Valor novo, em nome: " + att.value((int) valorDoAtributo0));

        System.out.println();
        System.out.println();
      }
    }
  }