/**
   * Convert an input instance
   *
   * @param current the input instance to convert
   * @return a transformed instance
   * @throws Exception if a problem occurs
   */
  protected Instance convertInstance(Instance current) throws Exception {
    double[] vals = new double[getOutputFormat().numAttributes()];
    int index = 0;
    for (int j = 0; j < current.numAttributes(); j++) {
      if (j != current.classIndex()) {
        if (m_unchanged != null && m_unchanged.attribute(current.attribute(j).name()) != null) {
          vals[index++] = current.value(j);
        } else {
          Estimator[] estForAtt = m_estimatorLookup.get(current.attribute(j).name());
          for (int k = 0; k < current.classAttribute().numValues(); k++) {
            if (current.isMissing(j)) {
              vals[index++] = Utils.missingValue();
            } else {
              double e = estForAtt[k].getProbability(current.value(j));
              vals[index++] = e;
            }
          }
        }
      }
    }

    vals[vals.length - 1] = current.classValue();
    DenseInstance instNew = new DenseInstance(current.weight(), vals);

    return instNew;
  }
Beispiel #2
0
  public double updateWeights(Instance inst, double learningRatio) {
    // Normalize Instance
    double[] normalizedInstance = normalizedInstance(inst);
    // Compute the Normalized Prediction of Perceptron
    double normalizedPredict = prediction(normalizedInstance);
    double normalizedY = normalizeActualClassValue(inst);
    double sumWeights = 0.0;
    double delta = normalizedY - normalizedPredict;

    for (int j = 0; j < inst.numAttributes() - 1; j++) {
      int instAttIndex = modelAttIndexToInstanceAttIndex(j, inst);
      if (inst.attribute(instAttIndex).isNumeric()) {
        this.weightAttribute[j] += learningRatio * delta * normalizedInstance[j];
        sumWeights += Math.abs(this.weightAttribute[j]);
      }
    }
    this.weightAttribute[inst.numAttributes() - 1] += learningRatio * delta;
    sumWeights += Math.abs(this.weightAttribute[inst.numAttributes() - 1]);
    if (sumWeights > inst.numAttributes()) { // Lasso regression
      for (int j = 0; j < inst.numAttributes() - 1; j++) {
        int instAttIndex = modelAttIndexToInstanceAttIndex(j, inst);
        if (inst.attribute(instAttIndex).isNumeric()) {
          this.weightAttribute[j] = this.weightAttribute[j] / sumWeights;
        }
      }
      this.weightAttribute[inst.numAttributes() - 1] =
          this.weightAttribute[inst.numAttributes() - 1] / sumWeights;
    }

    return denormalizedPrediction(normalizedPredict);
  }
Beispiel #3
0
  /**
   * Convert a single instance over. The converted instance is added to the end of the output queue.
   *
   * @param instance the instance to convert
   */
  private void convertInstance(Instance instance) {
    Instance inst = null;

    if (instance instanceof SparseInstance) {
      double[] newVals = new double[instance.numAttributes()];
      int[] newIndices = new int[instance.numAttributes()];
      double[] vals = instance.toDoubleArray();
      int ind = 0;
      for (int j = 0; j < instance.numAttributes(); j++) {
        double value;
        if (instance.attribute(j).isNumeric()
            && (!Instance.isMissingValue(vals[j]))
            && (getInputFormat().classIndex() != j)) {

          value = vals[j] - m_Means[j];
          if (value != 0.0) {
            newVals[ind] = value;
            newIndices[ind] = j;
            ind++;
          }
        } else {
          value = vals[j];
          if (value != 0.0) {
            newVals[ind] = value;
            newIndices[ind] = j;
            ind++;
          }
        }
      }
      double[] tempVals = new double[ind];
      int[] tempInd = new int[ind];
      System.arraycopy(newVals, 0, tempVals, 0, ind);
      System.arraycopy(newIndices, 0, tempInd, 0, ind);
      inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes());
    } else {
      double[] vals = instance.toDoubleArray();
      for (int j = 0; j < getInputFormat().numAttributes(); j++) {
        if (instance.attribute(j).isNumeric()
            && (!Instance.isMissingValue(vals[j]))
            && (getInputFormat().classIndex() != j)) {
          vals[j] = (vals[j] - m_Means[j]);
        }
      }
      inst = new Instance(instance.weight(), vals);
    }

    inst.setDataset(instance.dataset());

    push(inst);
  }
Beispiel #4
0
  /**
   * Metoda zwracaj??ca list?? warto??ci dla danej instancji.
   *
   * @param inst Analizowana instancja
   * @param attrX Nazwa atrybutu dla osi X.
   * @param attrY Nazwa atrybutu dla osi Y
   * @return Lista dwuelementowa z warto??ciami kolejno dla osi X i Y.
   */
  public List<Number> getValueForInstance(Instance inst, String attrX, String attrY) {
    List<Number> value = new ArrayList<Number>();
    Attribute atX = inst.attribute(getAttributeNames().indexOf(attrX));
    Attribute atY = inst.attribute(getAttributeNames().indexOf(attrY));

    value.add(inst.value(atX));
    value.add(inst.value(atY));
    return value;
  }
Beispiel #5
0
  /**
   * Convert an <code>Instance</code> to an array of values that matches the format of the mining
   * schema. First maps raw attribute values and then applies rules for missing values, outliers
   * etc.
   *
   * @param inst the <code>Instance</code> to convert
   * @param miningSchema the mining schema incoming instance attributes
   * @return an array of doubles that are values from the incoming Instances, correspond to the
   *     format of the mining schema and have had missing values, outliers etc. dealt with.
   * @throws Exception if something goes wrong
   */
  public double[] instanceToSchema(Instance inst, MiningSchema miningSchema) throws Exception {
    Instances miningSchemaI = miningSchema.getMiningSchemaAsInstances();

    // allocate enough space for both mining schema fields and any derived fields
    double[] result = new double[miningSchema.getFieldsAsInstances().numAttributes()];

    // Copy over the values
    for (int i = 0; i < miningSchemaI.numAttributes(); i++) {
      // if (miningSchemaI.attribute(i).isNumeric()) {
      result[i] = inst.value(m_fieldsMap[i]);
      if (miningSchemaI.attribute(i).isNominal() || miningSchemaI.attribute(i).isString()) {
        // If not missing, look up the index of this incoming categorical value in
        // the mining schema
        if (!Utils.isMissingValue(inst.value(m_fieldsMap[i]))) {
          int[] valueMap = m_nominalValueMaps[i];
          int index = valueMap[(int) inst.value(m_fieldsMap[i])];
          String incomingAttValue =
              inst.attribute(m_fieldsMap[i]).value((int) inst.value(m_fieldsMap[i]));
          /*int index = miningSchemaI.attribute(i).indexOfValue(incomingAttValue); */
          if (index >= 0) {
            result[i] = index;
          } else {
            // set this to "unknown" (-1) for nominal valued attributes
            result[i] = UNKNOWN_NOMINAL_VALUE;
            String warningString =
                "[MappingInfo] WARNING: Can't match nominal value " + incomingAttValue;
            if (m_log != null) {
              m_log.logMessage(warningString);
            } else {
              System.err.println(warningString);
            }
          }
        }
      }
    }

    // Now deal with missing values and outliers...
    miningSchema.applyMissingAndOutlierTreatments(result);
    //    printInst(result);

    // now fill in any derived values
    ArrayList<DerivedFieldMetaInfo> derivedFields = miningSchema.getDerivedFields();
    for (int i = 0; i < derivedFields.size(); i++) {
      DerivedFieldMetaInfo temp = derivedFields.get(i);
      //      System.err.println("Applying : " + temp);
      double r = temp.getDerivedValue(result);
      result[i + miningSchemaI.numAttributes()] = r;
    }

    /*System.err.print("==> ");
    for (int i = 0; i < result.length; i++) {
      System.err.print(" " + result[i]);
    }
    System.err.println();*/

    return result;
  }
Beispiel #6
0
  /**
   * Returns index of subset instance is assigned to. Returns -1 if instance is assigned to more
   * than one subset.
   *
   * @exception Exception if something goes wrong
   */
  public final int whichSubset(Instance instance) throws Exception {

    if (instance.isMissing(m_attIndex)) return -1;
    else {
      if (instance.attribute(m_attIndex).isNominal()) {
        if ((int) m_splitPoint == (int) instance.value(m_attIndex)) return 0;
        else return 1;
      } else if (Utils.smOrEq(instance.value(m_attIndex), m_splitPoint)) return 0;
      else return 1;
    }
  }
 private List<Object> convert(Instance instance) {
   List<Object> data = new LinkedList<Object>();
   for (int i = 0; i < isNumeric.length; i++) {
     if (isNumeric[i]) {
       data.add(instance.value(i));
     } else {
       data.add(instance.attribute(i).value((int) instance.value(i)));
     }
   }
   return data;
 }
  /**
   * Input an instance for filtering. Ordinarily the instance is processed and made available for
   * output immediately. Some filters require all instances be read before producing output.
   *
   * @param instance the input instance
   * @return true if the filtered instance may now be collected with output().
   * @throws IllegalStateException if no input structure has been defined.
   */
  @Override
  public boolean input(Instance instance) {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }
    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }

    if (getOutputFormat().numAttributes() == 0) {
      return false;
    }

    if (m_selectedAttributes.length == 0) {
      push(instance);
    } else {
      double vals[] = new double[getOutputFormat().numAttributes()];
      for (int i = 0; i < instance.numAttributes(); i++) {
        double currentV = instance.value(i);

        if (!m_selectedCols.isInRange(i)) {
          vals[i] = currentV;
        } else {
          if (currentV == Utils.missingValue()) {
            vals[i] = currentV;
          } else {
            String currentS = instance.attribute(i).value((int) currentV);
            String replace =
                m_ignoreCase ? m_renameMap.get(currentS.toLowerCase()) : m_renameMap.get(currentS);
            if (replace == null) {
              vals[i] = currentV;
            } else {
              vals[i] = getOutputFormat().attribute(i).indexOfValue(replace);
            }
          }
        }
      }

      Instance inst = null;
      if (instance instanceof SparseInstance) {
        inst = new SparseInstance(instance.weight(), vals);
      } else {
        inst = new DenseInstance(instance.weight(), vals);
      }
      inst.setDataset(getOutputFormat());
      copyValues(inst, false, instance.dataset(), getOutputFormat());
      inst.setDataset(getOutputFormat());
      push(inst);
    }

    return true;
  }
  /**
   * Input an instance for filtering. The instance is processed and made available for output
   * immediately.
   *
   * @param instance the input instance.
   * @return true if the filtered instance may now be collected with output().
   * @throws IllegalStateException if no input structure has been defined.
   */
  public boolean input(Instance instance) {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }
    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }

    if (isOutputFormatDefined()) {
      Instance newInstance = (Instance) instance.copy();

      // make sure that we get the right indexes set for the converted
      // string attributes when operating on a second batch of instances
      for (int i = 0; i < newInstance.numAttributes(); i++) {
        if (newInstance.attribute(i).isString()
            && !newInstance.isMissing(i)
            && m_AttIndices.isInRange(i)) {
          Attribute outAtt = getOutputFormat().attribute(newInstance.attribute(i).name());
          String inVal = newInstance.stringValue(i);
          int outIndex = outAtt.indexOfValue(inVal);
          if (outIndex < 0) {
            newInstance.setMissing(i);
          } else {
            newInstance.setValue(i, outIndex);
          }
        }
      }
      push(newInstance);
      return true;
    }

    bufferInput(instance);
    return false;
  }
Beispiel #10
0
  /**
   * Determines whether an instance passes the test.
   *
   * @param inst the instance
   * @return true if the instance satisfies the test, false otherwise
   * @exception Exception if something goes wrong
   */
  public boolean passesTest(Instance inst) throws Exception {
    if (inst.isMissing(m_AttIndex)) return false; // missing values fail

    boolean isNominal = inst.attribute(m_AttIndex).isNominal();
    double attribVal = inst.value(m_AttIndex);
    if (!m_Not) {
      if (isNominal) {
        if (((int) attribVal) != ((int) m_Split)) return false;
      } else if (attribVal >= m_Split) return false;
    } else {
      if (isNominal) {
        if (((int) attribVal) == ((int) m_Split)) return false;
      } else if (attribVal < m_Split) return false;
    }
    return true;
  }
Beispiel #11
0
  /**
   * processes the given instance (may change the provided instance) and returns the modified
   * version.
   *
   * @param instance the instance to process
   * @return the modified data
   * @throws Exception in case the processing goes wrong
   */
  protected Instance process(Instance instance) throws Exception {
    Instance result;
    Attribute att;
    double[] values;
    int i;

    // adjust indices
    values = new double[instance.numAttributes()];
    for (i = 0; i < instance.numAttributes(); i++) {
      att = instance.attribute(i);
      if (!att.isNominal() || !m_AttributeIndices.isInRange(i) || instance.isMissing(i))
        values[i] = instance.value(i);
      else values[i] = m_NewOrder[i][(int) instance.value(i)];
    }

    // create new instance
    result = new DenseInstance(instance.weight(), values);

    return result;
  }
Beispiel #12
0
  @Override
  public void updateNode(Instance inst) throws Exception {
    super.updateDistribution(inst);

    for (int i = 0; i < inst.numAttributes(); i++) {
      Attribute a = inst.attribute(i);
      if (i != inst.classIndex()) {
        ConditionalSufficientStats stats = m_nodeStats.get(a.name());
        if (stats == null) {
          if (a.isNumeric()) {
            stats = new GaussianConditionalSufficientStats();
          } else {
            stats = new NominalConditionalSufficientStats();
          }
          m_nodeStats.put(a.name(), stats);
        }

        stats.update(
            inst.value(a), inst.classAttribute().value((int) inst.classValue()), inst.weight());
      }
    }
  }
  /**
   * processes the given instance (may change the provided instance) and returns the modified
   * version.
   *
   * @param instance the instance to process
   * @return the modified data
   * @throws Exception in case the processing goes wrong
   */
  @Override
  protected Instance process(Instance instance) throws Exception {
    Instance result;
    int i;
    double val;
    double factor;

    result = (Instance) instance.copy();

    if (m_Decimals > -1) {
      factor = StrictMath.pow(10, m_Decimals);
    } else {
      factor = 1;
    }

    for (i = 0; i < result.numAttributes(); i++) {
      // only numeric attributes
      if (!result.attribute(i).isNumeric()) {
        continue;
      }

      // out of range?
      if (!m_Cols.isInRange(i)) {
        continue;
      }

      // skip class?
      if ((result.classIndex() == i) && (!m_IncludeClass)) {
        continue;
      }

      // too small?
      if (result.value(i) < m_MinThreshold) {
        if (getDebug()) {
          System.out.println("Too small: " + result.value(i) + " -> " + m_MinDefault);
        }
        result.setValue(i, m_MinDefault);
      }
      // too big?
      else if (result.value(i) > m_MaxThreshold) {
        if (getDebug()) {
          System.out.println("Too big: " + result.value(i) + " -> " + m_MaxDefault);
        }
        result.setValue(i, m_MaxDefault);
      }
      // too close?
      else if ((result.value(i) - m_CloseTo < m_CloseToTolerance)
          && (m_CloseTo - result.value(i) < m_CloseToTolerance)
          && (result.value(i) != m_CloseTo)) {
        if (getDebug()) {
          System.out.println("Too close: " + result.value(i) + " -> " + m_CloseToDefault);
        }
        result.setValue(i, m_CloseToDefault);
      }

      // decimals?
      if (m_Decimals > -1 && !result.isMissing(i)) {
        val = result.value(i);
        val = StrictMath.round(val * factor) / factor;
        result.setValue(i, val);
      }
    }

    return result;
  }
Beispiel #14
0
  /**
   * Convert a single instance over. The converted instance is added to the end of the output queue.
   *
   * @param instance the instance to convert
   * @throws Exception if instance cannot be converted
   */
  private void convertInstance(Instance instance) throws Exception {

    Instance inst = null;
    HashMap symbols = new HashMap(5);
    if (instance instanceof SparseInstance) {
      double[] newVals = new double[instance.numAttributes()];
      int[] newIndices = new int[instance.numAttributes()];
      double[] vals = instance.toDoubleArray();
      int ind = 0;
      double value;
      for (int j = 0; j < instance.numAttributes(); j++) {
        if (m_SelectCols.isInRange(j)) {
          if (instance.attribute(j).isNumeric()
              && (!Utils.isMissingValue(vals[j]))
              && (getInputFormat().classIndex() != j)) {
            symbols.put("A", new Double(vals[j]));
            symbols.put("MAX", new Double(m_attStats[j].numericStats.max));
            symbols.put("MIN", new Double(m_attStats[j].numericStats.min));
            symbols.put("MEAN", new Double(m_attStats[j].numericStats.mean));
            symbols.put("SD", new Double(m_attStats[j].numericStats.stdDev));
            symbols.put("COUNT", new Double(m_attStats[j].numericStats.count));
            symbols.put("SUM", new Double(m_attStats[j].numericStats.sum));
            symbols.put("SUMSQUARED", new Double(m_attStats[j].numericStats.sumSq));
            value = eval(symbols);
            if (Double.isNaN(value) || Double.isInfinite(value)) {
              System.err.println("WARNING:Error in evaluating the expression: missing value set");
              value = Utils.missingValue();
            }
            if (value != 0.0) {
              newVals[ind] = value;
              newIndices[ind] = j;
              ind++;
            }
          }
        } else {
          value = vals[j];
          if (value != 0.0) {
            newVals[ind] = value;
            newIndices[ind] = j;
            ind++;
          }
        }
      }
      double[] tempVals = new double[ind];
      int[] tempInd = new int[ind];
      System.arraycopy(newVals, 0, tempVals, 0, ind);
      System.arraycopy(newIndices, 0, tempInd, 0, ind);
      inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes());
    } else {
      double[] vals = instance.toDoubleArray();
      for (int j = 0; j < getInputFormat().numAttributes(); j++) {
        if (m_SelectCols.isInRange(j)) {
          if (instance.attribute(j).isNumeric()
              && (!Utils.isMissingValue(vals[j]))
              && (getInputFormat().classIndex() != j)) {
            symbols.put("A", new Double(vals[j]));
            symbols.put("MAX", new Double(m_attStats[j].numericStats.max));
            symbols.put("MIN", new Double(m_attStats[j].numericStats.min));
            symbols.put("MEAN", new Double(m_attStats[j].numericStats.mean));
            symbols.put("SD", new Double(m_attStats[j].numericStats.stdDev));
            symbols.put("COUNT", new Double(m_attStats[j].numericStats.count));
            symbols.put("SUM", new Double(m_attStats[j].numericStats.sum));
            symbols.put("SUMSQUARED", new Double(m_attStats[j].numericStats.sumSq));
            vals[j] = eval(symbols);
            if (Double.isNaN(vals[j]) || Double.isInfinite(vals[j])) {
              System.err.println("WARNING:Error in Evaluation the Expression: missing value set");
              vals[j] = Utils.missingValue();
            }
          }
        }
      }
      inst = new DenseInstance(instance.weight(), vals);
    }
    inst.setDataset(instance.dataset());
    push(inst);
  }
Beispiel #15
0
  /**
   * Classifies the given test instance.
   *
   * @param instance the instance to be classified
   * @return the predicted class for the instance
   * @throws Exception if the instance can't be classified
   */
  public double[] distributionForInstance(Instance instance) throws Exception {
    double[] dist = new double[m_NumClasses];
    double[] temp = new double[m_NumClasses];
    double weight = 1.0;

    for (int i = 0; i < instance.numAttributes(); i++) {
      if (i != m_ClassIndex && !instance.isMissing(i)) {
        double val = instance.value(i);
        boolean ok = false;
        if (instance.attribute(i).isNumeric()) {
          int k;
          for (k = m_intervalBounds[i].length - 1; k >= 0; k--) {
            if (val > m_intervalBounds[i][k]) {
              for (int j = 0; j < m_NumClasses; j++) {
                if (m_globalCounts[j] > 0) {
                  temp[j] = ((m_counts[i][k][j] + TINY) / (m_globalCounts[j] + TINY));
                }
              }
              ok = true;
              break;
            } else if (val == m_intervalBounds[i][k]) {
              for (int j = 0; j < m_NumClasses; j++) {
                if (m_globalCounts[j] > 0) {
                  temp[j] = ((m_counts[i][k][j] + m_counts[i][k - 1][j]) / 2.0) + TINY;
                  temp[j] /= (m_globalCounts[j] + TINY);
                }
              }
              ok = true;
              break;
            }
          }
          if (!ok) {
            throw new Exception("This shouldn't happen");
          }
        } else { // nominal attribute
          ok = true;
          for (int j = 0; j < m_NumClasses; j++) {
            if (m_globalCounts[j] > 0) {
              temp[j] = ((m_counts[i][(int) val][j] + TINY) / (m_globalCounts[j] + TINY));
            }
          }
        }

        double sum = Utils.sum(temp);
        if (sum <= 0) {
          for (int j = 0; j < temp.length; j++) {
            temp[j] = 1.0 / (double) temp.length;
          }
        } else {
          Utils.normalize(temp, sum);
        }

        if (m_weightByConfidence) {
          weight = weka.core.ContingencyTables.entropy(temp);
          weight = Math.pow(weight, m_bias);
          if (weight < 1.0) {
            weight = 1.0;
          }
        }

        for (int j = 0; j < m_NumClasses; j++) {
          dist[j] += (temp[j] * weight);
        }
      }
    }

    double sum = Utils.sum(dist);
    if (sum <= 0) {
      for (int j = 0; j < dist.length; j++) {
        dist[j] = 1.0 / (double) dist.length;
      }
      return dist;
    } else {
      Utils.normalize(dist, sum);
      return dist;
    }
  }
  /**
   * Accepts and processes a classifier encapsulated in an incremental classifier event
   *
   * @param ce an <code>IncrementalClassifierEvent</code> value
   */
  @Override
  public void acceptClassifier(final IncrementalClassifierEvent ce) {
    try {
      if (ce.getStatus() == IncrementalClassifierEvent.NEW_BATCH) {
        m_throughput = new StreamThroughput(statusMessagePrefix());
        m_throughput.setSamplePeriod(m_statusFrequency);

        // m_eval = new Evaluation(ce.getCurrentInstance().dataset());
        m_eval = new Evaluation(ce.getStructure());
        m_eval.useNoPriors();

        m_dataLegend = new Vector();
        m_reset = true;
        m_dataPoint = new double[0];
        Instances inst = ce.getStructure();
        System.err.println("NEW BATCH");
        m_instanceCount = 0;

        if (m_windowSize > 0) {
          m_window = new LinkedList<Instance>();
          m_windowEval = new Evaluation(ce.getStructure());
          m_windowEval.useNoPriors();
          m_windowedPreds = new LinkedList<double[]>();

          if (m_logger != null) {
            m_logger.logMessage(
                statusMessagePrefix()
                    + "[IncrementalClassifierEvaluator] Chart output using windowed "
                    + "evaluation over "
                    + m_windowSize
                    + " instances");
          }
        }

        /*
         * if (m_logger != null) { m_logger.statusMessage(statusMessagePrefix()
         * + "IncrementalClassifierEvaluator: started processing...");
         * m_logger.logMessage(statusMessagePrefix() +
         * " [IncrementalClassifierEvaluator]" + statusMessagePrefix() +
         * " started processing..."); }
         */
      } else {
        Instance inst = ce.getCurrentInstance();
        if (inst != null) {
          m_throughput.updateStart();
          m_instanceCount++;
          // if (inst.attribute(inst.classIndex()).isNominal()) {
          double[] dist = ce.getClassifier().distributionForInstance(inst);
          double pred = 0;
          if (!inst.isMissing(inst.classIndex())) {
            if (m_outputInfoRetrievalStats) {
              // store predictions so AUC etc can be output.
              m_eval.evaluateModelOnceAndRecordPrediction(dist, inst);
            } else {
              m_eval.evaluateModelOnce(dist, inst);
            }

            if (m_windowSize > 0) {

              m_windowEval.evaluateModelOnce(dist, inst);
              m_window.addFirst(inst);
              m_windowedPreds.addFirst(dist);

              if (m_instanceCount > m_windowSize) {
                // "forget" the oldest prediction
                Instance oldest = m_window.removeLast();

                double[] oldDist = m_windowedPreds.removeLast();
                oldest.setWeight(-oldest.weight());
                m_windowEval.evaluateModelOnce(oldDist, oldest);
                oldest.setWeight(-oldest.weight());
              }
            }
          } else {
            pred = ce.getClassifier().classifyInstance(inst);
          }
          if (inst.classIndex() >= 0) {
            // need to check that the class is not missing
            if (inst.attribute(inst.classIndex()).isNominal()) {
              if (!inst.isMissing(inst.classIndex())) {
                if (m_dataPoint.length < 2) {
                  m_dataPoint = new double[3];
                  m_dataLegend.addElement("Accuracy");
                  m_dataLegend.addElement("RMSE (prob)");
                  m_dataLegend.addElement("Kappa");
                }
                // int classV = (int) inst.value(inst.classIndex());

                if (m_windowSize > 0) {
                  m_dataPoint[1] = m_windowEval.rootMeanSquaredError();
                  m_dataPoint[2] = m_windowEval.kappa();
                } else {
                  m_dataPoint[1] = m_eval.rootMeanSquaredError();
                  m_dataPoint[2] = m_eval.kappa();
                }
                // int maxO = Utils.maxIndex(dist);
                // if (maxO == classV) {
                // dist[classV] = -1;
                // maxO = Utils.maxIndex(dist);
                // }
                // m_dataPoint[1] -= dist[maxO];
              } else {
                if (m_dataPoint.length < 1) {
                  m_dataPoint = new double[1];
                  m_dataLegend.addElement("Confidence");
                }
              }
              double primaryMeasure = 0;
              if (!inst.isMissing(inst.classIndex())) {
                if (m_windowSize > 0) {
                  primaryMeasure = 1.0 - m_windowEval.errorRate();
                } else {
                  primaryMeasure = 1.0 - m_eval.errorRate();
                }
              } else {
                // record confidence as the primary measure
                // (another possibility would be entropy of
                // the distribution, or perhaps average
                // confidence)
                primaryMeasure = dist[Utils.maxIndex(dist)];
              }
              // double [] dataPoint = new double[1];
              m_dataPoint[0] = primaryMeasure;
              // double min = 0; double max = 100;
              /*
               * ChartEvent e = new
               * ChartEvent(IncrementalClassifierEvaluator.this, m_dataLegend,
               * min, max, dataPoint);
               */

              m_ce.setLegendText(m_dataLegend);
              m_ce.setMin(0);
              m_ce.setMax(1);
              m_ce.setDataPoint(m_dataPoint);
              m_ce.setReset(m_reset);
              m_reset = false;
            } else {
              // numeric class
              if (m_dataPoint.length < 1) {
                m_dataPoint = new double[1];
                if (inst.isMissing(inst.classIndex())) {
                  m_dataLegend.addElement("Prediction");
                } else {
                  m_dataLegend.addElement("RMSE");
                }
              }
              if (!inst.isMissing(inst.classIndex())) {
                double update;
                if (!inst.isMissing(inst.classIndex())) {
                  if (m_windowSize > 0) {
                    update = m_windowEval.rootMeanSquaredError();
                  } else {
                    update = m_eval.rootMeanSquaredError();
                  }
                } else {
                  update = pred;
                }
                m_dataPoint[0] = update;
                if (update > m_max) {
                  m_max = update;
                }
                if (update < m_min) {
                  m_min = update;
                }
              }

              m_ce.setLegendText(m_dataLegend);
              m_ce.setMin((inst.isMissing(inst.classIndex()) ? m_min : 0));
              m_ce.setMax(m_max);
              m_ce.setDataPoint(m_dataPoint);
              m_ce.setReset(m_reset);
              m_reset = false;
            }
            notifyChartListeners(m_ce);
          }
          m_throughput.updateEnd(m_logger);
        }

        if (ce.getStatus() == IncrementalClassifierEvent.BATCH_FINISHED || inst == null) {
          if (m_logger != null) {
            m_logger.logMessage(
                "[IncrementalClassifierEvaluator]"
                    + statusMessagePrefix()
                    + " Finished processing.");
          }
          m_throughput.finished(m_logger);

          // save memory if using windowed evaluation for charting
          m_windowEval = null;
          m_window = null;
          m_windowedPreds = null;

          if (m_textListeners.size() > 0) {
            String textTitle = ce.getClassifier().getClass().getName();
            textTitle = textTitle.substring(textTitle.lastIndexOf('.') + 1, textTitle.length());
            String results =
                "=== Performance information ===\n\n"
                    + "Scheme:   "
                    + textTitle
                    + "\n"
                    + "Relation: "
                    + m_eval.getHeader().relationName()
                    + "\n\n"
                    + m_eval.toSummaryString();
            if (m_eval.getHeader().classIndex() >= 0
                && m_eval.getHeader().classAttribute().isNominal()
                && (m_outputInfoRetrievalStats)) {
              results += "\n" + m_eval.toClassDetailsString();
            }

            if (m_eval.getHeader().classIndex() >= 0
                && m_eval.getHeader().classAttribute().isNominal()) {
              results += "\n" + m_eval.toMatrixString();
            }
            textTitle = "Results: " + textTitle;
            TextEvent te = new TextEvent(this, results, textTitle);
            notifyTextListeners(te);
          }
        }
      }
    } catch (Exception ex) {
      if (m_logger != null) {
        m_logger.logMessage(
            "[IncrementalClassifierEvaluator]"
                + statusMessagePrefix()
                + " Error processing prediction "
                + ex.getMessage());
        m_logger.statusMessage(
            statusMessagePrefix() + "ERROR: problem processing prediction (see log for details)");
      }
      ex.printStackTrace();
      stop();
    }
  }
  /**
   * Métod que percorre todos os dados pertencentes à Instances dados. Imprimindo as informações da
   * base.
   */
  public void percorrerDados() {

    if (dados != null) {
      /*Cada exemplo contido nos dados é identificado no Weka através da
       * classe Instance. Assim, o objeto dados, do tipo Instances, é uma coleçao de
       * Instance. Voce vai ter metodos que possibilitam acessar todos os exemplos
       * presentes na base.
       * */
      // Percorre todos os exemples presentes na base
      for (int i = 0; i < dados.numInstances(); i++) {
        // Método para obter a instance de número 1.
        // Voce pode pegar a primeira e a ultima instance tb.
        // Além de poder deletar entre outras coisas.
        Instance exemplo = dados.instance(i);

        /*Uma Intance é formada por vários atributos, que são os atributos
         * da base. Voce pode percorrer todos os atributos Instace, ou pode
         * "setar" (set) ou pegar (get) um atributo especifico.
         * */

        // É possível transforma todos os atributos em um array de double

        double[] arrayAtributos = exemplo.toDoubleArray();

        System.out.println("Valores para o exemplo " + i);
        System.out.print("Array de atributos: ");
        for (int j = 0; j < arrayAtributos.length; j++) {
          System.out.print(arrayAtributos[j] + " ");
        }

        System.out.println();
        // Percorrendo todos os atributos para se obter informacoes sobre eles
        for (int j = 0; j < exemplo.numAttributes(); j++) {
          Attribute att = exemplo.attribute(j);
          double valor = exemplo.value(att);
          System.out.println(
              "Valor do atributo " + att.name() + ":" + valor + " - " + att.value((int) valor));
        }

        System.out.println();
        // Mudando o valor do atributo 0, para um valor possível do atributos
        // Obtendo as informacoes do atributo 0;
        Attribute att = exemplo.attribute(0);
        // Obtendo o valor do atributo 0.
        double valorDoAtributo0 = exemplo.value(att);

        System.out.println("Valor antigo, em double: " + valorDoAtributo0);
        System.out.println("Valor antigo, em nome: " + att.value((int) valorDoAtributo0));

        int novoValor = 1;
        exemplo.setValue(att, novoValor);

        valorDoAtributo0 = exemplo.value(att);

        System.out.println("Valor novo, em nome: " + att.value((int) valorDoAtributo0));

        System.out.println();
        System.out.println();
      }
    }
  }
  protected void tokenizeInstance(Instance instance, boolean updateDictionary) {
    if (m_inputVector == null) {
      m_inputVector = new LinkedHashMap<String, Count>();
    } else {
      m_inputVector.clear();
    }

    if (m_useStopList && m_stopwords == null) {
      m_stopwords = new Stopwords();
      try {
        if (getStopwords().exists() && !getStopwords().isDirectory()) {
          m_stopwords.read(getStopwords());
        }
      } catch (Exception ex) {
        ex.printStackTrace();
      }
    }

    for (int i = 0; i < instance.numAttributes(); i++) {
      if (instance.attribute(i).isString() && !instance.isMissing(i)) {
        m_tokenizer.tokenize(instance.stringValue(i));

        while (m_tokenizer.hasMoreElements()) {
          String word = m_tokenizer.nextElement();
          if (m_lowercaseTokens) {
            word = word.toLowerCase();
          }

          word = m_stemmer.stem(word);

          if (m_useStopList) {
            if (m_stopwords.is(word)) {
              continue;
            }
          }

          Count docCount = m_inputVector.get(word);
          if (docCount == null) {
            m_inputVector.put(word, new Count(instance.weight()));
          } else {
            docCount.m_count += instance.weight();
          }
        }
      }
    }

    if (updateDictionary) {
      int classValue = (int) instance.classValue();
      LinkedHashMap<String, Count> dictForClass = m_probOfWordGivenClass.get(classValue);

      // document normalization
      double iNorm = 0;
      double fv = 0;

      if (m_normalize) {
        for (Count c : m_inputVector.values()) {
          // word counts or bag-of-words?
          fv = (m_wordFrequencies) ? c.m_count : 1.0;
          iNorm += Math.pow(Math.abs(fv), m_lnorm);
        }
        iNorm = Math.pow(iNorm, 1.0 / m_lnorm);
      }

      for (Map.Entry<String, Count> feature : m_inputVector.entrySet()) {
        String word = feature.getKey();
        double freq = (m_wordFrequencies) ? feature.getValue().m_count : 1.0;
        // double freq = (feature.getValue().m_count / iNorm * m_norm);

        if (m_normalize) {
          freq /= (iNorm * m_norm);
        }

        // check all classes
        for (int i = 0; i < m_data.numClasses(); i++) {
          LinkedHashMap<String, Count> dict = m_probOfWordGivenClass.get(i);
          if (dict.get(word) == null) {
            dict.put(word, new Count(m_leplace));
            m_wordsPerClass[i] += m_leplace;
          }
        }

        Count dictCount = dictForClass.get(word);
        /*
         * if (dictCount == null) { dictForClass.put(word, new Count(m_leplace +
         * freq)); m_wordsPerClass[classValue] += (m_leplace + freq); } else {
         */
        dictCount.m_count += freq;
        m_wordsPerClass[classValue] += freq;
        // }
      }

      pruneDictionary();
    }
  }
Beispiel #19
0
 /**
  * Convert a single instance over. The converted instance is added to the end of the output queue.
  *
  * @param instance the instance to convert
  * @throws Exception if conversion fails
  */
 protected void convertInstance(Instance instance) throws Exception {
   Instance inst = null;
   if (instance instanceof SparseInstance) {
     double[] newVals = new double[instance.numAttributes()];
     int[] newIndices = new int[instance.numAttributes()];
     double[] vals = instance.toDoubleArray();
     int ind = 0;
     for (int j = 0; j < instance.numAttributes(); j++) {
       double value;
       if (instance.attribute(j).isNumeric()
           && (!Utils.isMissingValue(vals[j]))
           && (getInputFormat().classIndex() != j)) {
         if (Double.isNaN(m_MinArray[j]) || (m_MaxArray[j] == m_MinArray[j])) {
           value = 0;
         } else {
           value =
               (vals[j] - m_MinArray[j]) / (m_MaxArray[j] - m_MinArray[j]) * m_Scale
                   + m_Translation;
           if (Double.isNaN(value)) {
             throw new Exception(
                 "A NaN value was generated "
                     + "while normalizing "
                     + instance.attribute(j).name());
           }
         }
         if (value != 0.0) {
           newVals[ind] = value;
           newIndices[ind] = j;
           ind++;
         }
       } else {
         value = vals[j];
         if (value != 0.0) {
           newVals[ind] = value;
           newIndices[ind] = j;
           ind++;
         }
       }
     }
     double[] tempVals = new double[ind];
     int[] tempInd = new int[ind];
     System.arraycopy(newVals, 0, tempVals, 0, ind);
     System.arraycopy(newIndices, 0, tempInd, 0, ind);
     inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes());
   } else {
     double[] vals = instance.toDoubleArray();
     for (int j = 0; j < getInputFormat().numAttributes(); j++) {
       if (instance.attribute(j).isNumeric()
           && (!Utils.isMissingValue(vals[j]))
           && (getInputFormat().classIndex() != j)) {
         if (Double.isNaN(m_MinArray[j]) || (m_MaxArray[j] == m_MinArray[j])) {
           vals[j] = 0;
         } else {
           vals[j] =
               (vals[j] - m_MinArray[j]) / (m_MaxArray[j] - m_MinArray[j]) * m_Scale
                   + m_Translation;
           if (Double.isNaN(vals[j])) {
             throw new Exception(
                 "A NaN value was generated "
                     + "while normalizing "
                     + instance.attribute(j).name());
           }
         }
       }
     }
     inst = new DenseInstance(instance.weight(), vals);
   }
   inst.setDataset(instance.dataset());
   push(inst);
 }