/**
   * Process a classifier's prediction for an instance and update a set of plotting instances and
   * additional plotting info. m_PlotShape for nominal class datasets holds shape types (actual data
   * points have automatic shape type assignment; classifier error data points have box shape type).
   * For numeric class datasets, the actual data points are stored in m_PlotInstances and m_PlotSize
   * stores the error (which is later converted to shape size values).
   *
   * @param toPredict the actual data point
   * @param classifier the classifier
   * @param eval the evaluation object to use for evaluating the classifier on the instance to
   *     predict
   * @see #m_PlotShapes
   * @see #m_PlotSizes
   * @see #m_PlotInstances
   */
  public void process(Instance toPredict, Classifier classifier, Evaluation eval) {
    double pred;
    double[] values;
    int i;

    try {
      pred = eval.evaluateModelOnceAndRecordPrediction(classifier, toPredict);

      if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) {
        toPredict =
            ((weka.classifiers.misc.InputMappedClassifier) classifier)
                .constructMappedInstance(toPredict);
      }

      if (!m_SaveForVisualization) return;

      if (m_PlotInstances != null) {
        values = new double[m_PlotInstances.numAttributes()];
        for (i = 0; i < m_PlotInstances.numAttributes(); i++) {
          if (i < toPredict.classIndex()) {
            values[i] = toPredict.value(i);
          } else if (i == toPredict.classIndex()) {
            values[i] = pred;
            values[i + 1] = toPredict.value(i);
            i++;
          } else {
            values[i] = toPredict.value(i - 1);
          }
        }

        m_PlotInstances.add(new DenseInstance(1.0, values));

        if (toPredict.classAttribute().isNominal()) {
          if (toPredict.isMissing(toPredict.classIndex()) || Utils.isMissingValue(pred)) {
            m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE));
          } else if (pred != toPredict.classValue()) {
            // set to default error point shape
            m_PlotShapes.addElement(new Integer(Plot2D.ERROR_SHAPE));
          } else {
            // otherwise set to constant (automatically assigned) point shape
            m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE));
          }
          m_PlotSizes.addElement(new Integer(Plot2D.DEFAULT_SHAPE_SIZE));
        } else {
          // store the error (to be converted to a point size later)
          Double errd = null;
          if (!toPredict.isMissing(toPredict.classIndex()) && !Utils.isMissingValue(pred)) {
            errd = new Double(pred - toPredict.classValue());
            m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE));
          } else {
            // missing shape if actual class not present or prediction is missing
            m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE));
          }
          m_PlotSizes.addElement(errd);
        }
      }
    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }
Пример #2
0
  private double calcNodeScorePlain(int nNode) {
    Instances instances = m_BayesNet.m_Instances;
    ParentSet oParentSet = m_BayesNet.getParentSet(nNode);

    // determine cardinality of parent set & reserve space for frequency counts
    int nCardinality = oParentSet.getCardinalityOfParents();
    int numValues = instances.attribute(nNode).numValues();
    int[] nCounts = new int[nCardinality * numValues];

    // initialize (don't need this?)
    for (int iParent = 0; iParent < nCardinality * numValues; iParent++) {
      nCounts[iParent] = 0;
    }

    // estimate distributions
    Enumeration enumInsts = instances.enumerateInstances();

    while (enumInsts.hasMoreElements()) {
      Instance instance = (Instance) enumInsts.nextElement();

      // updateClassifier;
      double iCPT = 0;

      for (int iParent = 0; iParent < oParentSet.getNrOfParents(); iParent++) {
        int nParent = oParentSet.getParent(iParent);

        iCPT = iCPT * instances.attribute(nParent).numValues() + instance.value(nParent);
      }

      nCounts[numValues * ((int) iCPT) + (int) instance.value(nNode)]++;
    }

    return calcScoreOfCounts(nCounts, nCardinality, numValues, instances);
  } // CalcNodeScore
Пример #3
0
        @Override
        boolean evaluate(
            Instance inst,
            int lhsAttIndex,
            String rhsOperand,
            double numericOperand,
            Pattern regexPattern,
            boolean rhsIsAttribute,
            int rhsAttIndex) {

          if (rhsIsAttribute) {
            if (inst.isMissing(lhsAttIndex) && inst.isMissing(rhsAttIndex)) {
              return true;
            }
            if (inst.isMissing(lhsAttIndex) || inst.isMissing(rhsAttIndex)) {
              return false;
            }
            return Utils.eq(inst.value(lhsAttIndex), inst.value(rhsAttIndex));
          }

          if (inst.isMissing(lhsAttIndex)) {
            return false;
          }
          return (Utils.eq(inst.value(lhsAttIndex), numericOperand));
        }
Пример #4
0
  /** Computes average class values for each attribute and value */
  private void computeAverageClassValues() {

    double totalCounts, sum;
    Instance instance;
    double[] counts;

    double[][] avgClassValues = new double[getInputFormat().numAttributes()][0];
    m_Indices = new int[getInputFormat().numAttributes()][0];
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (att.isNominal()) {
        avgClassValues[j] = new double[att.numValues()];
        counts = new double[att.numValues()];
        for (int i = 0; i < getInputFormat().numInstances(); i++) {
          instance = getInputFormat().instance(i);
          if (!instance.classIsMissing() && (!instance.isMissing(j))) {
            counts[(int) instance.value(j)] += instance.weight();
            avgClassValues[j][(int) instance.value(j)] += instance.weight() * instance.classValue();
          }
        }
        sum = Utils.sum(avgClassValues[j]);
        totalCounts = Utils.sum(counts);
        if (Utils.gr(totalCounts, 0)) {
          for (int k = 0; k < att.numValues(); k++) {
            if (Utils.gr(counts[k], 0)) {
              avgClassValues[j][k] /= counts[k];
            } else {
              avgClassValues[j][k] = sum / totalCounts;
            }
          }
        }
        m_Indices[j] = Utils.sort(avgClassValues[j]);
      }
    }
  }
  /**
   * Compare two datasets to see if they differ.
   *
   * @param data1 one set of instances
   * @param data2 the other set of instances
   * @throws Exception if the datasets differ
   */
  protected void compareDatasets(Instances data1, Instances data2) throws Exception {

    if (m_CheckHeader) {
      if (!data2.equalHeaders(data1)) {
        throw new Exception("header has been modified\n" + data2.equalHeadersMsg(data1));
      }
    }
    if (!(data2.numInstances() == data1.numInstances())) {
      throw new Exception("number of instances has changed");
    }
    for (int i = 0; i < data2.numInstances(); i++) {
      Instance orig = data1.instance(i);
      Instance copy = data2.instance(i);
      for (int j = 0; j < orig.numAttributes(); j++) {
        if (orig.isMissing(j)) {
          if (!copy.isMissing(j)) {
            throw new Exception("instances have changed");
          }
        } else {
          if (m_CompareValuesAsString) {
            if (!orig.toString(j).equals(copy.toString(j))) {
              throw new Exception("instances have changed");
            }
          } else {
            if (Math.abs(orig.value(j) - copy.value(j)) > m_MaxDiffValues) {
              throw new Exception("instances have changed");
            }
          }
        }
        if (Math.abs(orig.weight() - copy.weight()) > m_MaxDiffWeights) {
          throw new Exception("instance weights have changed");
        }
      }
    }
  }
  protected void searchMedian(Instances instances) {
    medians = new double[instances.numAttributes()];
    imputations = new int[instances.numAttributes()];

    for (int j = 0; j < instances.numAttributes(); ++j) {
      int numPresentValues = 0;
      if (instances.attribute(j).isNumeric()) {
        double[] values = new double[instances.numInstances()];
        for (int i = 0; i < instances.numInstances(); ++i) {
          Instance current = instances.get(i);
          if (Utils.isMissingValue(current.value(j)) == false) {
            values[numPresentValues] = current.value(j);
            numPresentValues += 1;
          }
        }
        if (numPresentValues > 0) {
          double[] goodValues = Arrays.copyOf(values, numPresentValues);
          Median median = new Median();
          medians[j] = median.evaluate(goodValues);
        }
      }
    }

    for (int j = 0; j < instances.numAttributes(); ++j) {
      if (instances.attribute(j).isNumeric()) {
        Conversion.log(
            "OK",
            "Impute Numeric",
            "Attribute " + instances.attribute(j) + " - Median: " + medians[j]);
      }
    }
  }
  /**
   * Convert an input instance
   *
   * @param current the input instance to convert
   * @return a transformed instance
   * @throws Exception if a problem occurs
   */
  protected Instance convertInstance(Instance current) throws Exception {
    double[] vals = new double[getOutputFormat().numAttributes()];
    int index = 0;
    for (int j = 0; j < current.numAttributes(); j++) {
      if (j != current.classIndex()) {
        if (m_unchanged != null && m_unchanged.attribute(current.attribute(j).name()) != null) {
          vals[index++] = current.value(j);
        } else {
          Estimator[] estForAtt = m_estimatorLookup.get(current.attribute(j).name());
          for (int k = 0; k < current.classAttribute().numValues(); k++) {
            if (current.isMissing(j)) {
              vals[index++] = Utils.missingValue();
            } else {
              double e = estForAtt[k].getProbability(current.value(j));
              vals[index++] = e;
            }
          }
        }
      }
    }

    vals[vals.length - 1] = current.classValue();
    DenseInstance instNew = new DenseInstance(current.weight(), vals);

    return instNew;
  }
Пример #8
0
  /**
   * Calculates the class membership probabilities for the given test instance.
   *
   * @param instance the instance to be classified
   * @return predicted class probability distribution
   * @exception Exception if distribution can't be computed
   */
  @Override
  public double[] distributionForInstance(Instance instance) throws Exception {

    double[] probs = new double[instance.numClasses()];
    int attIndex;

    for (int j = 0; j < instance.numClasses(); j++) {
      probs[j] = 1;
      Enumeration<Attribute> enumAtts = instance.enumerateAttributes();
      attIndex = 0;
      while (enumAtts.hasMoreElements()) {
        Attribute attribute = enumAtts.nextElement();
        if (!instance.isMissing(attribute)) {
          if (attribute.isNominal()) {
            probs[j] *= m_Counts[j][attIndex][(int) instance.value(attribute)];
          } else {
            probs[j] *=
                normalDens(instance.value(attribute), m_Means[j][attIndex], m_Devs[j][attIndex]);
          }
        }
        attIndex++;
      }
      probs[j] *= m_Priors[j];
    }

    // Normalize probabilities
    Utils.normalize(probs);

    return probs;
  }
Пример #9
0
  /**
   * Convert a single instance over. The converted instance is added to the end of the output queue.
   *
   * @param instance the instance to convert
   */
  protected void convertInstance(Instance instance) {

    int index = 0;
    double[] vals = new double[outputFormatPeek().numAttributes()];
    // Copy and convert the values
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
      if (m_DiscretizeCols.isInRange(i) && getInputFormat().attribute(i).isNumeric()) {
        int j;
        double currentVal = instance.value(i);
        if (m_CutPoints[i] == null) {
          if (instance.isMissing(i)) {
            vals[index] = Utils.missingValue();
          } else {
            vals[index] = 0;
          }
          index++;
        } else {
          if (!m_MakeBinary) {
            if (instance.isMissing(i)) {
              vals[index] = Utils.missingValue();
            } else {
              for (j = 0; j < m_CutPoints[i].length; j++) {
                if (currentVal <= m_CutPoints[i][j]) {
                  break;
                }
              }
              vals[index] = j;
            }
            index++;
          } else {
            for (j = 0; j < m_CutPoints[i].length; j++) {
              if (instance.isMissing(i)) {
                vals[index] = Utils.missingValue();
              } else if (currentVal <= m_CutPoints[i][j]) {
                vals[index] = 0;
              } else {
                vals[index] = 1;
              }
              index++;
            }
          }
        }
      } else {
        vals[index] = instance.value(i);
        index++;
      }
    }

    Instance inst = null;
    if (instance instanceof SparseInstance) {
      inst = new SparseInstance(instance.weight(), vals);
    } else {
      inst = new DenseInstance(instance.weight(), vals);
    }
    inst.setDataset(getOutputFormat());
    copyValues(inst, false, instance.dataset(), getOutputFormat());
    inst.setDataset(getOutputFormat());
    push(inst);
  }
Пример #10
0
  /**
   * Convert an <code>Instance</code> to an array of values that matches the format of the mining
   * schema. First maps raw attribute values and then applies rules for missing values, outliers
   * etc.
   *
   * @param inst the <code>Instance</code> to convert
   * @param miningSchema the mining schema incoming instance attributes
   * @return an array of doubles that are values from the incoming Instances, correspond to the
   *     format of the mining schema and have had missing values, outliers etc. dealt with.
   * @throws Exception if something goes wrong
   */
  public double[] instanceToSchema(Instance inst, MiningSchema miningSchema) throws Exception {
    Instances miningSchemaI = miningSchema.getMiningSchemaAsInstances();

    // allocate enough space for both mining schema fields and any derived fields
    double[] result = new double[miningSchema.getFieldsAsInstances().numAttributes()];

    // Copy over the values
    for (int i = 0; i < miningSchemaI.numAttributes(); i++) {
      // if (miningSchemaI.attribute(i).isNumeric()) {
      result[i] = inst.value(m_fieldsMap[i]);
      if (miningSchemaI.attribute(i).isNominal() || miningSchemaI.attribute(i).isString()) {
        // If not missing, look up the index of this incoming categorical value in
        // the mining schema
        if (!Utils.isMissingValue(inst.value(m_fieldsMap[i]))) {
          int[] valueMap = m_nominalValueMaps[i];
          int index = valueMap[(int) inst.value(m_fieldsMap[i])];
          String incomingAttValue =
              inst.attribute(m_fieldsMap[i]).value((int) inst.value(m_fieldsMap[i]));
          /*int index = miningSchemaI.attribute(i).indexOfValue(incomingAttValue); */
          if (index >= 0) {
            result[i] = index;
          } else {
            // set this to "unknown" (-1) for nominal valued attributes
            result[i] = UNKNOWN_NOMINAL_VALUE;
            String warningString =
                "[MappingInfo] WARNING: Can't match nominal value " + incomingAttValue;
            if (m_log != null) {
              m_log.logMessage(warningString);
            } else {
              System.err.println(warningString);
            }
          }
        }
      }
    }

    // Now deal with missing values and outliers...
    miningSchema.applyMissingAndOutlierTreatments(result);
    //    printInst(result);

    // now fill in any derived values
    ArrayList<DerivedFieldMetaInfo> derivedFields = miningSchema.getDerivedFields();
    for (int i = 0; i < derivedFields.size(); i++) {
      DerivedFieldMetaInfo temp = derivedFields.get(i);
      //      System.err.println("Applying : " + temp);
      double r = temp.getDerivedValue(result);
      result[i + miningSchemaI.numAttributes()] = r;
    }

    /*System.err.print("==> ");
    for (int i = 0; i < result.length; i++) {
      System.err.print(" " + result[i]);
    }
    System.err.println();*/

    return result;
  }
Пример #11
0
  public void insert(Instance instance, long timestamp) {
    N++;
    LST += timestamp;
    SST += timestamp * timestamp;

    for (int i = 0; i < instance.numValues(); i++) {
      LS[i] += instance.value(i);
      SS[i] += instance.value(i) * instance.value(i);
    }
  }
Пример #12
0
  private RunTrace traceToXML(int file_id, int task_id, int run_id) throws Exception {
    RunTrace trace = new RunTrace(run_id);
    URL traceURL = apiconnector.getOpenmlFileUrl(file_id, "Task_" + task_id + "_trace.arff");
    Instances traceDataset = new Instances(new BufferedReader(Input.getURL(traceURL)));
    List<Integer> parameterIndexes = new ArrayList<Integer>();

    if (traceDataset.attribute("repeat") == null
        || traceDataset.attribute("fold") == null
        || traceDataset.attribute("iteration") == null
        || traceDataset.attribute("evaluation") == null
        || traceDataset.attribute("selected") == null) {
      throw new Exception("trace file missing mandatory attributes. ");
    }

    for (int i = 0; i < traceDataset.numAttributes(); ++i) {
      if (traceDataset.attribute(i).name().startsWith("parameter_")) {
        parameterIndexes.add(i);
      }
    }
    if (parameterIndexes.size() == 0) {
      throw new Exception(
          "trace file contains no fields with prefix 'parameter_' (i.e., parameters are not registered). ");
    }
    if (traceDataset.numAttributes() > 6 + parameterIndexes.size()) {
      throw new Exception(
          "trace file contains illegal attributes (only allow for repeat, fold, iteration, evaluation, selected, setup_string and parameter_*). ");
    }

    for (int i = 0; i < traceDataset.numInstances(); ++i) {
      Instance current = traceDataset.get(i);
      Integer repeat = (int) current.value(traceDataset.attribute("repeat").index());
      Integer fold = (int) current.value(traceDataset.attribute("fold").index());
      Integer iteration = (int) current.value(traceDataset.attribute("iteration").index());
      Double evaluation = current.value(traceDataset.attribute("evaluation").index());
      Boolean selected =
          current.stringValue(traceDataset.attribute("selected").index()).equals("true");

      Map<String, String> parameters = new HashMap<String, String>();
      for (int j = 0; j < parameterIndexes.size(); ++j) {
        int attIdx = parameterIndexes.get(j);
        if (traceDataset.attribute(attIdx).isNumeric()) {
          parameters.put(traceDataset.attribute(attIdx).name(), current.value(attIdx) + "");
        } else {
          parameters.put(traceDataset.attribute(attIdx).name(), current.stringValue(attIdx));
        }
      }
      String setup_string = new JSONObject(parameters).toString();

      trace.addIteration(
          new RunTrace.Trace_iteration(
              repeat, fold, iteration, setup_string, evaluation, selected));
    }

    return trace;
  }
Пример #13
0
  /**
   * Returns index of subset instance is assigned to. Returns -1 if instance is assigned to more
   * than one subset.
   *
   * @exception Exception if something goes wrong
   */
  public final int whichSubset(Instance instance) throws Exception {

    if (instance.isMissing(m_attIndex)) return -1;
    else {
      if (instance.attribute(m_attIndex).isNominal()) {
        if ((int) m_splitPoint == (int) instance.value(m_attIndex)) return 0;
        else return 1;
      } else if (Utils.smOrEq(instance.value(m_attIndex), m_splitPoint)) return 0;
      else return 1;
    }
  }
Пример #14
0
 private List<Object> convert(Instance instance) {
   List<Object> data = new LinkedList<Object>();
   for (int i = 0; i < isNumeric.length; i++) {
     if (isNumeric[i]) {
       data.add(instance.value(i));
     } else {
       data.add(instance.attribute(i).value((int) instance.value(i)));
     }
   }
   return data;
 }
Пример #15
0
  public int calculateAllWrong() {
    if (run_ids.size() < 2) {
      throw new RuntimeException("Too few runs to compare. Should be at least 2. ");
    }

    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("repeat"));
    attributes.add(new Attribute("fold"));
    attributes.add(new Attribute("rowid"));
    resultSet = new Instances("all-wrong", attributes, task_splits.numInstances());

    for (int i = 0; i < task_splits.numInstances(); ++i) {
      Instance current = task_splits.get(i);
      boolean test = current.stringValue(task_splits.attribute("type")).equals("TEST");
      if (!test) {
        continue;
      }

      Integer row_id = (int) current.value(task_splits.attribute("rowid"));
      Integer repeat = (int) current.value(task_splits.attribute("repeat"));
      Integer fold = (int) current.value(task_splits.attribute("fold"));
      Integer sample = 0;
      try {
        sample = (int) current.value(task_splits.attribute("sample"));
      } catch (Exception e) {
      }

      String correctLabel = correct.get(row_id);
      Integer correctPredictions = 0;

      for (Integer run_id : run_ids) {

        // System.out.println(predictions.get(run_id));
        // System.out.println(repeat + "," + fold + "," + sample + "," + row_id);

        if (predictions
            .get(run_id)
            .get(repeat)
            .get(fold)
            .get(sample)
            .get(row_id)
            .equals(correctLabel)) {
          correctPredictions += 1;
        }
      }

      if (correctPredictions == 0) {
        double[] instance = {repeat, fold, row_id};
        resultSet.add(new DenseInstance(1.0, instance));
      }
    }
    return resultSet.size();
  }
Пример #16
0
 public void testTypical() {
   m_Filter = getFilter("6,3");
   Instances result = useFilter();
   assertEquals(m_Instances.numAttributes() - 1, result.numAttributes());
   for (int i = 0; i < result.numInstances(); i++) {
     Instance orig = m_Instances.instance(i);
     if (orig.isMissing(5) || orig.isMissing(2)) {
       assertTrue("Instance " + (i + 1) + " should have been ?", result.instance(i).isMissing(4));
     } else {
       assertEquals(orig.value(5) - orig.value(2), result.instance(i).value(4), EXPR_DELTA);
     }
   }
 }
Пример #17
0
  /**
   * Checks if an instance contains an item set.
   *
   * @param instance the instance to be tested
   * @return true if the given instance contains this item set
   */
  public boolean containedByTreatZeroAsMissing(Instance instance) {

    if (instance instanceof weka.core.SparseInstance) {
      int numInstVals = instance.numValues();
      int numItemSetVals = m_items.length;

      for (int p1 = 0, p2 = 0; p1 < numInstVals || p2 < numItemSetVals; ) {
        int instIndex = Integer.MAX_VALUE;
        if (p1 < numInstVals) {
          instIndex = instance.index(p1);
        }
        int itemIndex = p2;

        if (m_items[itemIndex] > -1) {
          if (itemIndex != instIndex) {
            return false;
          } else {
            if (instance.isMissingSparse(p1)) {
              return false;
            }
            if (m_items[itemIndex] != (int) instance.valueSparse(p1)) {
              return false;
            }
          }

          p1++;
          p2++;
        } else {
          if (itemIndex < instIndex) {
            p2++;
          } else if (itemIndex == instIndex) {
            p2++;
            p1++;
          }
        }
      }
    } else {
      for (int i = 0; i < instance.numAttributes(); i++) {
        if (m_items[i] > -1) {
          if (instance.isMissing(i) || (int) instance.value(i) == 0) {
            return false;
          }
          if (m_items[i] != (int) instance.value(i)) {
            return false;
          }
        }
      }
    }

    return true;
  }
Пример #18
0
  /**
   * Convert a single instance over if the class is nominal. The converted instance is added to the
   * end of the output queue.
   *
   * @param instance the instance to convert
   */
  private void convertInstanceNominal(Instance instance) {

    if (!m_needToTransform) {
      push(instance);
      return;
    }

    double[] vals = new double[outputFormatPeek().numAttributes()];
    int attSoFar = 0;

    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if ((!att.isNominal()) || (j == getInputFormat().classIndex())) {
        vals[attSoFar] = instance.value(j);
        attSoFar++;
      } else {
        if ((att.numValues() <= 2) && (!m_TransformAll)) {
          vals[attSoFar] = instance.value(j);
          attSoFar++;
        } else {
          if (instance.isMissing(j)) {
            for (int k = 0; k < att.numValues(); k++) {
              vals[attSoFar + k] = instance.value(j);
            }
          } else {
            for (int k = 0; k < att.numValues(); k++) {
              if (k == (int) instance.value(j)) {
                vals[attSoFar + k] = 1;
              } else {
                vals[attSoFar + k] = 0;
              }
            }
          }
          attSoFar += att.numValues();
        }
      }
    }
    Instance inst = null;
    if (instance instanceof SparseInstance) {
      inst = new SparseInstance(instance.weight(), vals);
    } else {
      inst = new DenseInstance(instance.weight(), vals);
    }
    inst.setDataset(getOutputFormat());
    copyValues(inst, false, instance.dataset(), getOutputFormat());
    inst.setDataset(getOutputFormat());
    push(inst);
  }
  /**
   * Computes class distribution of an instance using the FastRandomTree.
   *
   * <p>In Weka's RandomTree, the distributions were normalized so that all probabilities sum to 1;
   * this would abolish the effect of instance weights on voting. In FastRandomForest 0.97 onwards,
   * the distributions are normalized by dividing with the number of instances going into a leaf.
   *
   * <p>
   *
   * @param instance the instance to compute the distribution for
   * @return the computed class distribution
   * @throws Exception if computation fails
   */
  @Override
  public double[] distributionForInstance(Instance instance) throws Exception {

    double[] returnedDist = null;

    if (m_Attribute > -1) { // ============================ node is not a leaf

      if (instance.isMissing(m_Attribute)) { // ---------------- missing value

        returnedDist = new double[m_MotherForest.getM_Info().numClasses()];
        // split instance up
        for (int i = 0; i < m_Successors.length; i++) {
          double[] help = m_Successors[i].distributionForInstance(instance);
          if (help != null) {
            for (int j = 0; j < help.length; j++) {
              returnedDist[j] += m_Prop[i] * help[j];
            }
          }
        }

      } else if (m_MotherForest.getM_Info().attribute(m_Attribute).isNominal()) { // ------ nominal

        // returnedDist = m_Successors[(int) instance.value(m_Attribute)]
        //        .distributionForInstance(instance);

        // 0.99: new - binary splits (also) for nominal attributes
        if (instance.value(m_Attribute) == m_SplitPoint) {
          returnedDist = m_Successors[0].distributionForInstance(instance);
        } else {
          returnedDist = m_Successors[1].distributionForInstance(instance);
        }

      } else { // ------------------------------------------ numeric attributes

        if (instance.value(m_Attribute) < m_SplitPoint) {
          returnedDist = m_Successors[0].distributionForInstance(instance);
        } else {
          returnedDist = m_Successors[1].distributionForInstance(instance);
        }
      }

      return returnedDist;

    } else { // =============================================== node is a leaf

      return m_ClassProbs;
    }
  }
Пример #20
0
 private Instance instanceFromStream(StreamElement data) {
   try {
     Instance i = new Instance(data.getFieldNames().length);
     for (int j = 0; j < data.getFieldNames().length; j++) {
       i.setValue(j, ((Double) data.getData()[j]));
     }
     // scaling specific to opensense data!! should be put in the parameters?
     i.setValue(0, i.value(0) / 1400.0);
     i.setValue(2, i.value(2) / 50);
     i.setValue(3, i.value(3) / 100.0);
     i.setValue(4, i.value(4) / 100.0 - 4);
     return i;
   } catch (Exception e) {
     return null;
   }
 }
Пример #21
0
 public double[] normalizedInstance(Instance inst) {
   // Normalize Instance
   double[] normalizedInstance = new double[inst.numAttributes()];
   for (int j = 0; j < inst.numAttributes() - 1; j++) {
     int instAttIndex = modelAttIndexToInstanceAttIndex(j, inst);
     double mean = perceptronattributeStatistics.getValue(j) / perceptronYSeen;
     double sd =
         computeSD(
             squaredperceptronattributeStatistics.getValue(j),
             perceptronattributeStatistics.getValue(j),
             perceptronYSeen);
     if (sd > SD_THRESHOLD) normalizedInstance[j] = (inst.value(instAttIndex) - mean) / sd;
     else normalizedInstance[j] = inst.value(instAttIndex) - mean;
   }
   return normalizedInstance;
 }
Пример #22
0
  /**
   * Input an instance for filtering. Ordinarily the instance is processed and made available for
   * output immediately. Some filters require all instances be read before producing output.
   *
   * @param instance the input instance
   * @return true if the filtered instance may now be collected with output().
   * @exception IllegalStateException if no input format has been defined.
   * @exception Exception if there was a problem during the filtering.
   */
  public boolean input(Instance instance) throws Exception {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }
    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }

    double[] vals = new double[instance.numAttributes() + 1];
    for (int i = 0; i < instance.numAttributes(); i++) {
      if (instance.isMissing(i)) {
        vals[i] = Instance.missingValue();
      } else {
        vals[i] = instance.value(i);
      }
    }

    evaluateExpression(vals);

    Instance inst = null;
    if (instance instanceof SparseInstance) {
      inst = new SparseInstance(instance.weight(), vals);
    } else {
      inst = new Instance(instance.weight(), vals);
    }
    copyStringValues(inst, false, instance.dataset(), getOutputFormat());
    inst.setDataset(getOutputFormat());
    push(inst);
    return true;
  }
Пример #23
0
 /**
  * Calculate average of every columns
  *
  * @param inst
  * @return
  */
 public Double[] calculateAverage(Instances inst) {
   Double[] average = new Double[inst.numAttributes() - 1];
   for (int i = 0; i < inst.numAttributes() - 1; i++) {
     average[i] = 0.0;
   }
   for (int i = 0; i < inst.numInstances(); i++) {
     for (int x = 0; x < inst.instance(i).numAttributes() - 1; x++) {
       Instance ins = inst.instance(i);
       if (ins != null && !Double.isNaN(ins.value(x))) average[x] += ins.value(x);
     }
   }
   for (int i = 0; i < inst.numAttributes() - 1; i++) {
     average[i] /= inst.numInstances();
   }
   return average;
 }
Пример #24
0
  /**
   * Input an instance for filtering.
   *
   * @param instance the input instance
   * @return true if the filtered instance may now be collected with output().
   * @throws Exception if the input format was not set or the date format cannot be parsed
   */
  public boolean input(Instance instance) throws Exception {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }
    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }
    Instance newInstance = (Instance) instance.copy();
    int index = m_AttIndex.getIndex();
    if (!newInstance.isMissing(index)) {
      double value = instance.value(index);
      try {
        // Format and parse under the new format to force any required
        // loss in precision.
        value = m_OutputAttribute.parseDate(m_OutputAttribute.formatDate(value));
      } catch (ParseException pe) {
        throw new RuntimeException("Output date format couldn't parse its own output!!");
      }
      newInstance.setValue(index, value);
    }
    push(newInstance);
    return true;
  }
  public double ExpectedClassificationError(Instances pool, int attr_i) {

    // initialize alpha's to one
    int alpha[][][];
    int NumberOfFeatures = pool.numAttributes() - 1;
    int NumberOfLabels = pool.numClasses();

    alpha = new int[NumberOfFeatures][NumberOfLabels][];
    for (int i = 0; i < NumberOfFeatures; i++)
      for (int j = 0; j < NumberOfLabels; j++) alpha[i][j] = new int[pool.attribute(i).numValues()];

    for (int i = 0; i < NumberOfFeatures; i++)
      for (int j = 0; j < NumberOfLabels; j++)
        for (int k = 0; k < alpha[i][j].length; k++) alpha[i][j][k] = 1;

    // construct alpha's
    for (int i = 0; i < NumberOfFeatures; i++) // for each attribute
    {
      if (i == pool.classIndex()) // skip the class attribute
      i++;
      for (Enumeration<Instance> e = pool.enumerateInstances();
          e.hasMoreElements(); ) // for each instance
      {
        Instance inst = e.nextElement();
        if (!inst.isMissing(i)) // if attribute i is not missing (i.e. its been bought)
        {
          int j = (int) inst.classValue();
          int k = (int) inst.value(i);
          alpha[i][j][k]++;
        }
      }
    }
    return ExpectedClassificationError(alpha, attr_i);
  }
Пример #26
0
  @Override
  public void buildClassifier(Instances data) throws Exception {
    trainingData = data;
    Attribute classAttribute = data.classAttribute();
    prototypes = new ArrayList<>();

    classedData = new HashMap<String, ArrayList<Sequence>>();
    indexClassedDataInFullData = new HashMap<String, ArrayList<Integer>>();
    for (int c = 0; c < data.numClasses(); c++) {
      classedData.put(data.classAttribute().value(c), new ArrayList<Sequence>());
      indexClassedDataInFullData.put(data.classAttribute().value(c), new ArrayList<Integer>());
    }

    sequences = new Sequence[data.numInstances()];
    classMap = new String[sequences.length];
    for (int i = 0; i < sequences.length; i++) {
      Instance sample = data.instance(i);
      MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
      int shift = (sample.classIndex() == 0) ? 1 : 0;
      for (int t = 0; t < sequence.length; t++) {
        sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
      }
      sequences[i] = new Sequence(sequence);
      String clas = sample.stringValue(classAttribute);
      classMap[i] = clas;
      classedData.get(clas).add(sequences[i]);
      indexClassedDataInFullData.get(clas).add(i);
      //			System.out.println("Element "+i+" of train is classed "+clas+" and went to element
      // "+(indexClassedDataInFullData.get(clas).size()-1));
    }
    buildSpecificClassifier(data);
  }
Пример #27
0
  /**
   * turns the instance into a libsvm row
   *
   * @param inst the instance to transform
   * @return the generated libsvm row
   */
  protected String instanceToLibsvm(Instance inst) {
    StringBuffer result;
    int i;

    // class
    result = new StringBuffer("" + inst.classValue());

    // attributes
    for (i = 0; i < inst.numAttributes(); i++) {
      if (i == inst.classIndex()) continue;
      if (inst.value(i) == 0) continue;
      result.append(" " + (i + 1) + ":" + inst.value(i));
    }

    return result.toString();
  }
Пример #28
0
  /**
   * Updates the minimum and maximum values for all the attributes based on a new instance.
   *
   * @param instance the new instance
   */
  private void updateMinMax(Instance instance) {

    for (int j = 0; j < instance.numAttributes(); j++) {
      if (Double.isNaN(m_Min[j])) {
        m_Min[j] = instance.value(j);
        m_Max[j] = instance.value(j);
      } else {
        if (instance.value(j) < m_Min[j]) {
          m_Min[j] = instance.value(j);
        } else {
          if (instance.value(j) > m_Max[j]) {
            m_Max[j] = instance.value(j);
          }
        }
      }
    }
  }
Пример #29
0
  public void calculateDifference() {
    if (run_ids.size() != 2) {
      throw new RuntimeException("Too many runs to compare. Should be 2. ");
    }

    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("repeat"));
    attributes.add(new Attribute("fold"));
    attributes.add(new Attribute("rowid"));
    resultSet = new Instances("difference", attributes, task_splits.numInstances());

    for (int i = 0; i < task_splits.numInstances(); ++i) {
      Instance current = task_splits.get(i);
      boolean test = current.stringValue(task_splits.attribute("type")).equals("TEST");
      if (!test) {
        continue;
      }

      Integer row_id = (int) current.value(task_splits.attribute("rowid"));
      Integer repeat = (int) current.value(task_splits.attribute("repeat"));
      Integer fold = (int) current.value(task_splits.attribute("fold"));
      Integer sample = 0;
      try {
        sample = (int) current.value(task_splits.attribute("sample"));
      } catch (Exception e) {
      }

      String label = null;
      boolean difference = false;

      for (Integer run_id : run_ids) {
        String currentLabel = predictions.get(run_id).get(repeat).get(fold).get(sample).get(row_id);
        if (label == null) {
          label = currentLabel;
        } else if (label.equals(currentLabel) == false) {
          difference = true;
        }
      }

      if (difference) {
        double[] instance = {repeat, fold, row_id};
        resultSet.add(new DenseInstance(1.0, instance));
      }
    }
  }
  /**
   * @param outFilePath full path to the output file
   * @param data the instances object containing the data on which the quantizer is learner
   * @param numClusters the number of clusters in k-means
   * @param maxIterations the maximum number of k-means iterations
   * @param seed the seed given to k-means
   * @param numSlots the number of execution slots to use (>1 = parallel execution)
   * @param kMeansPlusPlus whether to use kmeans++ for the initialization of the centroids
   *     (true/false)
   * @throws Exception
   */
  public static void learnAndWriteQuantizer(
      String outFilePath,
      Instances data,
      int numClusters,
      int maxIterations,
      int seed,
      int numSlots,
      boolean kMeansPlusPlus)
      throws Exception {
    System.out.println("--" + data.numInstances() + " vectors loaded--");
    System.out.println("Vector dimensionality: " + data.numAttributes());
    System.out.println("Clustering settings:");
    System.out.println("Num clusters: " + numClusters);
    System.out.println("Max iterations: " + maxIterations);
    System.out.println("Seed: " + seed);

    System.out.println("Clustering started");
    long start = System.currentTimeMillis();
    // create a new instance for the Clusterer
    SimpleKMeansWithOutput clusterer = new SimpleKMeansWithOutput();
    clusterer.setInitializeUsingKMeansPlusPlusMethod(kMeansPlusPlus);
    clusterer.setSeed(seed);
    clusterer.setNumClusters(numClusters);
    clusterer.setMaxIterations(maxIterations);
    clusterer.setNumExecutionSlots(numSlots);
    clusterer.setFastDistanceCalc(false);
    // build the clusterer
    clusterer.buildClusterer(data);
    long end = System.currentTimeMillis();
    System.out.println("Clustering completed in " + (end - start) + " ms");

    System.out.println("Writing quantizer in file");
    // create a new file to store the codebook
    BufferedWriter out = new BufferedWriter(new FileWriter(new File(outFilePath)));
    // write the results of the clustering to the new file (csv formated)
    Instances clusterCentroids = clusterer.getClusterCentroids();
    for (int j = 0; j < clusterCentroids.numInstances(); j++) {
      Instance centroid = clusterCentroids.instance(j);
      for (int k = 0; k < centroid.numAttributes() - 1; k++) {
        out.write(centroid.value(k) + ",");
      }
      out.write(centroid.value(centroid.numAttributes() - 1) + "\n");
    }
    out.close();
  }