// 将一个文本数据转换成weka可以识别的Instance
 private Instance makeInstance(String text, Instances data) {
   Instance instance = new Instance(2);
   Attribute messageAtt = data.attribute("Message");
   instance.setValue(messageAtt, messageAtt.addStringValue(text));
   instance.setDataset(data);
   return instance;
 }
Example #2
0
  public void restart() {
    IMDb = 0L;
    Oracle = 0L;

    try {
      clearRecords();

      recordsSelect = "SELECT " + userColumn + ", " + objectColumn + ", ";
      for (Attribute attr : attributes) {
        if (attr.isRelationValued() || attr.name().isEmpty()) continue;
        recordsSelect += attr.name() + " ,";
      }
      recordsSelect = recordsSelect.substring(0, recordsSelect.length() - 1);

      recordsSelect += " FROM " + recordsTable;
      if (betweenCondition != null) {
        recordsSelect += " WHERE " + betweenCondition;
      }
      if (userID != null) {
        // We add where clause, if it is not present
        if (betweenCondition == null || !recordsSelect.endsWith(betweenCondition))
          recordsSelect += " WHERE ";
        // AND if where is already there
        else recordsSelect += " AND ";

        recordsSelect += userColumn + " = " + userID;
      }
      recordsStatement = provider.getConn().prepareStatement(recordsSelect);
      records = recordsStatement.executeQuery();
      records.next();
    } catch (Exception e) {
      e.printStackTrace();
    }
    getAttributes();
  }
  /**
   * Método que inicializa uma regra com os valores obtidos atraves do Apriori.
   *
   * @param b Corpo da regre
   * @param h Cabecao da regra
   * @param conf Confianca da regra
   * @param e Lista com os atributos da regra
   * @param c Atributo da classe da regra
   */
  public Regra(ItemSet b, ItemSet h, double conf, Enumeration<Attribute> e, Attribute c)
      throws Exception {

    cabeca = h.itemAt(0);
    confianca = conf;
    int corpoTemp[] = b.items();
    atributosNaoVazios = 0;
    corpo = new Atributo[corpoTemp.length];
    int i = 0;
    while (e.hasMoreElements()) {
      Attribute att = (Attribute) e.nextElement();
      // attributes.add(att);
      if (att.isNominal()) {
        if (corpoTemp[i] == -1) {
          AtributoNominal vazio = new AtributoNominal(true, att, i);
          corpo[i++] = vazio;
        } else {
          AtributoNominal nominal =
              new AtributoNominal(corpoTemp[i], AtributoNominal.igual, att, i);
          corpo[i++] = nominal;
        }

      } else {
        throw new Exception("Atributo não nominal!");
      }
    }

    classe = c;
    matrizContigencia = new MatrizContingencia();
    getNumAtributosNaoVazios();
  }
Example #4
0
  @Override
  public String classify(User user, Sample sample) {

    Instances trainingSet =
        new TrainingSetBuilder()
            .setAttributes(user.getBssids())
            .setClassAttribute(
                "Location",
                user.getLocations().stream().map(Location::getName).collect(Collectors.toList()))
            .build("TrainingSet", 1);

    // Create instance
    Map<String, Integer> BSSIDLevelMap = getBSSIDLevelMap(sample);

    Instance instance = new Instance(trainingSet.numAttributes());

    for (Enumeration e = trainingSet.enumerateAttributes(); e.hasMoreElements(); ) {
      Attribute attribute = (Attribute) e.nextElement();
      String bssid = attribute.name();
      int level = (BSSIDLevelMap.containsKey(bssid)) ? BSSIDLevelMap.get(bssid) : 0;
      instance.setValue(attribute, level);
    }

    if (sample.getLocation() != null)
      instance.setValue(trainingSet.classAttribute(), sample.getLocation());

    instance.setDataset(trainingSet);
    trainingSet.add(instance);

    int predictedClass = classify(fromBase64(user.getClassifiers()), instance);

    return trainingSet.classAttribute().value(predictedClass);
  }
Example #5
0
  /**
   * Input an instance for filtering.
   *
   * @param instance the input instance
   * @return true if the filtered instance may now be collected with output().
   * @throws Exception if the input format was not set or the date format cannot be parsed
   */
  public boolean input(Instance instance) throws Exception {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }
    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }
    Instance newInstance = (Instance) instance.copy();
    int index = m_AttIndex.getIndex();
    if (!newInstance.isMissing(index)) {
      double value = instance.value(index);
      try {
        // Format and parse under the new format to force any required
        // loss in precision.
        value = m_OutputAttribute.parseDate(m_OutputAttribute.formatDate(value));
      } catch (ParseException pe) {
        throw new RuntimeException("Output date format couldn't parse its own output!!");
      }
      newInstance.setValue(index, value);
    }
    push(newInstance);
    return true;
  }
Example #6
0
  @Override
  public List<Classifier> buildClassifiers(User user, List<Sample> validSamples) {

    Instances trainingSet =
        new TrainingSetBuilder()
            .setAttributes(user.getBssids())
            .setClassAttribute(
                "Location",
                user.getLocations().stream().map(Location::getName).collect(Collectors.toList()))
            .build("TrainingSet", validSamples.size());

    // Create instances
    validSamples.forEach(
        sample -> {
          Map<String, Integer> BSSIDLevelMap = getBSSIDLevelMap(sample);

          Instance instance = new Instance(trainingSet.numAttributes());

          for (Enumeration e = trainingSet.enumerateAttributes(); e.hasMoreElements(); ) {
            Attribute attribute = (Attribute) e.nextElement();
            String bssid = attribute.name();
            int level = (BSSIDLevelMap.containsKey(bssid)) ? BSSIDLevelMap.get(bssid) : 0;
            instance.setValue(attribute, level);
          }

          instance.setValue(trainingSet.classAttribute(), sample.getLocation());

          instance.setDataset(trainingSet);
          trainingSet.add(instance);
        });

    // Build classifiers
    List<Classifier> classifiers = buildClassifiers(trainingSet);
    return classifiers;
  }
Example #7
0
  /**
   * Calculates the class membership probabilities for the given test instance.
   *
   * @param instance the instance to be classified
   * @return predicted class probability distribution
   * @exception Exception if distribution can't be computed
   */
  @Override
  public double[] distributionForInstance(Instance instance) throws Exception {

    double[] probs = new double[instance.numClasses()];
    int attIndex;

    for (int j = 0; j < instance.numClasses(); j++) {
      probs[j] = 1;
      Enumeration<Attribute> enumAtts = instance.enumerateAttributes();
      attIndex = 0;
      while (enumAtts.hasMoreElements()) {
        Attribute attribute = enumAtts.nextElement();
        if (!instance.isMissing(attribute)) {
          if (attribute.isNominal()) {
            probs[j] *= m_Counts[j][attIndex][(int) instance.value(attribute)];
          } else {
            probs[j] *=
                normalDens(instance.value(attribute), m_Means[j][attIndex], m_Devs[j][attIndex]);
          }
        }
        attIndex++;
      }
      probs[j] *= m_Priors[j];
    }

    // Normalize probabilities
    Utils.normalize(probs);

    return probs;
  }
Example #8
0
  /** Computes average class values for each attribute and value */
  private void computeAverageClassValues() {

    double totalCounts, sum;
    Instance instance;
    double[] counts;

    double[][] avgClassValues = new double[getInputFormat().numAttributes()][0];
    m_Indices = new int[getInputFormat().numAttributes()][0];
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (att.isNominal()) {
        avgClassValues[j] = new double[att.numValues()];
        counts = new double[att.numValues()];
        for (int i = 0; i < getInputFormat().numInstances(); i++) {
          instance = getInputFormat().instance(i);
          if (!instance.classIsMissing() && (!instance.isMissing(j))) {
            counts[(int) instance.value(j)] += instance.weight();
            avgClassValues[j][(int) instance.value(j)] += instance.weight() * instance.classValue();
          }
        }
        sum = Utils.sum(avgClassValues[j]);
        totalCounts = Utils.sum(counts);
        if (Utils.gr(totalCounts, 0)) {
          for (int k = 0; k < att.numValues(); k++) {
            if (Utils.gr(counts[k], 0)) {
              avgClassValues[j][k] /= counts[k];
            } else {
              avgClassValues[j][k] = sum / totalCounts;
            }
          }
        }
        m_Indices[j] = Utils.sort(avgClassValues[j]);
      }
    }
  }
Example #9
0
 /**
  * Compute the number of all possible conditions that could appear in a rule of a given data. For
  * nominal attributes, it's the number of values that could appear; for numeric attributes, it's
  * the number of values * 2, i.e. <= and >= are counted as different possible conditions.
  *
  * @param data the given data
  * @return number of all conditions of the data
  */
 public static double numAllConditions(Instances data) {
   double total = 0;
   Enumeration attEnum = data.enumerateAttributes();
   while (attEnum.hasMoreElements()) {
     Attribute att = (Attribute) attEnum.nextElement();
     if (att.isNominal()) total += (double) att.numValues();
     else total += 2.0 * (double) data.numDistinctValues(att);
   }
   return total;
 }
Example #10
0
 /** Find the fold attribute within a dataset. */
 private Attribute getAttribute(Instances data) {
   SingleIndex index = new SingleIndex(super.getAttributeIndex());
   index.setUpper(data.numAttributes() - 1);
   Attribute att = data.attribute(index.getIndex());
   if (att == null)
     throw new NoSuchElementException(
         "attribute #" + super.getAttributeIndex() + " does not exist");
   if (!att.isNominal() && !att.isString())
     throw new IllegalArgumentException("Attribute '" + att + "' is not nominal");
   return att;
 }
Example #11
0
 /** Helper method to represent an Attribute as a single-element vector. */
 private int[] toValueIndexArray(Attribute attribute) {
   TIntArrayList out = new TIntArrayList();
   for (String value : values) {
     int valueIndex = attribute.indexOfValue(value);
     if (valueIndex < 0)
       throw new NoSuchElementException(
           "no such value: '" + value + "' in attribute '" + attribute.toString() + "'");
     out.add(valueIndex);
   }
   return out.toNativeArray();
 }
  public double classifyInstance(Instance inst) throws Exception {

    if (m_attribute == null) {
      return m_intercept;
    } else {
      if (inst.isMissing(m_attribute.index())) {
        throw new Exception("UnivariateLinearRegression: No missing values!");
      }
      return m_intercept + m_slope * inst.value(m_attribute.index());
    }
  }
Example #13
0
  /**
   * Adds this tree recursively to the buffer.
   *
   * @param id the unqiue id for the method
   * @param buffer the buffer to add the source code to
   * @return the last ID being used
   * @throws Exception if something goes wrong
   */
  protected int toSource(int id, StringBuffer buffer) throws Exception {
    int result;
    int i;
    int newID;
    StringBuffer[] subBuffers;

    buffer.append("\n");
    buffer.append("  protected static double node" + id + "(Object[] i) {\n");

    // leaf?
    if (m_Attribute == null) {
      result = id;
      if (Double.isNaN(m_ClassValue)) buffer.append("    return Double.NaN;");
      else buffer.append("    return " + m_ClassValue + ";");
      if (m_ClassAttribute != null)
        buffer.append(" // " + m_ClassAttribute.value((int) m_ClassValue));
      buffer.append("\n");
      buffer.append("  }\n");
    } else {
      buffer.append("    // " + m_Attribute.name() + "\n");

      // subtree calls
      subBuffers = new StringBuffer[m_Attribute.numValues()];
      newID = id;
      for (i = 0; i < m_Attribute.numValues(); i++) {
        newID++;

        buffer.append("    ");
        if (i > 0) buffer.append("else ");
        buffer.append(
            "if (((String) i["
                + m_Attribute.index()
                + "]).equals(\""
                + m_Attribute.value(i)
                + "\"))\n");
        buffer.append("      return node" + newID + "(i);\n");

        subBuffers[i] = new StringBuffer();
        newID = m_Successors[i].toSource(newID, subBuffers[i]);
      }
      buffer.append("    else\n");
      buffer.append(
          "      throw new IllegalArgumentException(\"Value '\" + i["
              + m_Attribute.index()
              + "] + \"' is not allowed!\");\n");
      buffer.append("  }\n");

      // output subtree code
      for (i = 0; i < m_Attribute.numValues(); i++) {
        buffer.append(subBuffers[i].toString());
      }
      subBuffers = null;

      result = newID;
    }

    return result;
  }
Example #14
0
  /**
   * Splits a dataset according to the values of a nominal attribute.
   *
   * @param data the data which is to be split
   * @param att the attribute to be used for splitting
   * @return the sets of instances produced by the split
   */
  private Instances[] splitData(Instances data, Attribute att) {

    Instances[] splitData = new Instances[att.numValues()];
    for (int j = 0; j < att.numValues(); j++) {
      splitData[j] = new Instances(data, data.numInstances());
    }
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
      Instance inst = (Instance) instEnum.nextElement();
      splitData[(int) inst.value(att)].add(inst);
    }
    for (int i = 0; i < splitData.length; i++) {
      splitData[i].compactify();
    }
    return splitData;
  }
Example #15
0
    @Override
    public int compare(InstanceHolder o1, InstanceHolder o2) {

      // both missing is equal
      if (o1.m_instance.isMissing(m_attribute) && o2.m_instance.isMissing(m_attribute)) {
        return 0;
      }

      // one missing - missing instances should all be at the end
      // regardless of whether order is ascending or descending
      if (o1.m_instance.isMissing(m_attribute)) {
        return 1;
      }

      if (o2.m_instance.isMissing(m_attribute)) {
        return -1;
      }

      int cmp = 0;

      if (!m_attribute.isString() && !m_attribute.isRelationValued()) {
        double val1 = o1.m_instance.value(m_attribute);
        double val2 = o2.m_instance.value(m_attribute);

        cmp = Double.compare(val1, val2);
      } else if (m_attribute.isString()) {
        String val1 = o1.m_stringVals.get(m_attribute.name());
        String val2 = o2.m_stringVals.get(m_attribute.name());

        /*
         * String val1 = o1.stringValue(m_attribute); String val2 =
         * o2.stringValue(m_attribute);
         */

        // TODO case insensitive?
        cmp = val1.compareTo(val2);
      } else {
        throw new IllegalArgumentException(
            "Can't sort according to " + "relation-valued attribute values!");
      }

      if (m_descending) {
        return -cmp;
      }

      return cmp;
    }
  /**
   * Cálculo da precisão de Laplace
   *
   * @return Precisão de Laplace
   */
  public double getLaplace() {
    double temp = matrizContigencia.getB();
    int numClasses = classe.numValues();
    if (temp != 0) laplace = (matrizContigencia.getH_B() + 1) / (temp + numClasses);
    else laplace = 0;

    return laplace;
  }
Example #17
0
  /**
   * Convert a single instance over if the class is numeric. The converted instance is added to the
   * end of the output queue.
   *
   * @param instance the instance to convert
   */
  private void convertInstanceNumeric(Instance instance) {

    if (!m_needToTransform) {
      push(instance);
      return;
    }

    double[] vals = new double[outputFormatPeek().numAttributes()];
    int attSoFar = 0;

    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if ((!att.isNominal()) || (j == getInputFormat().classIndex())) {
        vals[attSoFar] = instance.value(j);
        attSoFar++;
      } else {
        if (instance.isMissing(j)) {
          for (int k = 0; k < att.numValues() - 1; k++) {
            vals[attSoFar + k] = instance.value(j);
          }
        } else {
          int k = 0;
          while ((int) instance.value(j) != m_Indices[j][k]) {
            vals[attSoFar + k] = 1;
            k++;
          }
          while (k < att.numValues() - 1) {
            vals[attSoFar + k] = 0;
            k++;
          }
        }
        attSoFar += att.numValues() - 1;
      }
    }
    Instance inst = null;
    if (instance instanceof SparseInstance) {
      inst = new SparseInstance(instance.weight(), vals);
    } else {
      inst = new DenseInstance(instance.weight(), vals);
    }
    inst.setDataset(getOutputFormat());
    copyValues(inst, false, instance.dataset(), getOutputFormat());
    inst.setDataset(getOutputFormat());
    push(inst);
  }
Example #18
0
  /**
   * Set the output format. Takes the current average class values and m_InputFormat and calls
   * setOutputFormat(Instances) appropriately.
   */
  private void setOutputFormat() {
    Instances newData;
    FastVector newAtts;

    // Compute new attributes
    newAtts = new FastVector(getInputFormat().numAttributes());
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);

      if (!att.isNominal() || !m_AttIndex.isInRange(j)) newAtts.addElement(att);
      else newAtts.addElement(new Attribute(att.name(), (FastVector) null));
    }

    // Construct new header
    newData = new Instances(getInputFormat().relationName(), newAtts, 0);
    newData.setClassIndex(getInputFormat().classIndex());

    setOutputFormat(newData);
  }
Example #19
0
  /**
   * Calculate metric value
   *
   * @param mlData Multi-label dataset to which calculate the metric
   * @return Value of the metric
   */
  public double calculate(MultiLabelInstances mlData) {
    Instances instances = mlData.getDataSet();
    int nInstances = mlData.getNumInstances();

    double avg;
    double var2;
    double var4;
    double val;
    int nNumeric = 0;
    double mean = 0;

    Set<Attribute> attributesSet = mlData.getFeatureAttributes();

    for (Attribute att : attributesSet) {
      if (att.isNumeric()) {
        nNumeric++;
        avg = instances.meanOrMode(att);
        var2 = 0;
        var4 = 0;

        for (Instance inst : instances) {
          val = inst.value(att);
          var2 += Math.pow(val - avg, 2);
          var4 += Math.pow(val - avg, 4);
        }

        double kurtosis = (nInstances * var4 / Math.pow(var2, 2)) - 3;
        double sampleKurtosis =
            (kurtosis * (nInstances + 1) + 6)
                * (nInstances - 1)
                / ((nInstances - 2) * (nInstances - 3));
        mean += sampleKurtosis;
      }
    }
    if (nNumeric > 0) {
      mean = mean / nNumeric;
    } else {
      mean = Double.NaN;
    }

    this.value = mean;
    return value;
  }
Example #20
0
  /** Set the output format. Changes the format of the specified date attribute. */
  private void setOutputFormat() {

    // Create new attributes
    FastVector newAtts = new FastVector(getInputFormat().numAttributes());
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (j == m_AttIndex.getIndex()) {
        newAtts.addElement(new Attribute(att.name(), getDateFormat().toPattern()));
      } else {
        newAtts.addElement(att.copy());
      }
    }

    // Create new header
    Instances newData = new Instances(getInputFormat().relationName(), newAtts, 0);
    newData.setClassIndex(getInputFormat().classIndex());
    m_OutputAttribute = newData.attribute(m_AttIndex.getIndex());
    setOutputFormat(newData);
  }
Example #21
0
  /**
   * Returns the test represented by a string in Prolog notation.
   *
   * @return a string representing the test in Prolog notation
   */
  public String toPrologString() {
    Attribute att = m_Dataset.attribute(m_AttIndex);
    StringBuffer str = new StringBuffer();
    String attName = m_Dataset.attribute(m_AttIndex).name();
    if (att.isNumeric()) {
      str = str.append(attName + " ");
      if (m_Not) str = str.append(">= " + Utils.doubleToString(m_Split, 3));
      else str = str.append("< " + Utils.doubleToString(m_Split, 3));
    } else {
      String value = att.value((int) m_Split);

      if (value == "false") {
        str = str.append("not(" + attName + ")");
      } else {
        str = str.append(attName);
      }
    }
    return str.toString();
  }
Example #22
0
  /**
   * Constructs an instance suitable for passing to the model for scoring
   *
   * @param incoming the incoming instance
   * @return an instance with values mapped to be consistent with what the model is expecting
   */
  protected Instance mapIncomingFieldsToModelFields(Instance incoming) {
    Instances modelHeader = m_model.getHeader();
    double[] vals = new double[modelHeader.numAttributes()];

    for (int i = 0; i < modelHeader.numAttributes(); i++) {

      if (m_attributeMap[i] < 0) {
        // missing or type mismatch
        vals[i] = Utils.missingValue();
        continue;
      }

      Attribute modelAtt = modelHeader.attribute(i);
      Attribute incomingAtt = incoming.dataset().attribute(m_attributeMap[i]);

      if (incoming.isMissing(incomingAtt.index())) {
        vals[i] = Utils.missingValue();
        continue;
      }

      if (modelAtt.isNumeric()) {
        vals[i] = incoming.value(m_attributeMap[i]);
      } else if (modelAtt.isNominal()) {
        String incomingVal = incoming.stringValue(m_attributeMap[i]);
        int modelIndex = modelAtt.indexOfValue(incomingVal);

        if (modelIndex < 0) {
          vals[i] = Utils.missingValue();
        } else {
          vals[i] = modelIndex;
        }
      } else if (modelAtt.isString()) {
        vals[i] = 0;
        modelAtt.setStringValue(incoming.stringValue(m_attributeMap[i]));
      }
    }

    if (modelHeader.classIndex() >= 0) {
      // set class to missing value
      vals[modelHeader.classIndex()] = Utils.missingValue();
    }

    Instance newInst = null;
    if (incoming instanceof SparseInstance) {
      newInst = new SparseInstance(incoming.weight(), vals);
    } else {
      newInst = new DenseInstance(incoming.weight(), vals);
    }

    newInst.setDataset(modelHeader);
    return newInst;
  }
Example #23
0
  /**
   * processes the given instance (may change the provided instance) and returns the modified
   * version.
   *
   * @param instance the instance to process
   * @return the modified data
   * @throws Exception in case the processing goes wrong
   */
  protected Instance process(Instance instance) throws Exception {
    Instance result;
    Attribute att;
    double[] values;
    int i;

    // adjust indices
    values = new double[instance.numAttributes()];
    for (i = 0; i < instance.numAttributes(); i++) {
      att = instance.attribute(i);
      if (!att.isNominal() || !m_AttributeIndices.isInRange(i) || instance.isMissing(i))
        values[i] = instance.value(i);
      else values[i] = m_NewOrder[i][(int) instance.value(i)];
    }

    // create new instance
    result = new DenseInstance(instance.weight(), values);

    return result;
  }
  public String toString() {

    if (m_attribute == null) {
      return "No model built yet.";
    }
    StringBuffer text = new StringBuffer();
    if (m_attribute == null) {
      text.append("Predicting constant " + m_intercept);
    } else {
      text.append("Linear regression on " + m_attribute.name() + "\n\n");
      text.append(Utils.doubleToString(m_slope, 2) + " * " + m_attribute.name());
      if (m_intercept > 0) {
        text.append(" + " + Utils.doubleToString(m_intercept, 2));
      } else {
        text.append(" - " + Utils.doubleToString((-m_intercept), 2));
      }
    }
    text.append("\n");
    return text.toString();
  }
Example #25
0
  /**
   * Method for building an Id3 tree.
   *
   * @param data the training data
   * @exception Exception if decision tree can't be built successfully
   */
  private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
      m_Attribute = null;
      m_ClassValue = Utils.missingValue();
      m_Distribution = new double[data.numClasses()];
      return;
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
      Attribute att = (Attribute) attEnum.nextElement();
      infoGains[att.index()] = computeInfoGain(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));

    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
      m_Attribute = null;
      m_Distribution = new double[data.numClasses()];
      Enumeration instEnum = data.enumerateInstances();
      while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        m_Distribution[(int) inst.classValue()]++;
      }
      Utils.normalize(m_Distribution);
      m_ClassValue = Utils.maxIndex(m_Distribution);
      m_ClassAttribute = data.classAttribute();
    } else {
      Instances[] splitData = splitData(data, m_Attribute);
      m_Successors = new Id3[m_Attribute.numValues()];
      for (int j = 0; j < m_Attribute.numValues(); j++) {
        m_Successors[j] = new Id3();
        m_Successors[j].makeTree(splitData[j]);
      }
    }
  }
  /**
   * Set the output format. Takes the current average class values and m_InputFormat and calls
   * setOutputFormat(Instances) appropriately.
   */
  private void setOutputFormat() {

    Instances newData;
    FastVector newAtts, newVals;

    // Compute new attributes

    newAtts = new FastVector(getInputFormat().numAttributes());
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (!m_AttIndices.isInRange(j) || !att.isString()) {

        // We don't have to copy the attribute because the
        // attribute index remains unchanged.
        newAtts.addElement(att);
      } else {

        // Compute list of attribute values
        newVals = new FastVector(att.numValues());
        for (int i = 0; i < att.numValues(); i++) {
          newVals.addElement(att.value(i));
        }
        newAtts.addElement(new Attribute(att.name(), newVals));
      }
    }

    // Construct new header
    newData = new Instances(getInputFormat().relationName(), newAtts, 0);
    newData.setClassIndex(getInputFormat().classIndex());
    setOutputFormat(newData);
  }
  /**
   * Sets up the structure for the plot instances. Sets m_PlotInstances to null if instances are not
   * saved for visualization.
   *
   * @see #getSaveForVisualization()
   */
  protected void determineFormat() {
    FastVector hv;
    Attribute predictedClass;
    Attribute classAt;
    FastVector attVals;
    int i;

    if (!m_SaveForVisualization) {
      m_PlotInstances = null;
      return;
    }

    hv = new FastVector();

    classAt = m_Instances.attribute(m_ClassIndex);
    if (classAt.isNominal()) {
      attVals = new FastVector();
      for (i = 0; i < classAt.numValues(); i++) attVals.addElement(classAt.value(i));
      predictedClass = new Attribute("predicted" + classAt.name(), attVals);
    } else {
      predictedClass = new Attribute("predicted" + classAt.name());
    }

    for (i = 0; i < m_Instances.numAttributes(); i++) {
      if (i == m_Instances.classIndex()) hv.addElement(predictedClass);
      hv.addElement(m_Instances.attribute(i).copy());
    }

    m_PlotInstances =
        new Instances(m_Instances.relationName() + "_predicted", hv, m_Instances.numInstances());
    m_PlotInstances.setClassIndex(m_ClassIndex + 1);
  }
Example #28
0
  /**
   * Outputs a tree at a certain level.
   *
   * @param level the level at which the tree is to be printed
   * @return the tree as string at the given level
   */
  private String toString(int level) {

    StringBuffer text = new StringBuffer();

    if (m_Attribute == null) {
      if (Utils.isMissingValue(m_ClassValue)) {
        text.append(": null");
      } else {
        text.append(": " + m_ClassAttribute.value((int) m_ClassValue));
      }
    } else {
      for (int j = 0; j < m_Attribute.numValues(); j++) {
        text.append("\n");
        for (int i = 0; i < level; i++) {
          text.append("|  ");
        }
        text.append(m_Attribute.name() + " = " + m_Attribute.value(j));
        text.append(m_Successors[j].toString(level + 1));
      }
    }
    return text.toString();
  }
Example #29
0
  /**
   * Builds a mapping between the header for the incoming data to be scored and the header used to
   * train the model. Uses attribute names to match between the two. Also constructs a list of
   * missing attributes and a list of type mismatches.
   *
   * @param modelHeader the header of the data used to train the model
   * @param incomingHeader the header of the incoming data
   * @throws DistributedWekaException if more than 50% of the attributes expected by the model are
   *     missing or have a type mismatch with the incoming data
   */
  protected void buildAttributeMap(Instances modelHeader, Instances incomingHeader)
      throws DistributedWekaException {
    m_attributeMap = new int[modelHeader.numAttributes()];

    int problemCount = 0;
    for (int i = 0; i < modelHeader.numAttributes(); i++) {
      Attribute modAtt = modelHeader.attribute(i);
      Attribute incomingAtt = incomingHeader.attribute(modAtt.name());

      if (incomingAtt == null) {
        // missing model attribute
        m_attributeMap[i] = -1;
        m_missingMismatch.put(modAtt.name(), "missing from incoming data");
        problemCount++;
      } else if (modAtt.type() != incomingAtt.type()) {
        // type mismatch
        m_attributeMap[i] = -1;
        m_missingMismatch.put(
            modAtt.name(),
            "type mismatch - "
                + "model: "
                + Attribute.typeToString(modAtt)
                + " != incoming: "
                + Attribute.typeToString(incomingAtt));
        problemCount++;
      } else {
        m_attributeMap[i] = incomingAtt.index();
      }
    }

    // -1 for the class (if set)
    int adjustForClass = modelHeader.classIndex() >= 0 ? 1 : 0;
    if (problemCount > (modelHeader.numAttributes() - adjustForClass) / 2) {
      throw new DistributedWekaException(
          "More than 50% of the attributes that the model "
              + "is expecting to see are either missing or have a type mismatch in the "
              + "incoming data.");
    }
  }
Example #30
0
  @Override
  public void updateNode(Instance inst) throws Exception {
    super.updateDistribution(inst);

    for (int i = 0; i < inst.numAttributes(); i++) {
      Attribute a = inst.attribute(i);
      if (i != inst.classIndex()) {
        ConditionalSufficientStats stats = m_nodeStats.get(a.name());
        if (stats == null) {
          if (a.isNumeric()) {
            stats = new GaussianConditionalSufficientStats();
          } else {
            stats = new NominalConditionalSufficientStats();
          }
          m_nodeStats.put(a.name(), stats);
        }

        stats.update(
            inst.value(a), inst.classAttribute().value((int) inst.classValue()), inst.weight());
      }
    }
  }