public void restart() {
    IMDb = 0L;
    Oracle = 0L;

    try {
      clearRecords();

      recordsSelect = "SELECT " + userColumn + ", " + objectColumn + ", ";
      for (Attribute attr : attributes) {
        if (attr.isRelationValued() || attr.name().isEmpty()) continue;
        recordsSelect += attr.name() + " ,";
      }
      recordsSelect = recordsSelect.substring(0, recordsSelect.length() - 1);

      recordsSelect += " FROM " + recordsTable;
      if (betweenCondition != null) {
        recordsSelect += " WHERE " + betweenCondition;
      }
      if (userID != null) {
        // We add where clause, if it is not present
        if (betweenCondition == null || !recordsSelect.endsWith(betweenCondition))
          recordsSelect += " WHERE ";
        // AND if where is already there
        else recordsSelect += " AND ";

        recordsSelect += userColumn + " = " + userID;
      }
      recordsStatement = provider.getConn().prepareStatement(recordsSelect);
      records = recordsStatement.executeQuery();
      records.next();
    } catch (Exception e) {
      e.printStackTrace();
    }
    getAttributes();
  }
  /**
   * Sets up the structure for the plot instances. Sets m_PlotInstances to null if instances are not
   * saved for visualization.
   *
   * @see #getSaveForVisualization()
   */
  protected void determineFormat() {
    FastVector hv;
    Attribute predictedClass;
    Attribute classAt;
    FastVector attVals;
    int i;

    if (!m_SaveForVisualization) {
      m_PlotInstances = null;
      return;
    }

    hv = new FastVector();

    classAt = m_Instances.attribute(m_ClassIndex);
    if (classAt.isNominal()) {
      attVals = new FastVector();
      for (i = 0; i < classAt.numValues(); i++) attVals.addElement(classAt.value(i));
      predictedClass = new Attribute("predicted" + classAt.name(), attVals);
    } else {
      predictedClass = new Attribute("predicted" + classAt.name());
    }

    for (i = 0; i < m_Instances.numAttributes(); i++) {
      if (i == m_Instances.classIndex()) hv.addElement(predictedClass);
      hv.addElement(m_Instances.attribute(i).copy());
    }

    m_PlotInstances =
        new Instances(m_Instances.relationName() + "_predicted", hv, m_Instances.numInstances());
    m_PlotInstances.setClassIndex(m_ClassIndex + 1);
  }
  /**
   * Set the output format. Takes the current average class values and m_InputFormat and calls
   * setOutputFormat(Instances) appropriately.
   */
  private void setOutputFormat() {

    Instances newData;
    FastVector newAtts, newVals;

    // Compute new attributes

    newAtts = new FastVector(getInputFormat().numAttributes());
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (!m_AttIndices.isInRange(j) || !att.isString()) {

        // We don't have to copy the attribute because the
        // attribute index remains unchanged.
        newAtts.addElement(att);
      } else {

        // Compute list of attribute values
        newVals = new FastVector(att.numValues());
        for (int i = 0; i < att.numValues(); i++) {
          newVals.addElement(att.value(i));
        }
        newAtts.addElement(new Attribute(att.name(), newVals));
      }
    }

    // Construct new header
    newData = new Instances(getInputFormat().relationName(), newAtts, 0);
    newData.setClassIndex(getInputFormat().classIndex());
    setOutputFormat(newData);
  }
Beispiel #4
0
  @Override
  public String classify(User user, Sample sample) {

    Instances trainingSet =
        new TrainingSetBuilder()
            .setAttributes(user.getBssids())
            .setClassAttribute(
                "Location",
                user.getLocations().stream().map(Location::getName).collect(Collectors.toList()))
            .build("TrainingSet", 1);

    // Create instance
    Map<String, Integer> BSSIDLevelMap = getBSSIDLevelMap(sample);

    Instance instance = new Instance(trainingSet.numAttributes());

    for (Enumeration e = trainingSet.enumerateAttributes(); e.hasMoreElements(); ) {
      Attribute attribute = (Attribute) e.nextElement();
      String bssid = attribute.name();
      int level = (BSSIDLevelMap.containsKey(bssid)) ? BSSIDLevelMap.get(bssid) : 0;
      instance.setValue(attribute, level);
    }

    if (sample.getLocation() != null)
      instance.setValue(trainingSet.classAttribute(), sample.getLocation());

    instance.setDataset(trainingSet);
    trainingSet.add(instance);

    int predictedClass = classify(fromBase64(user.getClassifiers()), instance);

    return trainingSet.classAttribute().value(predictedClass);
  }
Beispiel #5
0
  @Override
  public List<Classifier> buildClassifiers(User user, List<Sample> validSamples) {

    Instances trainingSet =
        new TrainingSetBuilder()
            .setAttributes(user.getBssids())
            .setClassAttribute(
                "Location",
                user.getLocations().stream().map(Location::getName).collect(Collectors.toList()))
            .build("TrainingSet", validSamples.size());

    // Create instances
    validSamples.forEach(
        sample -> {
          Map<String, Integer> BSSIDLevelMap = getBSSIDLevelMap(sample);

          Instance instance = new Instance(trainingSet.numAttributes());

          for (Enumeration e = trainingSet.enumerateAttributes(); e.hasMoreElements(); ) {
            Attribute attribute = (Attribute) e.nextElement();
            String bssid = attribute.name();
            int level = (BSSIDLevelMap.containsKey(bssid)) ? BSSIDLevelMap.get(bssid) : 0;
            instance.setValue(attribute, level);
          }

          instance.setValue(trainingSet.classAttribute(), sample.getLocation());

          instance.setDataset(trainingSet);
          trainingSet.add(instance);
        });

    // Build classifiers
    List<Classifier> classifiers = buildClassifiers(trainingSet);
    return classifiers;
  }
Beispiel #6
0
  /**
   * Adds this tree recursively to the buffer.
   *
   * @param id the unqiue id for the method
   * @param buffer the buffer to add the source code to
   * @return the last ID being used
   * @throws Exception if something goes wrong
   */
  protected int toSource(int id, StringBuffer buffer) throws Exception {
    int result;
    int i;
    int newID;
    StringBuffer[] subBuffers;

    buffer.append("\n");
    buffer.append("  protected static double node" + id + "(Object[] i) {\n");

    // leaf?
    if (m_Attribute == null) {
      result = id;
      if (Double.isNaN(m_ClassValue)) buffer.append("    return Double.NaN;");
      else buffer.append("    return " + m_ClassValue + ";");
      if (m_ClassAttribute != null)
        buffer.append(" // " + m_ClassAttribute.value((int) m_ClassValue));
      buffer.append("\n");
      buffer.append("  }\n");
    } else {
      buffer.append("    // " + m_Attribute.name() + "\n");

      // subtree calls
      subBuffers = new StringBuffer[m_Attribute.numValues()];
      newID = id;
      for (i = 0; i < m_Attribute.numValues(); i++) {
        newID++;

        buffer.append("    ");
        if (i > 0) buffer.append("else ");
        buffer.append(
            "if (((String) i["
                + m_Attribute.index()
                + "]).equals(\""
                + m_Attribute.value(i)
                + "\"))\n");
        buffer.append("      return node" + newID + "(i);\n");

        subBuffers[i] = new StringBuffer();
        newID = m_Successors[i].toSource(newID, subBuffers[i]);
      }
      buffer.append("    else\n");
      buffer.append(
          "      throw new IllegalArgumentException(\"Value '\" + i["
              + m_Attribute.index()
              + "] + \"' is not allowed!\");\n");
      buffer.append("  }\n");

      // output subtree code
      for (i = 0; i < m_Attribute.numValues(); i++) {
        buffer.append(subBuffers[i].toString());
      }
      subBuffers = null;

      result = newID;
    }

    return result;
  }
Beispiel #7
0
  private void fieldsMappingString(Instances miningSchemaI, Instances incomingI) {
    StringBuffer result = new StringBuffer();

    int maxLength = 0;
    for (int i = 0; i < miningSchemaI.numAttributes(); i++) {
      if (miningSchemaI.attribute(i).name().length() > maxLength) {
        maxLength = miningSchemaI.attribute(i).name().length();
      }
    }
    maxLength += 12; // length of " (nominal)"/" (numeric)"

    int minLength =
        13; // "Mining schema".length()
    String headerS = "Mining schema";
    String sep = "-------------";

    if (maxLength < minLength) {
      maxLength = minLength;
    }

    headerS = PMMLUtils.pad(headerS, " ", maxLength, false);
    sep = PMMLUtils.pad(sep, "-", maxLength, false);

    sep += "\t    ----------------\n";
    headerS += "\t    Incoming fields\n";
    result.append(headerS);
    result.append(sep);

    for (int i = 0; i < miningSchemaI.numAttributes(); i++) {
      Attribute temp = miningSchemaI.attribute(i);
      String attName = "(" + ((temp.isNumeric()) ? "numeric)" : "nominal)") + " " + temp.name();
      attName = PMMLUtils.pad(attName, " ", maxLength, false);
      attName += "\t--> ";
      result.append(attName);

      Attribute incoming = incomingI.attribute(m_fieldsMap[i]);
      String fieldName =
          "" + (m_fieldsMap[i] + 1) + " (" + ((incoming.isNumeric()) ? "numeric)" : "nominal)");
      fieldName += " " + incoming.name();
      result.append(fieldName + "\n");
    }

    m_fieldsMappingText = result.toString();
  }
  public String toString() {

    if (m_attribute == null) {
      return "No model built yet.";
    }
    StringBuffer text = new StringBuffer();
    if (m_attribute == null) {
      text.append("Predicting constant " + m_intercept);
    } else {
      text.append("Linear regression on " + m_attribute.name() + "\n\n");
      text.append(Utils.doubleToString(m_slope, 2) + " * " + m_attribute.name());
      if (m_intercept > 0) {
        text.append(" + " + Utils.doubleToString(m_intercept, 2));
      } else {
        text.append(" - " + Utils.doubleToString((-m_intercept), 2));
      }
    }
    text.append("\n");
    return text.toString();
  }
Beispiel #9
0
  @Override
  public void updateNode(Instance inst) throws Exception {
    super.updateDistribution(inst);

    for (int i = 0; i < inst.numAttributes(); i++) {
      Attribute a = inst.attribute(i);
      if (i != inst.classIndex()) {
        ConditionalSufficientStats stats = m_nodeStats.get(a.name());
        if (stats == null) {
          if (a.isNumeric()) {
            stats = new GaussianConditionalSufficientStats();
          } else {
            stats = new NominalConditionalSufficientStats();
          }
          m_nodeStats.put(a.name(), stats);
        }

        stats.update(
            inst.value(a), inst.classAttribute().value((int) inst.classValue()), inst.weight());
      }
    }
  }
Beispiel #10
0
  /**
   * Builds a mapping between the header for the incoming data to be scored and the header used to
   * train the model. Uses attribute names to match between the two. Also constructs a list of
   * missing attributes and a list of type mismatches.
   *
   * @param modelHeader the header of the data used to train the model
   * @param incomingHeader the header of the incoming data
   * @throws DistributedWekaException if more than 50% of the attributes expected by the model are
   *     missing or have a type mismatch with the incoming data
   */
  protected void buildAttributeMap(Instances modelHeader, Instances incomingHeader)
      throws DistributedWekaException {
    m_attributeMap = new int[modelHeader.numAttributes()];

    int problemCount = 0;
    for (int i = 0; i < modelHeader.numAttributes(); i++) {
      Attribute modAtt = modelHeader.attribute(i);
      Attribute incomingAtt = incomingHeader.attribute(modAtt.name());

      if (incomingAtt == null) {
        // missing model attribute
        m_attributeMap[i] = -1;
        m_missingMismatch.put(modAtt.name(), "missing from incoming data");
        problemCount++;
      } else if (modAtt.type() != incomingAtt.type()) {
        // type mismatch
        m_attributeMap[i] = -1;
        m_missingMismatch.put(
            modAtt.name(),
            "type mismatch - "
                + "model: "
                + Attribute.typeToString(modAtt)
                + " != incoming: "
                + Attribute.typeToString(incomingAtt));
        problemCount++;
      } else {
        m_attributeMap[i] = incomingAtt.index();
      }
    }

    // -1 for the class (if set)
    int adjustForClass = modelHeader.classIndex() >= 0 ? 1 : 0;
    if (problemCount > (modelHeader.numAttributes() - adjustForClass) / 2) {
      throw new DistributedWekaException(
          "More than 50% of the attributes that the model "
              + "is expecting to see are either missing or have a type mismatch in the "
              + "incoming data.");
    }
  }
Beispiel #11
0
  /**
   * Determines the output format based on the input format and returns this. In case the output
   * format cannot be returned immediately, i.e., hasImmediateOutputFormat() returns false, then
   * this method will called from batchFinished() after the call of preprocess(Instances), in which,
   * e.g., statistics for the actual processing step can be gathered.
   *
   * @param inputFormat the input format to base the output format on
   * @return the output format
   * @throws Exception in case the determination goes wrong
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    Instances result;
    Attribute att;
    ArrayList<Attribute> atts;
    int i;

    m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1);

    // generate new header
    atts = new ArrayList<Attribute>();
    for (i = 0; i < inputFormat.numAttributes(); i++) {
      att = inputFormat.attribute(i);
      if (m_AttributeIndices.isInRange(i)) {
        if (m_ReplaceAll) atts.add(att.copy(att.name().replaceAll(m_Find, m_Replace)));
        else atts.add(att.copy(att.name().replaceFirst(m_Find, m_Replace)));
      } else {
        atts.add((Attribute) att.copy());
      }
    }
    result = new Instances(inputFormat.relationName(), atts, 0);
    result.setClassIndex(inputFormat.classIndex());

    return result;
  }
Beispiel #12
0
    @Override
    public int compare(InstanceHolder o1, InstanceHolder o2) {

      // both missing is equal
      if (o1.m_instance.isMissing(m_attribute) && o2.m_instance.isMissing(m_attribute)) {
        return 0;
      }

      // one missing - missing instances should all be at the end
      // regardless of whether order is ascending or descending
      if (o1.m_instance.isMissing(m_attribute)) {
        return 1;
      }

      if (o2.m_instance.isMissing(m_attribute)) {
        return -1;
      }

      int cmp = 0;

      if (!m_attribute.isString() && !m_attribute.isRelationValued()) {
        double val1 = o1.m_instance.value(m_attribute);
        double val2 = o2.m_instance.value(m_attribute);

        cmp = Double.compare(val1, val2);
      } else if (m_attribute.isString()) {
        String val1 = o1.m_stringVals.get(m_attribute.name());
        String val2 = o2.m_stringVals.get(m_attribute.name());

        /*
         * String val1 = o1.stringValue(m_attribute); String val2 =
         * o2.stringValue(m_attribute);
         */

        // TODO case insensitive?
        cmp = val1.compareTo(val2);
      } else {
        throw new IllegalArgumentException(
            "Can't sort according to " + "relation-valued attribute values!");
      }

      if (m_descending) {
        return -cmp;
      }

      return cmp;
    }
Beispiel #13
0
  /**
   * Determines the output format based on the input format and returns this.
   *
   * @param inputFormat the input format to base the output format on
   * @return the output format
   * @throws Exception in case the determination goes wrong
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    Instances result;
    Attribute att;
    Attribute attSorted;
    FastVector atts;
    FastVector values;
    Vector<String> sorted;
    int i;
    int n;

    m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1);

    // determine sorted indices
    atts = new FastVector();
    m_NewOrder = new int[inputFormat.numAttributes()][];
    for (i = 0; i < inputFormat.numAttributes(); i++) {
      att = inputFormat.attribute(i);
      if (!att.isNominal() || !m_AttributeIndices.isInRange(i)) {
        m_NewOrder[i] = new int[0];
        atts.addElement(inputFormat.attribute(i).copy());
        continue;
      }

      // sort labels
      sorted = new Vector<String>();
      for (n = 0; n < att.numValues(); n++) sorted.add(att.value(n));
      Collections.sort(sorted, m_Comparator);

      // determine new indices
      m_NewOrder[i] = new int[att.numValues()];
      values = new FastVector();
      for (n = 0; n < att.numValues(); n++) {
        m_NewOrder[i][n] = sorted.indexOf(att.value(n));
        values.addElement(sorted.get(n));
      }
      attSorted = new Attribute(att.name(), values);
      attSorted.setWeight(att.weight());
      atts.addElement(attSorted);
    }

    // generate new header
    result = new Instances(inputFormat.relationName(), atts, 0);
    result.setClassIndex(inputFormat.classIndex());

    return result;
  }
Beispiel #14
0
  /**
   * Returns a description of the classifier.
   *
   * @return a description of the classifier as a string.
   */
  @Override
  public String toString() {

    if (m_Instances == null) {
      return "Naive Bayes (simple): No model built yet.";
    }
    try {
      StringBuffer text = new StringBuffer("Naive Bayes (simple)");
      int attIndex;

      for (int i = 0; i < m_Instances.numClasses(); i++) {
        text.append(
            "\n\nClass "
                + m_Instances.classAttribute().value(i)
                + ": P(C) = "
                + Utils.doubleToString(m_Priors[i], 10, 8)
                + "\n\n");
        Enumeration<Attribute> enumAtts = m_Instances.enumerateAttributes();
        attIndex = 0;
        while (enumAtts.hasMoreElements()) {
          Attribute attribute = enumAtts.nextElement();
          text.append("Attribute " + attribute.name() + "\n");
          if (attribute.isNominal()) {
            for (int j = 0; j < attribute.numValues(); j++) {
              text.append(attribute.value(j) + "\t");
            }
            text.append("\n");
            for (int j = 0; j < attribute.numValues(); j++) {
              text.append(Utils.doubleToString(m_Counts[i][attIndex][j], 10, 8) + "\t");
            }
          } else {
            text.append("Mean: " + Utils.doubleToString(m_Means[i][attIndex], 10, 8) + "\t");
            text.append("Standard Deviation: " + Utils.doubleToString(m_Devs[i][attIndex], 10, 8));
          }
          text.append("\n\n");
          attIndex++;
        }
      }

      return text.toString();
    } catch (Exception e) {
      return "Can't print Naive Bayes classifier!";
    }
  }
Beispiel #15
0
  /**
   * Set the output format. Takes the current average class values and m_InputFormat and calls
   * setOutputFormat(Instances) appropriately.
   */
  private void setOutputFormat() {
    Instances newData;
    FastVector newAtts;

    // Compute new attributes
    newAtts = new FastVector(getInputFormat().numAttributes());
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);

      if (!att.isNominal() || !m_AttIndex.isInRange(j)) newAtts.addElement(att);
      else newAtts.addElement(new Attribute(att.name(), (FastVector) null));
    }

    // Construct new header
    newData = new Instances(getInputFormat().relationName(), newAtts, 0);
    newData.setClassIndex(getInputFormat().classIndex());

    setOutputFormat(newData);
  }
Beispiel #16
0
  /** Set the output format. Changes the format of the specified date attribute. */
  private void setOutputFormat() {

    // Create new attributes
    FastVector newAtts = new FastVector(getInputFormat().numAttributes());
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (j == m_AttIndex.getIndex()) {
        newAtts.addElement(new Attribute(att.name(), getDateFormat().toPattern()));
      } else {
        newAtts.addElement(att.copy());
      }
    }

    // Create new header
    Instances newData = new Instances(getInputFormat().relationName(), newAtts, 0);
    newData.setClassIndex(getInputFormat().classIndex());
    m_OutputAttribute = newData.attribute(m_AttIndex.getIndex());
    setOutputFormat(newData);
  }
Beispiel #17
0
  /**
   * Set the output format. Swapss the desired nominal attribute values in the header and calls
   * setOutputFormat(Instances) appropriately.
   */
  private void setOutputFormat() {

    Instances newData;
    ArrayList<Attribute> newAtts;
    ArrayList<String> newVals;

    // Compute new attributes

    newAtts = new ArrayList<Attribute>(getInputFormat().numAttributes());
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (j != m_AttIndex.getIndex()) {
        newAtts.add((Attribute) att.copy());
      } else {

        // Compute list of attribute values

        newVals = new ArrayList<String>(att.numValues());
        for (int i = 0; i < att.numValues(); i++) {
          if (i == m_FirstIndex.getIndex()) {
            newVals.add(att.value(m_SecondIndex.getIndex()));
          } else if (i == m_SecondIndex.getIndex()) {
            newVals.add(att.value(m_FirstIndex.getIndex()));
          } else {
            newVals.add(att.value(i));
          }
        }
        Attribute newAtt = new Attribute(att.name(), newVals);
        newAtt.setWeight(att.weight());
        newAtts.add(newAtt);
      }
    }

    // Construct new header

    newData = new Instances(getInputFormat().relationName(), newAtts, 0);
    newData.setClassIndex(getInputFormat().classIndex());
    setOutputFormat(newData);
  }
Beispiel #18
0
  /**
   * Outputs a tree at a certain level.
   *
   * @param level the level at which the tree is to be printed
   * @return the tree as string at the given level
   */
  private String toString(int level) {

    StringBuffer text = new StringBuffer();

    if (m_Attribute == null) {
      if (Utils.isMissingValue(m_ClassValue)) {
        text.append(": null");
      } else {
        text.append(": " + m_ClassAttribute.value((int) m_ClassValue));
      }
    } else {
      for (int j = 0; j < m_Attribute.numValues(); j++) {
        text.append("\n");
        for (int i = 0; i < level; i++) {
          text.append("|  ");
        }
        text.append(m_Attribute.name() + " = " + m_Attribute.value(j));
        text.append(m_Successors[j].toString(level + 1));
      }
    }
    return text.toString();
  }
  public String toString1() {

    StringBuffer str = new StringBuffer();
    String nome;
    str.append("< ");
    for (int i = 0; i < corpo.length; i++) {
      str.append(corpo[i] + ", ");
    }

    nome = classe.value(cabeca);

    str.append("[" + classe.name() + " = " + nome + "] " + "(" + confianca + ")");
    str.append(">");

    if (valoresObjetivos != null) {
      str.append("\t[");
      for (int i = 0; i < valoresObjetivos.length; i++) {
        str.append(valoresObjetivos[i] + ", ");
      }
      str.deleteCharAt(str.length() - 1);
      str.deleteCharAt(str.length() - 1);
      str.append(']');
    }

    str.append("\t(" + getACC() + ")");
    str.append("\t(" + getAcerto() + ")");
    str.append("\t(" + getConfidence() + ")");
    str.append("\t(" + getCov() + ")");
    str.append("\t(" + getERR() + ")");
    str.append("\t(" + getErro() + ")");
    str.append("\t(" + getNegRel() + ")");
    str.append("\t\t(" + getSens() + ")");
    str.append("\t(" + getSpec() + ")");

    // str.append("\n" +matrizContigencia.toString() + "\n");

    return str.toString();
  }
  private static Instance makeInstance(Instances instances, String inputLine) {
    inputLine = inputLine.trim();
    // We need to store the lastName as well...
    String[] parts = inputLine.split("\\s+");
    String label = parts[0];
    String firstName = parts[1].toLowerCase();
    String lastName = parts[2].toLowerCase();

    Instance instance = new Instance(features.length + 1);
    instance.setDataset(instances);

    Set<String> feats = new HashSet<String>();
    /*
    feats.add("firstName0=" + firstName.charAt(0));
    feats.add("firstNameN=" + firstName.charAt(firstName.length() - 1));
           */
    for (int f = 0; f < 9; f++) {
      if (firstName.length() > f) feats.add("firstName" + f + "=" + firstName.charAt(f));
    }
    for (int l = 0; l < 9; l++) {
      if (lastName.length() > l) feats.add("lastName" + l + "=" + lastName.charAt(l));
    }
    /////////////////////////////////////////////////////////////////
    for (int featureId = 0; featureId < features.length; featureId++) {
      Attribute att = instances.attribute(features[featureId]);

      String name = att.name();
      String featureLabel;
      if (feats.contains(name)) {
        featureLabel = "1";
      } else featureLabel = "0";
      instance.setValue(att, featureLabel);
    }

    instance.setClassValue(label);

    return instance;
  }
  public String toString() {

    StringBuffer str = new StringBuffer();
    String nome;
    for (int i = 0; i < corpo.length; i++) {
      str.append(corpo[i] + "\t");
    }

    nome = classe.value(cabeca);

    str.append("[" + classe.name() + " = " + nome + "]");

    if (valoresObjetivos != null) {
      str.append("\t");
      for (int i = 0; i < valoresObjetivos.length; i++) {
        str.append(new Double(valoresObjetivos[i]).toString().replace('.', ',') + "\t");
      }
    }

    str.append("\t" + new Double(getACC()).toString().replace('.', ','));
    str.append("\t" + new Double(getERR()).toString().replace('.', ','));
    str.append("\t" + new Double(getNegRel()).toString().replace('.', ','));

    // str.append("\t" + new Double(getAcerto()).toString().replace('.',','));

    str.append("\t" + new Double(getConfidence()).toString().replace('.', ','));
    str.append("\t" + new Double(getSup()).toString().replace('.', ','));
    str.append("\t" + new Double(getCov()).toString().replace('.', ','));
    str.append("\t" + new Double(getNovelty()).toString().replace('.', ','));

    // str.append("\t" + new Double(getErro()).toString().replace('.',','));

    // str.append("\t" + new Double(getSens()).toString().replace('.',','));
    // str.append("\t" + new Double(getSpec()).toString().replace('.',','));

    return str.toString();
  }
Beispiel #22
0
  /** Set the output format if the class is numeric. */
  private void setOutputFormatNumeric() {

    if (m_Indices == null) {
      setOutputFormat(null);
      return;
    }
    ArrayList<Attribute> newAtts;
    int newClassIndex;
    StringBuffer attributeName;
    Instances outputFormat;
    ArrayList<String> vals;

    // Compute new attributes

    m_needToTransform = false;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
      Attribute att = getInputFormat().attribute(i);
      if (att.isNominal() && (att.numValues() > 2 || m_Numeric || m_TransformAll)) {
        m_needToTransform = true;
        break;
      }
    }

    if (!m_needToTransform) {
      setOutputFormat(getInputFormat());
      return;
    }

    newClassIndex = getInputFormat().classIndex();
    newAtts = new ArrayList<Attribute>();
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if ((!att.isNominal()) || (j == getInputFormat().classIndex())) {
        newAtts.add((Attribute) att.copy());
      } else {
        if (j < getInputFormat().classIndex()) {
          newClassIndex += att.numValues() - 2;
        }

        // Compute values for new attributes

        for (int k = 1; k < att.numValues(); k++) {
          attributeName = new StringBuffer(att.name() + "=");
          for (int l = k; l < att.numValues(); l++) {
            if (l > k) {
              attributeName.append(',');
            }
            attributeName.append(att.value(m_Indices[j][l]));
          }
          if (m_Numeric) {
            newAtts.add(new Attribute(attributeName.toString()));
          } else {
            vals = new ArrayList<String>(2);
            vals.add("f");
            vals.add("t");
            newAtts.add(new Attribute(attributeName.toString(), vals));
          }
        }
      }
    }
    outputFormat = new Instances(getInputFormat().relationName(), newAtts, 0);
    outputFormat.setClassIndex(newClassIndex);
    setOutputFormat(outputFormat);
  }
Beispiel #23
0
  /**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data
   * @exception Exception if the classifier has not been generated successfully
   */
  @Override
  public void buildClassifier(Instances instances) throws Exception {

    int attIndex = 0;
    double sum;

    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();

    m_Instances = new Instances(instances, 0);

    // Reserve space
    m_Counts = new double[instances.numClasses()][instances.numAttributes() - 1][0];
    m_Means = new double[instances.numClasses()][instances.numAttributes() - 1];
    m_Devs = new double[instances.numClasses()][instances.numAttributes() - 1];
    m_Priors = new double[instances.numClasses()];
    Enumeration<Attribute> enu = instances.enumerateAttributes();
    while (enu.hasMoreElements()) {
      Attribute attribute = enu.nextElement();
      if (attribute.isNominal()) {
        for (int j = 0; j < instances.numClasses(); j++) {
          m_Counts[j][attIndex] = new double[attribute.numValues()];
        }
      } else {
        for (int j = 0; j < instances.numClasses(); j++) {
          m_Counts[j][attIndex] = new double[1];
        }
      }
      attIndex++;
    }

    // Compute counts and sums
    Enumeration<Instance> enumInsts = instances.enumerateInstances();
    while (enumInsts.hasMoreElements()) {
      Instance instance = enumInsts.nextElement();
      if (!instance.classIsMissing()) {
        Enumeration<Attribute> enumAtts = instances.enumerateAttributes();
        attIndex = 0;
        while (enumAtts.hasMoreElements()) {
          Attribute attribute = enumAtts.nextElement();
          if (!instance.isMissing(attribute)) {
            if (attribute.isNominal()) {
              m_Counts[(int) instance.classValue()][attIndex][(int) instance.value(attribute)]++;
            } else {
              m_Means[(int) instance.classValue()][attIndex] += instance.value(attribute);
              m_Counts[(int) instance.classValue()][attIndex][0]++;
            }
          }
          attIndex++;
        }
        m_Priors[(int) instance.classValue()]++;
      }
    }

    // Compute means
    Enumeration<Attribute> enumAtts = instances.enumerateAttributes();
    attIndex = 0;
    while (enumAtts.hasMoreElements()) {
      Attribute attribute = enumAtts.nextElement();
      if (attribute.isNumeric()) {
        for (int j = 0; j < instances.numClasses(); j++) {
          if (m_Counts[j][attIndex][0] < 2) {
            throw new Exception(
                "attribute "
                    + attribute.name()
                    + ": less than two values for class "
                    + instances.classAttribute().value(j));
          }
          m_Means[j][attIndex] /= m_Counts[j][attIndex][0];
        }
      }
      attIndex++;
    }

    // Compute standard deviations
    enumInsts = instances.enumerateInstances();
    while (enumInsts.hasMoreElements()) {
      Instance instance = enumInsts.nextElement();
      if (!instance.classIsMissing()) {
        enumAtts = instances.enumerateAttributes();
        attIndex = 0;
        while (enumAtts.hasMoreElements()) {
          Attribute attribute = enumAtts.nextElement();
          if (!instance.isMissing(attribute)) {
            if (attribute.isNumeric()) {
              m_Devs[(int) instance.classValue()][attIndex] +=
                  (m_Means[(int) instance.classValue()][attIndex] - instance.value(attribute))
                      * (m_Means[(int) instance.classValue()][attIndex]
                          - instance.value(attribute));
            }
          }
          attIndex++;
        }
      }
    }
    enumAtts = instances.enumerateAttributes();
    attIndex = 0;
    while (enumAtts.hasMoreElements()) {
      Attribute attribute = enumAtts.nextElement();
      if (attribute.isNumeric()) {
        for (int j = 0; j < instances.numClasses(); j++) {
          if (m_Devs[j][attIndex] <= 0) {
            throw new Exception(
                "attribute "
                    + attribute.name()
                    + ": standard deviation is 0 for class "
                    + instances.classAttribute().value(j));
          } else {
            m_Devs[j][attIndex] /= m_Counts[j][attIndex][0] - 1;
            m_Devs[j][attIndex] = Math.sqrt(m_Devs[j][attIndex]);
          }
        }
      }
      attIndex++;
    }

    // Normalize counts
    enumAtts = instances.enumerateAttributes();
    attIndex = 0;
    while (enumAtts.hasMoreElements()) {
      Attribute attribute = enumAtts.nextElement();
      if (attribute.isNominal()) {
        for (int j = 0; j < instances.numClasses(); j++) {
          sum = Utils.sum(m_Counts[j][attIndex]);
          for (int i = 0; i < attribute.numValues(); i++) {
            m_Counts[j][attIndex][i] =
                (m_Counts[j][attIndex][i] + 1) / (sum + attribute.numValues());
          }
        }
      }
      attIndex++;
    }

    // Normalize priors
    sum = Utils.sum(m_Priors);
    for (int j = 0; j < instances.numClasses(); j++) {
      m_Priors[j] = (m_Priors[j] + 1) / (sum + instances.numClasses());
    }
  }
  /**
   * Set the output format. Takes the current average class values and m_InputFormat and calls
   * setOutputFormat(Instances) appropriately.
   */
  private void setOutputFormat() {

    Instances newData;
    FastVector newAtts, newVals;
    boolean firstEndsWithPrime = false, secondEndsWithPrime = false;
    StringBuffer text = new StringBuffer();

    // Compute new attributes

    newAtts = new FastVector(getInputFormat().numAttributes());
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (j != m_AttIndex.getIndex()) {
        newAtts.addElement(att.copy());
      } else {

        // Compute new value

        if (att.value(m_FirstIndex.getIndex()).endsWith("'")) {
          firstEndsWithPrime = true;
        }
        if (att.value(m_SecondIndex.getIndex()).endsWith("'")) {
          secondEndsWithPrime = true;
        }
        if (firstEndsWithPrime || secondEndsWithPrime) {
          text.append("'");
        }
        if (firstEndsWithPrime) {
          text.append(
              ((String) att.value(m_FirstIndex.getIndex()))
                  .substring(1, ((String) att.value(m_FirstIndex.getIndex())).length() - 1));
        } else {
          text.append((String) att.value(m_FirstIndex.getIndex()));
        }
        text.append('_');
        if (secondEndsWithPrime) {
          text.append(
              ((String) att.value(m_SecondIndex.getIndex()))
                  .substring(1, ((String) att.value(m_SecondIndex.getIndex())).length() - 1));
        } else {
          text.append((String) att.value(m_SecondIndex.getIndex()));
        }
        if (firstEndsWithPrime || secondEndsWithPrime) {
          text.append("'");
        }

        // Compute list of attribute values

        newVals = new FastVector(att.numValues() - 1);
        for (int i = 0; i < att.numValues(); i++) {
          if (i == m_FirstIndex.getIndex()) {
            newVals.addElement(text.toString());
          } else if (i != m_SecondIndex.getIndex()) {
            newVals.addElement(att.value(i));
          }
        }

        Attribute newAtt = new Attribute(att.name(), newVals);
        newAtt.setWeight(getInputFormat().attribute(j).weight());

        newAtts.addElement(newAtt);
      }
    }

    // Construct new header

    newData = new Instances(getInputFormat().relationName(), newAtts, 0);
    newData.setClassIndex(getInputFormat().classIndex());
    setOutputFormat(newData);
  }
Beispiel #25
0
    @Override
    public void init(Instances structure, Environment env) {
      super.init(structure, env);

      m_resolvedLhsName = m_lhsAttributeName;
      m_resolvedRhsOperand = m_rhsOperand;
      try {
        m_resolvedLhsName = m_env.substitute(m_resolvedLhsName);
        m_resolvedRhsOperand = m_env.substitute(m_resolvedRhsOperand);
      } catch (Exception ex) {
      }

      Attribute lhs = null;
      // try as an index or "special" label first
      if (m_resolvedLhsName.toLowerCase().startsWith("/first")) {
        lhs = structure.attribute(0);
      } else if (m_resolvedLhsName.toLowerCase().startsWith("/last")) {
        lhs = structure.attribute(structure.numAttributes() - 1);
      } else {
        // try as an index
        try {
          int indx = Integer.parseInt(m_resolvedLhsName);
          indx--;
          lhs = structure.attribute(indx);
        } catch (NumberFormatException ex) {
        }
      }

      if (lhs == null) {
        lhs = structure.attribute(m_resolvedLhsName);
      }
      if (lhs == null) {
        throw new IllegalArgumentException(
            "Data does not contain attribute " + "\"" + m_resolvedLhsName + "\"");
      }
      m_lhsAttIndex = lhs.index();

      if (m_rhsIsAttribute) {
        Attribute rhs = null;

        // try as an index or "special" label first
        if (m_resolvedRhsOperand.toLowerCase().equals("/first")) {
          rhs = structure.attribute(0);
        } else if (m_resolvedRhsOperand.toLowerCase().equals("/last")) {
          rhs = structure.attribute(structure.numAttributes() - 1);
        } else {
          // try as an index
          try {
            int indx = Integer.parseInt(m_resolvedRhsOperand);
            indx--;
            rhs = structure.attribute(indx);
          } catch (NumberFormatException ex) {
          }
        }

        if (rhs == null) {
          rhs = structure.attribute(m_resolvedRhsOperand);
        }
        if (rhs == null) {
          throw new IllegalArgumentException(
              "Data does not contain attribute " + "\"" + m_resolvedRhsOperand + "\"");
        }
        m_rhsAttIndex = rhs.index();
      } else if (m_operator != ExpressionType.CONTAINS
          && m_operator != ExpressionType.STARTSWITH
          && m_operator != ExpressionType.ENDSWITH
          && m_operator != ExpressionType.REGEX
          && m_operator != ExpressionType.ISMISSING) {
        // make sure the operand is parseable as a number (unless missing has
        // been specified - equals only)
        if (lhs.isNominal()) {
          m_numericOperand = lhs.indexOfValue(m_resolvedRhsOperand);

          if (m_numericOperand < 0) {
            throw new IllegalArgumentException(
                "Unknown nominal value '"
                    + m_resolvedRhsOperand
                    + "' for attribute '"
                    + lhs.name()
                    + "'");
          }
        } else {
          try {
            m_numericOperand = Double.parseDouble(m_resolvedRhsOperand);
          } catch (NumberFormatException e) {
            throw new IllegalArgumentException(
                "\"" + m_resolvedRhsOperand + "\" is not parseable as a number!");
          }
        }
      }

      if (m_operator == ExpressionType.REGEX) {
        m_regexPattern = Pattern.compile(m_resolvedRhsOperand);
      }
    }
  /**
   * Métod que percorre todos os dados pertencentes à Instances dados. Imprimindo as informações da
   * base.
   */
  public void percorrerDados() {

    if (dados != null) {
      /*Cada exemplo contido nos dados é identificado no Weka através da
       * classe Instance. Assim, o objeto dados, do tipo Instances, é uma coleçao de
       * Instance. Voce vai ter metodos que possibilitam acessar todos os exemplos
       * presentes na base.
       * */
      // Percorre todos os exemples presentes na base
      for (int i = 0; i < dados.numInstances(); i++) {
        // Método para obter a instance de número 1.
        // Voce pode pegar a primeira e a ultima instance tb.
        // Além de poder deletar entre outras coisas.
        Instance exemplo = dados.instance(i);

        /*Uma Intance é formada por vários atributos, que são os atributos
         * da base. Voce pode percorrer todos os atributos Instace, ou pode
         * "setar" (set) ou pegar (get) um atributo especifico.
         * */

        // É possível transforma todos os atributos em um array de double

        double[] arrayAtributos = exemplo.toDoubleArray();

        System.out.println("Valores para o exemplo " + i);
        System.out.print("Array de atributos: ");
        for (int j = 0; j < arrayAtributos.length; j++) {
          System.out.print(arrayAtributos[j] + " ");
        }

        System.out.println();
        // Percorrendo todos os atributos para se obter informacoes sobre eles
        for (int j = 0; j < exemplo.numAttributes(); j++) {
          Attribute att = exemplo.attribute(j);
          double valor = exemplo.value(att);
          System.out.println(
              "Valor do atributo " + att.name() + ":" + valor + " - " + att.value((int) valor));
        }

        System.out.println();
        // Mudando o valor do atributo 0, para um valor possível do atributos
        // Obtendo as informacoes do atributo 0;
        Attribute att = exemplo.attribute(0);
        // Obtendo o valor do atributo 0.
        double valorDoAtributo0 = exemplo.value(att);

        System.out.println("Valor antigo, em double: " + valorDoAtributo0);
        System.out.println("Valor antigo, em nome: " + att.value((int) valorDoAtributo0));

        int novoValor = 1;
        exemplo.setValue(att, novoValor);

        valorDoAtributo0 = exemplo.value(att);

        System.out.println("Valor novo, em nome: " + att.value((int) valorDoAtributo0));

        System.out.println();
        System.out.println();
      }
    }
  }
Beispiel #27
0
  public MappingInfo(Instances dataSet, MiningSchema miningSchema, Logger log) throws Exception {
    m_log = log;
    // miningSchema.convertStringAttsToNominal();
    Instances fieldsI = miningSchema.getMiningSchemaAsInstances();

    m_fieldsMap = new int[fieldsI.numAttributes()];
    m_nominalValueMaps = new int[fieldsI.numAttributes()][];

    for (int i = 0; i < fieldsI.numAttributes(); i++) {
      String schemaAttName = fieldsI.attribute(i).name();
      boolean found = false;
      for (int j = 0; j < dataSet.numAttributes(); j++) {
        if (dataSet.attribute(j).name().equals(schemaAttName)) {
          Attribute miningSchemaAtt = fieldsI.attribute(i);
          Attribute incomingAtt = dataSet.attribute(j);
          // check type match
          if (miningSchemaAtt.type() != incomingAtt.type()) {
            throw new Exception(
                "[MappingInfo] type mismatch for field "
                    + schemaAttName
                    + ". Mining schema type "
                    + miningSchemaAtt.toString()
                    + ". Incoming type "
                    + incomingAtt.toString()
                    + ".");
          }

          // check nominal values (number, names...)
          if (miningSchemaAtt.numValues() != incomingAtt.numValues()) {
            String warningString =
                "[MappingInfo] WARNING: incoming nominal attribute "
                    + incomingAtt.name()
                    + " does not have the same "
                    + "number of values as the corresponding mining "
                    + "schema attribute.";
            if (m_log != null) {
              m_log.logMessage(warningString);
            } else {
              System.err.println(warningString);
            }
          }
          if (miningSchemaAtt.isNominal() || miningSchemaAtt.isString()) {
            int[] valuesMap = new int[incomingAtt.numValues()];
            for (int k = 0; k < incomingAtt.numValues(); k++) {
              String incomingNomVal = incomingAtt.value(k);
              int indexInSchema = miningSchemaAtt.indexOfValue(incomingNomVal);
              if (indexInSchema < 0) {
                String warningString =
                    "[MappingInfo] WARNING: incoming nominal attribute "
                        + incomingAtt.name()
                        + " has value "
                        + incomingNomVal
                        + " that doesn't occur in the mining schema.";
                if (m_log != null) {
                  m_log.logMessage(warningString);
                } else {
                  System.err.println(warningString);
                }
                valuesMap[k] = UNKNOWN_NOMINAL_VALUE;
              } else {
                valuesMap[k] = indexInSchema;
              }
            }
            m_nominalValueMaps[i] = valuesMap;
          }

          /*if (miningSchemaAtt.isNominal()) {
            for (int k = 0; k < miningSchemaAtt.numValues(); k++) {
              if (!miningSchemaAtt.value(k).equals(incomingAtt.value(k))) {
                throw new Exception("[PMMLUtils] value " + k + " (" +
                                    miningSchemaAtt.value(k) + ") does not match " +
                                    "incoming value (" + incomingAtt.value(k) +
                                    ") for attribute " + miningSchemaAtt.name() +
                                    ".");

              }
            }
          }*/
          found = true;
          m_fieldsMap[i] = j;
        }
      }
      if (!found) {
        throw new Exception(
            "[MappingInfo] Unable to find a match for mining schema "
                + "attribute "
                + schemaAttName
                + " in the "
                + "incoming instances!");
      }
    }

    // check class attribute (if set)
    if (fieldsI.classIndex() >= 0) {
      if (dataSet.classIndex() < 0) {
        // first see if we can find a matching class
        String className = fieldsI.classAttribute().name();
        Attribute classMatch = dataSet.attribute(className);
        if (classMatch == null) {
          throw new Exception(
              "[MappingInfo] Can't find match for target field "
                  + className
                  + "in incoming instances!");
        }
        dataSet.setClass(classMatch);
      } else if (!fieldsI.classAttribute().name().equals(dataSet.classAttribute().name())) {
        throw new Exception(
            "[MappingInfo] class attribute in mining schema does not match "
                + "class attribute in incoming instances!");
      }
    }

    // Set up the textual description of the mapping
    fieldsMappingString(fieldsI, dataSet);
  }