Пример #1
0
  /**
   * Set the output format. Takes the current average class values and m_InputFormat and calls
   * setOutputFormat(Instances) appropriately.
   */
  private void setOutputFormat() {

    Instances newData;
    FastVector newAtts, newVals;

    // Compute new attributes

    newAtts = new FastVector(getInputFormat().numAttributes());
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (!m_AttIndices.isInRange(j) || !att.isString()) {

        // We don't have to copy the attribute because the
        // attribute index remains unchanged.
        newAtts.addElement(att);
      } else {

        // Compute list of attribute values
        newVals = new FastVector(att.numValues());
        for (int i = 0; i < att.numValues(); i++) {
          newVals.addElement(att.value(i));
        }
        newAtts.addElement(new Attribute(att.name(), newVals));
      }
    }

    // Construct new header
    newData = new Instances(getInputFormat().relationName(), newAtts, 0);
    newData.setClassIndex(getInputFormat().classIndex());
    setOutputFormat(newData);
  }
Пример #2
0
  /**
   * Method that finds all large itemsets for the given set of instances.
   *
   * @param the instances to be used
   * @exception Exception if an attribute is numeric
   */
  private void findLargeItemSets(int index) throws Exception {

    FastVector kMinusOneSets, kSets = new FastVector();
    Hashtable hashtable;
    int i = 0;
    // Find large itemsets
    // of length 1
    if (index == 1) {
      kSets = ItemSet.singletons(m_instances);
      ItemSet.upDateCounters(kSets, m_instances);
      kSets = ItemSet.deleteItemSets(kSets, m_premiseCount, Integer.MAX_VALUE);
      if (kSets.size() == 0) return;
      m_Ls.addElement(kSets);
    }
    // of length > 1
    if (index > 1) {
      if (m_Ls.size() > 0) kSets = (FastVector) m_Ls.lastElement();
      m_Ls.removeAllElements();
      i = index - 2;
      kMinusOneSets = kSets;
      kSets = ItemSet.mergeAllItemSets(kMinusOneSets, i, m_instances.numInstances());
      hashtable = ItemSet.getHashtable(kMinusOneSets, kMinusOneSets.size());
      m_hashtables.addElement(hashtable);
      kSets = ItemSet.pruneItemSets(kSets, hashtable);
      ItemSet.upDateCounters(kSets, m_instances);
      kSets = ItemSet.deleteItemSets(kSets, m_premiseCount, Integer.MAX_VALUE);
      if (kSets.size() == 0) return;
      m_Ls.addElement(kSets);
    }
  }
Пример #3
0
  /**
   * Set the labels for nominal attribute creation.
   *
   * @param labelList a comma separated list of labels
   * @throws IllegalArgumentException if the labelList was invalid
   */
  public void setNominalLabels(String labelList) {

    FastVector labels = new FastVector(10);

    // Split the labelList up into the vector
    int commaLoc;
    while ((commaLoc = labelList.indexOf(',')) >= 0) {
      String label = labelList.substring(0, commaLoc).trim();
      if (!label.equals("")) {
        labels.addElement(label);
      } else {
        throw new IllegalArgumentException(
            "Invalid label list at " + labelList.substring(commaLoc));
      }
      labelList = labelList.substring(commaLoc + 1);
    }
    String label = labelList.trim();
    if (!label.equals("")) {
      labels.addElement(label);
    }

    // If everything is OK, make the type change
    m_Labels = labels;
    if (labels.size() == 0) {
      m_AttributeType = Attribute.NUMERIC;
    } else {
      m_AttributeType = Attribute.NOMINAL;
    }
  }
  /**
   * Sets up the structure for the plot instances. Sets m_PlotInstances to null if instances are not
   * saved for visualization.
   *
   * @see #getSaveForVisualization()
   */
  protected void determineFormat() {
    FastVector hv;
    Attribute predictedClass;
    Attribute classAt;
    FastVector attVals;
    int i;

    if (!m_SaveForVisualization) {
      m_PlotInstances = null;
      return;
    }

    hv = new FastVector();

    classAt = m_Instances.attribute(m_ClassIndex);
    if (classAt.isNominal()) {
      attVals = new FastVector();
      for (i = 0; i < classAt.numValues(); i++) attVals.addElement(classAt.value(i));
      predictedClass = new Attribute("predicted" + classAt.name(), attVals);
    } else {
      predictedClass = new Attribute("predicted" + classAt.name());
    }

    for (i = 0; i < m_Instances.numAttributes(); i++) {
      if (i == m_Instances.classIndex()) hv.addElement(predictedClass);
      hv.addElement(m_Instances.attribute(i).copy());
    }

    m_PlotInstances =
        new Instances(m_Instances.relationName() + "_predicted", hv, m_Instances.numInstances());
    m_PlotInstances.setClassIndex(m_ClassIndex + 1);
  }
  /**
   * Adds the prediction intervals as additional attributes at the end. Since classifiers can
   * returns varying number of intervals per instance, the dataset is filled with missing values for
   * non-existing intervals.
   */
  protected void addPredictionIntervals() {
    int maxNum;
    int num;
    int i;
    int n;
    FastVector preds;
    FastVector atts;
    Instances data;
    Instance inst;
    Instance newInst;
    double[] values;
    double[][] predInt;

    // determine the maximum number of intervals
    maxNum = 0;
    preds = m_Evaluation.predictions();
    for (i = 0; i < preds.size(); i++) {
      num = ((NumericPrediction) preds.elementAt(i)).predictionIntervals().length;
      if (num > maxNum) maxNum = num;
    }

    // create new header
    atts = new FastVector();
    for (i = 0; i < m_PlotInstances.numAttributes(); i++)
      atts.addElement(m_PlotInstances.attribute(i));
    for (i = 0; i < maxNum; i++) {
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-lowerBoundary"));
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-upperBoundary"));
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-width"));
    }
    data = new Instances(m_PlotInstances.relationName(), atts, m_PlotInstances.numInstances());
    data.setClassIndex(m_PlotInstances.classIndex());

    // update data
    for (i = 0; i < m_PlotInstances.numInstances(); i++) {
      inst = m_PlotInstances.instance(i);
      // copy old values
      values = new double[data.numAttributes()];
      System.arraycopy(inst.toDoubleArray(), 0, values, 0, inst.numAttributes());
      // add interval data
      predInt = ((NumericPrediction) preds.elementAt(i)).predictionIntervals();
      for (n = 0; n < maxNum; n++) {
        if (n < predInt.length) {
          values[m_PlotInstances.numAttributes() + n * 3 + 0] = predInt[n][0];
          values[m_PlotInstances.numAttributes() + n * 3 + 1] = predInt[n][1];
          values[m_PlotInstances.numAttributes() + n * 3 + 2] = predInt[n][1] - predInt[n][0];
        } else {
          values[m_PlotInstances.numAttributes() + n * 3 + 0] = Utils.missingValue();
          values[m_PlotInstances.numAttributes() + n * 3 + 1] = Utils.missingValue();
          values[m_PlotInstances.numAttributes() + n * 3 + 2] = Utils.missingValue();
        }
      }
      // create new Instance
      newInst = new DenseInstance(inst.weight(), values);
      data.add(newInst);
    }

    m_PlotInstances = data;
  }
Пример #6
0
 /**
  * Read the sparse feature vector data from the data file and convert it into the Weka's instance
  * format.
  */
 public void readSparseFVsFromFile(
     File dataFile, int numDocs, boolean trainingMode, int numLabels, boolean surroundMode) {
   int numFeats = 0;
   int numClasses = 0;
   labelsFVDoc = new LabelsOfFeatureVectorDoc[numDocs];
   // Read the sparse FVs by using the method in MultiClassLearning class
   MultiClassLearning multiClassL = new MultiClassLearning();
   boolean isUsingDataFile = false;
   File tempFVDataFile = null;
   multiClassL.getDataFromFile(numDocs, dataFile, isUsingDataFile, tempFVDataFile);
   // Create the attributes.
   numFeats = multiClassL.dataFVinDoc.getTotalNumFeatures();
   FastVector attributes = new FastVector(numFeats + 1);
   for (int i = 0; i < numFeats; ++i)
     attributes.addElement(new Attribute(new Integer(i + 1).toString()));
   // Add class attribute.
   if (surroundMode) numClasses = 2 * numLabels + 1; // count the null too, as value -1.
   else numClasses = numLabels + 1;
   FastVector classValues = new FastVector(numClasses);
   classValues.addElement("-1"); // The first class for null class
   for (int i = 1; i < numClasses; ++i) classValues.addElement(new Integer(i).toString());
   attributes.addElement(new Attribute("Class", classValues));
   // Create the dataset with capacity of all FVs (but actuall number of FVs
   // mabe be larger than the pre-specified, because possible multi-label) and
   // set index of class
   instancesData =
       new Instances("SparseFVsData", attributes, multiClassL.dataFVinDoc.getNumTraining());
   instancesData.setClassIndex(instancesData.numAttributes() - 1);
   // Copy the data into the instance;
   for (int iDoc = 0; iDoc < multiClassL.dataFVinDoc.getNumTrainingDocs(); ++iDoc) {
     SparseFeatureVector[] fvs = multiClassL.dataFVinDoc.trainingFVinDoc[iDoc].getFvs();
     labelsFVDoc[iDoc] = new LabelsOfFeatureVectorDoc();
     labelsFVDoc[iDoc].multiLabels = multiClassL.dataFVinDoc.labelsFVDoc[iDoc].multiLabels;
     for (int i = 0; i < fvs.length; ++i) {
       // Object valueO = fvs[i].getValues();
       double[] values = new double[fvs[i].getLen()];
       int[] indexes = new int[fvs[i].getLen()];
       for (int j = 0; j < fvs[i].getLen(); ++j) {
         // values[j] = (double)fvs[i].values[j];
         values[j] = fvs[i].nodes[j].value;
         indexes[j] = fvs[i].nodes[j].index;
       }
       SparseInstance inst = new SparseInstance(1.0, values, indexes, 50000);
       inst.setDataset(instancesData);
       if (trainingMode && labelsFVDoc[iDoc].multiLabels[i].num > 0)
         for (int j1 = 0; j1 < labelsFVDoc[iDoc].multiLabels[i].num; ++j1) {
           inst.setClassValue((labelsFVDoc[iDoc].multiLabels[i].labels[j1])); // label
           // >0
           instancesData.add(inst);
         }
       else {
         inst.setClassValue("-1"); // set label as -1 for null
         instancesData.add(inst);
       }
     }
   }
   return;
 }
Пример #7
0
  private List<Instance> myExtractKeyphrases(String document, int numOfPhrases) throws Exception {

    // Check whether there is actually any data
    //
    if (document.length() == 0 || document.equals("")) {
      throw new Exception("Couldn't find any data!");
    }

    FastVector atts = new FastVector(3);
    atts.addElement(new Attribute("doc", (FastVector) null));
    atts.addElement(new Attribute("keyphrases", (FastVector) null));
    Instances data = new Instances("keyphrase_training_data", atts, 0);

    List<Instance> myInstances = new ArrayList<Instance>();

    double[] newInst = new double[2];
    newInst[0] = (double) data.attribute(0).addStringValue(document);
    newInst[1] = Instance.missingValue();

    data.add(new Instance(1.0, newInst));

    m_KEAFilter.input(data.instance(0));

    data = data.stringFreeStructure();

    ke.setNumPhrases(numOfPhrases);

    int numPhrases = numOfPhrases; // ke.getNumPhrases();

    Instance[] topRankedInstances = new Instance[numPhrases];
    Instance inst;

    // Iterating over all extracted keyphrases (inst)
    while ((inst = m_KEAFilter.output()) != null) {
      int index = (int) inst.value(m_KEAFilter.getRankIndex()) - 1;

      if (index < numPhrases) {
        topRankedInstances[index] = inst;
      }
    }

    double numExtracted = 0, numCorrect = 0;

    for (int i = 0; i < numPhrases; i++) {
      if (topRankedInstances[i] != null) {
        if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) {
          numExtracted += 1.0;
        }
        if ((int) topRankedInstances[i].value(topRankedInstances[i].numAttributes() - 1) == 1) {
          numCorrect += 1.0;
        }
        myInstances.add(topRankedInstances[i]);
      }
    }

    return myInstances;
  }
 public FastVector buildCosineAttributes() {
   FastVector attributes = new FastVector(2);
   attributes.addElement(new Attribute("cosine"));
   FastVector classVal = new FastVector();
   classVal.addElement("1");
   classVal.addElement("0");
   Attribute label = new Attribute("label", classVal);
   attributes.addElement(label);
   return attributes;
 }
 /** 构造分类器,主要及时对数据格式,类标,类别数目等进行说明 */
 public MessageClassify() throws Exception {
   String nameOfDataset = "MessageClassification";
   FastVector attributes = new FastVector(2);
   attributes.addElement(new Attribute("Message", (FastVector) null));
   FastVector classValues = new FastVector(2); // 类标向量,共有两类
   classValues.addElement("0");
   classValues.addElement("1");
   attributes.addElement(new Attribute("Class", classValues));
   instances = new Instances(nameOfDataset, attributes, 100); // 可以把instance认为是行,attribute认为是列
   instances.setClassIndex(instances.numAttributes() - 1); // 类表在instance中的那列
 }
Пример #10
0
  /**
   * Determines the output format based on the input format and returns this. In case the output
   * format cannot be returned immediately, i.e., immediateOutputFormat() returns false, then this
   * method will be called from batchFinished().
   *
   * @param inputFormat the input format to base the output format on
   * @return the output format
   * @throws Exception in case the determination goes wrong
   * @see #hasImmediateOutputFormat()
   * @see #batchFinished()
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {

    Instances data;
    Instances result;
    FastVector atts;
    FastVector values;
    HashSet hash;
    int i;
    int n;
    boolean isDate;
    Instance inst;
    Vector sorted;

    m_Cols.setUpper(inputFormat.numAttributes() - 1);
    data = new Instances(inputFormat);
    atts = new FastVector();
    for (i = 0; i < data.numAttributes(); i++) {
      if (!m_Cols.isInRange(i) || !data.attribute(i).isNumeric()) {
        atts.addElement(data.attribute(i));
        continue;
      }

      // date attribute?
      isDate = (data.attribute(i).type() == Attribute.DATE);

      // determine all available attribtues in dataset
      hash = new HashSet();
      for (n = 0; n < data.numInstances(); n++) {
        inst = data.instance(n);
        if (inst.isMissing(i)) continue;

        if (isDate) hash.add(inst.stringValue(i));
        else hash.add(new Double(inst.value(i)));
      }

      // sort values
      sorted = new Vector();
      for (Object o : hash) sorted.add(o);
      Collections.sort(sorted);

      // create attribute from sorted values
      values = new FastVector();
      for (Object o : sorted) {
        if (isDate) values.addElement(o.toString());
        else values.addElement(Utils.doubleToString(((Double) o).doubleValue(), MAX_DECIMALS));
      }
      atts.addElement(new Attribute(data.attribute(i).name(), values));
    }

    result = new Instances(inputFormat.relationName(), atts, 0);
    result.setClassIndex(inputFormat.classIndex());

    return result;
  }
Пример #11
0
 private static Instances createSet(int l) {
   FastVector attributes = new FastVector(1 + l);
   FastVector vals = new FastVector(3);
   vals.addElement("S1");
   vals.addElement("S2");
   vals.addElement("S3");
   Attribute classAttribute = new Attribute("Sense", vals);
   attributes.addElement(classAttribute);
   for (int i = 0; i < l; i++) {
     attributes.addElement(new Attribute(i + 1 + ""));
   }
   Instances set = new Instances("Rel", attributes, 1 + l);
   set.setClassIndex(0);
   return set;
 }
Пример #12
0
  private static Instances convertToInstances(
      final IScope scope,
      final IList<String> attributes,
      final IAddressableContainer<Integer, IAgent, Integer, IAgent> agents)
      throws GamaRuntimeException {
    FastVector attribs = new FastVector();
    for (String att : attributes) {
      attribs.addElement(new Attribute(att));
    }
    Instances dataset =
        new Instances(scope.getAgentScope().getName(), attribs, agents.length(scope));
    for (IAgent ag : agents.iterable(scope)) {

      int nb = attributes.size();
      double vals[] = new double[nb];
      for (int i = 0; i < nb; i++) {
        String attrib = attributes.get(i);
        Double var = Cast.asFloat(scope, ag.getDirectVarValue(scope, attrib));
        vals[i] = var;
      }
      Instance instance = new Instance(1, vals);
      dataset.add(instance);
    }
    return dataset;
  }
Пример #13
0
  public weka.core.Instances toWekaInstances() {
    // attributes
    FastVector wattrs = new FastVector();
    Iterator itr = attributes.iterator();
    while (itr.hasNext()) {
      Attribute attr = (Attribute) itr.next();
      wattrs.addElement(attr.toWekaAttribute());
    }
    // data instances
    weka.core.Instances winsts = new weka.core.Instances(name, wattrs, instances.size());
    itr = instances.iterator();

    while (itr.hasNext()) {
      Instance inst = (Instance) itr.next();
      Iterator itrval = inst.getValues().iterator();
      Iterator itrmis = inst.getMissing().iterator();
      double[] vals = new double[wattrs.size()];
      for (int i = 0; i < wattrs.size(); i++) {
        double val = (Double) itrval.next();
        if ((Boolean) itrmis.next()) {
          vals[i] = weka.core.Instance.missingValue();
        } else {
          vals[i] = val;
        }
      }
      weka.core.Instance winst = new weka.core.Instance(1, vals);
      winst.setDataset(winsts);
      winsts.add(winst);
    }
    winsts.setClassIndex(this.class_index);
    return winsts;
  }
Пример #14
0
  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration listOptions() {

    String string1 = "\tThe required number of rules. (default = " + (m_numRules - 5) + ")";
    FastVector newVector = new FastVector(1);

    newVector.addElement(new Option(string1, "N", 1, "-N <required number of rules output>"));
    return newVector.elements();
  }
Пример #15
0
  /**
   * Determines the output format based on the input format and returns this.
   *
   * @param inputFormat the input format to base the output format on
   * @return the output format
   * @throws Exception in case the determination goes wrong
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    Instances result;
    Attribute att;
    Attribute attSorted;
    FastVector atts;
    FastVector values;
    Vector<String> sorted;
    int i;
    int n;

    m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1);

    // determine sorted indices
    atts = new FastVector();
    m_NewOrder = new int[inputFormat.numAttributes()][];
    for (i = 0; i < inputFormat.numAttributes(); i++) {
      att = inputFormat.attribute(i);
      if (!att.isNominal() || !m_AttributeIndices.isInRange(i)) {
        m_NewOrder[i] = new int[0];
        atts.addElement(inputFormat.attribute(i).copy());
        continue;
      }

      // sort labels
      sorted = new Vector<String>();
      for (n = 0; n < att.numValues(); n++) sorted.add(att.value(n));
      Collections.sort(sorted, m_Comparator);

      // determine new indices
      m_NewOrder[i] = new int[att.numValues()];
      values = new FastVector();
      for (n = 0; n < att.numValues(); n++) {
        m_NewOrder[i][n] = sorted.indexOf(att.value(n));
        values.addElement(sorted.get(n));
      }
      attSorted = new Attribute(att.name(), values);
      attSorted.setWeight(att.weight());
      atts.addElement(attSorted);
    }

    // generate new header
    result = new Instances(inputFormat.relationName(), atts, 0);
    result.setClassIndex(inputFormat.classIndex());

    return result;
  }
Пример #16
0
 private FastVector attFromStream(StreamElement data) {
   FastVector fv = new FastVector();
   for (int i = 0; i < data.getFieldNames().length; i++) {
     Attribute a = new Attribute(data.getFieldNames()[i]);
     fv.addElement(a);
   }
   return fv;
 }
  /**
   * Process a classifier's prediction for an instance and update a set of plotting instances and
   * additional plotting info. m_PlotShape for nominal class datasets holds shape types (actual data
   * points have automatic shape type assignment; classifier error data points have box shape type).
   * For numeric class datasets, the actual data points are stored in m_PlotInstances and m_PlotSize
   * stores the error (which is later converted to shape size values).
   *
   * @param toPredict the actual data point
   * @param classifier the classifier
   * @param eval the evaluation object to use for evaluating the classifier on the instance to
   *     predict
   * @see #m_PlotShapes
   * @see #m_PlotSizes
   * @see #m_PlotInstances
   */
  public void process(Instance toPredict, Classifier classifier, Evaluation eval) {
    double pred;
    double[] values;
    int i;

    try {
      pred = eval.evaluateModelOnceAndRecordPrediction(classifier, toPredict);

      if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) {
        toPredict =
            ((weka.classifiers.misc.InputMappedClassifier) classifier)
                .constructMappedInstance(toPredict);
      }

      if (!m_SaveForVisualization) return;

      if (m_PlotInstances != null) {
        values = new double[m_PlotInstances.numAttributes()];
        for (i = 0; i < m_PlotInstances.numAttributes(); i++) {
          if (i < toPredict.classIndex()) {
            values[i] = toPredict.value(i);
          } else if (i == toPredict.classIndex()) {
            values[i] = pred;
            values[i + 1] = toPredict.value(i);
            i++;
          } else {
            values[i] = toPredict.value(i - 1);
          }
        }

        m_PlotInstances.add(new DenseInstance(1.0, values));

        if (toPredict.classAttribute().isNominal()) {
          if (toPredict.isMissing(toPredict.classIndex()) || Utils.isMissingValue(pred)) {
            m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE));
          } else if (pred != toPredict.classValue()) {
            // set to default error point shape
            m_PlotShapes.addElement(new Integer(Plot2D.ERROR_SHAPE));
          } else {
            // otherwise set to constant (automatically assigned) point shape
            m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE));
          }
          m_PlotSizes.addElement(new Integer(Plot2D.DEFAULT_SHAPE_SIZE));
        } else {
          // store the error (to be converted to a point size later)
          Double errd = null;
          if (!toPredict.isMissing(toPredict.classIndex()) && !Utils.isMissingValue(pred)) {
            errd = new Double(pred - toPredict.classValue());
            m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE));
          } else {
            // missing shape if actual class not present or prediction is missing
            m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE));
          }
          m_PlotSizes.addElement(errd);
        }
      }
    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }
Пример #18
0
  /**
   * Add a rule to the ruleset and update the stats
   *
   * @param lastRule the rule to be added
   */
  public void addAndUpdate(Rule lastRule) {
    if (m_Ruleset == null) m_Ruleset = new FastVector();
    m_Ruleset.addElement(lastRule);

    Instances data = (m_Filtered == null) ? m_Data : ((Instances[]) m_Filtered.lastElement())[1];
    double[] stats = new double[6];
    double[] classCounts = new double[m_Data.classAttribute().numValues()];
    Instances[] filtered = computeSimpleStats(m_Ruleset.size() - 1, data, stats, classCounts);

    if (m_Filtered == null) m_Filtered = new FastVector();
    m_Filtered.addElement(filtered);

    if (m_SimpleStats == null) m_SimpleStats = new FastVector();
    m_SimpleStats.addElement(stats);

    if (m_Distributions == null) m_Distributions = new FastVector();
    m_Distributions.addElement(classCounts);
  }
Пример #19
0
  /**
   * Trains the classifier on the array of Signal objects. Implementations of this method should
   * also produce an ordered list of the class names which can be returned with the <code>
   * getClassNames</code> method.
   *
   * @param inputData the Signal array that the model should be trained on.
   * @throws noMetadataException Thrown if there is no class metadata to train the Gaussian model
   *     with
   */
  public void train(Signal[] inputData) {

    List classNamesList = new ArrayList();
    for (int i = 0; i < inputData.length; i++) {
      try {
        String className = inputData[i].getStringMetadata(Signal.PROP_CLASS);
        if ((className != null) && (!classNamesList.contains(className))) {
          classNamesList.add(className);
        }
      } catch (noMetadataException ex) {
        throw new RuntimeException("No class metadata found to train model on!", ex);
      }
    }
    Collections.sort(classNamesList);
    classnames = (String[]) classNamesList.toArray(new String[classNamesList.size()]);

    FastVector classValues = new FastVector(classnames.length);
    for (int i = 0; i < classnames.length; i++) {
      classValues.addElement(classnames[i]);
    }
    classAttribute = new Attribute(Signal.PROP_CLASS, classValues);
    Instances trainingDataSet =
        new Instances(Signal2Instance.convert(inputData[0], classAttribute));

    if (inputData.length > 1) {
      for (int i = 1; i < inputData.length; i++) {
        Instances aSignalInstance = Signal2Instance.convert(inputData[i], classAttribute);
        for (int j = 0; j < aSignalInstance.numInstances(); j++)
          trainingDataSet.add(aSignalInstance.instance(j));
      }
    }

    trainingDataSet.setClass(classAttribute);

    inputData = null;
    theRule = new MISMO();

    // parse options
    StringTokenizer stOption = new StringTokenizer(this.MISMOOptions, " ");
    String[] options = new String[stOption.countTokens()];
    for (int i = 0; i < options.length; i++) {
      options[i] = stOption.nextToken();
    }

    try {
      theRule.setOptions(options);
    } catch (Exception ex) {
      throw new RuntimeException("Failed to set MISMO classifier options!", ex);
    }
    try {
      theRule.buildClassifier(trainingDataSet);
      System.out.println("WEKA: outputting MISMO classifier; " + theRule.globalInfo());
    } catch (Exception ex) {
      throw new RuntimeException("Failed to train classifier!", ex);
    }
  }
Пример #20
0
  /**
   * Filter the data according to the ruleset and compute the basic stats: coverage/uncoverage,
   * true/false positive/negatives of each rule
   */
  public void countData() {
    if ((m_Filtered != null) || (m_Ruleset == null) || (m_Data == null)) return;

    int size = m_Ruleset.size();
    m_Filtered = new FastVector(size);
    m_SimpleStats = new FastVector(size);
    m_Distributions = new FastVector(size);
    Instances data = new Instances(m_Data);

    for (int i = 0; i < size; i++) {
      double[] stats = new double[6]; // 6 statistics parameters
      double[] classCounts = new double[m_Data.classAttribute().numValues()];
      Instances[] filtered = computeSimpleStats(i, data, stats, classCounts);
      m_Filtered.addElement(filtered);
      m_SimpleStats.addElement(stats);
      m_Distributions.addElement(classCounts);
      data = filtered[1]; // Data not covered
    }
  }
Пример #21
0
  /** Set the output format. Changes the format of the specified date attribute. */
  private void setOutputFormat() {

    // Create new attributes
    FastVector newAtts = new FastVector(getInputFormat().numAttributes());
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (j == m_AttIndex.getIndex()) {
        newAtts.addElement(new Attribute(att.name(), getDateFormat().toPattern()));
      } else {
        newAtts.addElement(att.copy());
      }
    }

    // Create new header
    Instances newData = new Instances(getInputFormat().relationName(), newAtts, 0);
    newData.setClassIndex(getInputFormat().classIndex());
    m_OutputAttribute = newData.attribute(m_AttIndex.getIndex());
    setOutputFormat(newData);
  }
Пример #22
0
  /**
   * Set the output format. Takes the current average class values and m_InputFormat and calls
   * setOutputFormat(Instances) appropriately.
   */
  private void setOutputFormat() {
    Instances newData;
    FastVector newAtts;

    // Compute new attributes
    newAtts = new FastVector(getInputFormat().numAttributes());
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);

      if (!att.isNominal() || !m_AttIndex.isInRange(j)) newAtts.addElement(att);
      else newAtts.addElement(new Attribute(att.name(), (FastVector) null));
    }

    // Construct new header
    newData = new Instances(getInputFormat().relationName(), newAtts, 0);
    newData.setClassIndex(getInputFormat().classIndex());

    setOutputFormat(newData);
  }
  private static Instances initializeAttributes() {

    String nameOfDataset = "Badges";

    Instances instances;

    FastVector attributes = new FastVector(9);
    for (String featureName : features) {
      attributes.addElement(new Attribute(featureName, zeroOne));
    }
    Attribute classLabel = new Attribute("Class", labels);
    // labels is a FastVector of '+' and '-'
    attributes.addElement(classLabel);

    instances = new Instances(nameOfDataset, attributes, 0);

    instances.setClass(classLabel);

    return instances;
  }
  private void initialiseRelation() {
    minAttribute = new Attribute("min");
    maxAttribute = new Attribute("max");
    meanAttribute = new Attribute("mean");
    stdDevAttribute = new Attribute("stdDev");

    FastVector activities = new FastVector(2);
    activities.addElement("walking");
    activities.addElement("running");
    activityAttribute = new Attribute("activity", activities);

    FastVector features = new FastVector(NUMBER_OF_ATTRIBUTES);
    features.addElement(minAttribute);
    features.addElement(maxAttribute);
    features.addElement(meanAttribute);
    features.addElement(stdDevAttribute);
    features.addElement(activityAttribute);

    instances = new Instances("Activity", features, 0);

    loadExistingData();
  }
Пример #25
0
  /**
   * Determines the output format based on the input format and returns this. In case the output
   * format cannot be returned immediately, i.e., hasImmediateOutputFormat() returns false, then
   * this method will called from batchFinished() after the call of preprocess(Instances), in which,
   * e.g., statistics for the actual processing step can be gathered.
   *
   * @param inputFormat the input format to base the output format on
   * @return the output format
   * @throws Exception in case the determination goes wrong
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    Instances result;
    FastVector atts;
    int i;
    int numAtts;
    Vector<Integer> indices;
    Vector<Integer> subset;
    Random rand;
    int index;

    // determine the number of attributes
    numAtts = inputFormat.numAttributes();
    if (inputFormat.classIndex() > -1) numAtts--;

    if (m_NumAttributes < 1) {
      numAtts = (int) Math.round((double) numAtts * m_NumAttributes);
    } else {
      if (m_NumAttributes < numAtts) numAtts = (int) m_NumAttributes;
    }
    if (getDebug()) System.out.println("# of atts: " + numAtts);

    // determine random indices
    indices = new Vector<Integer>();
    for (i = 0; i < inputFormat.numAttributes(); i++) {
      if (i == inputFormat.classIndex()) continue;
      indices.add(i);
    }

    subset = new Vector<Integer>();
    rand = new Random(m_Seed);
    for (i = 0; i < numAtts; i++) {
      index = rand.nextInt(indices.size());
      subset.add(indices.get(index));
      indices.remove(index);
    }
    Collections.sort(subset);
    if (inputFormat.classIndex() > -1) subset.add(inputFormat.classIndex());
    if (getDebug()) System.out.println("indices: " + subset);

    // generate output format
    atts = new FastVector();
    m_Indices = new int[subset.size()];
    for (i = 0; i < subset.size(); i++) {
      atts.addElement(inputFormat.attribute(subset.get(i)));
      m_Indices[i] = subset.get(i);
    }
    result = new Instances(inputFormat.relationName(), atts, 0);
    if (inputFormat.classIndex() > -1) result.setClassIndex(result.numAttributes() - 1);

    return result;
  }
  static {
    features = new String[] {"firstName", "lastName"};

    List<String> ff = new ArrayList<String>();

    for (String f : features) {
      for (int i = 0; i < 9; i++) {
        for (char letter = 'a'; letter <= 'z'; letter++) {
          ff.add(f + i + "=" + letter);
        }
      }
    }

    features = ff.toArray(new String[ff.size()]);

    zeroOne = new FastVector(2);
    zeroOne.addElement("1");
    zeroOne.addElement("0");

    labels = new FastVector(2);
    labels.addElement("+");
    labels.addElement("-");
  }
Пример #27
0
  /**
   * Count data from the position index in the ruleset assuming that given data are not covered by
   * the rules in position 0...(index-1), and the statistics of these rules are provided.<br>
   * This procedure is typically useful when a temporary object of RuleStats is constructed in order
   * to efficiently calculate the relative DL of rule in position index, thus all other stuff is not
   * needed.
   *
   * @param index the given position
   * @param uncovered the data not covered by rules before index
   * @param prevRuleStats the provided stats of previous rules
   */
  public void countData(int index, Instances uncovered, double[][] prevRuleStats) {
    if ((m_Filtered != null) || (m_Ruleset == null)) return;

    int size = m_Ruleset.size();
    m_Filtered = new FastVector(size);
    m_SimpleStats = new FastVector(size);
    Instances[] data = new Instances[2];
    data[1] = uncovered;

    for (int i = 0; i < index; i++) {
      m_SimpleStats.addElement(prevRuleStats[i]);
      if (i + 1 == index) m_Filtered.addElement(data);
      else m_Filtered.addElement(new Object()); // Stuff sth.
    }

    for (int j = index; j < size; j++) {
      double[] stats = new double[6]; // 6 statistics parameters
      Instances[] filtered = computeSimpleStats(j, data[1], stats, null);
      m_Filtered.addElement(filtered);
      m_SimpleStats.addElement(stats);
      data = filtered; // Data not covered
    }
  }
Пример #28
0
 public void init() {
   // 1. set up attributes
   attributes = new FastVector();
   // - numeric: 1
   attributes.addElement(new Attribute("IsMarket"));
   // - numeric: 2
   attributes.addElement(new Attribute("IsMonth"));
   // - numeric: 3
   attributes.addElement(new Attribute("IsCategory"));
   // - numeric: 4
   attributes.addElement(new Attribute("IsCompany"));
   // - numeric: 5
   attributes.addElement(new Attribute("IsBrand"));
   // - numeric: 5
   attributes.addElement(new Attribute("OfferValue"));
   // - nominal: 6
   attributeReturn = new FastVector();
   attributeReturn.addElement("f");
   attributeReturn.addElement("t");
   attributes.addElement(new Attribute("Return", attributeReturn));
 }
Пример #29
0
  /**
   * Parses a given list of options. Valid options are:
   *
   * <p>-D <br>
   * Turn on debugging output.
   *
   * <p>-S seed <br>
   * Random number seed (default 1).
   *
   * <p>-B classifierstring <br>
   * Classifierstring should contain the full class name of a scheme included for selection followed
   * by options to the classifier (required, option should be used once for each classifier).
   *
   * <p>-X num_folds <br>
   * Use cross validation error as the basis for classifier selection. (default 0, is to use error
   * on the training data instead)
   *
   * <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    setDebug(Utils.getFlag('D', options));

    String numFoldsString = Utils.getOption('X', options);
    if (numFoldsString.length() != 0) {
      setNumFolds(Integer.parseInt(numFoldsString));
    } else {
      setNumFolds(0);
    }

    String randomString = Utils.getOption('S', options);
    if (randomString.length() != 0) {
      setSeed(Integer.parseInt(randomString));
    } else {
      setSeed(1);
    }

    // Iterate through the schemes
    FastVector classifiers = new FastVector();
    while (true) {
      String classifierString = Utils.getOption('B', options);
      if (classifierString.length() == 0) {
        break;
      }
      String[] classifierSpec = Utils.splitOptions(classifierString);
      if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
      }
      String classifierName = classifierSpec[0];
      classifierSpec[0] = "";
      classifiers.addElement(Classifier.forName(classifierName, classifierSpec));
    }
    if (classifiers.size() <= 1) {
      throw new Exception("At least two classifiers must be specified" + " with the -B option.");
    } else {
      Classifier[] classifiersArray = new Classifier[classifiers.size()];
      for (int i = 0; i < classifiersArray.length; i++) {
        classifiersArray[i] = (Classifier) classifiers.elementAt(i);
      }
      setClassifiers(classifiersArray);
    }
  }
Пример #30
0
 public boolean batchFinished() throws Exception {
   Instances input = getInputFormat();
   String relation = input.relationName();
   Instances output = new Instances(relation);
   int numAttributes = input.numAttributes();
   int numInstances = input.numInstances();
   for (int i = 0; i < numAttributes; i++) {
     FastVector vector = new FastVector();
     for (int j = 0; j < numInstances; j++) {
       double value = input.instance(j).value(i);
       String string = String.valueOf(value);
       if (vector.indexOf(string) == -1) vector.addElement(string);
     }
     Attribute attribute = new Attribute(input.attribute(i).name(), vector);
     output.appendAttribute(attribute);
   }
   setOutputFormat(output);
   for (int i = 0; i < numInstances; i++) push(input.instance(i));
   return super.batchFinished();
 }