/**
   * Initializes the ranges using all instances of the dataset. Sets m_Ranges.
   *
   * @return the ranges
   */
  public double[][] initializeRanges() {
    if (m_Data == null) {
      m_Ranges = null;
      return m_Ranges;
    }

    int numAtt = m_Data.numAttributes();
    double[][] ranges = new double[numAtt][3];

    if (m_Data.numInstances() <= 0) {
      initializeRangesEmpty(numAtt, ranges);
      m_Ranges = ranges;
      return m_Ranges;
    } else {
      // initialize ranges using the first instance
      updateRangesFirst(m_Data.instance(0), numAtt, ranges);
    }

    // update ranges, starting from the second
    for (int i = 1; i < m_Data.numInstances(); i++) {
      updateRanges(m_Data.instance(i), numAtt, ranges);
    }

    m_Ranges = ranges;

    return m_Ranges;
  }
  /**
   * initializes the algorithm
   *
   * @param data the data to work with
   * @throws Exception if m_SVM is null
   */
  protected void init(Instances data) throws Exception {
    if (m_SVM == null) {
      throw new Exception("SVM not initialized in optimizer. Use RegOptimizer.setSVMReg()");
    }
    m_C = m_SVM.getC();
    m_data = data;
    m_classIndex = data.classIndex();
    m_nInstances = data.numInstances();

    // Initialize kernel
    m_kernel = Kernel.makeCopy(m_SVM.getKernel());
    m_kernel.buildKernel(data);

    // init m_target
    m_target = new double[m_nInstances];
    for (int i = 0; i < m_nInstances; i++) {
      m_target[i] = data.instance(i).classValue();
    }

    m_random = new Random(m_nSeed);

    //		initialize alpha and alpha* array to all zero
    m_alpha = new double[m_target.length];
    m_alphaStar = new double[m_target.length];

    m_supportVectors = new SMOset(m_nInstances);

    m_b = 0.0;
    m_nEvals = 0;
    m_nCacheHits = -1;
  }
Exemple #3
0
  /**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data
   * @throws Exception if the classifier has not been generated successfully
   */
  public void buildClassifier(Instances instances) throws Exception {

    if (!(m_Classifier instanceof WeightedInstancesHandler)) {
      throw new IllegalArgumentException("Classifier must be a " + "WeightedInstancesHandler!");
    }

    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (instances.numAttributes() == 1) {
      System.err.println(
          "Cannot build model (only class attribute present in data!), "
              + "using ZeroR model instead!");
      m_ZeroR = new weka.classifiers.rules.ZeroR();
      m_ZeroR.buildClassifier(instances);
      return;
    } else {
      m_ZeroR = null;
    }

    m_Train = new Instances(instances, 0, instances.numInstances());

    m_NNSearch.setInstances(m_Train);
  }
Exemple #4
0
  // 构造一个tri-trainer分类器。
  public Tritrainer(
      String classifier, String trainingIns_File, String testIns_File, double precentage) {
    try {
      this.classifier1 = (Classifier) Class.forName(classifier).newInstance();
      this.classifier2 = (Classifier) Class.forName(classifier).newInstance();
      this.classifier3 = (Classifier) Class.forName(classifier).newInstance();

      Instances trainingInstances = Util.getInstances(trainingIns_File);

      // 将trainIns_File按照precentage和(1-precentage)的比例切割成labeledIns和unlabeledIns;
      int length = trainingInstances.numInstances();
      int i = new Double(length * precentage).intValue();
      labeledIns = new Instances(trainingInstances, 0);
      for (int j = 0; j < i; j++) {
        labeledIns.add(trainingInstances.firstInstance());
        trainingInstances.delete(0);
      }
      unlabeledIns = trainingInstances;
      testIns = Util.getInstances(testIns_File);

      Init();
    } catch (Exception e) {

    }
  }
Exemple #5
0
 // 将样本集中裁剪提取成m个样本组成的集合;
 public void SubSample(Instances inst, int m) {
   inst.randomize(new Random());
   while (inst.numInstances() != m) {
     inst.delete(0);
   }
   // System.out.println("subsample:=" + inst.numInstances() + " m:=" + m );
 }
Exemple #6
0
  //	计算h1,h2分类器共同的分类错误率;
  public double measureBothError(Classifier h1, Classifier h2, Instances test) {
    int m = test.numInstances();
    double value1, value2, value;
    int error = 0, total = 0;
    try {
      for (int i = 0; i < m; i++) {
        value = test.instance(i).classValue();
        value1 = h1.classifyInstance(test.instance(i));
        value2 = h2.classifyInstance(test.instance(i));

        // 两分类器做出相同决策
        if (value1 == value2) {
          // 两分类器做出相同决策的样本数量
          total++;

          // 两分类器做出相同错误决策
          if (value != value1) {
            //	两分类器做出相同错误决策的样本数量
            error++;
          }
        }
      }
    } catch (Exception e) {
      System.out.println(e);
    }
    // System.out.println("m:=" + m);
    // System.out.println("error:=" + error +"; total:=" + total);

    // 两个分类器的分类错误率= 两分类器做出相同错误决策的样本数量/两分类器做出相同决策的样本数量
    return (error * 1.0) / total;
  }
Exemple #7
0
  private static IList<IList<IAgent>> clusteringUsingWeka(
      final IScope scope,
      final Clusterer clusterer,
      final IList<String> attributes,
      final IAddressableContainer<Integer, IAgent, Integer, IAgent> agents)
      throws GamaRuntimeException {
    Instances dataset = convertToInstances(scope, attributes, agents);
    try {
      clusterer.buildClusterer(dataset);

      IList<IList<IAgent>> groupes = GamaListFactory.create(Types.LIST.of(Types.AGENT));

      for (int i = 0; i < clusterer.numberOfClusters(); i++) {
        groupes.add(GamaListFactory.<IAgent>create(Types.AGENT));
      }
      for (int i = 0; i < dataset.numInstances(); i++) {
        Instance inst = dataset.instance(i);
        int clusterIndex = -1;
        clusterIndex = clusterer.clusterInstance(inst);
        IList<IAgent> groupe = groupes.get(clusterIndex);
        groupe.add(agents.get(scope, i));
      }
      return groupes;
    } catch (Exception e) {
      return null;
    }
  }
Exemple #8
0
 /**
  * Private function to compute default number of accurate instances in the specified data for the
  * consequent of the rule
  *
  * @param data the data in question
  * @return the default accuracy number
  */
 private double computeDefAccu(Instances data) {
   double defAccu = 0;
   for (int i = 0; i < data.numInstances(); i++) {
     Instance inst = data.instance(i);
     if ((int) inst.classValue() == (int) m_Consequent) defAccu += inst.weight();
   }
   return defAccu;
 }
Exemple #9
0
  /**
   * Computes the entropy of a dataset.
   *
   * @param data the data for which entropy is to be computed
   * @return the entropy of the data's class distribution
   * @throws Exception if computation fails
   */
  private double computeEntropy(Instances data) throws Exception {

    double[] classCounts = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
      Instance inst = (Instance) instEnum.nextElement();
      classCounts[(int) inst.classValue()]++;
    }
    double entropy = 0;
    for (int j = 0; j < data.numClasses(); j++) {
      if (classCounts[j] > 0) {
        entropy -= classCounts[j] * Utils.log2(classCounts[j]);
      }
    }
    entropy /= (double) data.numInstances();
    return entropy + Utils.log2(data.numInstances());
  }
  public int getClusterNumber(String objectID) {
    int datasetIndex = -1;

    for (int i = 0; i < m_Sequences.numInstances(); i++) {
      if (objectID.equals(m_Sequences.instance(i).stringValue(0))) datasetIndex = i;
    }

    return cluster[datasetIndex];
  }
  /**
   * Determines the output format based on the input format and returns this. In case the output
   * format cannot be returned immediately, i.e., immediateOutputFormat() returns false, then this
   * method will be called from batchFinished().
   *
   * @param inputFormat the input format to base the output format on
   * @return the output format
   * @throws Exception in case the determination goes wrong
   * @see #hasImmediateOutputFormat()
   * @see #batchFinished()
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {

    Instances data;
    Instances result;
    FastVector atts;
    FastVector values;
    HashSet hash;
    int i;
    int n;
    boolean isDate;
    Instance inst;
    Vector sorted;

    m_Cols.setUpper(inputFormat.numAttributes() - 1);
    data = new Instances(inputFormat);
    atts = new FastVector();
    for (i = 0; i < data.numAttributes(); i++) {
      if (!m_Cols.isInRange(i) || !data.attribute(i).isNumeric()) {
        atts.addElement(data.attribute(i));
        continue;
      }

      // date attribute?
      isDate = (data.attribute(i).type() == Attribute.DATE);

      // determine all available attribtues in dataset
      hash = new HashSet();
      for (n = 0; n < data.numInstances(); n++) {
        inst = data.instance(n);
        if (inst.isMissing(i)) continue;

        if (isDate) hash.add(inst.stringValue(i));
        else hash.add(new Double(inst.value(i)));
      }

      // sort values
      sorted = new Vector();
      for (Object o : hash) sorted.add(o);
      Collections.sort(sorted);

      // create attribute from sorted values
      values = new FastVector();
      for (Object o : sorted) {
        if (isDate) values.addElement(o.toString());
        else values.addElement(Utils.doubleToString(((Double) o).doubleValue(), MAX_DECIMALS));
      }
      atts.addElement(new Attribute(data.attribute(i).name(), values));
    }

    result = new Instances(inputFormat.relationName(), atts, 0);
    result.setClassIndex(inputFormat.classIndex());

    return result;
  }
Exemple #12
0
  protected void initMinMax(Instances data) {
    m_Min = new double[data.numAttributes()];
    m_Max = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
      m_Min[i] = m_Max[i] = Double.NaN;
    }

    for (int i = 0; i < data.numInstances(); i++) {
      updateMinMax(data.instance(i));
    }
  }
Exemple #13
0
  /**
   * Labels the artificially generated data.
   *
   * @param artData the artificially generated instances
   * @exception Exception if instances cannot be labeled successfully
   */
  protected void labelData(Instances artData) throws Exception {
    Instance curr;
    double[] probs;

    for (int i = 0; i < artData.numInstances(); i++) {
      curr = artData.instance(i);
      // compute the class membership probs predicted by the current ensemble
      probs = distributionForInstance(curr);
      // select class label inversely proportional to the ensemble predictions
      curr.setClassValue(inverseLabel(probs));
    }
  }
Exemple #14
0
  /**
   * Computes the error in classification on the given data.
   *
   * @param data the instances to be classified
   * @return classification error
   * @exception Exception if error can not be computed successfully
   */
  protected double computeError(Instances data) throws Exception {
    double error = 0.0;
    int numInstances = data.numInstances();
    Instance curr;

    for (int i = 0; i < numInstances; i++) {
      curr = data.instance(i);
      // Check if the instance has been misclassified
      if (curr.classValue() != ((int) classifyInstance(curr))) error++;
    }
    return (error / numInstances);
  }
Exemple #15
0
  /**
   * Computes information gain for an attribute.
   *
   * @param data the data for which info gain is to be computed
   * @param att the attribute
   * @return the information gain for the given attribute and data
   * @throws Exception if computation fails
   */
  private double computeInfoGain(Instances data, Attribute att) throws Exception {

    double infoGain = computeEntropy(data);
    Instances[] splitData = splitData(data, att);
    for (int j = 0; j < att.numValues(); j++) {
      if (splitData[j].numInstances() > 0) {
        infoGain -=
            ((double) splitData[j].numInstances() / (double) data.numInstances())
                * computeEntropy(splitData[j]);
      }
    }
    return infoGain;
  }
Exemple #16
0
  /**
   * Implements the splitData function. This procedure is to split the data into bags according to
   * the nominal attribute value The infoGain for each bag is also calculated.
   *
   * @param data the data to be split
   * @param defAcRt the default accuracy rate for data
   * @param cl the class label to be predicted
   * @return the array of data after split
   */
  public Instances[] splitData(Instances data, double defAcRt, double cl) {
    int bag = att.numValues();
    Instances[] splitData = new Instances[bag];

    for (int x = 0; x < bag; x++) {
      splitData[x] = new Instances(data, data.numInstances());
      accurate[x] = 0;
      coverage[x] = 0;
    }

    for (int x = 0; x < data.numInstances(); x++) {
      Instance inst = data.instance(x);
      if (!inst.isMissing(att)) {
        int v = (int) inst.value(att);
        splitData[v].add(inst);
        coverage[v] += inst.weight();
        if ((int) inst.classValue() == (int) cl) accurate[v] += inst.weight();
      }
    }

    for (int x = 0; x < bag; x++) {
      double t = coverage[x] + 1.0;
      double p = accurate[x] + 1.0;
      double infoGain =
          // Utils.eq(defAcRt, 1.0) ?
          // accurate[x]/(double)numConds :
          accurate[x] * (Utils.log2(p / t) - Utils.log2(defAcRt));

      if (infoGain > maxInfoGain) {
        maxInfoGain = infoGain;
        cover = coverage[x];
        accu = accurate[x];
        accuRate = p / t;
        value = (double) x;
      }
    }

    return splitData;
  }
  /**
   * Initializes a gain ratio attribute evaluator. Discretizes all attributes that are numeric.
   *
   * @param data set of instances serving as training data
   * @throws Exception if the evaluator has not been generated successfully
   */
  public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    m_trainInstances = data;
    m_classIndex = m_trainInstances.classIndex();
    m_numAttribs = m_trainInstances.numAttributes();
    m_numInstances = m_trainInstances.numInstances();
    Discretize disTransform = new Discretize();
    disTransform.setUseBetterEncoding(true);
    disTransform.setInputFormat(m_trainInstances);
    m_trainInstances = Filter.useFilter(m_trainInstances, disTransform);
    m_numClasses = m_trainInstances.attribute(m_classIndex).numValues();
  }
Exemple #18
0
  /**
   * Splits a dataset according to the values of a nominal attribute.
   *
   * @param data the data which is to be split
   * @param att the attribute to be used for splitting
   * @return the sets of instances produced by the split
   */
  private Instances[] splitData(Instances data, Attribute att) {

    Instances[] splitData = new Instances[att.numValues()];
    for (int j = 0; j < att.numValues(); j++) {
      splitData[j] = new Instances(data, data.numInstances());
    }
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
      Instance inst = (Instance) instEnum.nextElement();
      splitData[(int) inst.value(att)].add(inst);
    }
    for (int i = 0; i < splitData.length; i++) {
      splitData[i].compactify();
    }
    return splitData;
  }
  /**
   * Generates a clusterer by the mean of spectral clustering algorithm.
   *
   * @param data set of instances serving as training data
   * @exception Exception if the clusterer has not been generated successfully
   */
  public void buildClusterer(Instances data) throws java.lang.Exception {
    m_Sequences = new Instances(data);
    int n = data.numInstances();
    int k = data.numAttributes();
    DoubleMatrix2D w;
    if (useSparseMatrix) w = DoubleFactory2D.sparse.make(n, n);
    else w = DoubleFactory2D.dense.make(n, n);
    double[][] v1 = new double[n][];
    for (int i = 0; i < n; i++) v1[i] = data.instance(i).toDoubleArray();
    v = DoubleFactory2D.dense.make(v1);
    double sigma_sq = sigma * sigma;
    // Sets up similarity matrix
    for (int i = 0; i < n; i++)
      for (int j = i; j < n; j++) {
        /*double dist = distnorm2(v.viewRow(i), v.viewRow(j));
        if((r == -1) || (dist < r)) {
          double sim = Math.exp(- (dist * dist) / (2 * sigma_sq));
          w.set(i, j, sim);
          w.set(j, i, sim);
        }*/
        /* String [] key = {data.instance(i).stringValue(0), data.instance(j).stringValue(0)};
        System.out.println(key[0]);
        System.out.println(key[1]);
        System.out.println(simScoreMap.containsKey(key));
        Double simValue = simScoreMap.get(key);*/

        double sim = sim_matrix[i][j];
        w.set(i, j, sim);
        w.set(j, i, sim);
      }

    // Partitions points
    int[][] p = partition(w, alpha_star);

    // Deploys results
    numOfClusters = p.length;
    cluster = new int[n];
    for (int i = 0; i < p.length; i++) for (int j = 0; j < p[i].length; j++) cluster[p[i][j]] = i;

    // System.out.println("Final partition:");
    // UtilsJS.printMatrix(p);
    // System.out.println("Cluster:\n");
    // UtilsJS.printArray(cluster);
    this.numOfClusters = cluster[Utils.maxIndex(cluster)] + 1;
    //  System.out.println("Num clusters:\t"+this.numOfClusters);
  }
Exemple #20
0
 // 通过h1,h2分类器学习样本集,将h1,h2分类决策相同的样本放入L中,得到标记集合;
 public void updateL(Classifier h1, Classifier h2, Instances L, Instances test) {
   int length = unlabeledIns.numInstances();
   double value1 = 0.0, value2 = 0.0;
   try {
     for (int i = 0; i < length; i++) {
       value1 = h1.classifyInstance(test.instance(i));
       value2 = h2.classifyInstance(test.instance(i));
       if (value1 == value2) {
         // 当两个分类器做出相同决策时重新标记样本的类别;
         test.instance(i).setClassValue(value1);
         L.add(test.instance(i));
       }
     }
   } catch (Exception e) {
     System.out.println(e);
   }
   // return false;
 }
  /**
   * Processes the given data (may change the provided dataset) and returns the modified version.
   * This method is called in batchFinished().
   *
   * @param instances the data to process
   * @return the modified data
   * @throws Exception in case the processing goes wrong
   * @see #batchFinished()
   */
  protected Instances process(Instances instances) throws Exception {
    Instances result;
    int i;
    int n;
    double[] values;
    String value;
    Instance inst;
    Instance newInst;

    // we need the complete input data!
    if (!isFirstBatchDone()) setOutputFormat(determineOutputFormat(getInputFormat()));

    result = new Instances(getOutputFormat());

    for (i = 0; i < instances.numInstances(); i++) {
      inst = instances.instance(i);
      values = inst.toDoubleArray();

      for (n = 0; n < values.length; n++) {
        if (!m_Cols.isInRange(n) || !instances.attribute(n).isNumeric() || inst.isMissing(n))
          continue;

        // get index of value
        if (instances.attribute(n).type() == Attribute.DATE) value = inst.stringValue(n);
        else value = Utils.doubleToString(inst.value(n), MAX_DECIMALS);

        values[n] = result.attribute(n).indexOfValue(value);
      }

      // generate new instance
      if (inst instanceof SparseInstance) newInst = new SparseInstance(inst.weight(), values);
      else newInst = new DenseInstance(inst.weight(), values);

      // copy possible string, relational values
      newInst.setDataset(getOutputFormat());
      copyValues(newInst, false, inst.dataset(), getOutputFormat());

      result.add(newInst);
    }

    return result;
  }
Exemple #22
0
  /**
   * Method for building an Id3 tree.
   *
   * @param data the training data
   * @exception Exception if decision tree can't be built successfully
   */
  private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
      m_Attribute = null;
      m_ClassValue = Utils.missingValue();
      m_Distribution = new double[data.numClasses()];
      return;
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
      Attribute att = (Attribute) attEnum.nextElement();
      infoGains[att.index()] = computeInfoGain(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));

    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
      m_Attribute = null;
      m_Distribution = new double[data.numClasses()];
      Enumeration instEnum = data.enumerateInstances();
      while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        m_Distribution[(int) inst.classValue()]++;
      }
      Utils.normalize(m_Distribution);
      m_ClassValue = Utils.maxIndex(m_Distribution);
      m_ClassAttribute = data.classAttribute();
    } else {
      Instances[] splitData = splitData(data, m_Attribute);
      m_Successors = new Id3[m_Attribute.numValues()];
      for (int j = 0; j < m_Attribute.numValues(); j++) {
        m_Successors[j] = new Id3();
        m_Successors[j].makeTree(splitData[j]);
      }
    }
  }
Exemple #23
0
  /**
   * Generates a clusterer. Has to initialize all fields of the clusterer that are not being set via
   * options.
   *
   * @param data set of instances serving as training data
   * @exception Exception if the clusterer has not been generated successfully
   */
  public void buildClusterer(Instances data) throws Exception {

    // long start = System.currentTimeMillis();
    if (data.checkForStringAttributes()) {
      throw new Exception("Can't handle string attributes!");
    }

    m_ReplaceMissingFilter = new ReplaceMissingValues();
    m_ReplaceMissingFilter.setInputFormat(data);
    m_instances = Filter.useFilter(data, m_ReplaceMissingFilter);

    initMinMax(m_instances);

    m_ClusterCentroids = new Instances(m_instances, m_NumClusters);

    int n = m_instances.numInstances();
    Random r = new Random(m_Seed);
    boolean[] selected = new boolean[n];
    double[] minDistance = new double[n];

    for (int i = 0; i < n; i++) minDistance[i] = Double.MAX_VALUE;

    int firstI = r.nextInt(n);
    m_ClusterCentroids.add(m_instances.instance(firstI));
    selected[firstI] = true;

    updateMinDistance(minDistance, selected, m_instances, m_instances.instance(firstI));

    if (m_NumClusters > n) m_NumClusters = n;

    for (int i = 1; i < m_NumClusters; i++) {
      int nextI = farthestAway(minDistance, selected);
      m_ClusterCentroids.add(m_instances.instance(nextI));
      selected[nextI] = true;
      updateMinDistance(minDistance, selected, m_instances, m_instances.instance(nextI));
    }

    m_instances = new Instances(m_instances, 0);
    // long end = System.currentTimeMillis();
    // System.out.println("Clustering Time = " + (end-start));
  }
  /**
   * Initializes the ranges of a subset of the instances of this dataset. Therefore m_Ranges is not
   * set.
   *
   * @param instList list of indexes of the subset
   * @return the ranges
   * @throws Exception if something goes wrong
   */
  public double[][] initializeRanges(int[] instList) throws Exception {
    if (m_Data == null) {
      throw new Exception("No instances supplied.");
    }

    int numAtt = m_Data.numAttributes();
    double[][] ranges = new double[numAtt][3];

    if (m_Data.numInstances() <= 0) {
      initializeRangesEmpty(numAtt, ranges);
      return ranges;
    } else {
      // initialize ranges using the first instance
      updateRangesFirst(m_Data.instance(instList[0]), numAtt, ranges);
      // update ranges, starting from the second
      for (int i = 1; i < instList.length; i++) {
        updateRanges(m_Data.instance(instList[i]), numAtt, ranges);
      }
    }
    return ranges;
  }
  /**
   * Generates an attribute evaluator. Has to initialise all fields of the evaluator that are not
   * being set via options.
   *
   * @param data set of instances serving as training data
   * @throws Exception if the evaluator has not been generated successfully
   */
  public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    m_trainInstances = new Instances(data);
    m_trainInstances.deleteWithMissingClass();

    m_numAttribs = m_trainInstances.numAttributes();
    m_numInstances = m_trainInstances.numInstances();

    // if the data has no decision feature, m_classIndex is negative
    m_classIndex = m_trainInstances.classIndex();

    // supervised
    if (m_classIndex >= 0) {
      m_isNumeric = m_trainInstances.attribute(m_classIndex).isNumeric();

      if (m_isNumeric) {
        m_DecisionSimilarity = m_Similarity;
      } else m_DecisionSimilarity = m_SimilarityEq;
    }

    m_Similarity.setInstances(m_trainInstances);
    m_DecisionSimilarity.setInstances(m_trainInstances);
    m_SimilarityEq.setInstances(m_trainInstances);
    m_composition = m_Similarity.getTNorm();

    m_FuzzyMeasure.set(
        m_Similarity,
        m_DecisionSimilarity,
        m_TNorm,
        m_composition,
        m_Implicator,
        m_SNorm,
        m_numInstances,
        m_numAttribs,
        m_classIndex,
        m_trainInstances);
  }
Exemple #26
0
  public void calculateConfidences(Instances data) throws Exception {
    RipperRule tempRule = (RipperRule) this.copy();

    while (tempRule.hasAntds()) {
      double acc = 0;
      double cov = 0;
      for (int i = 0; i < data.numInstances(); i++) {
        double membershipValue = tempRule.coverageDegree(data.instance(i));
        cov += membershipValue;
        if (m_Consequent == data.instance(i).classValue()) {
          acc += membershipValue;
        }
      }

      // m-estimate
      double m = 2.0;
      ((Antd) this.m_Antds.elementAt((int) tempRule.size() - 1)).m_confidence =
          (acc + m * (aprioriDistribution[(int) m_Consequent] / Utils.sum(aprioriDistribution)))
              / (cov + m);
      tempRule.m_Antds.removeElementAt(tempRule.m_Antds.size() - 1);
    }
  }
Exemple #27
0
 /**
  * Add new instances to the given set of instances.
  *
  * @param data given instances
  * @param newData set of instances to add to given instances
  */
 protected void addInstances(Instances data, Instances newData) {
   for (int i = 0; i < newData.numInstances(); i++) data.add(newData.instance(i));
 }
Exemple #28
0
 /**
  * Removes a specified number of instances from the given set of instances.
  *
  * @param data given instances
  * @param numRemove number of instances to delete from the given instances
  */
 protected void removeInstances(Instances data, int numRemove) {
   int num = data.numInstances();
   for (int i = num - 1; i > num - 1 - numRemove; i--) {
     data.delete(i);
   }
 }
Exemple #29
0
  /**
   * Build Decorate classifier
   *
   * @param data the training data to be used for generating the classifier
   * @exception Exception if the classifier could not be built successfully
   */
  public void buildClassifier(Instances data) throws Exception {
    if (m_Classifier == null) {
      throw new Exception("A base classifier has not been specified!");
    }
    if (data.checkForStringAttributes()) {
      throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
    }
    if (data.classAttribute().isNumeric()) {
      throw new UnsupportedClassTypeException("Decorate can't handle a numeric class!");
    }
    if (m_NumIterations < m_DesiredSize)
      throw new Exception("Max number of iterations must be >= desired ensemble size!");

    // initialize random number generator
    if (m_Seed == -1) m_Random = new Random();
    else m_Random = new Random(m_Seed);

    int i = 1; // current committee size
    int numTrials = 1; // number of Decorate iterations
    Instances divData = new Instances(data); // local copy of data - diversity data
    divData.deleteWithMissingClass();
    Instances artData = null; // artificial data

    // compute number of artficial instances to add at each iteration
    int artSize = (int) (Math.abs(m_ArtSize) * divData.numInstances());
    if (artSize == 0) artSize = 1; // atleast add one random example
    computeStats(data); // Compute training data stats for creating artificial examples

    // initialize new committee
    m_Committee = new Vector();
    Classifier newClassifier = m_Classifier;
    newClassifier.buildClassifier(divData);
    m_Committee.add(newClassifier);
    double eComm = computeError(divData); // compute ensemble error
    if (m_Debug)
      System.out.println(
          "Initialize:\tClassifier " + i + " added to ensemble. Ensemble error = " + eComm);

    // repeat till desired committee size is reached OR the max number of iterations is exceeded
    while (i < m_DesiredSize && numTrials < m_NumIterations) {
      // Generate artificial training examples
      artData = generateArtificialData(artSize, data);

      // Label artificial examples
      labelData(artData);
      addInstances(divData, artData); // Add new artificial data

      // Build new classifier
      Classifier tmp[] = Classifier.makeCopies(m_Classifier, 1);
      newClassifier = tmp[0];
      newClassifier.buildClassifier(divData);
      // Remove all the artificial data
      removeInstances(divData, artSize);

      // Test if the new classifier should be added to the ensemble
      m_Committee.add(newClassifier); // add new classifier to current committee
      double currError = computeError(divData);
      if (currError <= eComm) { // adding the new member did not increase the error
        i++;
        eComm = currError;
        if (m_Debug)
          System.out.println(
              "Iteration: "
                  + (1 + numTrials)
                  + "\tClassifier "
                  + i
                  + " added to ensemble. Ensemble error = "
                  + eComm);
      } else { // reject the current classifier because it increased the ensemble error
        m_Committee.removeElementAt(m_Committee.size() - 1); // pop the last member
      }
      numTrials++;
    }
  }
Exemple #30
0
  public static void main(String args[]) {
    Timers timer = new Timers();
    try {
      // Get the data set path.
      String referenceFile = Utils.getOption('r', args);
      String queryFile = Utils.getOption('q', args);
      if (referenceFile.length() == 0)
        throw new IllegalArgumentException(
            "Required option: File containing" + "the reference dataset.");

      // Load input dataset.
      DataSource source = new DataSource(referenceFile);
      Instances referenceData = source.getDataSet();

      Instances queryData = null;
      if (queryFile.length() != 0) {
        source = new DataSource(queryFile);
        queryData = source.getDataSet();
      }

      timer.StartTimer("total_time");

      // Get all the parameters.
      String leafSize = Utils.getOption('l', args);
      String neighbors = Utils.getOption('k', args);

      // Validate options.
      int k = 0;
      if (neighbors.length() == 0) {
        throw new IllegalArgumentException(
            "Required option: Number of " + "furthest neighbors to find.");
      } else {
        k = Integer.parseInt(neighbors);
        if (k < 1 || k > referenceData.numInstances())
          throw new IllegalArgumentException("[Fatal] Invalid k");
      }

      int l = 20;
      if (leafSize.length() != 0) l = Integer.parseInt(leafSize);

      // Create KDTree.
      KDTree tree = new KDTree();
      tree.setMaxInstInLeaf(l);
      tree.setInstances(referenceData);

      // Perform All K-Nearest-Neighbors.
      if (queryFile.length() != 0) {
        for (int i = 0; i < queryData.numInstances(); i++) {
          Instances out = tree.kNearestNeighbours(queryData.instance(i), k);
        }
      } else {
        for (int i = 0; i < referenceData.numInstances(); i++) {
          Instances out = tree.kNearestNeighbours(referenceData.instance(i), k);
        }
      }

      timer.StopTimer("total_time");
      timer.PrintTimer("total_time");
    } catch (IOException e) {
      System.err.println(USAGE);
    } catch (Exception e) {
      e.printStackTrace();
    }
  }