Beispiel #1
0
  /**
   * Builds a new LinearRegression without the 'bad' data found by buildWeight
   *
   * @throws Exception if building fails
   */
  private void buildRLSRegression() throws Exception {

    buildWeight();
    m_RLSData = new Instances(m_Data);
    int x = 0;
    int y = 0;
    int n = m_RLSData.numInstances();
    while (y < n) {
      if (m_weight[x] == 0) {
        m_RLSData.delete(y);
        n = m_RLSData.numInstances();
        y--;
      }
      x++;
      y++;
    }
    if (m_RLSData.numInstances() == 0) {
      System.err.println("rls regression unbuilt");
      m_ls = m_currentRegression;
    } else {
      m_ls = new LinearRegression();
      m_ls.setOptions(new String[] {"-S", "1"});
      m_ls.buildClassifier(m_RLSData);
      m_currentRegression = m_ls;
    }
  }
Beispiel #2
0
  /**
   * Finds residuals (squared) for the current regression.
   *
   * @throws Exception if an error occurs
   */
  private void findResiduals() throws Exception {

    m_SSR = 0;
    m_Residuals = new double[m_Data.numInstances()];
    for (int i = 0; i < m_Data.numInstances(); i++) {
      m_Residuals[i] = m_currentRegression.classifyInstance(m_Data.instance(i));
      m_Residuals[i] -= m_Data.instance(i).value(m_Data.classAttribute());
      m_Residuals[i] *= m_Residuals[i];
      m_SSR += m_Residuals[i];
    }
  }
Beispiel #3
0
  /**
   * Gets the number of samples to use.
   *
   * @throws Exception if an error occurs
   */
  private void getSamples() throws Exception {

    int stuf[] = new int[] {500, 50, 22, 17, 15, 14};
    if (m_samplesize < 7) {
      if (m_Data.numInstances() < stuf[m_samplesize - 1])
        m_samples = combinations(m_Data.numInstances(), m_samplesize);
      else m_samples = m_samplesize * 500;

    } else m_samples = 3000;
    if (m_debug) {
      System.out.println("m_samplesize: " + m_samplesize);
      System.out.println("m_samples: " + m_samples);
      System.out.println("m_randomseed: " + m_randomseed);
    }
  }
 /**
  * Creates a new <code>TestSetEvent</code>
  *
  * @param source the source of the event
  * @param testSet the test instances
  */
 public TestSetEvent(Object source, Instances testSet) {
   super(source);
   m_testSet = testSet;
   if (m_testSet != null && m_testSet.numInstances() == 0) {
     m_structureOnly = true;
   }
 }
Beispiel #5
0
  /**
   * Calculates the performance stats for the desired class and return results as a set of
   * Instances.
   *
   * @param predictions the predictions to base the curve on
   * @param classIndex index of the class of interest.
   * @return datapoints as a set of instances.
   */
  public Instances getCurve(FastVector predictions, int classIndex) {

    if ((predictions.size() == 0)
        || (((NominalPrediction) predictions.elementAt(0)).distribution().length <= classIndex)) {
      return null;
    }

    ThresholdCurve tc = new ThresholdCurve();
    Instances threshInst = tc.getCurve(predictions, classIndex);

    Instances insts = makeHeader();
    int fpind = threshInst.attribute(ThresholdCurve.FP_RATE_NAME).index();
    int tpind = threshInst.attribute(ThresholdCurve.TP_RATE_NAME).index();
    int threshind = threshInst.attribute(ThresholdCurve.THRESHOLD_NAME).index();

    double[] vals;
    double fpval, tpval, thresh;
    for (int i = 0; i < threshInst.numInstances(); i++) {
      fpval = threshInst.instance(i).value(fpind);
      tpval = threshInst.instance(i).value(tpind);
      thresh = threshInst.instance(i).value(threshind);
      vals = new double[3];
      vals[0] = 0;
      vals[1] = fpval;
      vals[2] = thresh;
      insts.add(new Instance(1.0, vals));
      vals = new double[3];
      vals[0] = 1;
      vals[1] = 1.0 - tpval;
      vals[2] = thresh;
      insts.add(new Instance(1.0, vals));
    }

    return insts;
  }
Beispiel #6
0
  /**
   * Signify that this batch of input to the filter is finished. If the filter requires all
   * instances prior to filtering, output() may now be called to retrieve the filtered instances.
   *
   * @return true if there are instances pending output
   * @exception Exception if an error occurs
   * @exception IllegalStateException if no input structure has been defined
   */
  public boolean batchFinished() throws Exception {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }
    if (m_Means == null) {
      Instances input = getInputFormat();
      m_Means = new double[input.numAttributes()];
      m_StdDevs = new double[input.numAttributes()];
      for (int i = 0; i < input.numAttributes(); i++) {
        if (input.attribute(i).isNumeric() && (input.classIndex() != i)) {
          m_Means[i] = input.meanOrMode(i);
          m_StdDevs[i] = Math.sqrt(input.variance(i));
        }
      }

      // Convert pending input instances
      for (int i = 0; i < input.numInstances(); i++) {
        convertInstance(input.instance(i));
      }
    }
    // Free memory
    flushInput();

    m_NewBatch = true;
    return (numPendingOutput() != 0);
  }
Beispiel #7
0
  /**
   * Builds a weight function removing instances with an abnormally high scaled residual
   *
   * @throws Exception if weight building fails
   */
  private void buildWeight() throws Exception {

    findResiduals();
    m_scalefactor =
        1.4826
            * (1 + 5 / (m_Data.numInstances() - m_Data.numAttributes()))
            * Math.sqrt(m_bestMedian);
    m_weight = new double[m_Residuals.length];
    for (int i = 0; i < m_Residuals.length; i++)
      m_weight[i] = ((Math.sqrt(m_Residuals[i]) / m_scalefactor < 2.5) ? 1.0 : 0.0);
  }
Beispiel #8
0
  /**
   * Returns a string suitable for passing to RemoveRange consisting of m_samplesize indices.
   *
   * @param data dataset from which to take indicese
   * @return string of indices suitable for passing to RemoveRange
   */
  private String selectIndices(Instances data) {

    StringBuffer text = new StringBuffer();
    for (int i = 0, x = 0; i < m_samplesize; i++) {
      do {
        x = (int) (m_random.nextDouble() * data.numInstances());
      } while (x == 0);
      text.append(Integer.toString(x));
      if (i < m_samplesize - 1) text.append(",");
      else text.append("\n");
    }
    return text.toString();
  }
  /**
   * Converts the header info of the given set of instances into a set of item sets (singletons).
   * The ordering of values in the header file determines the lexicographic order.
   *
   * @param instances the set of instances whose header info is to be used
   * @return a set of item sets, each containing a single item
   * @exception Exception if singletons can't be generated successfully
   */
  public static FastVector singletons(Instances instances) throws Exception {

    FastVector setOfItemSets = new FastVector();
    ItemSet current;

    for (int i = 0; i < instances.numAttributes(); i++) {
      if (instances.attribute(i).isNumeric())
        throw new Exception("Can't handle numeric attributes!");
      for (int j = 0; j < instances.attribute(i).numValues(); j++) {
        current = new AprioriItemSet(instances.numInstances());
        current.m_items = new int[instances.numAttributes()];
        for (int k = 0; k < instances.numAttributes(); k++) current.m_items[k] = -1;
        current.m_items[i] = j;
        setOfItemSets.addElement(current);
      }
    }
    return setOfItemSets;
  }