/** * Builds a new LinearRegression without the 'bad' data found by buildWeight * * @throws Exception if building fails */ private void buildRLSRegression() throws Exception { buildWeight(); m_RLSData = new Instances(m_Data); int x = 0; int y = 0; int n = m_RLSData.numInstances(); while (y < n) { if (m_weight[x] == 0) { m_RLSData.delete(y); n = m_RLSData.numInstances(); y--; } x++; y++; } if (m_RLSData.numInstances() == 0) { System.err.println("rls regression unbuilt"); m_ls = m_currentRegression; } else { m_ls = new LinearRegression(); m_ls.setOptions(new String[] {"-S", "1"}); m_ls.buildClassifier(m_RLSData); m_currentRegression = m_ls; } }
/** * Finds residuals (squared) for the current regression. * * @throws Exception if an error occurs */ private void findResiduals() throws Exception { m_SSR = 0; m_Residuals = new double[m_Data.numInstances()]; for (int i = 0; i < m_Data.numInstances(); i++) { m_Residuals[i] = m_currentRegression.classifyInstance(m_Data.instance(i)); m_Residuals[i] -= m_Data.instance(i).value(m_Data.classAttribute()); m_Residuals[i] *= m_Residuals[i]; m_SSR += m_Residuals[i]; } }
/** * Gets the number of samples to use. * * @throws Exception if an error occurs */ private void getSamples() throws Exception { int stuf[] = new int[] {500, 50, 22, 17, 15, 14}; if (m_samplesize < 7) { if (m_Data.numInstances() < stuf[m_samplesize - 1]) m_samples = combinations(m_Data.numInstances(), m_samplesize); else m_samples = m_samplesize * 500; } else m_samples = 3000; if (m_debug) { System.out.println("m_samplesize: " + m_samplesize); System.out.println("m_samples: " + m_samples); System.out.println("m_randomseed: " + m_randomseed); } }
/** * Creates a new <code>TestSetEvent</code> * * @param source the source of the event * @param testSet the test instances */ public TestSetEvent(Object source, Instances testSet) { super(source); m_testSet = testSet; if (m_testSet != null && m_testSet.numInstances() == 0) { m_structureOnly = true; } }
/** * Calculates the performance stats for the desired class and return results as a set of * Instances. * * @param predictions the predictions to base the curve on * @param classIndex index of the class of interest. * @return datapoints as a set of instances. */ public Instances getCurve(FastVector predictions, int classIndex) { if ((predictions.size() == 0) || (((NominalPrediction) predictions.elementAt(0)).distribution().length <= classIndex)) { return null; } ThresholdCurve tc = new ThresholdCurve(); Instances threshInst = tc.getCurve(predictions, classIndex); Instances insts = makeHeader(); int fpind = threshInst.attribute(ThresholdCurve.FP_RATE_NAME).index(); int tpind = threshInst.attribute(ThresholdCurve.TP_RATE_NAME).index(); int threshind = threshInst.attribute(ThresholdCurve.THRESHOLD_NAME).index(); double[] vals; double fpval, tpval, thresh; for (int i = 0; i < threshInst.numInstances(); i++) { fpval = threshInst.instance(i).value(fpind); tpval = threshInst.instance(i).value(tpind); thresh = threshInst.instance(i).value(threshind); vals = new double[3]; vals[0] = 0; vals[1] = fpval; vals[2] = thresh; insts.add(new Instance(1.0, vals)); vals = new double[3]; vals[0] = 1; vals[1] = 1.0 - tpval; vals[2] = thresh; insts.add(new Instance(1.0, vals)); } return insts; }
/** * Signify that this batch of input to the filter is finished. If the filter requires all * instances prior to filtering, output() may now be called to retrieve the filtered instances. * * @return true if there are instances pending output * @exception Exception if an error occurs * @exception IllegalStateException if no input structure has been defined */ public boolean batchFinished() throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_Means == null) { Instances input = getInputFormat(); m_Means = new double[input.numAttributes()]; m_StdDevs = new double[input.numAttributes()]; for (int i = 0; i < input.numAttributes(); i++) { if (input.attribute(i).isNumeric() && (input.classIndex() != i)) { m_Means[i] = input.meanOrMode(i); m_StdDevs[i] = Math.sqrt(input.variance(i)); } } // Convert pending input instances for (int i = 0; i < input.numInstances(); i++) { convertInstance(input.instance(i)); } } // Free memory flushInput(); m_NewBatch = true; return (numPendingOutput() != 0); }
/** * Builds a weight function removing instances with an abnormally high scaled residual * * @throws Exception if weight building fails */ private void buildWeight() throws Exception { findResiduals(); m_scalefactor = 1.4826 * (1 + 5 / (m_Data.numInstances() - m_Data.numAttributes())) * Math.sqrt(m_bestMedian); m_weight = new double[m_Residuals.length]; for (int i = 0; i < m_Residuals.length; i++) m_weight[i] = ((Math.sqrt(m_Residuals[i]) / m_scalefactor < 2.5) ? 1.0 : 0.0); }
/** * Returns a string suitable for passing to RemoveRange consisting of m_samplesize indices. * * @param data dataset from which to take indicese * @return string of indices suitable for passing to RemoveRange */ private String selectIndices(Instances data) { StringBuffer text = new StringBuffer(); for (int i = 0, x = 0; i < m_samplesize; i++) { do { x = (int) (m_random.nextDouble() * data.numInstances()); } while (x == 0); text.append(Integer.toString(x)); if (i < m_samplesize - 1) text.append(","); else text.append("\n"); } return text.toString(); }
/** * Converts the header info of the given set of instances into a set of item sets (singletons). * The ordering of values in the header file determines the lexicographic order. * * @param instances the set of instances whose header info is to be used * @return a set of item sets, each containing a single item * @exception Exception if singletons can't be generated successfully */ public static FastVector singletons(Instances instances) throws Exception { FastVector setOfItemSets = new FastVector(); ItemSet current; for (int i = 0; i < instances.numAttributes(); i++) { if (instances.attribute(i).isNumeric()) throw new Exception("Can't handle numeric attributes!"); for (int j = 0; j < instances.attribute(i).numValues(); j++) { current = new AprioriItemSet(instances.numInstances()); current.m_items = new int[instances.numAttributes()]; for (int k = 0; k < instances.numAttributes(); k++) current.m_items[k] = -1; current.m_items[i] = j; setOfItemSets.addElement(current); } } return setOfItemSets; }