/** * Generates the classifier. * * @param instances set of instances serving as training data * @throws Exception if the classifier has not been generated successfully */ public void buildClassifier(Instances instances) throws Exception { if (!(m_Classifier instanceof WeightedInstancesHandler)) { throw new IllegalArgumentException("Classifier must be a " + "WeightedInstancesHandler!"); } // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); // only class? -> build ZeroR model if (instances.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(instances); return; } else { m_ZeroR = null; } m_Train = new Instances(instances, 0, instances.numInstances()); m_NNSearch.setInstances(m_Train); }
/** * Splits the given set of instances into subsets. * * @exception Exception if something goes wrong */ public final Instances[] split(Instances data) throws Exception { Instances[] instances = new Instances[m_numSubsets]; double[] weights; double newWeight; Instance instance; int subset, i, j; for (j = 0; j < m_numSubsets; j++) instances[j] = new Instances((Instances) data, data.numInstances()); for (i = 0; i < data.numInstances(); i++) { instance = ((Instances) data).instance(i); weights = weights(instance); subset = whichSubset(instance); if (subset > -1) instances[subset].add(instance); else for (j = 0; j < m_numSubsets; j++) if (Utils.gr(weights[j], 0)) { newWeight = weights[j] * instance.weight(); instances[j].add(instance); instances[j].lastInstance().setWeight(newWeight); } } for (j = 0; j < m_numSubsets; j++) instances[j].compactify(); return instances; }
/** * Returns a vector with column names of the dataset, listed in "list". If a column cannot be * found or the list is empty the ones from the default list are returned. * * @param list comma-separated list of attribute names * @param defaultList the default list of attribute names * @param inst the instances to get the attribute names from * @return a vector containing attribute names */ protected Vector determineColumnNames(String list, String defaultList, Instances inst) { Vector result; Vector atts; StringTokenizer tok; int i; String item; // get attribute names atts = new Vector(); for (i = 0; i < inst.numAttributes(); i++) atts.add(inst.attribute(i).name().toLowerCase()); // process list result = new Vector(); tok = new StringTokenizer(list, ","); while (tok.hasMoreTokens()) { item = tok.nextToken().toLowerCase(); if (atts.contains(item)) { result.add(item); } else { result.clear(); break; } } // do we have to return defaults? if (result.size() == 0) { tok = new StringTokenizer(defaultList, ","); while (tok.hasMoreTokens()) result.add(tok.nextToken().toLowerCase()); } return result; }
public void testTypical() { Instances result = useFilter(); // Number of attributes and instances shouldn't change assertEquals(m_Instances.numAttributes() + 5, result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); // Eibe can enhance this to check the binarizing is correct. }
/** * Generate artificial training examples. * * @param artSize size of examples set to create * @param data training data * @return the set of unlabeled artificial examples */ protected Instances generateArtificialData(int artSize, Instances data) { int numAttributes = data.numAttributes(); Instances artData = new Instances(data, artSize); double[] att; Instance artInstance; for (int i = 0; i < artSize; i++) { att = new double[numAttributes]; for (int j = 0; j < numAttributes; j++) { if (data.attribute(j).isNominal()) { // Select nominal value based on the frequency of occurence in the training data double[] stats = (double[]) m_AttributeStats.get(j); att[j] = (double) selectIndexProbabilistically(stats); } else if (data.attribute(j).isNumeric()) { // Generate numeric value from the Guassian distribution // defined by the mean and std dev of the attribute double[] stats = (double[]) m_AttributeStats.get(j); att[j] = (m_Random.nextGaussian() * stats[1]) + stats[0]; } else System.err.println("Decorate can only handle numeric and nominal values."); } artInstance = new Instance(1.0, att); artData.add(artInstance); } return artData; }
public static void wekaAlgorithms(Instances data) throws Exception { classifier = new FilteredClassifier(); // new instance of tree classifier.setClassifier(new NaiveBayes()); // classifier.setClassifier(new J48()); // classifier.setClassifier(new RandomForest()); // classifier.setClassifier(new ZeroR()); // classifier.setClassifier(new NaiveBayes()); // classifier.setClassifier(new IBk()); data.setClassIndex(data.numAttributes() - 1); Evaluation eval = new Evaluation(data); int folds = 10; eval.crossValidateModel(classifier, data, folds, new Random(1)); System.out.println("===== Evaluating on filtered (training) dataset ====="); System.out.println(eval.toSummaryString()); System.out.println(eval.toClassDetailsString()); double[][] mat = eval.confusionMatrix(); System.out.println("========= Confusion Matrix ========="); for (int i = 0; i < mat.length; i++) { for (int j = 0; j < mat.length; j++) { System.out.print(mat[i][j] + " "); } System.out.println(" "); } }
/** trains the classifier */ @Override public void train() throws Exception { if (_train.classIndex() == -1) _train.setClassIndex(_train.numAttributes() - 1); _cl.buildClassifier(_train); // evaluate classifier and print some statistics evaluate(); }
/** * initializes the algorithm * * @param data the data to work with * @throws Exception if m_SVM is null */ protected void init(Instances data) throws Exception { if (m_SVM == null) { throw new Exception("SVM not initialized in optimizer. Use RegOptimizer.setSVMReg()"); } m_C = m_SVM.getC(); m_data = data; m_classIndex = data.classIndex(); m_nInstances = data.numInstances(); // Initialize kernel m_kernel = Kernel.makeCopy(m_SVM.getKernel()); m_kernel.buildKernel(data); // init m_target m_target = new double[m_nInstances]; for (int i = 0; i < m_nInstances; i++) { m_target[i] = data.instance(i).classValue(); } m_random = new Random(m_nSeed); // initialize alpha and alpha* array to all zero m_alpha = new double[m_target.length]; m_alphaStar = new double[m_target.length]; m_supportVectors = new SMOset(m_nInstances); m_b = 0.0; m_nEvals = 0; m_nCacheHits = -1; }
/** * wrap up various variables to save memeory and do some housekeeping after optimization has * finished. * * @throws Exception if something goes wrong */ protected void wrapUp() throws Exception { m_target = null; m_nEvals = m_kernel.numEvals(); m_nCacheHits = m_kernel.numCacheHits(); if ((m_SVM.getKernel() instanceof PolyKernel) && ((PolyKernel) m_SVM.getKernel()).getExponent() == 1.0) { // convert alpha's to weights double[] weights = new double[m_data.numAttributes()]; for (int k = m_supportVectors.getNext(-1); k != -1; k = m_supportVectors.getNext(k)) { for (int j = 0; j < weights.length; j++) { if (j != m_classIndex) { weights[j] += (m_alpha[k] - m_alphaStar[k]) * m_data.instance(k).value(j); } } } m_weights = weights; // release memory m_alpha = null; m_alphaStar = null; m_kernel = null; } m_bModelBuilt = true; }
/** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { Instances isTrainingSet = createSet(4); Instance instance1 = createInstance(new double[] {1, 0.7, 0.1, 0.7}, "S1", isTrainingSet); Instance instance2 = createInstance(new double[] {0.1, 0.2, 1, 0.3}, "S2", isTrainingSet); Instance instance22 = createInstance(new double[] {0, 0, 0, 0}, "S3", isTrainingSet); isTrainingSet.add(instance1); isTrainingSet.add(instance2); isTrainingSet.add(instance22); Instances isTestingSet = createSet(4); Instance instance3 = createInstance(new double[] {1, 0.7, 0.1, 0.7}, "S1", isTrainingSet); Instance instance4 = createInstance(new double[] {0.1, 0.2, 1, 0.3}, "S2", isTrainingSet); isTestingSet.add(instance3); isTestingSet.add(instance4); // Create a naïve bayes classifier Classifier cModel = (Classifier) new BayesNet(); // M5P cModel.buildClassifier(isTrainingSet); // Test the model Evaluation eTest = new Evaluation(isTrainingSet); eTest.evaluateModel(cModel, isTestingSet); // Print the result à la Weka explorer: String strSummary = eTest.toSummaryString(); System.out.println(strSummary); // Get the likelihood of each classes // fDistribution[0] is the probability of being “positive” // fDistribution[1] is the probability of being “negative” double[] fDistribution = cModel.distributionForInstance(instance4); for (int i = 0; i < fDistribution.length; i++) { System.out.println(fDistribution[i]); } }
/** * Build the associator on the filtered data. * * @param data the training data * @throws Exception if the Associator could not be built successfully */ public void buildAssociations(Instances data) throws Exception { if (m_Associator == null) throw new Exception("No base associator has been set!"); // create copy and set class-index data = new Instances(data); if (getClassIndex() == 0) { data.setClassIndex(data.numAttributes() - 1); } else { data.setClassIndex(getClassIndex() - 1); } if (getClassIndex() != -1) { // remove instances with missing class data.deleteWithMissingClass(); } m_Filter.setInputFormat(data); // filter capabilities are checked here data = Filter.useFilter(data, m_Filter); // can associator handle the data? getAssociator().getCapabilities().testWithFail(data); m_FilteredInstances = data.stringFreeStructure(); m_Associator.buildAssociations(data); }
/** * Determines and returns (if possible) the structure (internally the header) of the data set as * an empty set of instances. * * @return the structure of the data set as an empty set of Instances * @throws IOException if an error occurs */ public Instances getStructure() throws IOException { if (getDirectory() == null) { throw new IOException("No directory/source has been specified"); } // determine class labels, i.e., sub-dirs if (m_structure == null) { String directoryPath = getDirectory().getAbsolutePath(); ArrayList<Attribute> atts = new ArrayList<Attribute>(); ArrayList<String> classes = new ArrayList<String>(); File dir = new File(directoryPath); String[] subdirs = dir.list(); for (int i = 0; i < subdirs.length; i++) { File subdir = new File(directoryPath + File.separator + subdirs[i]); if (subdir.isDirectory()) classes.add(subdirs[i]); } atts.add(new Attribute("text", (ArrayList<String>) null)); if (m_OutputFilename) atts.add(new Attribute("filename", (ArrayList<String>) null)); // make sure that the name of the class attribute is unlikely to // clash with any attribute created via the StringToWordVector filter atts.add(new Attribute("@@class@@", classes)); String relName = directoryPath.replaceAll("/", "_"); relName = relName.replaceAll("\\\\", "_").replaceAll(":", "_"); m_structure = new Instances(relName, atts, 0); m_structure.setClassIndex(m_structure.numAttributes() - 1); } return m_structure; }
/** * Sets instances that should be stored. * * @param instances the instances */ @Override public void setInstances(Instances instances) { m_ClassIndex.setUpper(instances.numAttributes() - 1); instances.setClassIndex(m_ClassIndex.getIndex()); super.setInstances(instances); }
private double calcNodeScorePlain(int nNode) { Instances instances = m_BayesNet.m_Instances; ParentSet oParentSet = m_BayesNet.getParentSet(nNode); // determine cardinality of parent set & reserve space for frequency counts int nCardinality = oParentSet.getCardinalityOfParents(); int numValues = instances.attribute(nNode).numValues(); int[] nCounts = new int[nCardinality * numValues]; // initialize (don't need this?) for (int iParent = 0; iParent < nCardinality * numValues; iParent++) { nCounts[iParent] = 0; } // estimate distributions Enumeration enumInsts = instances.enumerateInstances(); while (enumInsts.hasMoreElements()) { Instance instance = (Instance) enumInsts.nextElement(); // updateClassifier; double iCPT = 0; for (int iParent = 0; iParent < oParentSet.getNrOfParents(); iParent++) { int nParent = oParentSet.getParent(iParent); iCPT = iCPT * instances.attribute(nParent).numValues() + instance.value(nParent); } nCounts[numValues * ((int) iCPT) + (int) instance.value(nNode)]++; } return calcScoreOfCounts(nCounts, nCardinality, numValues, instances); } // CalcNodeScore
/** tests whether a URL can be loaded (via setURL(URL)). */ public void testURLSourcedLoader() { Instances data; if (!(getLoader() instanceof URLSourcedLoader)) { return; } try { // save m_Saver.setInstances(m_Instances); m_Saver.setFile(new File(m_ExportFilename)); m_Saver.writeBatch(); // load ((URLSourcedLoader) m_Loader).setURL(new File(m_ExportFilename).toURI().toURL().toString()); data = m_Loader.getDataSet(); // compare data try { if (m_Instances.classIndex() != data.classIndex()) { data.setClassIndex(m_Instances.classIndex()); } compareDatasets(m_Instances, data); } catch (Exception e) { fail("URL load failed (datasets differ): " + e.toString()); } } catch (Exception e) { e.printStackTrace(); fail("URL load failed: " + e.toString()); } }
private static void writePredictedDistributions( Classifier c, Instances data, int idIndex, Writer out) throws Exception { // header out.write("id"); for (int i = 0; i < data.numClasses(); i++) { out.write(",\""); out.write(data.classAttribute().value(i).replaceAll("[\"\\\\]", "_")); out.write("\""); } out.write("\n"); // data for (int i = 0; i < data.numInstances(); i++) { final String id = data.instance(i).stringValue(idIndex); double[] distribution = c.distributionForInstance(data.instance(i)); // final String label = data.attribute(classIndex).value(); out.write(id); for (double probability : distribution) { out.write(","); out.write(String.valueOf(probability > 1e-5 ? (float) probability : 0f)); } out.write("\n"); } }
/** tests whether data can be loaded via setSource() with a file stream. */ public void testLoaderWithStream() { Instances data; try { // save m_Saver.setInstances(m_Instances); m_Saver.setFile(new File(m_ExportFilename)); m_Saver.writeBatch(); // load m_Loader.setSource(new FileInputStream(new File(m_ExportFilename))); data = m_Loader.getDataSet(); // compare data try { if (m_Instances.classIndex() != data.classIndex()) { data.setClassIndex(m_Instances.classIndex()); } compareDatasets(m_Instances, data); } catch (Exception e) { fail("File stream loading failed (datasets differ): " + e.toString()); } } catch (Exception e) { e.printStackTrace(); fail("File stream loading failed: " + e.toString()); } }
/** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input instance structure (any instances * contained in the object are ignored - only the structure is required). * @return true if the outputFormat may be collected immediately * @throws UnsupportedAttributeTypeException if selected attributes are not numeric or nominal. */ public boolean setInputFormat(Instances instanceInfo) throws Exception { if ((instanceInfo.classIndex() > 0) && (!getFillWithMissing())) { throw new IllegalArgumentException( "TimeSeriesTranslate: Need to fill in missing values " + "using appropriate option when class index is set."); } super.setInputFormat(instanceInfo); // Create the output buffer Instances outputFormat = new Instances(instanceInfo, 0); for (int i = 0; i < instanceInfo.numAttributes(); i++) { if (i != instanceInfo.classIndex()) { if (m_SelectedCols.isInRange(i)) { if (outputFormat.attribute(i).isNominal() || outputFormat.attribute(i).isNumeric()) { outputFormat.renameAttribute( i, outputFormat.attribute(i).name() + (m_InstanceRange < 0 ? '-' : '+') + Math.abs(m_InstanceRange)); } else { throw new UnsupportedAttributeTypeException( "Only numeric and nominal attributes may be " + " manipulated in time series."); } } } } outputFormat.setClassIndex(instanceInfo.classIndex()); setOutputFormat(outputFormat); return true; }
/** * Analyses the given list of decision points according to the context specified. Furthermore, the * context is provided with some visualization of the analysis result. * * @param decisionPoints the list of decision points to be analysed * @param log the log to be analysed * @param highLevelPN the simulation model to export discovered data dependencies */ public void analyse(ClusterDecisionAnalyzer cda) { clusterDecisionAnalyzer = cda; // create empty data set with attribute information Instances data = cda.getDataInfo(); // in case no single learning instance can be provided (as decision // point is never // reached, or decision classes cannot specified properly) --> do not // call algorithm if (data.numInstances() == 0) { System.out.println("No learning instances available"); } // actually solve the classification problem else { try { myClassifier.buildClassifier(data); // build up result visualization cda.setResultVisualization(createResultVisualization()); cda.setEvaluationVisualization(createEvaluationVisualization(data)); } catch (Exception ex) { ex.printStackTrace(); cda.setResultVisualization( createMessagePanel("Error while solving the classification problem")); } } }
/** * Calculates the area under the precision-recall curve (AUPRC). * * @param tcurve a previously extracted threshold curve Instances. * @return the PRC area, or Double.NaN if you don't pass in a ThresholdCurve generated Instances. */ public static double getPRCArea(Instances tcurve) { final int n = tcurve.numInstances(); if (!RELATION_NAME.equals(tcurve.relationName()) || (n == 0)) { return Double.NaN; } final int pInd = tcurve.attribute(PRECISION_NAME).index(); final int rInd = tcurve.attribute(RECALL_NAME).index(); final double[] pVals = tcurve.attributeToDoubleArray(pInd); final double[] rVals = tcurve.attributeToDoubleArray(rInd); double area = 0; double xlast = rVals[n - 1]; // start from the first real p/r pair (not the artificial zero point) for (int i = n - 2; i >= 0; i--) { double recallDelta = rVals[i] - xlast; area += (pVals[i] * recallDelta); xlast = rVals[i]; } if (area == 0) { return Utils.missingValue(); } return area; }
/** * Returns a string containing java source code equivalent to the test made at this node. The * instance being tested is called "i". * * @param index index of the nominal value tested * @param data the data containing instance structure info * @return a value of type 'String' */ public final String sourceExpression(int index, Instances data) { StringBuffer expr = null; if (index < 0) { return "i[" + m_attIndex + "] == null"; } if (data.attribute(m_attIndex).isNominal()) { if (index == 0) { expr = new StringBuffer("i["); } else { expr = new StringBuffer("!i["); } expr.append(m_attIndex).append("]"); expr.append(".equals(\"") .append(data.attribute(m_attIndex).value((int) m_splitPoint)) .append("\")"); } else { expr = new StringBuffer("((Double) i["); expr.append(m_attIndex).append("])"); if (index == 0) { expr.append(".doubleValue() <= ").append(m_splitPoint); } else { expr.append(".doubleValue() > ").append(m_splitPoint); } } return expr.toString(); }
/** * Calculates the area under the ROC curve as the Wilcoxon-Mann-Whitney statistic. * * @param tcurve a previously extracted threshold curve Instances. * @return the ROC area, or Double.NaN if you don't pass in a ThresholdCurve generated Instances. */ public static double getROCArea(Instances tcurve) { final int n = tcurve.numInstances(); if (!RELATION_NAME.equals(tcurve.relationName()) || (n == 0)) { return Double.NaN; } final int tpInd = tcurve.attribute(TRUE_POS_NAME).index(); final int fpInd = tcurve.attribute(FALSE_POS_NAME).index(); final double[] tpVals = tcurve.attributeToDoubleArray(tpInd); final double[] fpVals = tcurve.attributeToDoubleArray(fpInd); double area = 0.0, cumNeg = 0.0; final double totalPos = tpVals[0]; final double totalNeg = fpVals[0]; for (int i = 0; i < n; i++) { double cip, cin; if (i < n - 1) { cip = tpVals[i] - tpVals[i + 1]; cin = fpVals[i] - fpVals[i + 1]; } else { cip = tpVals[n - 1]; cin = fpVals[n - 1]; } area += cip * (cumNeg + (0.5 * cin)); cumNeg += cin; } area /= (totalNeg * totalPos); return area; }
/** * Find all the instances in the dataset covered/not covered by the rule in given index, and the * correponding simple statistics and predicted class distributions are stored in the given double * array, which can be obtained by getSimpleStats() and getDistributions().<br> * * @param index the given index, assuming correct * @param insts the dataset to be covered by the rule * @param stats the given double array to hold stats, side-effected * @param dist the given array to hold class distributions, side-effected if null, the * distribution is not necessary * @return the instances covered and not covered by the rule */ private Instances[] computeSimpleStats( int index, Instances insts, double[] stats, double[] dist) { Rule rule = (Rule) m_Ruleset.elementAt(index); Instances[] data = new Instances[2]; data[0] = new Instances(insts, insts.numInstances()); data[1] = new Instances(insts, insts.numInstances()); for (int i = 0; i < insts.numInstances(); i++) { Instance datum = insts.instance(i); double weight = datum.weight(); if (rule.covers(datum)) { data[0].add(datum); // Covered by this rule stats[0] += weight; // Coverage if ((int) datum.classValue() == (int) rule.getConsequent()) stats[2] += weight; // True positives else stats[4] += weight; // False positives if (dist != null) dist[(int) datum.classValue()] += weight; } else { data[1].add(datum); // Not covered by this rule stats[1] += weight; if ((int) datum.classValue() != (int) rule.getConsequent()) stats[3] += weight; // True negatives else stats[5] += weight; // False negatives } } return data; }
/** * Tests the ThresholdCurve generation from the command line. The classifier is currently * hardcoded. Pipe in an arff file. * * @param args currently ignored */ public static void main(String[] args) { try { Instances inst = new Instances(new java.io.InputStreamReader(System.in)); if (false) { System.out.println(ThresholdCurve.getNPointPrecision(inst, 11)); } else { inst.setClassIndex(inst.numAttributes() - 1); ThresholdCurve tc = new ThresholdCurve(); EvaluationUtils eu = new EvaluationUtils(); Classifier classifier = new weka.classifiers.functions.Logistic(); FastVector predictions = new FastVector(); for (int i = 0; i < 2; i++) { // Do two runs. eu.setSeed(i); predictions.appendElements(eu.getCVPredictions(classifier, inst, 10)); // System.out.println("\n\n\n"); } Instances result = tc.getCurve(predictions); System.out.println(result); } } catch (Exception ex) { ex.printStackTrace(); } }
public static double CA(Instances odata, int[] clusters) { double result = 0; double[] tmpdclass = odata.attributeToDoubleArray(odata.numAttributes() - 1); int[] oclass = new int[odata.numInstances()]; for (int i = 0; i < tmpdclass.length; ++i) { oclass[i] = (int) tmpdclass[i]; } int[] tmpclass = oclass.clone(); int[] tmpclusters = clusters.clone(); Arrays.sort(tmpclusters); Arrays.sort(tmpclass); int[][] M = new int[tmpclass[tmpclass.length - 1] + 1][tmpclusters[tmpclusters.length - 1] + 1]; for (int i = 0; i < clusters.length; ++i) { M[oclass[i]][clusters[i]]++; } for (int i = 0; i < M.length; ++i) { System.out.println(Arrays.toString(M[i])); } for (int i = 0; i < M.length; ++i) { int maxindex = -1; for (int j = 0; j < M[0].length - 1; ++j) { if (M[i][j] < M[i][j + 1]) maxindex = j + 1; } M[i][0] = maxindex; } for (int i = 0; i < oclass.length; ++i) { if (M[oclass[i]][0] == clusters[i]) result++; } return (double) result / (double) odata.numInstances(); }
/** * Returns a string representation of the classifier. * * @return a string representation of the classifier */ public String toString() { StringBuffer result = new StringBuffer( "The independent probability of a class\n--------------------------------------\n"); for (int c = 0; c < m_numClasses; c++) result .append(m_headerInfo.classAttribute().value(c)) .append("\t") .append(Double.toString(m_probOfClass[c])) .append("\n"); result.append( "\nThe probability of a word given the class\n-----------------------------------------\n\t"); for (int c = 0; c < m_numClasses; c++) result.append(m_headerInfo.classAttribute().value(c)).append("\t"); result.append("\n"); for (int w = 0; w < m_numAttributes; w++) { result.append(m_headerInfo.attribute(w).name()).append("\t"); for (int c = 0; c < m_numClasses; c++) result.append(Double.toString(Math.exp(m_probOfWordGivenClass[c][w]))).append("\t"); result.append("\n"); } return result.toString(); }
/** * Calculates the centroid pivot of a node based on the list of points that it contains (tbe two * lists of its children are provided). * * @param list1 The point index list of first child. * @param list2 The point index list of second child. * @param insts The insts object on which the tree is being built (for header information). * @return The centroid pivot of the node. */ public Instance calcPivot(MyIdxList list1, MyIdxList list2, Instances insts) { int classIdx = m_Instances.classIndex(); double[] attrVals = new double[insts.numAttributes()]; Instance temp; for (int i = 0; i < list1.length(); i++) { temp = insts.instance(((ListNode) list1.get(i)).idx); for (int k = 0; k < temp.numValues(); k++) { if (temp.index(k) == classIdx) continue; attrVals[k] += temp.valueSparse(k); } } for (int j = 0; j < list2.length(); j++) { temp = insts.instance(((ListNode) list2.get(j)).idx); for (int k = 0; k < temp.numValues(); k++) { if (temp.index(k) == classIdx) continue; attrVals[k] += temp.valueSparse(k); } } for (int j = 0, numInsts = list1.length() + list2.length(); j < attrVals.length; j++) { attrVals[j] /= numInsts; } temp = new DenseInstance(1.0, attrVals); return temp; }
/** test the batch saving/loading (via setFile(File)). */ public void testBatch() { Instances data; try { // save m_Saver.setInstances(m_Instances); m_Saver.setFile(new File(m_ExportFilename)); m_Saver.writeBatch(); // load ((AbstractFileLoader) m_Loader).setFile(new File(m_ExportFilename)); data = m_Loader.getDataSet(); // compare data try { if (m_Instances.classIndex() != data.classIndex()) { data.setClassIndex(m_Instances.classIndex()); } compareDatasets(m_Instances, data); } catch (Exception e) { fail("Incremental load failed (datasets differ): " + e.toString()); } } catch (Exception e) { e.printStackTrace(); fail("Batch save/load failed: " + e.toString()); } }
/** * Signify that this batch of input to the filter is finished. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ @Override public boolean batchFinished() throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (!m_firstBatchFinished) { Instances filtered; if (m_numOfCrossValidationFolds < 2) { filtered = cleanseTrain(getInputFormat()); } else { filtered = cleanseCross(getInputFormat()); } for (int i = 0; i < filtered.numInstances(); i++) { push(filtered.instance(i)); } m_firstBatchFinished = true; flushInput(); } m_NewBatch = true; return (numPendingOutput() != 0); }
/** * Aggregate an object with this one * * @param toAggregate the object to aggregate * @return the result of aggregation * @throws Exception if the supplied object can't be aggregated for some reason */ @Override public Logistic aggregate(Logistic toAggregate) throws Exception { if (m_numModels == Integer.MIN_VALUE) { throw new Exception( "Can't aggregate further - model has already been " + "aggregated and finalized"); } if (m_Par == null) { throw new Exception("No model built yet, can't aggregate"); } if (!m_structure.equalHeaders(toAggregate.m_structure)) { throw new Exception( "Can't aggregate - data headers dont match: " + m_structure.equalHeadersMsg(toAggregate.m_structure)); } for (int i = 0; i < m_Par.length; i++) { for (int j = 0; j < m_Par[i].length; j++) { m_Par[i][j] += toAggregate.m_Par[i][j]; } } m_numModels++; return this; }