/** * Parses a given list of options. Valid options are: * * <p>-I num <br> * The number of iterations to be performed. (default 1) * * <p>-E num <br> * The exponent for the polynomial kernel. (default 1) * * <p>-S num <br> * The seed for the random number generator. (default 1) * * <p>-M num <br> * The maximum number of alterations allowed. (default 10000) * * <p> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String iterationsString = Utils.getOption('I', options); if (iterationsString.length() != 0) { m_NumIterations = Integer.parseInt(iterationsString); } else { m_NumIterations = 1; } String exponentsString = Utils.getOption('E', options); if (exponentsString.length() != 0) { m_Exponent = (new Double(exponentsString)).doubleValue(); } else { m_Exponent = 1.0; } String seedString = Utils.getOption('S', options); if (seedString.length() != 0) { m_Seed = Integer.parseInt(seedString); } else { m_Seed = 1; } String alterationsString = Utils.getOption('M', options); if (alterationsString.length() != 0) { m_MaxK = Integer.parseInt(alterationsString); } else { m_MaxK = 10000; } }
/** * Parses a given list of options. Valid options are: * * <p>-W classname <br> * Specify the full class name of a weak classifier as the basis for bagging (required). * * <p>-I num <br> * Set the number of bagging iterations (default 10). * * <p>-S seed <br> * Random number seed for resampling (default 1). * * <p>-P num <br> * Size of each bag, as a percentage of the training size (default 100). * * <p>-O <br> * Compute out of bag error. * * <p>Options after -- are passed to the designated classifier. * * <p> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String bagSize = Utils.getOption('P', options); if (bagSize.length() != 0) { setBagSizePercent(Integer.parseInt(bagSize)); } else { setBagSizePercent(100); } setCalcOutOfBag(Utils.getFlag('O', options)); super.setOptions(options); }
/** * Performs a cross-validation for a DensityBasedClusterer clusterer on a set of instances. * * @param clustererString a string naming the class of the clusterer * @param data the data on which the cross-validation is to be performed * @param numFolds the number of folds for the cross-validation * @param options the options to the clusterer * @param random a random number generator * @return a string containing the cross validated log likelihood * @exception Exception if a clusterer could not be generated */ public static String crossValidateModel( String clustererString, Instances data, int numFolds, String[] options, Random random) throws Exception { Clusterer clusterer = null; Instances train, test; String[] savedOptions = null; double foldAv; double CvAv = 0.0; double[] tempDist; StringBuffer CvString = new StringBuffer(); if (options != null) { savedOptions = new String[options.length]; } data = new Instances(data); // create clusterer try { clusterer = (Clusterer) Class.forName(clustererString).newInstance(); } catch (Exception e) { throw new Exception("Can't find class with name " + clustererString + '.'); } if (!(clusterer instanceof DensityBasedClusterer)) { throw new Exception(clustererString + " must be a distrinbution " + "clusterer."); } // Save options if (options != null) { System.arraycopy(options, 0, savedOptions, 0, options.length); } // Parse options if (clusterer instanceof OptionHandler) { try { ((OptionHandler) clusterer).setOptions(savedOptions); Utils.checkForRemainingOptions(savedOptions); } catch (Exception e) { throw new Exception("Can't parse given options in " + "cross-validation!"); } } CvAv = crossValidateModel((DensityBasedClusterer) clusterer, data, numFolds, random); CvString.append( "\n" + numFolds + " fold CV Log Likelihood: " + Utils.doubleToString(CvAv, 6, 4) + "\n"); return CvString.toString(); }
/** * Evaluates cluster assignments with respect to actual class labels. Assumes that m_Clusterer has * been trained and tested on inst (minus the class). * * @param inst the instances (including class) to evaluate with respect to * @exception Exception if something goes wrong */ private void evaluateClustersWithRespectToClass(Instances inst) throws Exception { int numClasses = inst.classAttribute().numValues(); int[][] counts = new int[m_numClusters][numClasses]; int[] clusterTotals = new int[m_numClusters]; double[] best = new double[m_numClusters + 1]; double[] current = new double[m_numClusters + 1]; for (int i = 0; i < inst.numInstances(); i++) { counts[(int) m_clusterAssignments[i]][(int) inst.instance(i).classValue()]++; clusterTotals[(int) m_clusterAssignments[i]]++; } best[m_numClusters] = Double.MAX_VALUE; mapClasses(0, counts, clusterTotals, current, best, 0); m_clusteringResults.append("\n\nClass attribute: " + inst.classAttribute().name() + "\n"); m_clusteringResults.append("Classes to Clusters:\n"); String matrixString = toMatrixString(counts, clusterTotals, inst); m_clusteringResults.append(matrixString).append("\n"); int Cwidth = 1 + (int) (Math.log(m_numClusters) / Math.log(10)); // add the minimum error assignment for (int i = 0; i < m_numClusters; i++) { if (clusterTotals[i] > 0) { m_clusteringResults.append("Cluster " + Utils.doubleToString((double) i, Cwidth, 0)); m_clusteringResults.append(" <-- "); if (best[i] < 0) { m_clusteringResults.append("No class\n"); } else { m_clusteringResults.append(inst.classAttribute().value((int) best[i])).append("\n"); } } } m_clusteringResults.append( "\nIncorrectly clustered instances :\t" + best[m_numClusters] + "\t" + (Utils.doubleToString((best[m_numClusters] / inst.numInstances() * 100.0), 8, 4)) + " %\n"); // copy the class assignments m_classToCluster = new int[m_numClusters]; for (int i = 0; i < m_numClusters; i++) { m_classToCluster[i] = (int) best[i]; } }
/** * Returns a "confusion" style matrix of classes to clusters assignments * * @param counts the counts of classes for each cluster * @param clusterTotals total number of examples in each cluster * @param inst the training instances (with class) * @exception Exception if matrix can't be generated */ private String toMatrixString(int[][] counts, int[] clusterTotals, Instances inst) throws Exception { StringBuffer ms = new StringBuffer(); int maxval = 0; for (int i = 0; i < m_numClusters; i++) { for (int j = 0; j < counts[i].length; j++) { if (counts[i][j] > maxval) { maxval = counts[i][j]; } } } int Cwidth = 1 + Math.max( (int) (Math.log(maxval) / Math.log(10)), (int) (Math.log(m_numClusters) / Math.log(10))); ms.append("\n"); for (int i = 0; i < m_numClusters; i++) { if (clusterTotals[i] > 0) { ms.append(" ").append(Utils.doubleToString((double) i, Cwidth, 0)); } } ms.append(" <-- assigned to cluster\n"); for (int i = 0; i < counts[0].length; i++) { for (int j = 0; j < m_numClusters; j++) { if (clusterTotals[j] > 0) { ms.append(" ").append(Utils.doubleToString((double) counts[j][i], Cwidth, 0)); } } ms.append(" | ").append(inst.classAttribute().value(i)).append("\n"); } return ms.toString(); }
/** * Creates a new dataset of the same size using random sampling with replacement according to the * given weight vector. The weights of the instances in the new dataset are set to one. The length * of the weight vector has to be the same as the number of instances in the dataset, and all * weights have to be positive. * * @param data the data to be sampled from * @param random a random number generator * @param sampled indicating which instance has been sampled * @return the new dataset * @exception IllegalArgumentException if the weights array is of the wrong length or contains * negative weights. */ public final Instances resampleWithWeights(Instances data, Random random, boolean[] sampled) { double[] weights = new double[data.numInstances()]; for (int i = 0; i < weights.length; i++) { weights[i] = data.instance(i).weight(); } Instances newData = new Instances(data, data.numInstances()); if (data.numInstances() == 0) { return newData; } double[] probabilities = new double[data.numInstances()]; double sumProbs = 0, sumOfWeights = Utils.sum(weights); for (int i = 0; i < data.numInstances(); i++) { sumProbs += random.nextDouble(); probabilities[i] = sumProbs; } Utils.normalize(probabilities, sumProbs / sumOfWeights); // Make sure that rounding errors don't mess things up probabilities[data.numInstances() - 1] = sumOfWeights; int k = 0; int l = 0; sumProbs = 0; while ((k < data.numInstances() && (l < data.numInstances()))) { if (weights[l] < 0) { throw new IllegalArgumentException("Weights have to be positive."); } sumProbs += weights[l]; while ((k < data.numInstances()) && (probabilities[k] <= sumProbs)) { newData.add(data.instance(l)); sampled[l] = true; newData.instance(k).setWeight(1); k++; } l++; } return newData; }
/** * Calculates the class membership probabilities for the given test instance. * * @param instance the instance to be classified * @return preedicted class probability distribution * @exception Exception if distribution can't be computed successfully */ @Override public double[] distributionForInstance(Instance instance) throws Exception { double[] sums = new double[instance.numClasses()], newProbs; for (int i = 0; i < m_NumIterations; i++) { if (instance.classAttribute().isNumeric() == true) { sums[0] += m_Classifiers[i].classifyInstance(instance); } else { newProbs = m_Classifiers[i].distributionForInstance(instance); for (int j = 0; j < newProbs.length; j++) sums[j] += newProbs[j]; } } if (instance.classAttribute().isNumeric() == true) { sums[0] /= m_NumIterations; return sums; } else if (Utils.eq(Utils.sum(sums), 0)) { return sums; } else { Utils.normalize(sums); return sums; } }
/** * Returns description of the bagged classifier. * * @return description of the bagged classifier as a string */ @Override public String toString() { if (m_Classifiers == null) { return "Bagging: No model built yet."; } StringBuffer text = new StringBuffer(); text.append("All the base classifiers: \n\n"); for (int i = 0; i < m_Classifiers.length; i++) text.append(m_Classifiers[i].toString() + "\n\n"); if (m_CalcOutOfBag) { text.append("Out of bag error: " + Utils.doubleToString(m_OutOfBagError, 4) + "\n\n"); } return text.toString(); }
private static String numToString(double num) { int precision = 1; int whole = (int) Math.abs(num); double decimal = Math.abs(num) - whole; int nondecimal; nondecimal = (whole > 0) ? (int) (Math.log(whole) / Math.log(10)) : 1; precision = (decimal > 0) ? (int) Math.abs(((Math.log(Math.abs(num)) / Math.log(10)))) + 2 : 1; if (precision > 5) { precision = 1; } String numString = reconcile.weka.core.Utils.doubleToString(num, nondecimal + 1 + precision, precision); return numString; }
/** * Print the cluster statistics for either the training or the testing data. * * @param clusterer the clusterer to use for generating statistics. * @return a string containing cluster statistics. * @exception if statistics can't be generated. */ private static String printClusterStats(Clusterer clusterer, String fileName) throws Exception { StringBuffer text = new StringBuffer(); int i = 0; int cnum; double loglk = 0.0; double[] dist; double temp; int cc = clusterer.numberOfClusters(); double[] instanceStats = new double[cc]; int unclusteredInstances = 0; if (fileName.length() != 0) { BufferedReader inStream = null; try { inStream = new BufferedReader(new FileReader(fileName)); } catch (Exception e) { throw new Exception("Can't open file " + e.getMessage() + '.'); } Instances inst = new Instances(inStream, 1); while (inst.readInstance(inStream)) { try { cnum = clusterer.clusterInstance(inst.instance(0)); if (clusterer instanceof DensityBasedClusterer) { loglk += ((DensityBasedClusterer) clusterer).logDensityForInstance(inst.instance(0)); // temp = Utils.sum(dist); } instanceStats[cnum]++; } catch (Exception e) { unclusteredInstances++; } inst.delete(0); i++; } /* // count the actual number of used clusters int count = 0; for (i = 0; i < cc; i++) { if (instanceStats[i] > 0) { count++; } } if (count > 0) { double [] tempStats = new double [count]; count=0; for (i=0;i<cc;i++) { if (instanceStats[i] > 0) { tempStats[count++] = instanceStats[i]; } } instanceStats = tempStats; cc = instanceStats.length; } */ int clustFieldWidth = (int) ((Math.log(cc) / Math.log(10)) + 1); int numInstFieldWidth = (int) ((Math.log(i) / Math.log(10)) + 1); double sum = Utils.sum(instanceStats); loglk /= sum; text.append("Clustered Instances\n"); for (i = 0; i < cc; i++) { if (instanceStats[i] > 0) { text.append( Utils.doubleToString((double) i, clustFieldWidth, 0) + " " + Utils.doubleToString(instanceStats[i], numInstFieldWidth, 0) + " (" + Utils.doubleToString((instanceStats[i] / sum * 100.0), 3, 0) + "%)\n"); } } if (unclusteredInstances > 0) { text.append("\nUnclustered Instances : " + unclusteredInstances); } if (clusterer instanceof DensityBasedClusterer) { text.append("\n\nLog likelihood: " + Utils.doubleToString(loglk, 1, 5) + "\n"); } } return text.toString(); }
/** * Evaluates a clusterer with the options given in an array of strings. It takes the string * indicated by "-t" as training file, the string indicated by "-T" as test file. If the test file * is missing, a stratified ten-fold cross-validation is performed (distribution clusterers only). * Using "-x" you can change the number of folds to be used, and using "-s" the random seed. If * the "-p" option is present it outputs the classification for each test instance. If you provide * the name of an object file using "-l", a clusterer will be loaded from the given file. If you * provide the name of an object file using "-d", the clusterer built from the training data will * be saved to the given file. * * @param clusterer machine learning clusterer * @param options the array of string containing the options * @exception Exception if model could not be evaluated successfully * @return a string describing the results */ public static String evaluateClusterer(Clusterer clusterer, String[] options) throws Exception { int seed = 1, folds = 10; boolean doXval = false; Instances train = null; Instances test = null; Random random; String trainFileName, testFileName, seedString, foldsString, objectInputFileName, objectOutputFileName, attributeRangeString; String[] savedOptions = null; boolean printClusterAssignments = false; Range attributesToOutput = null; ObjectInputStream objectInputStream = null; ObjectOutputStream objectOutputStream = null; StringBuffer text = new StringBuffer(); int theClass = -1; // class based evaluation of clustering try { if (Utils.getFlag('h', options)) { throw new Exception("Help requested."); } // Get basic options (options the same for all clusterers // printClusterAssignments = Utils.getFlag('p', options); objectInputFileName = Utils.getOption('l', options); objectOutputFileName = Utils.getOption('d', options); trainFileName = Utils.getOption('t', options); testFileName = Utils.getOption('T', options); // Check -p option try { attributeRangeString = Utils.getOption('p', options); } catch (Exception e) { throw new Exception( e.getMessage() + "\nNOTE: the -p option has changed. " + "It now expects a parameter specifying a range of attributes " + "to list with the predictions. Use '-p 0' for none."); } if (attributeRangeString.length() != 0) { printClusterAssignments = true; if (!attributeRangeString.equals("0")) attributesToOutput = new Range(attributeRangeString); } if (trainFileName.length() == 0) { if (objectInputFileName.length() == 0) { throw new Exception("No training file and no object " + "input file given."); } if (testFileName.length() == 0) { throw new Exception("No training file and no test file given."); } } else { if ((objectInputFileName.length() != 0) && (printClusterAssignments == false)) { throw new Exception("Can't use both train and model file " + "unless -p specified."); } } seedString = Utils.getOption('s', options); if (seedString.length() != 0) { seed = Integer.parseInt(seedString); } foldsString = Utils.getOption('x', options); if (foldsString.length() != 0) { folds = Integer.parseInt(foldsString); doXval = true; } } catch (Exception e) { throw new Exception('\n' + e.getMessage() + makeOptionString(clusterer)); } try { if (trainFileName.length() != 0) { train = new Instances(new BufferedReader(new FileReader(trainFileName))); String classString = Utils.getOption('c', options); if (classString.length() != 0) { if (classString.compareTo("last") == 0) { theClass = train.numAttributes(); } else if (classString.compareTo("first") == 0) { theClass = 1; } else { theClass = Integer.parseInt(classString); } if (doXval || testFileName.length() != 0) { throw new Exception("Can only do class based evaluation on the " + "training data"); } if (objectInputFileName.length() != 0) { throw new Exception("Can't load a clusterer and do class based " + "evaluation"); } } if (theClass != -1) { if (theClass < 1 || theClass > train.numAttributes()) { throw new Exception("Class is out of range!"); } if (!train.attribute(theClass - 1).isNominal()) { throw new Exception("Class must be nominal!"); } train.setClassIndex(theClass - 1); } } if (objectInputFileName.length() != 0) { objectInputStream = new ObjectInputStream(new FileInputStream(objectInputFileName)); } if (objectOutputFileName.length() != 0) { objectOutputStream = new ObjectOutputStream(new FileOutputStream(objectOutputFileName)); } } catch (Exception e) { throw new Exception("ClusterEvaluation: " + e.getMessage() + '.'); } // Save options if (options != null) { savedOptions = new String[options.length]; System.arraycopy(options, 0, savedOptions, 0, options.length); } if (objectInputFileName.length() != 0) { Utils.checkForRemainingOptions(options); } // Set options for clusterer if (clusterer instanceof OptionHandler) { ((OptionHandler) clusterer).setOptions(options); } Utils.checkForRemainingOptions(options); if (objectInputFileName.length() != 0) { // Load the clusterer from file clusterer = (Clusterer) objectInputStream.readObject(); objectInputStream.close(); } else { // Build the clusterer if no object file provided if (theClass == -1) { clusterer.buildClusterer(train); } else { Remove removeClass = new Remove(); removeClass.setAttributeIndices("" + theClass); removeClass.setInvertSelection(false); removeClass.setInputFormat(train); Instances clusterTrain = Filter.useFilter(train, removeClass); clusterer.buildClusterer(clusterTrain); ClusterEvaluation ce = new ClusterEvaluation(); ce.setClusterer(clusterer); ce.evaluateClusterer(train); return "\n\n=== Clustering stats for training data ===\n\n" + ce.clusterResultsToString(); } } /* Output cluster predictions only (for the test data if specified, otherwise for the training data */ if (printClusterAssignments) { return printClusterings(clusterer, train, testFileName, attributesToOutput); } text.append(clusterer.toString()); text.append( "\n\n=== Clustering stats for training data ===\n\n" + printClusterStats(clusterer, trainFileName)); if (testFileName.length() != 0) { text.append( "\n\n=== Clustering stats for testing data ===\n\n" + printClusterStats(clusterer, testFileName)); } if ((clusterer instanceof DensityBasedClusterer) && (doXval == true) && (testFileName.length() == 0) && (objectInputFileName.length() == 0)) { // cross validate the log likelihood on the training data random = new Random(seed); random.setSeed(seed); train.randomize(random); text.append( crossValidateModel(clusterer.getClass().getName(), train, folds, savedOptions, random)); } // Save the clusterer if an object output file is provided if (objectOutputFileName.length() != 0) { objectOutputStream.writeObject(clusterer); objectOutputStream.flush(); objectOutputStream.close(); } return text.toString(); }
/** * Evaluate the clusterer on a set of instances. Calculates clustering statistics and stores * cluster assigments for the instances in m_clusterAssignments * * @param test the set of instances to cluster * @exception Exception if something goes wrong */ public void evaluateClusterer(Instances test) throws Exception { int i = 0; int cnum; double loglk = 0.0; double[] dist; double temp; int cc = m_Clusterer.numberOfClusters(); m_numClusters = cc; int numInstFieldWidth = (int) ((Math.log(test.numInstances()) / Math.log(10)) + 1); double[] instanceStats = new double[cc]; m_clusterAssignments = new double[test.numInstances()]; Instances testCopy = test; boolean hasClass = (testCopy.classIndex() >= 0); int unclusteredInstances = 0; // If class is set then do class based evaluation as well if (hasClass) { if (testCopy.classAttribute().isNumeric()) { throw new Exception("ClusterEvaluation: Class must be nominal!"); } Remove removeClass = new Remove(); removeClass.setAttributeIndices("" + (testCopy.classIndex() + 1)); removeClass.setInvertSelection(false); removeClass.setInputFormat(testCopy); testCopy = Filter.useFilter(testCopy, removeClass); } for (i = 0; i < testCopy.numInstances(); i++) { cnum = -1; try { if (m_Clusterer instanceof DensityBasedClusterer) { loglk += ((DensityBasedClusterer) m_Clusterer).logDensityForInstance(testCopy.instance(i)); // temp = Utils.sum(dist); // Utils.normalize(dist); cnum = m_Clusterer.clusterInstance(testCopy.instance(i)); // Utils.maxIndex(dist); m_clusterAssignments[i] = (double) cnum; } else { cnum = m_Clusterer.clusterInstance(testCopy.instance(i)); m_clusterAssignments[i] = (double) cnum; } } catch (Exception e) { unclusteredInstances++; } if (cnum != -1) { instanceStats[cnum]++; } } /* // count the actual number of used clusters int count = 0; for (i = 0; i < cc; i++) { if (instanceStats[i] > 0) { count++; } } if (count > 0) { double [] tempStats = new double [count]; double [] map = new double [m_clusterAssignments.length]; count=0; for (i=0;i<cc;i++) { if (instanceStats[i] > 0) { tempStats[count] = instanceStats[i]; map[i] = count; count++; } } instanceStats = tempStats; cc = instanceStats.length; for (i=0;i<m_clusterAssignments.length;i++) { m_clusterAssignments[i] = map[(int)m_clusterAssignments[i]]; } } */ double sum = Utils.sum(instanceStats); loglk /= sum; m_logL = loglk; m_clusteringResults.append(m_Clusterer.toString()); m_clusteringResults.append("Clustered Instances\n\n"); int clustFieldWidth = (int) ((Math.log(cc) / Math.log(10)) + 1); for (i = 0; i < cc; i++) { if (instanceStats[i] > 0) { m_clusteringResults.append( Utils.doubleToString((double) i, clustFieldWidth, 0) + " " + Utils.doubleToString(instanceStats[i], numInstFieldWidth, 0) + " (" + Utils.doubleToString((instanceStats[i] / sum * 100.0), 3, 0) + "%)\n"); } } if (unclusteredInstances > 0) { m_clusteringResults.append("\nUnclustered instances : " + unclusteredInstances); } if (m_Clusterer instanceof DensityBasedClusterer) { m_clusteringResults.append("\n\nLog likelihood: " + Utils.doubleToString(loglk, 1, 5) + "\n"); } if (hasClass) { evaluateClustersWithRespectToClass(test); } }
/** * Creates a new instance of a associator given it's class name and (optional) arguments to pass * to it's setOptions method. If the associator implements OptionHandler and the options parameter * is non-null, the associator will have it's options set. * * @param associatorName the fully qualified class name of the associator * @param options an array of options suitable for passing to setOptions. May be null. * @return the newly created associator, ready for use. * @exception Exception if the associator name is invalid, or the options supplied are not * acceptable to the associator */ public static Associator forName(String associatorName, String[] options) throws Exception { return (Associator) Utils.forName(Associator.class, associatorName, options); }
/** Main method. */ public static void main(String[] args) { try { String[] options = args; StRipShort classifier = new StRipShort(); InstancesShort train = null, tempTrain, test = null, template = null; int seed = 1, folds = 10, classIndex = -1; String trainFileName, testFileName, sourceClass, classIndexString, seedString, foldsString, objectInputFileName, objectOutputFileName, attributeRangeString; boolean IRstatistics = false, noOutput = false, printClassifications = false, trainStatistics = true, printMargins = false, printComplexityStatistics = false, printGraph = false, classStatistics = false, printSource = false; StringBuffer text = new StringBuffer(); BufferedReader trainReader = null, testReader = null; ObjectInputStream objectInputStream = null; CostMatrix costMatrix = null; StringBuffer schemeOptionsText = null; Range attributesToOutput = null; long trainTimeStart = 0, trainTimeElapsed = 0, testTimeStart = 0, testTimeElapsed = 0; classIndexString = Utils.getOption('c', options); if (classIndexString.length() != 0) { classIndex = Integer.parseInt(classIndexString); } trainFileName = Utils.getOption('t', options); objectInputFileName = Utils.getOption('l', options); objectOutputFileName = Utils.getOption('d', options); testFileName = Utils.getOption('T', options); if (trainFileName.length() == 0) { if (objectInputFileName.length() == 0) { throw new Exception("No training file and no object " + "input file given."); } if (testFileName.length() == 0) { throw new Exception("No training file and no test " + "file given."); } } try { if (trainFileName.length() != 0) { trainReader = new BufferedReader(new FileReader(trainFileName)); } if (testFileName.length() != 0) { testReader = new BufferedReader(new FileReader(testFileName)); } if (objectInputFileName.length() != 0) { InputStream is = new FileInputStream(objectInputFileName); if (objectInputFileName.endsWith(".gz")) { is = new GZIPInputStream(is); } objectInputStream = new ObjectInputStream(is); } } catch (Exception e) { throw new Exception("Can't open file " + e.getMessage() + '.'); } if (testFileName.length() != 0) { template = test = new InstancesShort(testReader, 1); if (classIndex != -1) { test.setClassIndex(classIndex - 1); } else { test.setClassIndex(test.numAttributes() - 1); } if (classIndex > test.numAttributes()) { throw new Exception("Index of class attribute too large."); } } seedString = Utils.getOption('s', options); if (seedString.length() != 0) { seed = Integer.parseInt(seedString); } foldsString = Utils.getOption('x', options); if (foldsString.length() != 0) { folds = Integer.parseInt(foldsString); } classStatistics = Utils.getFlag('i', options); noOutput = Utils.getFlag('o', options); trainStatistics = !Utils.getFlag('v', options); printComplexityStatistics = Utils.getFlag('k', options); printMargins = Utils.getFlag('r', options); printGraph = Utils.getFlag('g', options); sourceClass = Utils.getOption('z', options); printSource = (sourceClass.length() != 0); for (int i = 0; i < options.length; i++) { if (options[i].length() != 0) { if (schemeOptionsText == null) { schemeOptionsText = new StringBuffer(); } if (options[i].indexOf(' ') != -1) { schemeOptionsText.append('"' + options[i] + "\" "); } else { schemeOptionsText.append(options[i] + " "); } } } classifier.setOptions(options); Utils.checkForRemainingOptions(options); train = new ModifiedInstancesShort(trainReader); if (classIndex != -1) { train.setClassIndex(classIndex - 1); } else { train.setClassIndex(train.numAttributes() - 1); } train.cleanUpValues(); // System.err.println(train); classifier.buildClassifier(train); } catch (Exception e) { e.printStackTrace(); System.err.println(e.getMessage()); } }