/** * Sets the options. * * @param options the options * @throws Exception if invalid option */ @Override public void setOptions(String[] options) throws Exception { String tmpStr; super.setOptions(options); tmpStr = Utils.getOption('a', options); if (tmpStr.length() != 0) { setNumAttributes(Integer.parseInt(tmpStr)); } else { setNumAttributes(defaultNumAttributes()); } setClassFlag(Utils.getFlag('c', options)); tmpStr = Utils.getOption('b', options); setBooleanIndices(tmpStr); m_booleanCols.setUpper(getNumAttributes() - 1); tmpStr = Utils.getOption('m', options); setNominalIndices(tmpStr); m_nominalCols.setUpper(getNumAttributes() - 1); // check indices tmpStr = checkIndices(); if (tmpStr.length() > 0) { throw new IllegalArgumentException(tmpStr); } }
/** * Process a classifier's prediction for an instance and update a set of plotting instances and * additional plotting info. m_PlotShape for nominal class datasets holds shape types (actual data * points have automatic shape type assignment; classifier error data points have box shape type). * For numeric class datasets, the actual data points are stored in m_PlotInstances and m_PlotSize * stores the error (which is later converted to shape size values). * * @param toPredict the actual data point * @param classifier the classifier * @param eval the evaluation object to use for evaluating the classifier on the instance to * predict * @see #m_PlotShapes * @see #m_PlotSizes * @see #m_PlotInstances */ public void process(Instance toPredict, Classifier classifier, Evaluation eval) { double pred; double[] values; int i; try { pred = eval.evaluateModelOnceAndRecordPrediction(classifier, toPredict); if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) { toPredict = ((weka.classifiers.misc.InputMappedClassifier) classifier) .constructMappedInstance(toPredict); } if (!m_SaveForVisualization) return; if (m_PlotInstances != null) { values = new double[m_PlotInstances.numAttributes()]; for (i = 0; i < m_PlotInstances.numAttributes(); i++) { if (i < toPredict.classIndex()) { values[i] = toPredict.value(i); } else if (i == toPredict.classIndex()) { values[i] = pred; values[i + 1] = toPredict.value(i); i++; } else { values[i] = toPredict.value(i - 1); } } m_PlotInstances.add(new DenseInstance(1.0, values)); if (toPredict.classAttribute().isNominal()) { if (toPredict.isMissing(toPredict.classIndex()) || Utils.isMissingValue(pred)) { m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE)); } else if (pred != toPredict.classValue()) { // set to default error point shape m_PlotShapes.addElement(new Integer(Plot2D.ERROR_SHAPE)); } else { // otherwise set to constant (automatically assigned) point shape m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE)); } m_PlotSizes.addElement(new Integer(Plot2D.DEFAULT_SHAPE_SIZE)); } else { // store the error (to be converted to a point size later) Double errd = null; if (!toPredict.isMissing(toPredict.classIndex()) && !Utils.isMissingValue(pred)) { errd = new Double(pred - toPredict.classValue()); m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE)); } else { // missing shape if actual class not present or prediction is missing m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE)); } m_PlotSizes.addElement(errd); } } } catch (Exception ex) { ex.printStackTrace(); } }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -M * Minimize expected misclassification cost. Default is to * reweight training instances according to costs per class</pre> * * <pre> -C <cost file name> * File name of a cost matrix to use. If this is not supplied, * a cost matrix will be loaded on demand. The name of the * on-demand file is the relation name of the training data * plus ".cost", and the path to the on-demand file is * specified with the -N option.</pre> * * <pre> -N <directory> * Name of a directory to search for cost files when loading * costs on demand (default current directory).</pre> * * <pre> -cost-matrix <matrix> * The cost matrix in Matlab single line format.</pre> * * <pre> -S <num> * Random number seed. * (default 1)</pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * * <pre> -W * Full name of base classifier. * (default: weka.classifiers.rules.ZeroR)</pre> * * <pre> * Options specific to classifier weka.classifiers.rules.ZeroR: * </pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * * <!-- options-end --> * Options after -- are passed to the designated classifier. * * <p> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { setMinimizeExpectedCost(Utils.getFlag('M', options)); String costFile = Utils.getOption('C', options); if (costFile.length() != 0) { try { setCostMatrix(new CostMatrix(new BufferedReader(new FileReader(costFile)))); } catch (Exception ex) { // now flag as possible old format cost matrix. Delay cost matrix // loading until buildClassifer is called setCostMatrix(null); } setCostMatrixSource(new SelectedTag(MATRIX_SUPPLIED, TAGS_MATRIX_SOURCE)); m_CostFile = costFile; } else { setCostMatrixSource(new SelectedTag(MATRIX_ON_DEMAND, TAGS_MATRIX_SOURCE)); } String demandDir = Utils.getOption('N', options); if (demandDir.length() != 0) { setOnDemandDirectory(new File(demandDir)); } String cost_matrix = Utils.getOption("cost-matrix", options); if (cost_matrix.length() != 0) { StringWriter writer = new StringWriter(); CostMatrix.parseMatlab(cost_matrix).write(writer); setCostMatrix(new CostMatrix(new StringReader(writer.toString()))); setCostMatrixSource(new SelectedTag(MATRIX_SUPPLIED, TAGS_MATRIX_SOURCE)); } super.setOptions(options); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -L <double> * The epsilon parameter in epsilon-insensitive loss function. * (default 1.0e-3)</pre> * * <pre> -W <double> * The random number seed. * (default 1)</pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String tmpStr; tmpStr = Utils.getOption('L', options); if (tmpStr.length() != 0) { setEpsilonParameter(Double.parseDouble(tmpStr)); } else { setEpsilonParameter(1.0e-3); } /* tmpStr = Utils.getOption('S', options); if (tmpStr.length() != 0) setLossType(new SelectedTag(tmpStr, TAGS_LOSS_TYPE)); else setLossType(new SelectedTag(EPSILON, TAGS_LOSS_TYPE)); */ tmpStr = Utils.getOption('W', options); if (tmpStr.length() != 0) { setSeed(Integer.parseInt(tmpStr)); } else { setSeed(1); } }
/** * Returns a textual description of this classifier. * * @return a textual description of this classifier. */ @Override public String toString() { if (m_probOfClass == null) { return "NaiveBayesMultinomialText: No model built yet.\n"; } StringBuffer result = new StringBuffer(); // build a master dictionary over all classes HashSet<String> master = new HashSet<String>(); for (int i = 0; i < m_data.numClasses(); i++) { LinkedHashMap<String, Count> classDict = m_probOfWordGivenClass.get(i); for (String key : classDict.keySet()) { master.add(key); } } result.append("Dictionary size: " + master.size()).append("\n\n"); result.append("The independent frequency of a class\n"); result.append("--------------------------------------\n"); for (int i = 0; i < m_data.numClasses(); i++) { result .append(m_data.classAttribute().value(i)) .append("\t") .append(Double.toString(m_probOfClass[i])) .append("\n"); } result.append("\nThe frequency of a word given the class\n"); result.append("-----------------------------------------\n"); for (int i = 0; i < m_data.numClasses(); i++) { result.append(Utils.padLeft(m_data.classAttribute().value(i), 11)).append("\t"); } result.append("\n"); Iterator<String> masterIter = master.iterator(); while (masterIter.hasNext()) { String word = masterIter.next(); for (int i = 0; i < m_data.numClasses(); i++) { LinkedHashMap<String, Count> classDict = m_probOfWordGivenClass.get(i); Count c = classDict.get(word); if (c == null) { result.append("<laplace=1>\t"); } else { result.append(Utils.padLeft(Double.toString(c.m_count), 11)).append("\t"); } } result.append(word); result.append("\n"); } return result.toString(); }
/** * Returns description of the bias-variance decomposition results. * * @return the bias-variance decomposition results as a string */ public String toString() { String result = "\nBias-Variance Decomposition\n"; if (getClassifier() == null) { return "Invalid setup"; } result += "\nClassifier : " + getClassifier().getClass().getName(); if (getClassifier() instanceof OptionHandler) { result += Utils.joinOptions(((OptionHandler) m_Classifier).getOptions()); } result += "\nData File : " + getDataFileName(); result += "\nClass Index : "; if (getClassIndex() == 0) { result += "last"; } else { result += getClassIndex(); } result += "\nTraining Pool: " + getTrainPoolSize(); result += "\nIterations : " + getTrainIterations(); result += "\nSeed : " + getSeed(); result += "\nError : " + Utils.doubleToString(getError(), 6, 4); result += "\nSigma^2 : " + Utils.doubleToString(getSigma(), 6, 4); result += "\nBias^2 : " + Utils.doubleToString(getBias(), 6, 4); result += "\nVariance : " + Utils.doubleToString(getVariance(), 6, 4); return result + "\n"; }
@Override public String toString() { StringBuffer text = new StringBuffer(); text.append("\nsIB\n===\n"); text.append("\nNumber of clusters: " + m_numCluster + "\n"); for (int j = 0; j < m_numCluster; j++) { text.append( "\nCluster: " + j + " Size : " + bestT.size(j) + " Prior probability: " + Utils.doubleToString(bestT.Pt[j], 4) + "\n\n"); for (int i = 0; i < m_numAttributes; i++) { text.append("Attribute: " + m_data.attribute(i).name() + "\n"); text.append( "Probability given the cluster = " + Utils.doubleToString(bestT.Py_t.get(i, j), 4) + "\n"); } } return text.toString(); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -F <filter specification> * Full class name of filter to use, followed * by filter options. * eg: "weka.filters.unsupervised.attribute.Remove -V -R 1,2" * (default: weka.filters.MultiFilter with * weka.filters.unsupervised.attribute.ReplaceMissingValues)</pre> * * <pre> -c <the class index> * The class index. * (default: -1, i.e. unset)</pre> * * <pre> -W * Full name of base associator. * (default: weka.associations.Apriori)</pre> * * <pre> * Options specific to associator weka.associations.Apriori: * </pre> * * <pre> -N <required number of rules output> * The required number of rules. (default = 10)</pre> * * <pre> -T <0=confidence | 1=lift | 2=leverage | 3=Conviction> * The metric type by which to rank rules. (default = confidence)</pre> * * <pre> -C <minimum metric score of a rule> * The minimum confidence of a rule. (default = 0.9)</pre> * * <pre> -D <delta for minimum support> * The delta by which the minimum support is decreased in * each iteration. (default = 0.05)</pre> * * <pre> -U <upper bound for minimum support> * Upper bound for minimum support. (default = 1.0)</pre> * * <pre> -M <lower bound for minimum support> * The lower bound for the minimum support. (default = 0.1)</pre> * * <pre> -S <significance level> * If used, rules are tested for significance at * the given level. Slower. (default = no significance testing)</pre> * * <pre> -I * If set the itemsets found are also output. (default = no)</pre> * * <pre> -R * Remove columns that contain all missing values (default = no)</pre> * * <pre> -V * Report progress iteratively. (default = no)</pre> * * <pre> -A * If set class association rules are mined. (default = no)</pre> * * <pre> -c <the class index> * The class index. (default = last)</pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String tmpStr; tmpStr = Utils.getOption('F', options); if (tmpStr.length() > 0) { String[] filterSpec = Utils.splitOptions(tmpStr); if (filterSpec.length == 0) throw new IllegalArgumentException("Invalid filter specification string"); String filterName = filterSpec[0]; filterSpec[0] = ""; setFilter((Filter) Utils.forName(Filter.class, filterName, filterSpec)); } else { setFilter(new weka.filters.supervised.attribute.Discretize()); } tmpStr = Utils.getOption('c', options); if (tmpStr.length() > 0) { if (tmpStr.equalsIgnoreCase("last")) { setClassIndex(0); } else if (tmpStr.equalsIgnoreCase("first")) { setClassIndex(1); } else { setClassIndex(Integer.parseInt(tmpStr)); } } else { setClassIndex(-1); } super.setOptions(options); }
/** * Parses a given list of options. * <!-- options-start --> * Valid options are: * * <p> * * <pre> * -S <num> * Specifies the random number seed * (default 1) * </pre> * * <pre> * -P <percentage> * Specifies percentage of SMOTE instances to create. * (default 100.0) * </pre> * * <pre> * -K <nearest-neighbors> * Specifies the number of nearest neighbors to use. * (default 5) * </pre> * * <pre> * -C <value-index> * Specifies the index of the nominal class value to SMOTE * (default 0: auto-detect non-empty minority class)) * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String seedStr = Utils.getOption('S', options); if (seedStr.length() != 0) { setRandomSeed(Integer.parseInt(seedStr)); } else { setRandomSeed(1); } String percentageStr = Utils.getOption('P', options); if (percentageStr.length() != 0) { setPercentage(new Double(percentageStr).doubleValue()); } else { setPercentage(100.0); } String nnStr = Utils.getOption('K', options); if (nnStr.length() != 0) { setNearestNeighbors(Integer.parseInt(nnStr)); } else { setNearestNeighbors(5); } String classValueIndexStr = Utils.getOption('C', options); if (classValueIndexStr.length() != 0) { setClassValue(classValueIndexStr); } else { m_DetectMinorityClass = true; } }
/** * Parses the options for this object. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -D * Turns on output of debugging information.</pre> * * <pre> -A <Haar> * The algorithm to use. * (default: HAAR)</pre> * * <pre> -P <Zero> * The padding to use. * (default: ZERO)</pre> * * <pre> -F <filter specification> * The filter to use as preprocessing step (classname and options). * (default: MultiFilter with ReplaceMissingValues and Normalize)</pre> * * <pre> * Options specific to filter weka.filters.MultiFilter ('-F'): * </pre> * * <pre> -D * Turns on output of debugging information.</pre> * * <pre> -F <classname [options]> * A filter to apply (can be specified multiple times).</pre> * * <!-- options-end --> * * @param options the options to use * @throws Exception if the option setting fails */ public void setOptions(String[] options) throws Exception { String tmpStr; String[] tmpOptions; Filter filter; super.setOptions(options); tmpStr = Utils.getOption("A", options); if (tmpStr.length() != 0) setAlgorithm(new SelectedTag(tmpStr, TAGS_ALGORITHM)); else setAlgorithm(new SelectedTag(ALGORITHM_HAAR, TAGS_ALGORITHM)); tmpStr = Utils.getOption("P", options); if (tmpStr.length() != 0) setPadding(new SelectedTag(tmpStr, TAGS_PADDING)); else setPadding(new SelectedTag(PADDING_ZERO, TAGS_PADDING)); tmpStr = Utils.getOption("F", options); tmpOptions = Utils.splitOptions(tmpStr); if (tmpOptions.length != 0) { tmpStr = tmpOptions[0]; tmpOptions[0] = ""; setFilter((Filter) Utils.forName(Filter.class, tmpStr, tmpOptions)); } else { filter = new MultiFilter(); ((MultiFilter) filter) .setFilters( new Filter[] { new weka.filters.unsupervised.attribute.ReplaceMissingValues(), new weka.filters.unsupervised.attribute.Normalize() }); setFilter(filter); } }
/** * Gets the current settings of the classifier. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { Vector<String> result = new Vector<String>(); result.add("-C"); result.add("" + getC()); result.add("-N"); result.add("" + m_filterType); result.add("-I"); result.add( "" + getRegOptimizer().getClass().getName() + " " + Utils.joinOptions(getRegOptimizer().getOptions())); result.add("-K"); result.add( "" + getKernel().getClass().getName() + " " + Utils.joinOptions(getKernel().getOptions())); Collections.addAll(result, super.getOptions()); return (String[]) result.toArray(new String[result.size()]); }
/** * Parses a given list of options. Valid options are: * * <p>-D <br> * If set, clusterer is run in debug mode and may output additional info to the console. * * <p>-do-not-check-capabilities <br> * If set, clusterer capabilities are not checked before clusterer is built (use with caution). * * <p> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { Option.setOptionsForHierarchy(options, this, AbstractClusterer.class); setDebug(Utils.getFlag("output-debug-info", options)); setDoNotCheckCapabilities(Utils.getFlag("do-not-check-capabilities", options)); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -A * The nearest neighbour search algorithm to use (default: weka.core.neighboursearch.LinearNNSearch). * </pre> * * <pre> -K <number of neighbours> * Set the number of neighbours used to set the kernel bandwidth. * (default all)</pre> * * <pre> -U <number of weighting method> * Set the weighting kernel shape to use. 0=Linear, 1=Epanechnikov, * 2=Tricube, 3=Inverse, 4=Gaussian. * (default 0 = Linear)</pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * * <pre> -W * Full name of base classifier. * (default: weka.classifiers.trees.DecisionStump)</pre> * * <pre> * Options specific to classifier weka.classifiers.trees.DecisionStump: * </pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String knnString = Utils.getOption('K', options); if (knnString.length() != 0) { setKNN(Integer.parseInt(knnString)); } else { setKNN(-1); } String weightString = Utils.getOption('U', options); if (weightString.length() != 0) { setWeightingKernel(Integer.parseInt(weightString)); } else { setWeightingKernel(LINEAR); } String nnSearchClass = Utils.getOption('A', options); if (nnSearchClass.length() != 0) { String nnSearchClassSpec[] = Utils.splitOptions(nnSearchClass); if (nnSearchClassSpec.length == 0) { throw new Exception("Invalid NearestNeighbourSearch algorithm " + "specification string."); } String className = nnSearchClassSpec[0]; nnSearchClassSpec[0] = ""; setNearestNeighbourSearchAlgorithm( (NearestNeighbourSearch) Utils.forName(NearestNeighbourSearch.class, className, nnSearchClassSpec)); } else this.setNearestNeighbourSearchAlgorithm(new LinearNNSearch()); super.setOptions(options); }
/** * Classifies the given test instance. The instance has to belong to a dataset when it's being * classified. Note that a classifier MUST implement either this or distributionForInstance(). * * @param instance the instance to be classified * @return the predicted most likely class for the instance or Utils.missingValue() if no * prediction is made * @exception Exception if an error occurred during the prediction */ @Override public double classifyInstance(Instance instance) throws Exception { double[] dist = distributionForInstance(instance); if (dist == null) { throw new Exception("Null distribution predicted"); } switch (instance.classAttribute().type()) { case Attribute.NOMINAL: double max = 0; int maxIndex = 0; for (int i = 0; i < dist.length; i++) { if (dist[i] > max) { maxIndex = i; max = dist[i]; } } if (max > 0) { return maxIndex; } else { return Utils.missingValue(); } case Attribute.NUMERIC: case Attribute.DATE: return dist[0]; default: return Utils.missingValue(); } }
/** * Outputs the linear regression model as a string. * * @return the model as string */ public String toString() { if (m_TransformedData == null) { return "Linear Regression: No model built yet."; } try { StringBuffer text = new StringBuffer(); int column = 0; boolean first = true; text.append("\nLinear Regression Model\n\n"); text.append(m_TransformedData.classAttribute().name() + " =\n\n"); for (int i = 0; i < m_TransformedData.numAttributes(); i++) { if ((i != m_ClassIndex) && (m_SelectedAttributes[i])) { if (!first) text.append(" +\n"); else first = false; text.append(Utils.doubleToString(m_Coefficients[column], 12, 4) + " * "); text.append(m_TransformedData.attribute(i).name()); column++; } } text.append(" +\n" + Utils.doubleToString(m_Coefficients[column], 12, 4)); return text.toString(); } catch (Exception e) { return "Can't print Linear Regression!"; } }
/** * Parses a given list of options. Valid options are: * * <p>-B classifierstring <br> * Classifierstring should contain the full class name of a scheme included for selection followed * by options to the classifier (required, option should be used once for each classifier). * * <p> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { // Iterate through the schemes Vector classifiers = new Vector(); while (true) { String classifierString = Utils.getOption('B', options); if (classifierString.length() == 0) { break; } String[] classifierSpec = Utils.splitOptions(classifierString); if (classifierSpec.length == 0) { throw new IllegalArgumentException("Invalid classifier specification string"); } String classifierName = classifierSpec[0]; classifierSpec[0] = ""; classifiers.addElement(Classifier.forName(classifierName, classifierSpec)); } if (classifiers.size() == 0) { classifiers.addElement(new weka.classifiers.rules.ZeroR()); } Classifier[] classifiersArray = new Classifier[classifiers.size()]; for (int i = 0; i < classifiersArray.length; i++) { classifiersArray[i] = (Classifier) classifiers.elementAt(i); } setClassifiers(classifiersArray); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -C <col> * Sets the attribute index (default last).</pre> * * <pre> -F <value index> * Sets the first value's index (default first).</pre> * * <pre> -S <value index> * Sets the second value's index (default last).</pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String attIndex = Utils.getOption('C', options); if (attIndex.length() != 0) { setAttributeIndex(attIndex); } else { setAttributeIndex("last"); } String firstValIndex = Utils.getOption('F', options); if (firstValIndex.length() != 0) { setFirstValueIndex(firstValIndex); } else { setFirstValueIndex("first"); } String secondValIndex = Utils.getOption('S', options); if (secondValIndex.length() != 0) { setSecondValueIndex(secondValIndex); } else { setSecondValueIndex("last"); } if (getInputFormat() != null) { setInputFormat(getInputFormat()); } }
@Override boolean evaluate( Instance inst, int lhsAttIndex, String rhsOperand, double numericOperand, Pattern regexPattern, boolean rhsIsAttribute, int rhsAttIndex) { if (rhsIsAttribute) { if (inst.isMissing(lhsAttIndex) && inst.isMissing(rhsAttIndex)) { return true; } if (inst.isMissing(lhsAttIndex) || inst.isMissing(rhsAttIndex)) { return false; } return Utils.eq(inst.value(lhsAttIndex), inst.value(rhsAttIndex)); } if (inst.isMissing(lhsAttIndex)) { return false; } return (Utils.eq(inst.value(lhsAttIndex), numericOperand)); }
/** Computes average class values for each attribute and value */ private void computeAverageClassValues() { double totalCounts, sum; Instance instance; double[] counts; double[][] avgClassValues = new double[getInputFormat().numAttributes()][0]; m_Indices = new int[getInputFormat().numAttributes()][0]; for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (att.isNominal()) { avgClassValues[j] = new double[att.numValues()]; counts = new double[att.numValues()]; for (int i = 0; i < getInputFormat().numInstances(); i++) { instance = getInputFormat().instance(i); if (!instance.classIsMissing() && (!instance.isMissing(j))) { counts[(int) instance.value(j)] += instance.weight(); avgClassValues[j][(int) instance.value(j)] += instance.weight() * instance.classValue(); } } sum = Utils.sum(avgClassValues[j]); totalCounts = Utils.sum(counts); if (Utils.gr(totalCounts, 0)) { for (int k = 0; k < att.numValues(); k++) { if (Utils.gr(counts[k], 0)) { avgClassValues[j][k] /= counts[k]; } else { avgClassValues[j][k] = sum / totalCounts; } } } m_Indices[j] = Utils.sort(avgClassValues[j]); } } }
/** * Create the options array to pass to the classifier. The parameter values and positions are * taken from m_ClassifierOptions and m_CVParams. * * @return the options array */ protected String[] createOptions() { String[] options = new String[m_ClassifierOptions.length + 2 * m_CVParams.size()]; int start = 0, end = options.length; // Add the cross-validation parameters and their values for (int i = 0; i < m_CVParams.size(); i++) { CVParameter cvParam = (CVParameter) m_CVParams.elementAt(i); double paramValue = cvParam.m_ParamValue; if (cvParam.m_RoundParam) { // paramValue = (double)((int) (paramValue + 0.5)); paramValue = Math.rint(paramValue); } if (cvParam.m_AddAtEnd) { options[--end] = "" + Utils.doubleToString(paramValue, 4); options[--end] = "-" + cvParam.m_ParamChar; } else { options[start++] = "-" + cvParam.m_ParamChar; options[start++] = "" + Utils.doubleToString(paramValue, 4); } } // Add the static parameters System.arraycopy(m_ClassifierOptions, 0, options, start, m_ClassifierOptions.length); return options; }
@Override public void setOptions(String[] options) throws Exception { // these are options to the hadoop map task (especially the -names-file) String existing = Utils.getOption("existing-header", options); setPathToExistingHeader(existing); String attNames = Utils.getOption('A', options); setAttributeNames(attNames); String namesFile = Utils.getOption("names-file", options); setAttributeNamesFile(namesFile); String outputName = Utils.getOption("header-file-name", options); setOutputHeaderFileName(outputName); super.setOptions(options); // any options to pass on to the underlying Weka csv to arff map task? CSVToARFFHeaderMapTask tempMap = new CSVToARFFHeaderMapTask(); tempMap.setOptions(options); String optsToWekaMapTask = Utils.joinOptions(tempMap.getOptions()); if (!DistributedJobConfig.isEmpty(optsToWekaMapTask)) { setCsvToArffTaskOptions(optsToWekaMapTask); } }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -B <num> * Manual blend setting (default 20%) * </pre> * * <pre> -E * Enable entropic auto-blend setting (symbolic class only) * </pre> * * <pre> -M <char> * Specify the missing value treatment mode (default a) * Valid options are: a(verage), d(elete), m(axdiff), n(ormal) * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String debug = "(KStar.setOptions)"; String blendStr = Utils.getOption('B', options); if (blendStr.length() != 0) { setGlobalBlend(Integer.parseInt(blendStr)); } setEntropicAutoBlend(Utils.getFlag('E', options)); String missingModeStr = Utils.getOption('M', options); if (missingModeStr.length() != 0) { switch (missingModeStr.charAt(0)) { case 'a': setMissingMode(new SelectedTag(M_AVERAGE, TAGS_MISSING)); break; case 'd': setMissingMode(new SelectedTag(M_DELETE, TAGS_MISSING)); break; case 'm': setMissingMode(new SelectedTag(M_MAXDIFF, TAGS_MISSING)); break; case 'n': setMissingMode(new SelectedTag(M_NORMAL, TAGS_MISSING)); break; default: setMissingMode(new SelectedTag(M_AVERAGE, TAGS_MISSING)); } } Utils.checkForRemainingOptions(options); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -P <start set> * Specify a starting set of attributes. * Eg. 1,3,5-7.</pre> * * <pre> -D <0 = backward | 1 = forward | 2 = bi-directional> * Direction of search. (default = 1).</pre> * * <pre> -N <num> * Number of non-improving nodes to * consider before terminating search.</pre> * * <pre> -S <num> * Size of lookup cache for evaluated subsets. * Expressed as a multiple of the number of * attributes in the data set. (default = 1)</pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String optionString; resetOptions(); optionString = Utils.getOption('P', options); if (optionString.length() != 0) { setStartSet(optionString); } optionString = Utils.getOption('D', options); if (optionString.length() != 0) { setDirection(new SelectedTag(Integer.parseInt(optionString), TAGS_SELECTION)); } else { setDirection(new SelectedTag(SELECTION_FORWARD, TAGS_SELECTION)); } optionString = Utils.getOption('N', options); if (optionString.length() != 0) { setSearchTermination(Integer.parseInt(optionString)); } optionString = Utils.getOption('S', options); if (optionString.length() != 0) { setLookupCacheSize(Integer.parseInt(optionString)); } m_debug = Utils.getFlag('Z', options); }
/** * Gets the current settings of FuzzyRoughSubsetEval * * @return an array of strings suitable for passing to setOptions() */ public String[] getOptions() { Vector<String> result; result = new Vector<String>(); result.add("-Z"); result.add( (m_FuzzyMeasure.getClass().getName() + " " + Utils.joinOptions(m_FuzzyMeasure.getOptions())) .trim()); result.add("-I"); result.add( (m_Implicator.getClass().getName() + " " + Utils.joinOptions(m_Implicator.getOptions())) .trim()); result.add("-T"); result.add( (m_TNorm.getClass().getName() + " " + Utils.joinOptions(m_TNorm.getOptions())).trim()); result.add("-R"); result.add( (m_Similarity.getClass().getName() + " " + Utils.joinOptions(m_Similarity.getOptions())) .trim()); return result.toArray(new String[result.size()]); }
/** * Calculate the potential to decrease DL of the ruleset, i.e. the possible DL that could be * decreased by deleting the rule whose index and simple statstics are given. If there's no * potentials (i.e. smOrEq 0 && error rate < 0.5), it returns NaN. * * <p>The way this procedure does is copied from original RIPPER implementation and is quite * bizzare because it does not update the following rules' stats recursively any more when testing * each rule, which means it assumes after deletion no data covered by the following rules (or * regards the deleted rule as the last rule). Reasonable assumption? * * <p> * * @param index the index of the rule in m_Ruleset to be deleted * @param expFPOverErr expected FP/(FP+FN) * @param rulesetStat the simple statistics of the ruleset, updated if the rule should be deleted * @param ruleStat the simple statistics of the rule to be deleted * @param checkErr whether check if error rate >= 0.5 * @return the potential DL that could be decreased */ public double potential( int index, double expFPOverErr, double[] rulesetStat, double[] ruleStat, boolean checkErr) { // System.out.println("!!!inside potential: "); // Restore the stats if deleted double pcov = rulesetStat[0] - ruleStat[0]; double puncov = rulesetStat[1] + ruleStat[0]; double pfp = rulesetStat[4] - ruleStat[4]; double pfn = rulesetStat[5] + ruleStat[2]; double dataDLWith = dataDL(expFPOverErr, rulesetStat[0], rulesetStat[1], rulesetStat[4], rulesetStat[5]); double theoryDLWith = theoryDL(index); double dataDLWithout = dataDL(expFPOverErr, pcov, puncov, pfp, pfn); double potential = dataDLWith + theoryDLWith - dataDLWithout; double err = ruleStat[4] / ruleStat[0]; /*System.out.println("!!!"+dataDLWith +" | "+ theoryDLWith + " | " +dataDLWithout+"|"+ruleStat[4] + " / " + ruleStat[0]); */ boolean overErr = Utils.grOrEq(err, 0.5); if (!checkErr) overErr = false; if (Utils.grOrEq(potential, 0.0) || overErr) { // If deleted, update ruleset stats. Other stats do not matter rulesetStat[0] = pcov; rulesetStat[1] = puncov; rulesetStat[4] = pfp; rulesetStat[5] = pfn; return potential; } else return Double.NaN; }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -C <col> * Index of the attribute to be changed * (default last attribute)</pre> * * <pre> -M * Treat missing values as an extra value * </pre> * * <pre> -P <num> * Specify the percentage of noise introduced * to the data (default 10)</pre> * * <pre> -S <num> * Specify the random number seed (default 1)</pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String indexString = Utils.getOption('C', options); if (indexString.length() != 0) { setAttributeIndex(indexString); } else { setAttributeIndex("last"); } if (Utils.getFlag('M', options)) { setUseMissing(true); } String percentString = Utils.getOption('P', options); if (percentString.length() != 0) { setPercent((int) Double.valueOf(percentString).doubleValue()); } else { setPercent(10); } String seedString = Utils.getOption('S', options); if (seedString.length() != 0) { setRandomSeed(Integer.parseInt(seedString)); } else { setRandomSeed(1); } }
/** * Adds the prediction intervals as additional attributes at the end. Since classifiers can * returns varying number of intervals per instance, the dataset is filled with missing values for * non-existing intervals. */ protected void addPredictionIntervals() { int maxNum; int num; int i; int n; FastVector preds; FastVector atts; Instances data; Instance inst; Instance newInst; double[] values; double[][] predInt; // determine the maximum number of intervals maxNum = 0; preds = m_Evaluation.predictions(); for (i = 0; i < preds.size(); i++) { num = ((NumericPrediction) preds.elementAt(i)).predictionIntervals().length; if (num > maxNum) maxNum = num; } // create new header atts = new FastVector(); for (i = 0; i < m_PlotInstances.numAttributes(); i++) atts.addElement(m_PlotInstances.attribute(i)); for (i = 0; i < maxNum; i++) { atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-lowerBoundary")); atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-upperBoundary")); atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-width")); } data = new Instances(m_PlotInstances.relationName(), atts, m_PlotInstances.numInstances()); data.setClassIndex(m_PlotInstances.classIndex()); // update data for (i = 0; i < m_PlotInstances.numInstances(); i++) { inst = m_PlotInstances.instance(i); // copy old values values = new double[data.numAttributes()]; System.arraycopy(inst.toDoubleArray(), 0, values, 0, inst.numAttributes()); // add interval data predInt = ((NumericPrediction) preds.elementAt(i)).predictionIntervals(); for (n = 0; n < maxNum; n++) { if (n < predInt.length) { values[m_PlotInstances.numAttributes() + n * 3 + 0] = predInt[n][0]; values[m_PlotInstances.numAttributes() + n * 3 + 1] = predInt[n][1]; values[m_PlotInstances.numAttributes() + n * 3 + 2] = predInt[n][1] - predInt[n][0]; } else { values[m_PlotInstances.numAttributes() + n * 3 + 0] = Utils.missingValue(); values[m_PlotInstances.numAttributes() + n * 3 + 1] = Utils.missingValue(); values[m_PlotInstances.numAttributes() + n * 3 + 2] = Utils.missingValue(); } } // create new Instance newInst = new DenseInstance(inst.weight(), values); data.add(newInst); } m_PlotInstances = data; }
/** * Convert a single instance over. The converted instance is added to the end of the output queue. * * @param instance the instance to convert */ protected void convertInstance(Instance instance) { int index = 0; double[] vals = new double[outputFormatPeek().numAttributes()]; // Copy and convert the values for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (m_DiscretizeCols.isInRange(i) && getInputFormat().attribute(i).isNumeric()) { int j; double currentVal = instance.value(i); if (m_CutPoints[i] == null) { if (instance.isMissing(i)) { vals[index] = Utils.missingValue(); } else { vals[index] = 0; } index++; } else { if (!m_MakeBinary) { if (instance.isMissing(i)) { vals[index] = Utils.missingValue(); } else { for (j = 0; j < m_CutPoints[i].length; j++) { if (currentVal <= m_CutPoints[i][j]) { break; } } vals[index] = j; } index++; } else { for (j = 0; j < m_CutPoints[i].length; j++) { if (instance.isMissing(i)) { vals[index] = Utils.missingValue(); } else if (currentVal <= m_CutPoints[i][j]) { vals[index] = 0; } else { vals[index] = 1; } index++; } } } } else { vals[index] = instance.value(i); index++; } } Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), vals); } else { inst = new DenseInstance(instance.weight(), vals); } inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); inst.setDataset(getOutputFormat()); push(inst); }
/** * Parses a given list of options. * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String optionString = Utils.getOption('A', options); if (optionString.length() != 0) setAlphaStar(Double.parseDouble(optionString)); optionString = Utils.getOption('S', options); if (optionString.length() != 0) setSigma(Double.parseDouble(optionString)); optionString = Utils.getOption('R', options); if (optionString.length() != 0) setR(Double.parseDouble(optionString)); setUseSparseMatrix(Utils.getFlag('M', options)); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -D * Enables debugging output (if available) to be printed. * (default: off)</pre> * * <pre> -no-checks * Turns off all checks - use with caution! * (default: checks on)</pre> * * <pre> -C <num> * The size of the cache (a prime number), 0 for full cache and * -1 to turn it off. * (default: 250007)</pre> * * <pre> -E <num> * The Exponent to use. * (default: 1.0)</pre> * * <pre> -L * Use lower-order terms. * (default: no)</pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String tmpStr; tmpStr = Utils.getOption('E', options); if (tmpStr.length() != 0) setExponent(Double.parseDouble(tmpStr)); else setExponent(1.0); setUseLowerOrder(Utils.getFlag('L', options)); super.setOptions(options); }