/** * Computes average class values for each attribute and value * * @throws Exception */ private void computeAverageClassValues() throws Exception { double totalCounts, sum; M5Instance instance; double[] counts; double[][] avgClassValues = new double[getInputFormat().numAttributes()][0]; m_Indices = new int[getInputFormat().numAttributes()][0]; for (int j = 0; j < getInputFormat().numAttributes(); j++) { M5Attribute att = getInputFormat().attribute(j); if (att.isNominal()) { avgClassValues[j] = new double[att.numValues()]; counts = new double[att.numValues()]; for (int i = 0; i < getInputFormat().numInstances(); i++) { instance = getInputFormat().instance(i); if (!instance.classIsMissing() && (!instance.isMissing(j))) { counts[(int) instance.value(j)] += instance.weight(); avgClassValues[j][(int) instance.value(j)] += instance.weight() * instance.classValue(); } } sum = M5StaticUtils.sum(avgClassValues[j]); totalCounts = M5StaticUtils.sum(counts); if (M5StaticUtils.gr(totalCounts, 0)) { for (int k = 0; k < att.numValues(); k++) { if (M5StaticUtils.gr(counts[k], 0)) { avgClassValues[j][k] /= (double) counts[k]; } else { avgClassValues[j][k] = sum / (double) totalCounts; } } } m_Indices[j] = M5StaticUtils.sort(avgClassValues[j]); } } }
/** * Converts carriage returns and new lines in a string into \r and \n. Backquotes the following * characters: ` " \ \t and % * * @param string the string * @return the converted string */ public static String backQuoteChars(String string) { int index; StringBuffer newStringBuffer; // replace each of the following characters with the backquoted version char charsFind[] = {'\\', '\'', '\t', '"', '%'}; String charsReplace[] = {"\\\\", "\\'", "\\t", "\\\"", "\\%"}; for (int i = 0; i < charsFind.length; i++) { if (string.indexOf(charsFind[i]) != -1) { newStringBuffer = new StringBuffer(); while ((index = string.indexOf(charsFind[i])) != -1) { if (index > 0) { newStringBuffer.append(string.substring(0, index)); } newStringBuffer.append(charsReplace[i]); if ((index + 1) < string.length()) { string = string.substring(index + 1); } else { string = ""; } } newStringBuffer.append(string); string = newStringBuffer.toString(); } } return M5StaticUtils.convertNewLines(string); }
/** * Returns c*log2(c) for a given integer value c. * * @param c an integer value * @returns c*log2(c) (but is careful to return 0 if c is 0) */ public static double xlogx(int c) { if (c == 0) { return 0.0; } return c * M5StaticUtils.log2((double) c); }
/** * Parses the options for this object. Valid options are: * * <p>-N <br> * If binary attributes are to be coded as nominal ones. * * <p> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { setBinaryAttributesNominal(M5StaticUtils.getFlag('N', options)); if (getInputFormat() != null) { setInputFormat(getInputFormat()); } }
/** * Sorts a given array of doubles in ascending order and returns an array of integers with the * positions of the elements of the original array in the sorted array. The sort is stable (Equal * elements remain in their original order.) Occurrences of Double.NaN are treated as * Double.MAX_VALUE * * @param array this array is not changed by the method! * @return an array of integers with the positions in the sorted array. */ public static int[] stableSort(double[] array) { int[] index = new int[array.length]; int[] newIndex = new int[array.length]; int[] helpIndex; int numEqual; array = (double[]) array.clone(); for (int i = 0; i < index.length; i++) { index[i] = i; if (Double.isNaN(array[i])) { array[i] = Double.MAX_VALUE; } } quickSort(array, index, 0, array.length - 1); // Make sort stable int i = 0; while (i < index.length) { numEqual = 1; for (int j = i + 1; ((j < index.length) && M5StaticUtils.eq(array[index[i]], array[index[j]])); j++) { numEqual++; } if (numEqual > 1) { helpIndex = new int[numEqual]; for (int j = 0; j < numEqual; j++) { helpIndex[j] = i + j; } quickSort(index, helpIndex, 0, numEqual - 1); for (int j = 0; j < numEqual; j++) { newIndex[i + j] = index[helpIndex[j]]; } i += numEqual; } else { newIndex[i] = index[i]; i++; } } return newIndex; }
/** * Creates a new instance of an object given it's class name and (optional) arguments to pass to * it's setOptions method. If the object implements OptionHandler and the options parameter is * non-null, the object will have it's options set. Example use: * * <p><code> <pre> * String classifierName = M5StaticUtils.getOption('W', options); * Classifier c = (Classifier)M5StaticUtils.forName(Classifier.class, * classifierName, * options); * setClassifier(c); * </pre></code> * * @param classType the class that the instantiated object should be assignable to -- an exception * is thrown if this is not the case * @param className the fully qualified class name of the object * @param options an array of options suitable for passing to setOptions. May be null. Any options * accepted by the object will be removed from the array. * @return the newly created object, ready for use. * @exception Exception if the class name is invalid, or if the class is not assignable to the * desired class type, or the options supplied are not acceptable to the object */ public static Object forName(Class classType, String className, String[] options) throws Exception { Class c = null; try { c = Class.forName(className); } catch (Exception ex) { throw new Exception("Can't find class called: " + className); } if (!classType.isAssignableFrom(c)) { throw new Exception(classType.getName() + " is not assignable from " + className); } Object o = c.newInstance(); if ((o instanceof M5) && (options != null)) { ((M5) o).setOptions(options); M5StaticUtils.checkForRemainingOptions(options); } return o; }
/** Display a representation of this estimator */ public String toString() { String result = m_NumValues + " Normal Kernels. \nStandardDev = " + M5StaticUtils.doubleToString(m_StandardDev, 6, 4) + " Precision = " + m_Precision; if (m_NumValues == 0) { result += " \nMean = 0"; } else { result += " \nMeans ="; for (int i = 0; i < m_NumValues; i++) { result += " " + m_Values[i]; } if (!m_AllWeightsOne) { result += "\nWeights = "; for (int i = 0; i < m_NumValues; i++) { result += " " + m_Weights[i]; } } } return result + "\n"; }
/** * Main method for testing this class. * * @param ops some dummy options */ public static void main(String[] ops) { double[] doubles = {4.5, 6.7, Double.NaN, 3.4, 4.8, 1.2, 3.4}; int[] ints = {12, 6, 2, 18, 16, 6, 7, 5}; try { // Option handling System.out.println("First option split up:"); if (ops.length > 0) { String[] firstOptionSplitUp = M5StaticUtils.splitOptions(ops[0]); for (int i = 0; i < firstOptionSplitUp.length; i++) { System.out.println(firstOptionSplitUp[i]); } } System.out.println("Partitioned options: "); String[] partitionedOptions = M5StaticUtils.partitionOptions(ops); for (int i = 0; i < partitionedOptions.length; i++) { System.out.println(partitionedOptions[i]); } System.out.println("Get flag -f: " + M5StaticUtils.getFlag('f', ops)); System.out.println("Get option -o: " + M5StaticUtils.getOption('o', ops)); System.out.println("Checking for remaining options... "); M5StaticUtils.checkForRemainingOptions(ops); // Statistics System.out.println("Original array (doubles): "); for (int i = 0; i < doubles.length; i++) { System.out.print(doubles[i] + " "); } System.out.println(); System.out.println("Original array (ints): "); for (int i = 0; i < ints.length; i++) { System.out.print(ints[i] + " "); } System.out.println(); System.out.println( "Correlation: " + M5StaticUtils.correlation(doubles, doubles, doubles.length)); System.out.println("Mean: " + M5StaticUtils.mean(doubles)); System.out.println("Variance: " + M5StaticUtils.variance(doubles)); System.out.println("Sum (doubles): " + M5StaticUtils.sum(doubles)); System.out.println("Sum (ints): " + M5StaticUtils.sum(ints)); System.out.println("Max index (doubles): " + M5StaticUtils.maxIndex(doubles)); System.out.println("Max index (ints): " + M5StaticUtils.maxIndex(ints)); System.out.println("Min index (doubles): " + M5StaticUtils.minIndex(doubles)); System.out.println("Min index (ints): " + M5StaticUtils.minIndex(ints)); // Sorting and normalizing System.out.println("Sorted array (doubles): "); int[] sorted = M5StaticUtils.sort(doubles); for (int i = 0; i < doubles.length; i++) { System.out.print(doubles[sorted[i]] + " "); } System.out.println(); System.out.println("Normalized array (doubles): "); M5StaticUtils.normalize(doubles); for (int i = 0; i < doubles.length; i++) { System.out.print(doubles[i] + " "); } System.out.println(); System.out.println("Normalized again (doubles): "); M5StaticUtils.normalize(doubles, M5StaticUtils.sum(doubles)); for (int i = 0; i < doubles.length; i++) { System.out.print(doubles[i] + " "); } System.out.println(); // Pretty-printing System.out.println("-4.58: " + M5StaticUtils.doubleToString(-4.57826535, 2)); System.out.println("-6.78: " + M5StaticUtils.doubleToString(-6.78214234, 6, 2)); // Comparisons System.out.println("5.70001 == 5.7 ? " + M5StaticUtils.eq(5.70001, 5.7)); System.out.println("5.70001 > 5.7 ? " + M5StaticUtils.gr(5.70001, 5.7)); System.out.println("5.70001 >= 5.7 ? " + M5StaticUtils.grOrEq(5.70001, 5.7)); System.out.println("5.7 < 5.70001 ? " + M5StaticUtils.sm(5.7, 5.70001)); System.out.println("5.7 <= 5.70001 ? " + M5StaticUtils.smOrEq(5.7, 5.70001)); // Math System.out.println("Info (ints): " + M5StaticUtils.info(ints)); System.out.println("log2(4.6): " + M5StaticUtils.log2(4.6)); System.out.println("5 * log(5): " + M5StaticUtils.xlogx(5)); System.out.println("5.5 rounded: " + M5StaticUtils.round(5.5)); System.out.println( "5.55555 rounded to 2 decimal places: " + M5StaticUtils.roundDouble(5.55555, 2)); } catch (Exception e) { e.printStackTrace(); } }
/** * Method for testing filters ability to process multiple batches. * * @param options should contain the following arguments:<br> * -i (first) input file <br> * -o (first) output file <br> * -r (second) input file <br> * -s (second) output file <br> * -c class_index <br> * or -h for help on options * @exception Exception if something goes wrong or the user requests help on command options */ public static void batchFilterFile(NominalToBinaryFilter filter, String[] options) throws Exception { M5Instances firstData = null; M5Instances secondData = null; Reader firstInput = null; Reader secondInput = null; PrintWriter firstOutput = null; PrintWriter secondOutput = null; boolean helpRequest; try { helpRequest = M5StaticUtils.getFlag('h', options); String fileName = M5StaticUtils.getOption('i', options); if (fileName.length() != 0) { firstInput = new BufferedReader(new FileReader(fileName)); } else { throw new Exception("No first input file given.\n"); } fileName = M5StaticUtils.getOption('r', options); if (fileName.length() != 0) { secondInput = new BufferedReader(new FileReader(fileName)); } else { throw new Exception("No second input file given.\n"); } fileName = M5StaticUtils.getOption('o', options); if (fileName.length() != 0) { firstOutput = new PrintWriter(new FileOutputStream(fileName)); } else { firstOutput = new PrintWriter(System.out); } fileName = M5StaticUtils.getOption('s', options); if (fileName.length() != 0) { secondOutput = new PrintWriter(new FileOutputStream(fileName)); } else { secondOutput = new PrintWriter(System.out); } String classIndex = M5StaticUtils.getOption('c', options); if (filter instanceof NominalToBinaryFilter) { ((NominalToBinaryFilter) filter).setOptions(options); } M5StaticUtils.checkForRemainingOptions(options); if (helpRequest) { throw new Exception("Help requested.\n"); } firstData = new M5Instances(firstInput, 1); secondData = new M5Instances(secondInput, 1); if (!secondData.equalHeaders(firstData)) { throw new Exception("Input file formats differ.\n"); } if (classIndex.length() != 0) { if (classIndex.equals("first")) { firstData.setClassIndex(0); secondData.setClassIndex(0); } else if (classIndex.equals("last")) { firstData.setClassIndex(firstData.numAttributes() - 1); secondData.setClassIndex(secondData.numAttributes() - 1); } else { firstData.setClassIndex(Integer.parseInt(classIndex) - 1); secondData.setClassIndex(Integer.parseInt(classIndex) - 1); } } } catch (Exception ex) { String filterOptions = ""; // Output the error and also the valid options if (filter instanceof NominalToBinaryFilter) { filterOptions += "\nFilter options:\n\n"; Enumeration enume = ((NominalToBinaryFilter) filter).listOptions(); while (enume.hasMoreElements()) { Information option = (Information) enume.nextElement(); filterOptions += option.synopsis() + '\n' + option.description() + "\n"; } } String genericOptions = "\nGeneral options:\n\n" + "-h\n" + "\tGet help on available options.\n" + "-i <filename>\n" + "\tThe file containing first input instances.\n" + "-o <filename>\n" + "\tThe file first output instances will be written to.\n" + "-r <filename>\n" + "\tThe file containing second input instances.\n" + "-s <filename>\n" + "\tThe file second output instances will be written to.\n" + "-c <class index>\n" + "\tThe number of the attribute to use as the class.\n" + "\t\"first\" and \"last\" are also valid entries.\n" + "\tIf not supplied then no class is assigned.\n"; throw new Exception('\n' + ex.getMessage() + filterOptions + genericOptions); } boolean printedHeader = false; if (filter.setInputFormat(firstData)) { firstOutput.println(filter.getOutputFormat().toString()); printedHeader = true; } // Pass all the instances to the filter while (firstData.readInstance(firstInput)) { if (filter.input(firstData.instance(0))) { if (!printedHeader) { throw new Error("Filter didn't return true from setInputFormat() " + "earlier!"); } firstOutput.println(filter.output().toString()); } firstData.delete(0); } // Say that input has finished, and print any pending output instances if (filter.batchFinished()) { if (!printedHeader) { firstOutput.println(filter.getOutputFormat().toString()); } while (filter.numPendingOutput() > 0) { firstOutput.println(filter.output().toString()); } } if (firstOutput != null) { firstOutput.close(); } printedHeader = false; if (filter.isOutputFormatDefined()) { secondOutput.println(filter.getOutputFormat().toString()); printedHeader = true; } // Pass all the second instances to the filter while (secondData.readInstance(secondInput)) { if (filter.input(secondData.instance(0))) { if (!printedHeader) { throw new Error("Filter didn't return true from" + " isOutputFormatDefined() earlier!"); } secondOutput.println(filter.output().toString()); } secondData.delete(0); } // Say that input has finished, and print any pending output instances if (filter.batchFinished()) { if (!printedHeader) { secondOutput.println(filter.getOutputFormat().toString()); } while (filter.numPendingOutput() > 0) { secondOutput.println(filter.output().toString()); } } if (secondOutput != null) { secondOutput.close(); } }
/** * Method for testing filters. * * @param options should contain the following arguments: <br> * -i input_file <br> * -o output_file <br> * -c class_index <br> * or -h for help on options * @exception Exception if something goes wrong or the user requests help on command options */ public static void filterFile(NominalToBinaryFilter filter, String[] options) throws Exception { boolean debug = false; M5Instances data = null; Reader input = null; PrintWriter output = null; boolean helpRequest; try { helpRequest = M5StaticUtils.getFlag('h', options); if (M5StaticUtils.getFlag('d', options)) { debug = true; } String infileName = M5StaticUtils.getOption('i', options); String outfileName = M5StaticUtils.getOption('o', options); String classIndex = M5StaticUtils.getOption('c', options); if (filter instanceof NominalToBinaryFilter) { ((NominalToBinaryFilter) filter).setOptions(options); } M5StaticUtils.checkForRemainingOptions(options); if (helpRequest) { throw new Exception("Help requested.\n"); } if (infileName.length() != 0) { input = new BufferedReader(new FileReader(infileName)); } else { input = new BufferedReader(new InputStreamReader(System.in)); } if (outfileName.length() != 0) { output = new PrintWriter(new FileOutputStream(outfileName)); } else { output = new PrintWriter(System.out); } data = new M5Instances(input, 1); if (classIndex.length() != 0) { if (classIndex.equals("first")) { data.setClassIndex(0); } else if (classIndex.equals("last")) { data.setClassIndex(data.numAttributes() - 1); } else { data.setClassIndex(Integer.parseInt(classIndex) - 1); } } } catch (Exception ex) { String filterOptions = ""; // Output the error and also the valid options if (filter instanceof NominalToBinaryFilter) { filterOptions += "\nFilter options:\n\n"; Enumeration enuma = ((NominalToBinaryFilter) filter).listOptions(); while (enuma.hasMoreElements()) { Information option = (Information) enuma.nextElement(); filterOptions += option.synopsis() + '\n' + option.description() + "\n"; } } String genericOptions = "\nGeneral options:\n\n" + "-h\n" + "\tGet help on available options.\n" + "\t(use -b -h for help on batch mode.)\n" + "-i <file>\n" + "\tThe name of the file containing input instances.\n" + "\tIf not supplied then instances will be read from stdin.\n" + "-o <file>\n" + "\tThe name of the file output instances will be written to.\n" + "\tIf not supplied then instances will be written to stdout.\n" + "-c <class index>\n" + "\tThe number of the attribute to use as the class.\n" + "\t\"first\" and \"last\" are also valid entries.\n" + "\tIf not supplied then no class is assigned.\n"; throw new Exception('\n' + ex.getMessage() + filterOptions + genericOptions); } if (debug) { System.err.println("Setting input format"); } boolean printedHeader = false; if (filter.setInputFormat(data)) { if (debug) { System.err.println("Getting output format"); } output.println(filter.getOutputFormat().toString()); printedHeader = true; } // Pass all the instances to the filter while (data.readInstance(input)) { if (debug) { System.err.println("Input instance to filter"); } if (filter.input(data.instance(0))) { if (debug) { System.err.println("Filter said collect immediately"); } if (!printedHeader) { throw new Error("Filter didn't return true from setInputFormat() " + "earlier!"); } if (debug) { System.err.println("Getting output instance"); } output.println(filter.output().toString()); } data.delete(0); } // Say that input has finished, and print any pending output instances if (debug) { System.err.println("Setting end of batch"); } if (filter.batchFinished()) { if (debug) { System.err.println("Filter said collect output"); } if (!printedHeader) { if (debug) { System.err.println("Getting output format"); } output.println(filter.getOutputFormat().toString()); } if (debug) { System.err.println("Getting output instance"); } while (filter.numPendingOutput() > 0) { output.println(filter.output().toString()); if (debug) { System.err.println("Getting output instance"); } } } if (debug) { System.err.println("Done"); } if (output != null) { output.close(); } }