Пример #1
0
  /**
   * Computes average class values for each attribute and value
   *
   * @throws Exception
   */
  private void computeAverageClassValues() throws Exception {

    double totalCounts, sum;
    M5Instance instance;
    double[] counts;

    double[][] avgClassValues = new double[getInputFormat().numAttributes()][0];
    m_Indices = new int[getInputFormat().numAttributes()][0];
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      M5Attribute att = getInputFormat().attribute(j);
      if (att.isNominal()) {
        avgClassValues[j] = new double[att.numValues()];
        counts = new double[att.numValues()];
        for (int i = 0; i < getInputFormat().numInstances(); i++) {
          instance = getInputFormat().instance(i);
          if (!instance.classIsMissing() && (!instance.isMissing(j))) {
            counts[(int) instance.value(j)] += instance.weight();
            avgClassValues[j][(int) instance.value(j)] += instance.weight() * instance.classValue();
          }
        }
        sum = M5StaticUtils.sum(avgClassValues[j]);
        totalCounts = M5StaticUtils.sum(counts);
        if (M5StaticUtils.gr(totalCounts, 0)) {
          for (int k = 0; k < att.numValues(); k++) {
            if (M5StaticUtils.gr(counts[k], 0)) {
              avgClassValues[j][k] /= (double) counts[k];
            } else {
              avgClassValues[j][k] = sum / (double) totalCounts;
            }
          }
        }
        m_Indices[j] = M5StaticUtils.sort(avgClassValues[j]);
      }
    }
  }
Пример #2
0
  /**
   * Converts carriage returns and new lines in a string into \r and \n. Backquotes the following
   * characters: ` " \ \t and %
   *
   * @param string the string
   * @return the converted string
   */
  public static String backQuoteChars(String string) {

    int index;
    StringBuffer newStringBuffer;

    // replace each of the following characters with the backquoted version
    char charsFind[] = {'\\', '\'', '\t', '"', '%'};
    String charsReplace[] = {"\\\\", "\\'", "\\t", "\\\"", "\\%"};
    for (int i = 0; i < charsFind.length; i++) {
      if (string.indexOf(charsFind[i]) != -1) {
        newStringBuffer = new StringBuffer();
        while ((index = string.indexOf(charsFind[i])) != -1) {
          if (index > 0) {
            newStringBuffer.append(string.substring(0, index));
          }
          newStringBuffer.append(charsReplace[i]);
          if ((index + 1) < string.length()) {
            string = string.substring(index + 1);
          } else {
            string = "";
          }
        }
        newStringBuffer.append(string);
        string = newStringBuffer.toString();
      }
    }

    return M5StaticUtils.convertNewLines(string);
  }
Пример #3
0
  /**
   * Returns c*log2(c) for a given integer value c.
   *
   * @param c an integer value
   * @returns c*log2(c) (but is careful to return 0 if c is 0)
   */
  public static double xlogx(int c) {

    if (c == 0) {
      return 0.0;
    }
    return c * M5StaticUtils.log2((double) c);
  }
Пример #4
0
  /**
   * Parses the options for this object. Valid options are:
   *
   * <p>-N <br>
   * If binary attributes are to be coded as nominal ones.
   *
   * <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    setBinaryAttributesNominal(M5StaticUtils.getFlag('N', options));

    if (getInputFormat() != null) {
      setInputFormat(getInputFormat());
    }
  }
Пример #5
0
  /**
   * Sorts a given array of doubles in ascending order and returns an array of integers with the
   * positions of the elements of the original array in the sorted array. The sort is stable (Equal
   * elements remain in their original order.) Occurrences of Double.NaN are treated as
   * Double.MAX_VALUE
   *
   * @param array this array is not changed by the method!
   * @return an array of integers with the positions in the sorted array.
   */
  public static int[] stableSort(double[] array) {

    int[] index = new int[array.length];
    int[] newIndex = new int[array.length];
    int[] helpIndex;
    int numEqual;

    array = (double[]) array.clone();
    for (int i = 0; i < index.length; i++) {
      index[i] = i;
      if (Double.isNaN(array[i])) {
        array[i] = Double.MAX_VALUE;
      }
    }
    quickSort(array, index, 0, array.length - 1);

    // Make sort stable

    int i = 0;
    while (i < index.length) {
      numEqual = 1;
      for (int j = i + 1;
          ((j < index.length) && M5StaticUtils.eq(array[index[i]], array[index[j]]));
          j++) {
        numEqual++;
      }
      if (numEqual > 1) {
        helpIndex = new int[numEqual];
        for (int j = 0; j < numEqual; j++) {
          helpIndex[j] = i + j;
        }
        quickSort(index, helpIndex, 0, numEqual - 1);
        for (int j = 0; j < numEqual; j++) {
          newIndex[i + j] = index[helpIndex[j]];
        }
        i += numEqual;
      } else {
        newIndex[i] = index[i];
        i++;
      }
    }

    return newIndex;
  }
Пример #6
0
  /**
   * Creates a new instance of an object given it's class name and (optional) arguments to pass to
   * it's setOptions method. If the object implements OptionHandler and the options parameter is
   * non-null, the object will have it's options set. Example use:
   *
   * <p><code> <pre>
   * String classifierName = M5StaticUtils.getOption('W', options);
   * Classifier c = (Classifier)M5StaticUtils.forName(Classifier.class,
   *                                          classifierName,
   *                                          options);
   * setClassifier(c);
   * </pre></code>
   *
   * @param classType the class that the instantiated object should be assignable to -- an exception
   *     is thrown if this is not the case
   * @param className the fully qualified class name of the object
   * @param options an array of options suitable for passing to setOptions. May be null. Any options
   *     accepted by the object will be removed from the array.
   * @return the newly created object, ready for use.
   * @exception Exception if the class name is invalid, or if the class is not assignable to the
   *     desired class type, or the options supplied are not acceptable to the object
   */
  public static Object forName(Class classType, String className, String[] options)
      throws Exception {

    Class c = null;
    try {
      c = Class.forName(className);
    } catch (Exception ex) {
      throw new Exception("Can't find class called: " + className);
    }
    if (!classType.isAssignableFrom(c)) {
      throw new Exception(classType.getName() + " is not assignable from " + className);
    }
    Object o = c.newInstance();
    if ((o instanceof M5) && (options != null)) {
      ((M5) o).setOptions(options);
      M5StaticUtils.checkForRemainingOptions(options);
    }
    return o;
  }
Пример #7
0
  /** Display a representation of this estimator */
  public String toString() {

    String result =
        m_NumValues
            + " Normal Kernels. \nStandardDev = "
            + M5StaticUtils.doubleToString(m_StandardDev, 6, 4)
            + " Precision = "
            + m_Precision;
    if (m_NumValues == 0) {
      result += "  \nMean = 0";
    } else {
      result += "  \nMeans =";
      for (int i = 0; i < m_NumValues; i++) {
        result += " " + m_Values[i];
      }
      if (!m_AllWeightsOne) {
        result += "\nWeights = ";
        for (int i = 0; i < m_NumValues; i++) {
          result += " " + m_Weights[i];
        }
      }
    }
    return result + "\n";
  }
Пример #8
0
  /**
   * Main method for testing this class.
   *
   * @param ops some dummy options
   */
  public static void main(String[] ops) {

    double[] doubles = {4.5, 6.7, Double.NaN, 3.4, 4.8, 1.2, 3.4};
    int[] ints = {12, 6, 2, 18, 16, 6, 7, 5};

    try {

      // Option handling
      System.out.println("First option split up:");
      if (ops.length > 0) {
        String[] firstOptionSplitUp = M5StaticUtils.splitOptions(ops[0]);
        for (int i = 0; i < firstOptionSplitUp.length; i++) {
          System.out.println(firstOptionSplitUp[i]);
        }
      }
      System.out.println("Partitioned options: ");
      String[] partitionedOptions = M5StaticUtils.partitionOptions(ops);
      for (int i = 0; i < partitionedOptions.length; i++) {
        System.out.println(partitionedOptions[i]);
      }
      System.out.println("Get flag -f: " + M5StaticUtils.getFlag('f', ops));
      System.out.println("Get option -o: " + M5StaticUtils.getOption('o', ops));
      System.out.println("Checking for remaining options... ");
      M5StaticUtils.checkForRemainingOptions(ops);

      // Statistics
      System.out.println("Original array (doubles): ");
      for (int i = 0; i < doubles.length; i++) {
        System.out.print(doubles[i] + " ");
      }
      System.out.println();
      System.out.println("Original array (ints): ");
      for (int i = 0; i < ints.length; i++) {
        System.out.print(ints[i] + " ");
      }
      System.out.println();
      System.out.println(
          "Correlation: " + M5StaticUtils.correlation(doubles, doubles, doubles.length));
      System.out.println("Mean: " + M5StaticUtils.mean(doubles));
      System.out.println("Variance: " + M5StaticUtils.variance(doubles));
      System.out.println("Sum (doubles): " + M5StaticUtils.sum(doubles));
      System.out.println("Sum (ints): " + M5StaticUtils.sum(ints));
      System.out.println("Max index (doubles): " + M5StaticUtils.maxIndex(doubles));
      System.out.println("Max index (ints): " + M5StaticUtils.maxIndex(ints));
      System.out.println("Min index (doubles): " + M5StaticUtils.minIndex(doubles));
      System.out.println("Min index (ints): " + M5StaticUtils.minIndex(ints));

      // Sorting and normalizing
      System.out.println("Sorted array (doubles): ");
      int[] sorted = M5StaticUtils.sort(doubles);
      for (int i = 0; i < doubles.length; i++) {
        System.out.print(doubles[sorted[i]] + " ");
      }
      System.out.println();
      System.out.println("Normalized array (doubles): ");
      M5StaticUtils.normalize(doubles);
      for (int i = 0; i < doubles.length; i++) {
        System.out.print(doubles[i] + " ");
      }
      System.out.println();
      System.out.println("Normalized again (doubles): ");
      M5StaticUtils.normalize(doubles, M5StaticUtils.sum(doubles));
      for (int i = 0; i < doubles.length; i++) {
        System.out.print(doubles[i] + " ");
      }
      System.out.println();

      // Pretty-printing
      System.out.println("-4.58: " + M5StaticUtils.doubleToString(-4.57826535, 2));
      System.out.println("-6.78: " + M5StaticUtils.doubleToString(-6.78214234, 6, 2));

      // Comparisons
      System.out.println("5.70001 == 5.7 ? " + M5StaticUtils.eq(5.70001, 5.7));
      System.out.println("5.70001 > 5.7 ? " + M5StaticUtils.gr(5.70001, 5.7));
      System.out.println("5.70001 >= 5.7 ? " + M5StaticUtils.grOrEq(5.70001, 5.7));
      System.out.println("5.7 < 5.70001 ? " + M5StaticUtils.sm(5.7, 5.70001));
      System.out.println("5.7 <= 5.70001 ? " + M5StaticUtils.smOrEq(5.7, 5.70001));

      // Math
      System.out.println("Info (ints): " + M5StaticUtils.info(ints));
      System.out.println("log2(4.6): " + M5StaticUtils.log2(4.6));
      System.out.println("5 * log(5): " + M5StaticUtils.xlogx(5));
      System.out.println("5.5 rounded: " + M5StaticUtils.round(5.5));
      System.out.println(
          "5.55555 rounded to 2 decimal places: " + M5StaticUtils.roundDouble(5.55555, 2));
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
Пример #9
0
  /**
   * Method for testing filters ability to process multiple batches.
   *
   * @param options should contain the following arguments:<br>
   *     -i (first) input file <br>
   *     -o (first) output file <br>
   *     -r (second) input file <br>
   *     -s (second) output file <br>
   *     -c class_index <br>
   *     or -h for help on options
   * @exception Exception if something goes wrong or the user requests help on command options
   */
  public static void batchFilterFile(NominalToBinaryFilter filter, String[] options)
      throws Exception {

    M5Instances firstData = null;
    M5Instances secondData = null;
    Reader firstInput = null;
    Reader secondInput = null;
    PrintWriter firstOutput = null;
    PrintWriter secondOutput = null;
    boolean helpRequest;
    try {
      helpRequest = M5StaticUtils.getFlag('h', options);

      String fileName = M5StaticUtils.getOption('i', options);
      if (fileName.length() != 0) {
        firstInput = new BufferedReader(new FileReader(fileName));
      } else {
        throw new Exception("No first input file given.\n");
      }

      fileName = M5StaticUtils.getOption('r', options);
      if (fileName.length() != 0) {
        secondInput = new BufferedReader(new FileReader(fileName));
      } else {
        throw new Exception("No second input file given.\n");
      }

      fileName = M5StaticUtils.getOption('o', options);
      if (fileName.length() != 0) {
        firstOutput = new PrintWriter(new FileOutputStream(fileName));
      } else {
        firstOutput = new PrintWriter(System.out);
      }

      fileName = M5StaticUtils.getOption('s', options);
      if (fileName.length() != 0) {
        secondOutput = new PrintWriter(new FileOutputStream(fileName));
      } else {
        secondOutput = new PrintWriter(System.out);
      }
      String classIndex = M5StaticUtils.getOption('c', options);

      if (filter instanceof NominalToBinaryFilter) {
        ((NominalToBinaryFilter) filter).setOptions(options);
      }
      M5StaticUtils.checkForRemainingOptions(options);

      if (helpRequest) {
        throw new Exception("Help requested.\n");
      }
      firstData = new M5Instances(firstInput, 1);
      secondData = new M5Instances(secondInput, 1);
      if (!secondData.equalHeaders(firstData)) {
        throw new Exception("Input file formats differ.\n");
      }
      if (classIndex.length() != 0) {
        if (classIndex.equals("first")) {
          firstData.setClassIndex(0);
          secondData.setClassIndex(0);
        } else if (classIndex.equals("last")) {
          firstData.setClassIndex(firstData.numAttributes() - 1);
          secondData.setClassIndex(secondData.numAttributes() - 1);
        } else {
          firstData.setClassIndex(Integer.parseInt(classIndex) - 1);
          secondData.setClassIndex(Integer.parseInt(classIndex) - 1);
        }
      }
    } catch (Exception ex) {
      String filterOptions = "";
      // Output the error and also the valid options
      if (filter instanceof NominalToBinaryFilter) {
        filterOptions += "\nFilter options:\n\n";
        Enumeration enume = ((NominalToBinaryFilter) filter).listOptions();
        while (enume.hasMoreElements()) {
          Information option = (Information) enume.nextElement();
          filterOptions += option.synopsis() + '\n' + option.description() + "\n";
        }
      }

      String genericOptions =
          "\nGeneral options:\n\n"
              + "-h\n"
              + "\tGet help on available options.\n"
              + "-i <filename>\n"
              + "\tThe file containing first input instances.\n"
              + "-o <filename>\n"
              + "\tThe file first output instances will be written to.\n"
              + "-r <filename>\n"
              + "\tThe file containing second input instances.\n"
              + "-s <filename>\n"
              + "\tThe file second output instances will be written to.\n"
              + "-c <class index>\n"
              + "\tThe number of the attribute to use as the class.\n"
              + "\t\"first\" and \"last\" are also valid entries.\n"
              + "\tIf not supplied then no class is assigned.\n";

      throw new Exception('\n' + ex.getMessage() + filterOptions + genericOptions);
    }
    boolean printedHeader = false;
    if (filter.setInputFormat(firstData)) {
      firstOutput.println(filter.getOutputFormat().toString());
      printedHeader = true;
    }

    // Pass all the instances to the filter
    while (firstData.readInstance(firstInput)) {
      if (filter.input(firstData.instance(0))) {
        if (!printedHeader) {
          throw new Error("Filter didn't return true from setInputFormat() " + "earlier!");
        }
        firstOutput.println(filter.output().toString());
      }
      firstData.delete(0);
    }

    // Say that input has finished, and print any pending output instances
    if (filter.batchFinished()) {
      if (!printedHeader) {
        firstOutput.println(filter.getOutputFormat().toString());
      }
      while (filter.numPendingOutput() > 0) {
        firstOutput.println(filter.output().toString());
      }
    }

    if (firstOutput != null) {
      firstOutput.close();
    }
    printedHeader = false;
    if (filter.isOutputFormatDefined()) {
      secondOutput.println(filter.getOutputFormat().toString());
      printedHeader = true;
    }
    // Pass all the second instances to the filter
    while (secondData.readInstance(secondInput)) {
      if (filter.input(secondData.instance(0))) {
        if (!printedHeader) {
          throw new Error("Filter didn't return true from" + " isOutputFormatDefined() earlier!");
        }
        secondOutput.println(filter.output().toString());
      }
      secondData.delete(0);
    }

    // Say that input has finished, and print any pending output instances
    if (filter.batchFinished()) {
      if (!printedHeader) {
        secondOutput.println(filter.getOutputFormat().toString());
      }
      while (filter.numPendingOutput() > 0) {
        secondOutput.println(filter.output().toString());
      }
    }
    if (secondOutput != null) {
      secondOutput.close();
    }
  }
Пример #10
0
  /**
   * Method for testing filters.
   *
   * @param options should contain the following arguments: <br>
   *     -i input_file <br>
   *     -o output_file <br>
   *     -c class_index <br>
   *     or -h for help on options
   * @exception Exception if something goes wrong or the user requests help on command options
   */
  public static void filterFile(NominalToBinaryFilter filter, String[] options) throws Exception {

    boolean debug = false;
    M5Instances data = null;
    Reader input = null;
    PrintWriter output = null;
    boolean helpRequest;

    try {
      helpRequest = M5StaticUtils.getFlag('h', options);

      if (M5StaticUtils.getFlag('d', options)) {
        debug = true;
      }
      String infileName = M5StaticUtils.getOption('i', options);
      String outfileName = M5StaticUtils.getOption('o', options);
      String classIndex = M5StaticUtils.getOption('c', options);

      if (filter instanceof NominalToBinaryFilter) {
        ((NominalToBinaryFilter) filter).setOptions(options);
      }

      M5StaticUtils.checkForRemainingOptions(options);
      if (helpRequest) {
        throw new Exception("Help requested.\n");
      }
      if (infileName.length() != 0) {
        input = new BufferedReader(new FileReader(infileName));
      } else {
        input = new BufferedReader(new InputStreamReader(System.in));
      }
      if (outfileName.length() != 0) {
        output = new PrintWriter(new FileOutputStream(outfileName));
      } else {
        output = new PrintWriter(System.out);
      }

      data = new M5Instances(input, 1);
      if (classIndex.length() != 0) {
        if (classIndex.equals("first")) {
          data.setClassIndex(0);
        } else if (classIndex.equals("last")) {
          data.setClassIndex(data.numAttributes() - 1);
        } else {
          data.setClassIndex(Integer.parseInt(classIndex) - 1);
        }
      }
    } catch (Exception ex) {
      String filterOptions = "";
      // Output the error and also the valid options
      if (filter instanceof NominalToBinaryFilter) {
        filterOptions += "\nFilter options:\n\n";
        Enumeration enuma = ((NominalToBinaryFilter) filter).listOptions();
        while (enuma.hasMoreElements()) {
          Information option = (Information) enuma.nextElement();
          filterOptions += option.synopsis() + '\n' + option.description() + "\n";
        }
      }

      String genericOptions =
          "\nGeneral options:\n\n"
              + "-h\n"
              + "\tGet help on available options.\n"
              + "\t(use -b -h for help on batch mode.)\n"
              + "-i <file>\n"
              + "\tThe name of the file containing input instances.\n"
              + "\tIf not supplied then instances will be read from stdin.\n"
              + "-o <file>\n"
              + "\tThe name of the file output instances will be written to.\n"
              + "\tIf not supplied then instances will be written to stdout.\n"
              + "-c <class index>\n"
              + "\tThe number of the attribute to use as the class.\n"
              + "\t\"first\" and \"last\" are also valid entries.\n"
              + "\tIf not supplied then no class is assigned.\n";

      throw new Exception('\n' + ex.getMessage() + filterOptions + genericOptions);
    }

    if (debug) {
      System.err.println("Setting input format");
    }
    boolean printedHeader = false;
    if (filter.setInputFormat(data)) {
      if (debug) {
        System.err.println("Getting output format");
      }
      output.println(filter.getOutputFormat().toString());
      printedHeader = true;
    }

    // Pass all the instances to the filter
    while (data.readInstance(input)) {
      if (debug) {
        System.err.println("Input instance to filter");
      }
      if (filter.input(data.instance(0))) {
        if (debug) {
          System.err.println("Filter said collect immediately");
        }
        if (!printedHeader) {
          throw new Error("Filter didn't return true from setInputFormat() " + "earlier!");
        }
        if (debug) {
          System.err.println("Getting output instance");
        }
        output.println(filter.output().toString());
      }
      data.delete(0);
    }

    // Say that input has finished, and print any pending output instances
    if (debug) {
      System.err.println("Setting end of batch");
    }
    if (filter.batchFinished()) {
      if (debug) {
        System.err.println("Filter said collect output");
      }
      if (!printedHeader) {
        if (debug) {
          System.err.println("Getting output format");
        }
        output.println(filter.getOutputFormat().toString());
      }
      if (debug) {
        System.err.println("Getting output instance");
      }
      while (filter.numPendingOutput() > 0) {
        output.println(filter.output().toString());
        if (debug) {
          System.err.println("Getting output instance");
        }
      }
    }
    if (debug) {
      System.err.println("Done");
    }

    if (output != null) {
      output.close();
    }
  }