Exemple #1
0
  /** @param args */
  private void Init() {
    testIns.setClassIndex(testIns.numAttributes() - 1);
    labeledIns.setClassIndex(labeledIns.numAttributes() - 1);
    unlabeledIns.setClassIndex(unlabeledIns.numAttributes() - 1);

    class_Array[0] = classifier1;
    class_Array[1] = classifier2;
    class_Array[2] = classifier3;
  }
Exemple #2
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -i &lt;the input file&gt;
   * The input file</pre>
   *
   * <pre> -o &lt;the output file&gt;
   * The output file</pre>
   *
   * <pre> -c &lt;the class index&gt;
   * The class index</pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    String outputString = Utils.getOption('o', options);
    String inputString = Utils.getOption('i', options);
    String indexString = Utils.getOption('c', options);

    ArffLoader loader = new ArffLoader();

    resetOptions();

    // parse index
    int index = -1;
    if (indexString.length() != 0) {
      if (indexString.equals("first")) index = 0;
      else {
        if (indexString.equals("last")) index = -1;
        else index = Integer.parseInt(indexString);
      }
    }

    if (inputString.length() != 0) {
      try {
        File input = new File(inputString);
        loader.setFile(input);
        Instances inst = loader.getDataSet();
        if (index == -1) inst.setClassIndex(inst.numAttributes() - 1);
        else inst.setClassIndex(index);
        setInstances(inst);
      } catch (Exception ex) {
        throw new IOException(
            "No data set loaded. Data set has to be arff format (Reason: " + ex.toString() + ").");
      }
    } else throw new IOException("No data set to save.");

    if (outputString.length() != 0) {
      // add appropriate file extension
      if (!outputString.endsWith(getFileExtension())) {
        if (outputString.lastIndexOf('.') != -1)
          outputString =
              (outputString.substring(0, outputString.lastIndexOf('.'))) + getFileExtension();
        else outputString = outputString + getFileExtension();
      }
      try {
        File output = new File(outputString);
        setFile(output);
      } catch (Exception ex) {
        throw new IOException("Cannot create output file.");
      }
    }

    if (index == -1) index = getInstances().numAttributes() - 1;
    getInstances().setClassIndex(index);
  }
  /**
   * Determines the output format based on the input format and returns this. In case the output
   * format cannot be returned immediately, i.e., immediateOutputFormat() returns false, then this
   * method will be called from batchFinished().
   *
   * @param inputFormat the input format to base the output format on
   * @return the output format
   * @throws Exception in case the determination goes wrong
   * @see #hasImmediateOutputFormat()
   * @see #batchFinished()
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {

    Instances data;
    Instances result;
    FastVector atts;
    FastVector values;
    HashSet hash;
    int i;
    int n;
    boolean isDate;
    Instance inst;
    Vector sorted;

    m_Cols.setUpper(inputFormat.numAttributes() - 1);
    data = new Instances(inputFormat);
    atts = new FastVector();
    for (i = 0; i < data.numAttributes(); i++) {
      if (!m_Cols.isInRange(i) || !data.attribute(i).isNumeric()) {
        atts.addElement(data.attribute(i));
        continue;
      }

      // date attribute?
      isDate = (data.attribute(i).type() == Attribute.DATE);

      // determine all available attribtues in dataset
      hash = new HashSet();
      for (n = 0; n < data.numInstances(); n++) {
        inst = data.instance(n);
        if (inst.isMissing(i)) continue;

        if (isDate) hash.add(inst.stringValue(i));
        else hash.add(new Double(inst.value(i)));
      }

      // sort values
      sorted = new Vector();
      for (Object o : hash) sorted.add(o);
      Collections.sort(sorted);

      // create attribute from sorted values
      values = new FastVector();
      for (Object o : sorted) {
        if (isDate) values.addElement(o.toString());
        else values.addElement(Utils.doubleToString(((Double) o).doubleValue(), MAX_DECIMALS));
      }
      atts.addElement(new Attribute(data.attribute(i).name(), values));
    }

    result = new Instances(inputFormat.relationName(), atts, 0);
    result.setClassIndex(inputFormat.classIndex());

    return result;
  }
  /** Main method for testing this class. */
  public static void main(String[] options) {

    try {

      // Create numeric attributes "length" and "weight"
      Attribute length = new Attribute("length");
      Attribute weight = new Attribute("weight");

      // Create vector to hold nominal values "first", "second", "third"
      ArrayList<String> my_nominal_values = new ArrayList<String>(3);
      my_nominal_values.add("first");
      my_nominal_values.add("second");
      my_nominal_values.add("third");

      // Create nominal attribute "position"
      Attribute position = new Attribute("position", my_nominal_values);

      // Create vector of the above attributes
      ArrayList<Attribute> attributes = new ArrayList<Attribute>(3);
      attributes.add(length);
      attributes.add(weight);
      attributes.add(position);

      // Create the empty dataset "race" with above attributes
      Instances race = new Instances("race", attributes, 0);

      // Make position the class attribute
      race.setClassIndex(position.index());

      // Create empty instance with three attribute values
      SparseInstance inst = new SparseInstance(3);

      // Set instance's values for the attributes "length", "weight", and "position"
      inst.setValue(length, 5.3);
      inst.setValue(weight, 300);
      inst.setValue(position, "first");

      // Set instance's dataset to be the dataset "race"
      inst.setDataset(race);

      // Print the instance
      System.out.println("The instance: " + inst);

      // Print the first attribute
      System.out.println("First attribute: " + inst.attribute(0));

      // Print the class attribute
      System.out.println("Class attribute: " + inst.classAttribute());

      // Print the class index
      System.out.println("Class index: " + inst.classIndex());

      // Say if class is missing
      System.out.println("Class is missing: " + inst.classIsMissing());

      // Print the instance's class value in internal format
      System.out.println("Class value (internal format): " + inst.classValue());

      // Print a shallow copy of this instance
      SparseInstance copy = (SparseInstance) inst.copy();
      System.out.println("Shallow copy: " + copy);

      // Set dataset for shallow copy
      copy.setDataset(inst.dataset());
      System.out.println("Shallow copy with dataset set: " + copy);

      // Print out all values in internal format
      System.out.print("All stored values in internal format: ");
      for (int i = 0; i < inst.numValues(); i++) {
        if (i > 0) {
          System.out.print(",");
        }
        System.out.print(inst.valueSparse(i));
      }
      System.out.println();

      // Set all values to zero
      System.out.print("All values set to zero: ");
      while (inst.numValues() > 0) {
        inst.setValueSparse(0, 0);
      }
      for (int i = 0; i < inst.numValues(); i++) {
        if (i > 0) {
          System.out.print(",");
        }
        System.out.print(inst.valueSparse(i));
      }
      System.out.println();

      // Set all values to one
      System.out.print("All values set to one: ");
      for (int i = 0; i < inst.numAttributes(); i++) {
        inst.setValue(i, 1);
      }
      for (int i = 0; i < inst.numValues(); i++) {
        if (i > 0) {
          System.out.print(",");
        }
        System.out.print(inst.valueSparse(i));
      }
      System.out.println();

      // Unset dataset for copy, delete first attribute, and insert it again
      copy.setDataset(null);
      copy.deleteAttributeAt(0);
      copy.insertAttributeAt(0);
      copy.setDataset(inst.dataset());
      System.out.println("Copy with first attribute deleted and inserted: " + copy);

      // Same for second attribute
      copy.setDataset(null);
      copy.deleteAttributeAt(1);
      copy.insertAttributeAt(1);
      copy.setDataset(inst.dataset());
      System.out.println("Copy with second attribute deleted and inserted: " + copy);

      // Same for last attribute
      copy.setDataset(null);
      copy.deleteAttributeAt(2);
      copy.insertAttributeAt(2);
      copy.setDataset(inst.dataset());
      System.out.println("Copy with third attribute deleted and inserted: " + copy);

      // Enumerate attributes (leaving out the class attribute)
      System.out.println("Enumerating attributes (leaving out class):");
      Enumeration enu = inst.enumerateAttributes();
      while (enu.hasMoreElements()) {
        Attribute att = (Attribute) enu.nextElement();
        System.out.println(att);
      }

      // Headers are equivalent?
      System.out.println("Header of original and copy equivalent: " + inst.equalHeaders(copy));

      // Test for missing values
      System.out.println("Length of copy missing: " + copy.isMissing(length));
      System.out.println("Weight of copy missing: " + copy.isMissing(weight.index()));
      System.out.println("Length of copy missing: " + Utils.isMissingValue(copy.value(length)));

      // Prints number of attributes and classes
      System.out.println("Number of attributes: " + copy.numAttributes());
      System.out.println("Number of classes: " + copy.numClasses());

      // Replace missing values
      double[] meansAndModes = {2, 3, 0};
      copy.replaceMissingValues(meansAndModes);
      System.out.println("Copy with missing value replaced: " + copy);

      // Setting and getting values and weights
      copy.setClassMissing();
      System.out.println("Copy with missing class: " + copy);
      copy.setClassValue(0);
      System.out.println("Copy with class value set to first value: " + copy);
      copy.setClassValue("third");
      System.out.println("Copy with class value set to \"third\": " + copy);
      copy.setMissing(1);
      System.out.println("Copy with second attribute set to be missing: " + copy);
      copy.setMissing(length);
      System.out.println("Copy with length set to be missing: " + copy);
      copy.setValue(0, 0);
      System.out.println("Copy with first attribute set to 0: " + copy);
      copy.setValue(weight, 1);
      System.out.println("Copy with weight attribute set to 1: " + copy);
      copy.setValue(position, "second");
      System.out.println("Copy with position set to \"second\": " + copy);
      copy.setValue(2, "first");
      System.out.println("Copy with last attribute set to \"first\": " + copy);
      System.out.println("Current weight of instance copy: " + copy.weight());
      copy.setWeight(2);
      System.out.println("Current weight of instance copy (set to 2): " + copy.weight());
      System.out.println("Last value of copy: " + copy.toString(2));
      System.out.println("Value of position for copy: " + copy.toString(position));
      System.out.println("Last value of copy (internal format): " + copy.value(2));
      System.out.println("Value of position for copy (internal format): " + copy.value(position));
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
Exemple #5
0
  /**
   * Writes a Batch of instances
   *
   * @throws IOException throws IOException if saving in batch mode is not possible
   */
  public void writeBatch() throws IOException {

    Instances instances = getInstances();

    if (instances == null) throw new IOException("No instances to save");
    if (instances.classIndex() == -1) {
      instances.setClassIndex(instances.numAttributes() - 1);
      System.err.println("No class specified. Last attribute is used as class attribute.");
    }
    if (instances.attribute(instances.classIndex()).isNumeric())
      throw new IOException("To save in C4.5 format the class attribute cannot be numeric.");
    if (getRetrieval() == INCREMENTAL)
      throw new IOException("Batch and incremental saving cannot be mixed.");

    setRetrieval(BATCH);
    if (retrieveFile() == null || getWriter() == null) {
      throw new IOException(
          "C4.5 format requires two files. Therefore no output to standard out can be generated.\nPlease specifiy output files using the -o option.");
    }
    setWriteMode(WRITE);
    // print names file
    setFileExtension(".names");
    PrintWriter outW = new PrintWriter(getWriter());
    for (int i = 0; i < instances.attribute(instances.classIndex()).numValues(); i++) {
      outW.write(instances.attribute(instances.classIndex()).value(i));
      if (i < instances.attribute(instances.classIndex()).numValues() - 1) {
        outW.write(",");
      } else {
        outW.write(".\n");
      }
    }
    for (int i = 0; i < instances.numAttributes(); i++) {
      if (i != instances.classIndex()) {
        outW.write(instances.attribute(i).name() + ": ");
        if (instances.attribute(i).isNumeric() || instances.attribute(i).isDate()) {
          outW.write("continuous.\n");
        } else {
          Attribute temp = instances.attribute(i);
          for (int j = 0; j < temp.numValues(); j++) {
            outW.write(temp.value(j));
            if (j < temp.numValues() - 1) {
              outW.write(",");
            } else {
              outW.write(".\n");
            }
          }
        }
      }
    }
    outW.flush();
    outW.close();

    // print data file
    String out = retrieveFile().getAbsolutePath();
    setFileExtension(".data");
    out = out.substring(0, out.lastIndexOf('.')) + getFileExtension();
    File namesFile = new File(out);
    try {
      setFile(namesFile);
    } catch (Exception ex) {
      throw new IOException(
          "Cannot create data file, only names file created (Reason: " + ex.toString() + ").");
    }
    if (retrieveFile() == null || getWriter() == null) {
      throw new IOException("Cannot create data file, only names file created.");
    }
    outW = new PrintWriter(getWriter());
    // print data file
    for (int i = 0; i < instances.numInstances(); i++) {
      Instance temp = instances.instance(i);
      for (int j = 0; j < temp.numAttributes(); j++) {
        if (j != instances.classIndex()) {
          if (temp.isMissing(j)) {
            outW.write("?,");
          } else if (instances.attribute(j).isNominal() || instances.attribute(j).isString()) {
            outW.write(instances.attribute(j).value((int) temp.value(j)) + ",");
          } else {
            outW.write("" + temp.value(j) + ",");
          }
        }
      }
      // write the class value
      if (temp.isMissing(instances.classIndex())) {
        outW.write("?");
      } else {
        outW.write(
            instances
                .attribute(instances.classIndex())
                .value((int) temp.value(instances.classIndex())));
      }
      outW.write("\n");
    }
    outW.flush();
    outW.close();
    setFileExtension(".names");
    setWriteMode(WAIT);
    outW = null;
    resetWriter();
    setWriteMode(CANCEL);
  }
Exemple #6
0
  /**
   * Saves an instances incrementally. Structure has to be set by using the setStructure() method or
   * setInstances() method.
   *
   * @param inst the instance to save
   * @throws IOException throws IOEXception if an instance cannot be saved incrementally.
   */
  public void writeIncremental(Instance inst) throws IOException {

    int writeMode = getWriteMode();
    Instances structure = getInstances();
    PrintWriter outW = null;

    if (structure != null) {
      if (structure.classIndex() == -1) {
        structure.setClassIndex(structure.numAttributes() - 1);
        System.err.println("No class specified. Last attribute is used as class attribute.");
      }
      if (structure.attribute(structure.classIndex()).isNumeric())
        throw new IOException("To save in C4.5 format the class attribute cannot be numeric.");
    }
    if (getRetrieval() == BATCH || getRetrieval() == NONE)
      throw new IOException("Batch and incremental saving cannot be mixed.");
    if (retrieveFile() == null || getWriter() == null) {
      throw new IOException(
          "C4.5 format requires two files. Therefore no output to standard out can be generated.\nPlease specifiy output files using the -o option.");
    }

    outW = new PrintWriter(getWriter());

    if (writeMode == WAIT) {
      if (structure == null) {
        setWriteMode(CANCEL);
        if (inst != null)
          System.err.println("Structure(Header Information) has to be set in advance");
      } else setWriteMode(STRUCTURE_READY);
      writeMode = getWriteMode();
    }
    if (writeMode == CANCEL) {
      if (outW != null) outW.close();
      cancel();
    }
    if (writeMode == STRUCTURE_READY) {
      setWriteMode(WRITE);
      // write header: here names file
      for (int i = 0; i < structure.attribute(structure.classIndex()).numValues(); i++) {
        outW.write(structure.attribute(structure.classIndex()).value(i));
        if (i < structure.attribute(structure.classIndex()).numValues() - 1) {
          outW.write(",");
        } else {
          outW.write(".\n");
        }
      }
      for (int i = 0; i < structure.numAttributes(); i++) {
        if (i != structure.classIndex()) {
          outW.write(structure.attribute(i).name() + ": ");
          if (structure.attribute(i).isNumeric() || structure.attribute(i).isDate()) {
            outW.write("continuous.\n");
          } else {
            Attribute temp = structure.attribute(i);
            for (int j = 0; j < temp.numValues(); j++) {
              outW.write(temp.value(j));
              if (j < temp.numValues() - 1) {
                outW.write(",");
              } else {
                outW.write(".\n");
              }
            }
          }
        }
      }
      outW.flush();
      outW.close();

      writeMode = getWriteMode();

      String out = retrieveFile().getAbsolutePath();
      setFileExtension(".data");
      out = out.substring(0, out.lastIndexOf('.')) + getFileExtension();
      File namesFile = new File(out);
      try {
        setFile(namesFile);
      } catch (Exception ex) {
        throw new IOException("Cannot create data file, only names file created.");
      }
      if (retrieveFile() == null || getWriter() == null) {
        throw new IOException("Cannot create data file, only names file created.");
      }
      outW = new PrintWriter(getWriter());
    }
    if (writeMode == WRITE) {
      if (structure == null) throw new IOException("No instances information available.");
      if (inst != null) {
        // write instance: here data file
        for (int j = 0; j < inst.numAttributes(); j++) {
          if (j != structure.classIndex()) {
            if (inst.isMissing(j)) {
              outW.write("?,");
            } else if (structure.attribute(j).isNominal() || structure.attribute(j).isString()) {
              outW.write(structure.attribute(j).value((int) inst.value(j)) + ",");
            } else {
              outW.write("" + inst.value(j) + ",");
            }
          }
        }
        // write the class value
        if (inst.isMissing(structure.classIndex())) {
          outW.write("?");
        } else {
          outW.write(
              structure
                  .attribute(structure.classIndex())
                  .value((int) inst.value(structure.classIndex())));
        }
        outW.write("\n");
        // flushes every 100 instances
        m_incrementalCounter++;
        if (m_incrementalCounter > 100) {
          m_incrementalCounter = 0;
          outW.flush();
        }
      } else {
        // close
        if (outW != null) {
          outW.flush();
          outW.close();
        }
        setFileExtension(".names");
        m_incrementalCounter = 0;
        resetStructure();
        outW = null;
        resetWriter();
      }
    }
  }