/** @param args */ private void Init() { testIns.setClassIndex(testIns.numAttributes() - 1); labeledIns.setClassIndex(labeledIns.numAttributes() - 1); unlabeledIns.setClassIndex(unlabeledIns.numAttributes() - 1); class_Array[0] = classifier1; class_Array[1] = classifier2; class_Array[2] = classifier3; }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -i <the input file> * The input file</pre> * * <pre> -o <the output file> * The output file</pre> * * <pre> -c <the class index> * The class index</pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String outputString = Utils.getOption('o', options); String inputString = Utils.getOption('i', options); String indexString = Utils.getOption('c', options); ArffLoader loader = new ArffLoader(); resetOptions(); // parse index int index = -1; if (indexString.length() != 0) { if (indexString.equals("first")) index = 0; else { if (indexString.equals("last")) index = -1; else index = Integer.parseInt(indexString); } } if (inputString.length() != 0) { try { File input = new File(inputString); loader.setFile(input); Instances inst = loader.getDataSet(); if (index == -1) inst.setClassIndex(inst.numAttributes() - 1); else inst.setClassIndex(index); setInstances(inst); } catch (Exception ex) { throw new IOException( "No data set loaded. Data set has to be arff format (Reason: " + ex.toString() + ")."); } } else throw new IOException("No data set to save."); if (outputString.length() != 0) { // add appropriate file extension if (!outputString.endsWith(getFileExtension())) { if (outputString.lastIndexOf('.') != -1) outputString = (outputString.substring(0, outputString.lastIndexOf('.'))) + getFileExtension(); else outputString = outputString + getFileExtension(); } try { File output = new File(outputString); setFile(output); } catch (Exception ex) { throw new IOException("Cannot create output file."); } } if (index == -1) index = getInstances().numAttributes() - 1; getInstances().setClassIndex(index); }
/** * Determines the output format based on the input format and returns this. In case the output * format cannot be returned immediately, i.e., immediateOutputFormat() returns false, then this * method will be called from batchFinished(). * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong * @see #hasImmediateOutputFormat() * @see #batchFinished() */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { Instances data; Instances result; FastVector atts; FastVector values; HashSet hash; int i; int n; boolean isDate; Instance inst; Vector sorted; m_Cols.setUpper(inputFormat.numAttributes() - 1); data = new Instances(inputFormat); atts = new FastVector(); for (i = 0; i < data.numAttributes(); i++) { if (!m_Cols.isInRange(i) || !data.attribute(i).isNumeric()) { atts.addElement(data.attribute(i)); continue; } // date attribute? isDate = (data.attribute(i).type() == Attribute.DATE); // determine all available attribtues in dataset hash = new HashSet(); for (n = 0; n < data.numInstances(); n++) { inst = data.instance(n); if (inst.isMissing(i)) continue; if (isDate) hash.add(inst.stringValue(i)); else hash.add(new Double(inst.value(i))); } // sort values sorted = new Vector(); for (Object o : hash) sorted.add(o); Collections.sort(sorted); // create attribute from sorted values values = new FastVector(); for (Object o : sorted) { if (isDate) values.addElement(o.toString()); else values.addElement(Utils.doubleToString(((Double) o).doubleValue(), MAX_DECIMALS)); } atts.addElement(new Attribute(data.attribute(i).name(), values)); } result = new Instances(inputFormat.relationName(), atts, 0); result.setClassIndex(inputFormat.classIndex()); return result; }
/** Main method for testing this class. */ public static void main(String[] options) { try { // Create numeric attributes "length" and "weight" Attribute length = new Attribute("length"); Attribute weight = new Attribute("weight"); // Create vector to hold nominal values "first", "second", "third" ArrayList<String> my_nominal_values = new ArrayList<String>(3); my_nominal_values.add("first"); my_nominal_values.add("second"); my_nominal_values.add("third"); // Create nominal attribute "position" Attribute position = new Attribute("position", my_nominal_values); // Create vector of the above attributes ArrayList<Attribute> attributes = new ArrayList<Attribute>(3); attributes.add(length); attributes.add(weight); attributes.add(position); // Create the empty dataset "race" with above attributes Instances race = new Instances("race", attributes, 0); // Make position the class attribute race.setClassIndex(position.index()); // Create empty instance with three attribute values SparseInstance inst = new SparseInstance(3); // Set instance's values for the attributes "length", "weight", and "position" inst.setValue(length, 5.3); inst.setValue(weight, 300); inst.setValue(position, "first"); // Set instance's dataset to be the dataset "race" inst.setDataset(race); // Print the instance System.out.println("The instance: " + inst); // Print the first attribute System.out.println("First attribute: " + inst.attribute(0)); // Print the class attribute System.out.println("Class attribute: " + inst.classAttribute()); // Print the class index System.out.println("Class index: " + inst.classIndex()); // Say if class is missing System.out.println("Class is missing: " + inst.classIsMissing()); // Print the instance's class value in internal format System.out.println("Class value (internal format): " + inst.classValue()); // Print a shallow copy of this instance SparseInstance copy = (SparseInstance) inst.copy(); System.out.println("Shallow copy: " + copy); // Set dataset for shallow copy copy.setDataset(inst.dataset()); System.out.println("Shallow copy with dataset set: " + copy); // Print out all values in internal format System.out.print("All stored values in internal format: "); for (int i = 0; i < inst.numValues(); i++) { if (i > 0) { System.out.print(","); } System.out.print(inst.valueSparse(i)); } System.out.println(); // Set all values to zero System.out.print("All values set to zero: "); while (inst.numValues() > 0) { inst.setValueSparse(0, 0); } for (int i = 0; i < inst.numValues(); i++) { if (i > 0) { System.out.print(","); } System.out.print(inst.valueSparse(i)); } System.out.println(); // Set all values to one System.out.print("All values set to one: "); for (int i = 0; i < inst.numAttributes(); i++) { inst.setValue(i, 1); } for (int i = 0; i < inst.numValues(); i++) { if (i > 0) { System.out.print(","); } System.out.print(inst.valueSparse(i)); } System.out.println(); // Unset dataset for copy, delete first attribute, and insert it again copy.setDataset(null); copy.deleteAttributeAt(0); copy.insertAttributeAt(0); copy.setDataset(inst.dataset()); System.out.println("Copy with first attribute deleted and inserted: " + copy); // Same for second attribute copy.setDataset(null); copy.deleteAttributeAt(1); copy.insertAttributeAt(1); copy.setDataset(inst.dataset()); System.out.println("Copy with second attribute deleted and inserted: " + copy); // Same for last attribute copy.setDataset(null); copy.deleteAttributeAt(2); copy.insertAttributeAt(2); copy.setDataset(inst.dataset()); System.out.println("Copy with third attribute deleted and inserted: " + copy); // Enumerate attributes (leaving out the class attribute) System.out.println("Enumerating attributes (leaving out class):"); Enumeration enu = inst.enumerateAttributes(); while (enu.hasMoreElements()) { Attribute att = (Attribute) enu.nextElement(); System.out.println(att); } // Headers are equivalent? System.out.println("Header of original and copy equivalent: " + inst.equalHeaders(copy)); // Test for missing values System.out.println("Length of copy missing: " + copy.isMissing(length)); System.out.println("Weight of copy missing: " + copy.isMissing(weight.index())); System.out.println("Length of copy missing: " + Utils.isMissingValue(copy.value(length))); // Prints number of attributes and classes System.out.println("Number of attributes: " + copy.numAttributes()); System.out.println("Number of classes: " + copy.numClasses()); // Replace missing values double[] meansAndModes = {2, 3, 0}; copy.replaceMissingValues(meansAndModes); System.out.println("Copy with missing value replaced: " + copy); // Setting and getting values and weights copy.setClassMissing(); System.out.println("Copy with missing class: " + copy); copy.setClassValue(0); System.out.println("Copy with class value set to first value: " + copy); copy.setClassValue("third"); System.out.println("Copy with class value set to \"third\": " + copy); copy.setMissing(1); System.out.println("Copy with second attribute set to be missing: " + copy); copy.setMissing(length); System.out.println("Copy with length set to be missing: " + copy); copy.setValue(0, 0); System.out.println("Copy with first attribute set to 0: " + copy); copy.setValue(weight, 1); System.out.println("Copy with weight attribute set to 1: " + copy); copy.setValue(position, "second"); System.out.println("Copy with position set to \"second\": " + copy); copy.setValue(2, "first"); System.out.println("Copy with last attribute set to \"first\": " + copy); System.out.println("Current weight of instance copy: " + copy.weight()); copy.setWeight(2); System.out.println("Current weight of instance copy (set to 2): " + copy.weight()); System.out.println("Last value of copy: " + copy.toString(2)); System.out.println("Value of position for copy: " + copy.toString(position)); System.out.println("Last value of copy (internal format): " + copy.value(2)); System.out.println("Value of position for copy (internal format): " + copy.value(position)); } catch (Exception e) { e.printStackTrace(); } }
/** * Writes a Batch of instances * * @throws IOException throws IOException if saving in batch mode is not possible */ public void writeBatch() throws IOException { Instances instances = getInstances(); if (instances == null) throw new IOException("No instances to save"); if (instances.classIndex() == -1) { instances.setClassIndex(instances.numAttributes() - 1); System.err.println("No class specified. Last attribute is used as class attribute."); } if (instances.attribute(instances.classIndex()).isNumeric()) throw new IOException("To save in C4.5 format the class attribute cannot be numeric."); if (getRetrieval() == INCREMENTAL) throw new IOException("Batch and incremental saving cannot be mixed."); setRetrieval(BATCH); if (retrieveFile() == null || getWriter() == null) { throw new IOException( "C4.5 format requires two files. Therefore no output to standard out can be generated.\nPlease specifiy output files using the -o option."); } setWriteMode(WRITE); // print names file setFileExtension(".names"); PrintWriter outW = new PrintWriter(getWriter()); for (int i = 0; i < instances.attribute(instances.classIndex()).numValues(); i++) { outW.write(instances.attribute(instances.classIndex()).value(i)); if (i < instances.attribute(instances.classIndex()).numValues() - 1) { outW.write(","); } else { outW.write(".\n"); } } for (int i = 0; i < instances.numAttributes(); i++) { if (i != instances.classIndex()) { outW.write(instances.attribute(i).name() + ": "); if (instances.attribute(i).isNumeric() || instances.attribute(i).isDate()) { outW.write("continuous.\n"); } else { Attribute temp = instances.attribute(i); for (int j = 0; j < temp.numValues(); j++) { outW.write(temp.value(j)); if (j < temp.numValues() - 1) { outW.write(","); } else { outW.write(".\n"); } } } } } outW.flush(); outW.close(); // print data file String out = retrieveFile().getAbsolutePath(); setFileExtension(".data"); out = out.substring(0, out.lastIndexOf('.')) + getFileExtension(); File namesFile = new File(out); try { setFile(namesFile); } catch (Exception ex) { throw new IOException( "Cannot create data file, only names file created (Reason: " + ex.toString() + ")."); } if (retrieveFile() == null || getWriter() == null) { throw new IOException("Cannot create data file, only names file created."); } outW = new PrintWriter(getWriter()); // print data file for (int i = 0; i < instances.numInstances(); i++) { Instance temp = instances.instance(i); for (int j = 0; j < temp.numAttributes(); j++) { if (j != instances.classIndex()) { if (temp.isMissing(j)) { outW.write("?,"); } else if (instances.attribute(j).isNominal() || instances.attribute(j).isString()) { outW.write(instances.attribute(j).value((int) temp.value(j)) + ","); } else { outW.write("" + temp.value(j) + ","); } } } // write the class value if (temp.isMissing(instances.classIndex())) { outW.write("?"); } else { outW.write( instances .attribute(instances.classIndex()) .value((int) temp.value(instances.classIndex()))); } outW.write("\n"); } outW.flush(); outW.close(); setFileExtension(".names"); setWriteMode(WAIT); outW = null; resetWriter(); setWriteMode(CANCEL); }
/** * Saves an instances incrementally. Structure has to be set by using the setStructure() method or * setInstances() method. * * @param inst the instance to save * @throws IOException throws IOEXception if an instance cannot be saved incrementally. */ public void writeIncremental(Instance inst) throws IOException { int writeMode = getWriteMode(); Instances structure = getInstances(); PrintWriter outW = null; if (structure != null) { if (structure.classIndex() == -1) { structure.setClassIndex(structure.numAttributes() - 1); System.err.println("No class specified. Last attribute is used as class attribute."); } if (structure.attribute(structure.classIndex()).isNumeric()) throw new IOException("To save in C4.5 format the class attribute cannot be numeric."); } if (getRetrieval() == BATCH || getRetrieval() == NONE) throw new IOException("Batch and incremental saving cannot be mixed."); if (retrieveFile() == null || getWriter() == null) { throw new IOException( "C4.5 format requires two files. Therefore no output to standard out can be generated.\nPlease specifiy output files using the -o option."); } outW = new PrintWriter(getWriter()); if (writeMode == WAIT) { if (structure == null) { setWriteMode(CANCEL); if (inst != null) System.err.println("Structure(Header Information) has to be set in advance"); } else setWriteMode(STRUCTURE_READY); writeMode = getWriteMode(); } if (writeMode == CANCEL) { if (outW != null) outW.close(); cancel(); } if (writeMode == STRUCTURE_READY) { setWriteMode(WRITE); // write header: here names file for (int i = 0; i < structure.attribute(structure.classIndex()).numValues(); i++) { outW.write(structure.attribute(structure.classIndex()).value(i)); if (i < structure.attribute(structure.classIndex()).numValues() - 1) { outW.write(","); } else { outW.write(".\n"); } } for (int i = 0; i < structure.numAttributes(); i++) { if (i != structure.classIndex()) { outW.write(structure.attribute(i).name() + ": "); if (structure.attribute(i).isNumeric() || structure.attribute(i).isDate()) { outW.write("continuous.\n"); } else { Attribute temp = structure.attribute(i); for (int j = 0; j < temp.numValues(); j++) { outW.write(temp.value(j)); if (j < temp.numValues() - 1) { outW.write(","); } else { outW.write(".\n"); } } } } } outW.flush(); outW.close(); writeMode = getWriteMode(); String out = retrieveFile().getAbsolutePath(); setFileExtension(".data"); out = out.substring(0, out.lastIndexOf('.')) + getFileExtension(); File namesFile = new File(out); try { setFile(namesFile); } catch (Exception ex) { throw new IOException("Cannot create data file, only names file created."); } if (retrieveFile() == null || getWriter() == null) { throw new IOException("Cannot create data file, only names file created."); } outW = new PrintWriter(getWriter()); } if (writeMode == WRITE) { if (structure == null) throw new IOException("No instances information available."); if (inst != null) { // write instance: here data file for (int j = 0; j < inst.numAttributes(); j++) { if (j != structure.classIndex()) { if (inst.isMissing(j)) { outW.write("?,"); } else if (structure.attribute(j).isNominal() || structure.attribute(j).isString()) { outW.write(structure.attribute(j).value((int) inst.value(j)) + ","); } else { outW.write("" + inst.value(j) + ","); } } } // write the class value if (inst.isMissing(structure.classIndex())) { outW.write("?"); } else { outW.write( structure .attribute(structure.classIndex()) .value((int) inst.value(structure.classIndex()))); } outW.write("\n"); // flushes every 100 instances m_incrementalCounter++; if (m_incrementalCounter > 100) { m_incrementalCounter = 0; outW.flush(); } } else { // close if (outW != null) { outW.flush(); outW.close(); } setFileExtension(".names"); m_incrementalCounter = 0; resetStructure(); outW = null; resetWriter(); } } }