/** * Constructor that generates a sparse instance from the given instance. Reference to the dataset * is set to null. (ie. the instance doesn't have access to information about the attribute types) * * @param instance the instance from which the attribute values and the weight are to be copied */ public SparseInstance(Instance instance) { m_Weight = instance.weight(); m_Dataset = null; m_NumAttributes = instance.numAttributes(); if (instance instanceof SparseInstance) { m_AttValues = ((SparseInstance) instance).m_AttValues; m_Indices = ((SparseInstance) instance).m_Indices; } else { double[] tempValues = new double[instance.numAttributes()]; int[] tempIndices = new int[instance.numAttributes()]; int vals = 0; for (int i = 0; i < instance.numAttributes(); i++) { if (instance.value(i) != 0) { tempValues[vals] = instance.value(i); tempIndices[vals] = i; vals++; } } m_AttValues = new double[vals]; m_Indices = new int[vals]; System.arraycopy(tempValues, 0, m_AttValues, 0, vals); System.arraycopy(tempIndices, 0, m_Indices, 0, vals); } }
/** * Inserts an instance into the hash table * * @param inst instance to be inserted * @param instA to create the hash key from * @throws Exception if the instance can't be inserted */ private void insertIntoTable(Instance inst, double[] instA) throws Exception { double[] tempClassDist2; double[] newDist; DecisionTableHashKey thekey; if (instA != null) { thekey = new DecisionTableHashKey(instA); } else { thekey = new DecisionTableHashKey(inst, inst.numAttributes(), false); } // see if this one is already in the table tempClassDist2 = (double[]) m_entries.get(thekey); if (tempClassDist2 == null) { if (m_classIsNominal) { newDist = new double[m_theInstances.classAttribute().numValues()]; // Leplace estimation for (int i = 0; i < m_theInstances.classAttribute().numValues(); i++) { newDist[i] = 1.0; } newDist[(int) inst.classValue()] = inst.weight(); // add to the table m_entries.put(thekey, newDist); } else { newDist = new double[2]; newDist[0] = inst.classValue() * inst.weight(); newDist[1] = inst.weight(); // add to the table m_entries.put(thekey, newDist); } } else { // update the distribution for this instance if (m_classIsNominal) { tempClassDist2[(int) inst.classValue()] += inst.weight(); // update the table m_entries.put(thekey, tempClassDist2); } else { tempClassDist2[0] += (inst.classValue() * inst.weight()); tempClassDist2[1] += inst.weight(); // update the table m_entries.put(thekey, tempClassDist2); } } }
/** * Merges this instance with the given instance and returns the result. Dataset is set to null. * * @param inst the instance to be merged with this one * @return the merged instances */ public Instance mergeInstance(Instance inst) { int[] indices = new int[numValues() + inst.numValues()]; int m = 0; for (int j = 0; j < numValues(); j++) { indices[m++] = index(j); } for (int j = 0; j < inst.numValues(); j++) { if (inst.valueSparse(j) != 0) { indices[m++] = inst.index(j) + inst.numAttributes(); } } if (m != indices.length) { // Need to truncate int[] newInd = new int[m]; System.arraycopy(indices, 0, newInd, 0, m); indices = newInd; } return new BinarySparseInstance((float) 1.0, indices, numAttributes() + inst.numAttributes()); }
/** * Merges this instance with the given instance and returns the result. Dataset is set to null. * * @param inst the instance to be merged with this one * @return the merged instances */ public Instance mergeInstance(Instance inst) { double[] values = new double[numValues() + inst.numValues()]; int[] indices = new int[numValues() + inst.numValues()]; int m = 0; for (int j = 0; j < numValues(); j++, m++) { values[m] = valueSparse(j); indices[m] = index(j); } for (int j = 0; j < inst.numValues(); j++, m++) { values[m] = inst.valueSparse(j); indices[m] = numAttributes() + inst.index(j); } return new SparseInstance(1.0, values, indices, numAttributes() + inst.numAttributes()); }
/** * Updates the minimum and maximum values for all the attributes based on a new instance. * * @param instance the new instance */ private void updateMinMax(Instance instance) { for (int j = 0; j < instance.numAttributes(); j++) { if (Double.isNaN(m_Min[j])) { m_Min[j] = instance.value(j); m_Max[j] = instance.value(j); } else { if (instance.value(j) < m_Min[j]) { m_Min[j] = instance.value(j); } else { if (instance.value(j) > m_Max[j]) { m_Max[j] = instance.value(j); } } } } }
/** * Calculates the class membership probabilities for the given test instance. * * @param instance the instance to be classified * @return predicted class probability distribution * @throws Exception if distribution can't be computed */ public double[] distributionForInstance(Instance instance) throws Exception { DecisionTableHashKey thekey; double[] tempDist; double[] normDist; m_disTransform.input(instance); m_disTransform.batchFinished(); instance = m_disTransform.output(); m_delTransform.input(instance); m_delTransform.batchFinished(); instance = m_delTransform.output(); thekey = new DecisionTableHashKey(instance, instance.numAttributes(), false); // if this one is not in the table if ((tempDist = (double[]) m_entries.get(thekey)) == null) { if (m_useIBk) { tempDist = m_ibk.distributionForInstance(instance); } else { if (!m_classIsNominal) { tempDist = new double[1]; tempDist[0] = m_majority; } else { tempDist = m_classPriors.clone(); /*tempDist = new double [m_theInstances.classAttribute().numValues()]; tempDist[(int)m_majority] = 1.0; */ } } } else { if (!m_classIsNominal) { normDist = new double[1]; normDist[0] = (tempDist[0] / tempDist[1]); tempDist = normDist; } else { // normalise distribution normDist = new double[tempDist.length]; System.arraycopy(tempDist, 0, normDist, 0, tempDist.length); Utils.normalize(normDist); tempDist = normDist; } } return tempDist; }
/** * Writes a Batch of instances * * @throws IOException throws IOException if saving in batch mode is not possible */ public void writeBatch() throws IOException { Instances instances = getInstances(); if (instances == null) throw new IOException("No instances to save"); if (instances.classIndex() == -1) { instances.setClassIndex(instances.numAttributes() - 1); System.err.println("No class specified. Last attribute is used as class attribute."); } if (instances.attribute(instances.classIndex()).isNumeric()) throw new IOException("To save in C4.5 format the class attribute cannot be numeric."); if (getRetrieval() == INCREMENTAL) throw new IOException("Batch and incremental saving cannot be mixed."); setRetrieval(BATCH); if (retrieveFile() == null || getWriter() == null) { throw new IOException( "C4.5 format requires two files. Therefore no output to standard out can be generated.\nPlease specifiy output files using the -o option."); } setWriteMode(WRITE); // print names file setFileExtension(".names"); PrintWriter outW = new PrintWriter(getWriter()); for (int i = 0; i < instances.attribute(instances.classIndex()).numValues(); i++) { outW.write(instances.attribute(instances.classIndex()).value(i)); if (i < instances.attribute(instances.classIndex()).numValues() - 1) { outW.write(","); } else { outW.write(".\n"); } } for (int i = 0; i < instances.numAttributes(); i++) { if (i != instances.classIndex()) { outW.write(instances.attribute(i).name() + ": "); if (instances.attribute(i).isNumeric() || instances.attribute(i).isDate()) { outW.write("continuous.\n"); } else { Attribute temp = instances.attribute(i); for (int j = 0; j < temp.numValues(); j++) { outW.write(temp.value(j)); if (j < temp.numValues() - 1) { outW.write(","); } else { outW.write(".\n"); } } } } } outW.flush(); outW.close(); // print data file String out = retrieveFile().getAbsolutePath(); setFileExtension(".data"); out = out.substring(0, out.lastIndexOf('.')) + getFileExtension(); File namesFile = new File(out); try { setFile(namesFile); } catch (Exception ex) { throw new IOException( "Cannot create data file, only names file created (Reason: " + ex.toString() + ")."); } if (retrieveFile() == null || getWriter() == null) { throw new IOException("Cannot create data file, only names file created."); } outW = new PrintWriter(getWriter()); // print data file for (int i = 0; i < instances.numInstances(); i++) { Instance temp = instances.instance(i); for (int j = 0; j < temp.numAttributes(); j++) { if (j != instances.classIndex()) { if (temp.isMissing(j)) { outW.write("?,"); } else if (instances.attribute(j).isNominal() || instances.attribute(j).isString()) { outW.write(instances.attribute(j).value((int) temp.value(j)) + ","); } else { outW.write("" + temp.value(j) + ","); } } } // write the class value if (temp.isMissing(instances.classIndex())) { outW.write("?"); } else { outW.write( instances .attribute(instances.classIndex()) .value((int) temp.value(instances.classIndex()))); } outW.write("\n"); } outW.flush(); outW.close(); setFileExtension(".names"); setWriteMode(WAIT); outW = null; resetWriter(); setWriteMode(CANCEL); }
/** * Saves an instances incrementally. Structure has to be set by using the setStructure() method or * setInstances() method. * * @param inst the instance to save * @throws IOException throws IOEXception if an instance cannot be saved incrementally. */ public void writeIncremental(Instance inst) throws IOException { int writeMode = getWriteMode(); Instances structure = getInstances(); PrintWriter outW = null; if (structure != null) { if (structure.classIndex() == -1) { structure.setClassIndex(structure.numAttributes() - 1); System.err.println("No class specified. Last attribute is used as class attribute."); } if (structure.attribute(structure.classIndex()).isNumeric()) throw new IOException("To save in C4.5 format the class attribute cannot be numeric."); } if (getRetrieval() == BATCH || getRetrieval() == NONE) throw new IOException("Batch and incremental saving cannot be mixed."); if (retrieveFile() == null || getWriter() == null) { throw new IOException( "C4.5 format requires two files. Therefore no output to standard out can be generated.\nPlease specifiy output files using the -o option."); } outW = new PrintWriter(getWriter()); if (writeMode == WAIT) { if (structure == null) { setWriteMode(CANCEL); if (inst != null) System.err.println("Structure(Header Information) has to be set in advance"); } else setWriteMode(STRUCTURE_READY); writeMode = getWriteMode(); } if (writeMode == CANCEL) { if (outW != null) outW.close(); cancel(); } if (writeMode == STRUCTURE_READY) { setWriteMode(WRITE); // write header: here names file for (int i = 0; i < structure.attribute(structure.classIndex()).numValues(); i++) { outW.write(structure.attribute(structure.classIndex()).value(i)); if (i < structure.attribute(structure.classIndex()).numValues() - 1) { outW.write(","); } else { outW.write(".\n"); } } for (int i = 0; i < structure.numAttributes(); i++) { if (i != structure.classIndex()) { outW.write(structure.attribute(i).name() + ": "); if (structure.attribute(i).isNumeric() || structure.attribute(i).isDate()) { outW.write("continuous.\n"); } else { Attribute temp = structure.attribute(i); for (int j = 0; j < temp.numValues(); j++) { outW.write(temp.value(j)); if (j < temp.numValues() - 1) { outW.write(","); } else { outW.write(".\n"); } } } } } outW.flush(); outW.close(); writeMode = getWriteMode(); String out = retrieveFile().getAbsolutePath(); setFileExtension(".data"); out = out.substring(0, out.lastIndexOf('.')) + getFileExtension(); File namesFile = new File(out); try { setFile(namesFile); } catch (Exception ex) { throw new IOException("Cannot create data file, only names file created."); } if (retrieveFile() == null || getWriter() == null) { throw new IOException("Cannot create data file, only names file created."); } outW = new PrintWriter(getWriter()); } if (writeMode == WRITE) { if (structure == null) throw new IOException("No instances information available."); if (inst != null) { // write instance: here data file for (int j = 0; j < inst.numAttributes(); j++) { if (j != structure.classIndex()) { if (inst.isMissing(j)) { outW.write("?,"); } else if (structure.attribute(j).isNominal() || structure.attribute(j).isString()) { outW.write(structure.attribute(j).value((int) inst.value(j)) + ","); } else { outW.write("" + inst.value(j) + ","); } } } // write the class value if (inst.isMissing(structure.classIndex())) { outW.write("?"); } else { outW.write( structure .attribute(structure.classIndex()) .value((int) inst.value(structure.classIndex()))); } outW.write("\n"); // flushes every 100 instances m_incrementalCounter++; if (m_incrementalCounter > 100) { m_incrementalCounter = 0; outW.flush(); } } else { // close if (outW != null) { outW.flush(); outW.close(); } setFileExtension(".names"); m_incrementalCounter = 0; resetStructure(); outW = null; resetWriter(); } } }