/**
   * Constructor that generates a sparse instance from the given instance. Reference to the dataset
   * is set to null. (ie. the instance doesn't have access to information about the attribute types)
   *
   * @param instance the instance from which the attribute values and the weight are to be copied
   */
  public SparseInstance(Instance instance) {

    m_Weight = instance.weight();
    m_Dataset = null;
    m_NumAttributes = instance.numAttributes();
    if (instance instanceof SparseInstance) {
      m_AttValues = ((SparseInstance) instance).m_AttValues;
      m_Indices = ((SparseInstance) instance).m_Indices;
    } else {
      double[] tempValues = new double[instance.numAttributes()];
      int[] tempIndices = new int[instance.numAttributes()];
      int vals = 0;
      for (int i = 0; i < instance.numAttributes(); i++) {
        if (instance.value(i) != 0) {
          tempValues[vals] = instance.value(i);
          tempIndices[vals] = i;
          vals++;
        }
      }
      m_AttValues = new double[vals];
      m_Indices = new int[vals];
      System.arraycopy(tempValues, 0, m_AttValues, 0, vals);
      System.arraycopy(tempIndices, 0, m_Indices, 0, vals);
    }
  }
  /**
   * Inserts an instance into the hash table
   *
   * @param inst instance to be inserted
   * @param instA to create the hash key from
   * @throws Exception if the instance can't be inserted
   */
  private void insertIntoTable(Instance inst, double[] instA) throws Exception {

    double[] tempClassDist2;
    double[] newDist;
    DecisionTableHashKey thekey;

    if (instA != null) {
      thekey = new DecisionTableHashKey(instA);
    } else {
      thekey = new DecisionTableHashKey(inst, inst.numAttributes(), false);
    }

    // see if this one is already in the table
    tempClassDist2 = (double[]) m_entries.get(thekey);
    if (tempClassDist2 == null) {
      if (m_classIsNominal) {
        newDist = new double[m_theInstances.classAttribute().numValues()];

        // Leplace estimation
        for (int i = 0; i < m_theInstances.classAttribute().numValues(); i++) {
          newDist[i] = 1.0;
        }

        newDist[(int) inst.classValue()] = inst.weight();

        // add to the table
        m_entries.put(thekey, newDist);
      } else {
        newDist = new double[2];
        newDist[0] = inst.classValue() * inst.weight();
        newDist[1] = inst.weight();

        // add to the table
        m_entries.put(thekey, newDist);
      }
    } else {

      // update the distribution for this instance
      if (m_classIsNominal) {
        tempClassDist2[(int) inst.classValue()] += inst.weight();

        // update the table
        m_entries.put(thekey, tempClassDist2);
      } else {
        tempClassDist2[0] += (inst.classValue() * inst.weight());
        tempClassDist2[1] += inst.weight();

        // update the table
        m_entries.put(thekey, tempClassDist2);
      }
    }
  }
  /**
   * Merges this instance with the given instance and returns the result. Dataset is set to null.
   *
   * @param inst the instance to be merged with this one
   * @return the merged instances
   */
  public Instance mergeInstance(Instance inst) {

    int[] indices = new int[numValues() + inst.numValues()];

    int m = 0;
    for (int j = 0; j < numValues(); j++) {
      indices[m++] = index(j);
    }
    for (int j = 0; j < inst.numValues(); j++) {
      if (inst.valueSparse(j) != 0) {
        indices[m++] = inst.index(j) + inst.numAttributes();
      }
    }

    if (m != indices.length) {
      // Need to truncate
      int[] newInd = new int[m];
      System.arraycopy(indices, 0, newInd, 0, m);
      indices = newInd;
    }
    return new BinarySparseInstance((float) 1.0, indices, numAttributes() + inst.numAttributes());
  }
  /**
   * Merges this instance with the given instance and returns the result. Dataset is set to null.
   *
   * @param inst the instance to be merged with this one
   * @return the merged instances
   */
  public Instance mergeInstance(Instance inst) {

    double[] values = new double[numValues() + inst.numValues()];
    int[] indices = new int[numValues() + inst.numValues()];

    int m = 0;
    for (int j = 0; j < numValues(); j++, m++) {
      values[m] = valueSparse(j);
      indices[m] = index(j);
    }
    for (int j = 0; j < inst.numValues(); j++, m++) {
      values[m] = inst.valueSparse(j);
      indices[m] = numAttributes() + inst.index(j);
    }

    return new SparseInstance(1.0, values, indices, numAttributes() + inst.numAttributes());
  }
Exemple #5
0
  /**
   * Updates the minimum and maximum values for all the attributes based on a new instance.
   *
   * @param instance the new instance
   */
  private void updateMinMax(Instance instance) {

    for (int j = 0; j < instance.numAttributes(); j++) {
      if (Double.isNaN(m_Min[j])) {
        m_Min[j] = instance.value(j);
        m_Max[j] = instance.value(j);
      } else {
        if (instance.value(j) < m_Min[j]) {
          m_Min[j] = instance.value(j);
        } else {
          if (instance.value(j) > m_Max[j]) {
            m_Max[j] = instance.value(j);
          }
        }
      }
    }
  }
  /**
   * Calculates the class membership probabilities for the given test instance.
   *
   * @param instance the instance to be classified
   * @return predicted class probability distribution
   * @throws Exception if distribution can't be computed
   */
  public double[] distributionForInstance(Instance instance) throws Exception {

    DecisionTableHashKey thekey;
    double[] tempDist;
    double[] normDist;

    m_disTransform.input(instance);
    m_disTransform.batchFinished();
    instance = m_disTransform.output();

    m_delTransform.input(instance);
    m_delTransform.batchFinished();
    instance = m_delTransform.output();

    thekey = new DecisionTableHashKey(instance, instance.numAttributes(), false);

    // if this one is not in the table
    if ((tempDist = (double[]) m_entries.get(thekey)) == null) {
      if (m_useIBk) {
        tempDist = m_ibk.distributionForInstance(instance);
      } else {
        if (!m_classIsNominal) {
          tempDist = new double[1];
          tempDist[0] = m_majority;
        } else {
          tempDist = m_classPriors.clone();
          /*tempDist = new double [m_theInstances.classAttribute().numValues()];
          tempDist[(int)m_majority] = 1.0; */
        }
      }
    } else {
      if (!m_classIsNominal) {
        normDist = new double[1];
        normDist[0] = (tempDist[0] / tempDist[1]);
        tempDist = normDist;
      } else {

        // normalise distribution
        normDist = new double[tempDist.length];
        System.arraycopy(tempDist, 0, normDist, 0, tempDist.length);
        Utils.normalize(normDist);
        tempDist = normDist;
      }
    }
    return tempDist;
  }
Exemple #7
0
  /**
   * Writes a Batch of instances
   *
   * @throws IOException throws IOException if saving in batch mode is not possible
   */
  public void writeBatch() throws IOException {

    Instances instances = getInstances();

    if (instances == null) throw new IOException("No instances to save");
    if (instances.classIndex() == -1) {
      instances.setClassIndex(instances.numAttributes() - 1);
      System.err.println("No class specified. Last attribute is used as class attribute.");
    }
    if (instances.attribute(instances.classIndex()).isNumeric())
      throw new IOException("To save in C4.5 format the class attribute cannot be numeric.");
    if (getRetrieval() == INCREMENTAL)
      throw new IOException("Batch and incremental saving cannot be mixed.");

    setRetrieval(BATCH);
    if (retrieveFile() == null || getWriter() == null) {
      throw new IOException(
          "C4.5 format requires two files. Therefore no output to standard out can be generated.\nPlease specifiy output files using the -o option.");
    }
    setWriteMode(WRITE);
    // print names file
    setFileExtension(".names");
    PrintWriter outW = new PrintWriter(getWriter());
    for (int i = 0; i < instances.attribute(instances.classIndex()).numValues(); i++) {
      outW.write(instances.attribute(instances.classIndex()).value(i));
      if (i < instances.attribute(instances.classIndex()).numValues() - 1) {
        outW.write(",");
      } else {
        outW.write(".\n");
      }
    }
    for (int i = 0; i < instances.numAttributes(); i++) {
      if (i != instances.classIndex()) {
        outW.write(instances.attribute(i).name() + ": ");
        if (instances.attribute(i).isNumeric() || instances.attribute(i).isDate()) {
          outW.write("continuous.\n");
        } else {
          Attribute temp = instances.attribute(i);
          for (int j = 0; j < temp.numValues(); j++) {
            outW.write(temp.value(j));
            if (j < temp.numValues() - 1) {
              outW.write(",");
            } else {
              outW.write(".\n");
            }
          }
        }
      }
    }
    outW.flush();
    outW.close();

    // print data file
    String out = retrieveFile().getAbsolutePath();
    setFileExtension(".data");
    out = out.substring(0, out.lastIndexOf('.')) + getFileExtension();
    File namesFile = new File(out);
    try {
      setFile(namesFile);
    } catch (Exception ex) {
      throw new IOException(
          "Cannot create data file, only names file created (Reason: " + ex.toString() + ").");
    }
    if (retrieveFile() == null || getWriter() == null) {
      throw new IOException("Cannot create data file, only names file created.");
    }
    outW = new PrintWriter(getWriter());
    // print data file
    for (int i = 0; i < instances.numInstances(); i++) {
      Instance temp = instances.instance(i);
      for (int j = 0; j < temp.numAttributes(); j++) {
        if (j != instances.classIndex()) {
          if (temp.isMissing(j)) {
            outW.write("?,");
          } else if (instances.attribute(j).isNominal() || instances.attribute(j).isString()) {
            outW.write(instances.attribute(j).value((int) temp.value(j)) + ",");
          } else {
            outW.write("" + temp.value(j) + ",");
          }
        }
      }
      // write the class value
      if (temp.isMissing(instances.classIndex())) {
        outW.write("?");
      } else {
        outW.write(
            instances
                .attribute(instances.classIndex())
                .value((int) temp.value(instances.classIndex())));
      }
      outW.write("\n");
    }
    outW.flush();
    outW.close();
    setFileExtension(".names");
    setWriteMode(WAIT);
    outW = null;
    resetWriter();
    setWriteMode(CANCEL);
  }
Exemple #8
0
  /**
   * Saves an instances incrementally. Structure has to be set by using the setStructure() method or
   * setInstances() method.
   *
   * @param inst the instance to save
   * @throws IOException throws IOEXception if an instance cannot be saved incrementally.
   */
  public void writeIncremental(Instance inst) throws IOException {

    int writeMode = getWriteMode();
    Instances structure = getInstances();
    PrintWriter outW = null;

    if (structure != null) {
      if (structure.classIndex() == -1) {
        structure.setClassIndex(structure.numAttributes() - 1);
        System.err.println("No class specified. Last attribute is used as class attribute.");
      }
      if (structure.attribute(structure.classIndex()).isNumeric())
        throw new IOException("To save in C4.5 format the class attribute cannot be numeric.");
    }
    if (getRetrieval() == BATCH || getRetrieval() == NONE)
      throw new IOException("Batch and incremental saving cannot be mixed.");
    if (retrieveFile() == null || getWriter() == null) {
      throw new IOException(
          "C4.5 format requires two files. Therefore no output to standard out can be generated.\nPlease specifiy output files using the -o option.");
    }

    outW = new PrintWriter(getWriter());

    if (writeMode == WAIT) {
      if (structure == null) {
        setWriteMode(CANCEL);
        if (inst != null)
          System.err.println("Structure(Header Information) has to be set in advance");
      } else setWriteMode(STRUCTURE_READY);
      writeMode = getWriteMode();
    }
    if (writeMode == CANCEL) {
      if (outW != null) outW.close();
      cancel();
    }
    if (writeMode == STRUCTURE_READY) {
      setWriteMode(WRITE);
      // write header: here names file
      for (int i = 0; i < structure.attribute(structure.classIndex()).numValues(); i++) {
        outW.write(structure.attribute(structure.classIndex()).value(i));
        if (i < structure.attribute(structure.classIndex()).numValues() - 1) {
          outW.write(",");
        } else {
          outW.write(".\n");
        }
      }
      for (int i = 0; i < structure.numAttributes(); i++) {
        if (i != structure.classIndex()) {
          outW.write(structure.attribute(i).name() + ": ");
          if (structure.attribute(i).isNumeric() || structure.attribute(i).isDate()) {
            outW.write("continuous.\n");
          } else {
            Attribute temp = structure.attribute(i);
            for (int j = 0; j < temp.numValues(); j++) {
              outW.write(temp.value(j));
              if (j < temp.numValues() - 1) {
                outW.write(",");
              } else {
                outW.write(".\n");
              }
            }
          }
        }
      }
      outW.flush();
      outW.close();

      writeMode = getWriteMode();

      String out = retrieveFile().getAbsolutePath();
      setFileExtension(".data");
      out = out.substring(0, out.lastIndexOf('.')) + getFileExtension();
      File namesFile = new File(out);
      try {
        setFile(namesFile);
      } catch (Exception ex) {
        throw new IOException("Cannot create data file, only names file created.");
      }
      if (retrieveFile() == null || getWriter() == null) {
        throw new IOException("Cannot create data file, only names file created.");
      }
      outW = new PrintWriter(getWriter());
    }
    if (writeMode == WRITE) {
      if (structure == null) throw new IOException("No instances information available.");
      if (inst != null) {
        // write instance: here data file
        for (int j = 0; j < inst.numAttributes(); j++) {
          if (j != structure.classIndex()) {
            if (inst.isMissing(j)) {
              outW.write("?,");
            } else if (structure.attribute(j).isNominal() || structure.attribute(j).isString()) {
              outW.write(structure.attribute(j).value((int) inst.value(j)) + ",");
            } else {
              outW.write("" + inst.value(j) + ",");
            }
          }
        }
        // write the class value
        if (inst.isMissing(structure.classIndex())) {
          outW.write("?");
        } else {
          outW.write(
              structure
                  .attribute(structure.classIndex())
                  .value((int) inst.value(structure.classIndex())));
        }
        outW.write("\n");
        // flushes every 100 instances
        m_incrementalCounter++;
        if (m_incrementalCounter > 100) {
          m_incrementalCounter = 0;
          outW.flush();
        }
      } else {
        // close
        if (outW != null) {
          outW.flush();
          outW.close();
        }
        setFileExtension(".names");
        m_incrementalCounter = 0;
        resetStructure();
        outW = null;
        resetWriter();
      }
    }
  }