Ejemplo n.º 1
0
  /**
   * Signify that this batch of input to the filter is finished. If the filter requires all
   * instances prior to filtering, output() may now be called to retrieve the filtered instances.
   *
   * @return true if there are instances pending output
   * @exception Exception if an error occurs
   * @exception IllegalStateException if no input structure has been defined
   */
  public boolean batchFinished() throws Exception {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }
    if (m_Means == null) {
      Instances input = getInputFormat();
      m_Means = new double[input.numAttributes()];
      m_StdDevs = new double[input.numAttributes()];
      for (int i = 0; i < input.numAttributes(); i++) {
        if (input.attribute(i).isNumeric() && (input.classIndex() != i)) {
          m_Means[i] = input.meanOrMode(i);
          m_StdDevs[i] = Math.sqrt(input.variance(i));
        }
      }

      // Convert pending input instances
      for (int i = 0; i < input.numInstances(); i++) {
        convertInstance(input.instance(i));
      }
    }
    // Free memory
    flushInput();

    m_NewBatch = true;
    return (numPendingOutput() != 0);
  }
Ejemplo n.º 2
0
  /**
   * Converts the header info of the given set of instances into a set of item sets (singletons).
   * The ordering of values in the header file determines the lexicographic order.
   *
   * @param instances the set of instances whose header info is to be used
   * @return a set of item sets, each containing a single item
   * @exception Exception if singletons can't be generated successfully
   */
  public static FastVector singletons(Instances instances) throws Exception {

    FastVector setOfItemSets = new FastVector();
    ItemSet current;

    for (int i = 0; i < instances.numAttributes(); i++) {
      if (instances.attribute(i).isNumeric())
        throw new Exception("Can't handle numeric attributes!");
      for (int j = 0; j < instances.attribute(i).numValues(); j++) {
        current = new AprioriItemSet(instances.numInstances());
        current.m_items = new int[instances.numAttributes()];
        for (int k = 0; k < instances.numAttributes(); k++) current.m_items[k] = -1;
        current.m_items[i] = j;
        setOfItemSets.addElement(current);
      }
    }
    return setOfItemSets;
  }
Ejemplo n.º 3
0
  /**
   * Builds a weight function removing instances with an abnormally high scaled residual
   *
   * @throws Exception if weight building fails
   */
  private void buildWeight() throws Exception {

    findResiduals();
    m_scalefactor =
        1.4826
            * (1 + 5 / (m_Data.numInstances() - m_Data.numAttributes()))
            * Math.sqrt(m_bestMedian);
    m_weight = new double[m_Residuals.length];
    for (int i = 0; i < m_Residuals.length; i++)
      m_weight[i] = ((Math.sqrt(m_Residuals[i]) / m_scalefactor < 2.5) ? 1.0 : 0.0);
  }
Ejemplo n.º 4
0
  /**
   * Tests the CostCurve generation from the command line. The classifier is currently hardcoded.
   * Pipe in an arff file.
   *
   * @param args currently ignored
   */
  public static void main(String[] args) {

    try {

      Instances inst = new Instances(new java.io.InputStreamReader(System.in));

      inst.setClassIndex(inst.numAttributes() - 1);
      CostCurve cc = new CostCurve();
      EvaluationUtils eu = new EvaluationUtils();
      Classifier classifier = new features.classifiers.functions.Logistic();
      FastVector predictions = new FastVector();
      for (int i = 0; i < 2; i++) { // Do two runs.
        eu.setSeed(i);
        predictions.appendElements(eu.getCVPredictions(classifier, inst, 10));
        // System.out.println("\n\n\n");
      }
      Instances result = cc.getCurve(predictions);
      System.out.println(result);

    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }
Ejemplo n.º 5
0
  /**
   * Accept an instance for processing by StreamableFilters only
   *
   * @param e an <code>InstanceEvent</code> value
   */
  public void acceptInstance(InstanceEvent e) {
    // to do!
    if (m_filterThread != null) {
      String messg =
          Messages.getInstance().getString("Filter_AcceptInstance_Mess_Text_First")
              + statusMessagePrefix()
              + Messages.getInstance().getString("Filter_AcceptInstance_Mess_Text_Second");
      if (m_log != null) {
        m_log.logMessage(messg);
        m_log.statusMessage(
            statusMessagePrefix()
                + Messages.getInstance()
                    .getString("Filter_AcceptInstance_StatusMessage_Text_First"));
      } else {
        System.err.println(messg);
      }
      return;
    }
    if (!(m_Filter instanceof StreamableFilter)) {
      stop(); // stop all processing
      if (m_log != null) {
        m_log.logMessage(
            Messages.getInstance().getString("Filter_AcceptInstance_LogMessage_Text_First")
                + statusMessagePrefix()
                + Messages.getInstance().getString("Filter_AcceptInstance_LogMessage_Text_Second")
                + m_Filter.getClass().getName()
                + Messages.getInstance().getString("Filter_AcceptInstance_LogMessage_Text_Third"));
        m_log.statusMessage(
            statusMessagePrefix()
                + Messages.getInstance()
                    .getString("Filter_AcceptInstance_StatusMessage_Text_Second"));
      }
      return;
    }
    if (e.getStatus() == InstanceEvent.FORMAT_AVAILABLE) {
      try {
        m_instanceCount = 0;
        // notifyInstanceListeners(e);
        //	Instances dataset = e.getInstance().dataset();
        Instances dataset = e.getStructure();
        if (m_Filter instanceof SupervisedFilter) {
          // defualt to last column if no class is set
          if (dataset.classIndex() < 0) {
            dataset.setClassIndex(dataset.numAttributes() - 1);
          }
        }
        // initialize filter
        m_Filter.setInputFormat(dataset);
        // attempt to determine post-filtering
        // structure. If successful this can be passed on to instance
        // listeners as a new FORMAT_AVAILABLE event.
        m_structurePassedOn = false;
        try {
          if (m_Filter.isOutputFormatDefined()) {
            //	    System.err.println("Filter - passing on output format...");
            //	    System.err.println(m_Filter.getOutputFormat());
            m_ie.setStructure(m_Filter.getOutputFormat());
            notifyInstanceListeners(m_ie);
            m_structurePassedOn = true;
          }
        } catch (Exception ex) {
          stop(); // stop all processing
          if (m_log != null) {
            m_log.logMessage(
                Messages.getInstance().getString("Filter_AcceptInstance_LogMessage_Text_Fourth")
                    + statusMessagePrefix()
                    + Messages.getInstance()
                        .getString("Filter_AcceptInstance_LogMessage_Text_Fifth")
                    + ex.getMessage());
            m_log.statusMessage(
                statusMessagePrefix()
                    + Messages.getInstance()
                        .getString("Filter_AcceptInstance_StatusMessage_Text_Third"));
          } else {
            System.err.println(
                Messages.getInstance().getString("Filter_AcceptInstance_Error_Text_First")
                    + statusMessagePrefix()
                    + Messages.getInstance().getString("Filter_AcceptInstance_Error_Text_Second"));
          }
        }
      } catch (Exception ex) {
        ex.printStackTrace();
      }
      return;
    }

    if (e.getStatus() == InstanceEvent.BATCH_FINISHED) {
      // get the last instance (if available)
      try {
        if (m_log != null) {
          m_log.statusMessage(
              statusMessagePrefix()
                  + Messages.getInstance()
                      .getString("Filter_AcceptInstance_StatusMessage_Text_Fourth"));
        }
        if (m_Filter.input(e.getInstance())) {
          Instance filteredInstance = m_Filter.output();
          if (filteredInstance != null) {
            if (!m_structurePassedOn) {
              // pass on the new structure first
              m_ie.setStructure(new Instances(filteredInstance.dataset(), 0));
              notifyInstanceListeners(m_ie);
              m_structurePassedOn = true;
            }

            m_ie.setInstance(filteredInstance);

            // if there are instances pending for output don't want to send
            // a batch finisehd at this point...
            // System.err.println("Filter - in batch finisehd...");
            if (m_Filter.batchFinished() && m_Filter.numPendingOutput() > 0) {
              m_ie.setStatus(InstanceEvent.INSTANCE_AVAILABLE);
            } else {
              m_ie.setStatus(e.getStatus());
            }
            notifyInstanceListeners(m_ie);
          }
        }
        if (m_log != null) {
          m_log.statusMessage(
              statusMessagePrefix()
                  + Messages.getInstance()
                      .getString("Filter_AcceptInstance_StatusMessage_Text_Fourth_Alpha"));
        }
      } catch (Exception ex) {
        stop(); // stop all processing
        if (m_log != null) {
          m_log.logMessage(
              Messages.getInstance().getString("Filter_AcceptInstance_LogMessage_Text_Sixth")
                  + statusMessagePrefix()
                  + ex.getMessage());
          m_log.statusMessage(
              statusMessagePrefix()
                  + Messages.getInstance()
                      .getString("Filter_AcceptInstance_StatusMessage_Text_Fifth"));
        }
        ex.printStackTrace();
      }

      // check for any pending instances that we might need to pass on
      try {
        if (m_Filter.batchFinished() && m_Filter.numPendingOutput() > 0) {
          if (m_log != null) {
            m_log.statusMessage(
                statusMessagePrefix()
                    + Messages.getInstance()
                        .getString("Filter_AcceptInstance_StatusMessage_Text_Sixth"));
          }
          Instance filteredInstance = m_Filter.output();
          if (filteredInstance != null) {
            if (!m_structurePassedOn) {
              // pass on the new structure first
              m_ie.setStructure(new Instances(filteredInstance.dataset(), 0));
              notifyInstanceListeners(m_ie);
              m_structurePassedOn = true;
            }

            m_ie.setInstance(filteredInstance);

            // TODO here is the problem I think
            m_ie.setStatus(InstanceEvent.INSTANCE_AVAILABLE);
            notifyInstanceListeners(m_ie);
          }
          while (m_Filter.numPendingOutput() > 0) {
            filteredInstance = m_Filter.output();
            m_ie.setInstance(filteredInstance);
            //            System.err.println("Filter - sending pending...");
            if (m_Filter.numPendingOutput() == 0) {
              m_ie.setStatus(InstanceEvent.BATCH_FINISHED);
            } else {
              m_ie.setStatus(InstanceEvent.INSTANCE_AVAILABLE);
            }
            notifyInstanceListeners(m_ie);
          }
          if (m_log != null) {
            m_log.statusMessage(
                statusMessagePrefix()
                    + Messages.getInstance()
                        .getString("Filter_AcceptInstance_StatusMessage_Text_Seventh"));
          }
        }
      } catch (Exception ex) {
        stop(); // stop all processing
        if (m_log != null) {
          m_log.logMessage(
              Messages.getInstance().getString("Filter_AcceptInstance_LogMessage_Text_Seventh")
                  + statusMessagePrefix()
                  + ex.toString());
          m_log.statusMessage(
              statusMessagePrefix()
                  + Messages.getInstance()
                      .getString("Filter_AcceptInstance_StatusMessage_Text_Eighth"));
        }
        ex.printStackTrace();
      }
    } else {
      // pass instance through the filter
      try {
        if (!m_Filter.input(e.getInstance())) {
          //          System.err.println("Filter - inputing instance into filter...");
          /* if (m_log != null) {
            m_log.logMessage("ERROR : filter not ready to output instance");
          } */

          // quietly return. Filter might be able to output some instances
          // once the batch is finished.
          return;
        }

        // collect output instance.
        Instance filteredInstance = m_Filter.output();
        if (filteredInstance == null) {
          return;
        }
        m_instanceCount++;

        if (!m_structurePassedOn) {
          // pass on the new structure first
          m_ie.setStructure(new Instances(filteredInstance.dataset(), 0));
          notifyInstanceListeners(m_ie);
          m_structurePassedOn = true;
        }

        m_ie.setInstance(filteredInstance);
        m_ie.setStatus(e.getStatus());

        if (m_log != null && (m_instanceCount % 10000 == 0)) {
          m_log.statusMessage(
              statusMessagePrefix()
                  + Messages.getInstance()
                      .getString("Filter_AcceptInstance_StatusMessage_Text_Nineth")
                  + m_instanceCount
                  + Messages.getInstance()
                      .getString("Filter_AcceptInstance_StatusMessage_Text_Tenth"));
        }
        notifyInstanceListeners(m_ie);
      } catch (Exception ex) {
        stop(); // stop all processing
        if (m_log != null) {
          m_log.logMessage(
              Messages.getInstance().getString("Filter_AcceptInstance_LogMessage_Text_Eighth")
                  + statusMessagePrefix()
                  + ex.toString());
          m_log.statusMessage(
              statusMessagePrefix()
                  + Messages.getInstance()
                      .getString("Filter_AcceptInstance_StatusMessage_Text_Eleventh"));
        }
        ex.printStackTrace();
      }
    }
  }
Ejemplo n.º 6
0
  /**
   * Returns a string that describes the filter as source. The filter will be contained in a class
   * with the given name (there may be auxiliary classes), and will contain two methods with these
   * signatures:
   *
   * <pre><code>
   * // converts one row
   * public static Object[] filter(Object[] i);
   * // converts a full dataset (first dimension is row index)
   * public static Object[][] filter(Object[][] i);
   * </code></pre>
   *
   * where the array <code>i</code> contains elements that are either Double, String, with missing
   * values represented as null. The generated code is public domain and comes with no warranty.
   *
   * @param className the name that should be given to the source class.
   * @param data the dataset used for initializing the filter
   * @return the object source described by a string
   * @throws Exception if the source can't be computed
   */
  public String toSource(String className, Instances data) throws Exception {
    StringBuffer result;
    boolean[] process;
    int i;

    result = new StringBuffer();

    // determine what attributes were processed
    process = new boolean[data.numAttributes()];
    for (i = 0; i < data.numAttributes(); i++) {
      process[i] = (data.attribute(i).isNumeric() && (i != data.classIndex()));
    }

    result.append("class " + className + " {\n");
    result.append("\n");
    result.append("  /** lists which attributes will be processed */\n");
    result.append(
        "  protected final static boolean[] PROCESS = new boolean[]{"
            + Utils.arrayToString(process)
            + "};\n");
    result.append("\n");
    result.append("  /** the computed means */\n");
    result.append(
        "  protected final static double[] MEANS = new double[]{"
            + Utils.arrayToString(m_Means)
            + "};\n");
    result.append("\n");
    result.append("  /** the computed standard deviations */\n");
    result.append(
        "  protected final static double[] STDEVS = new double[]{"
            + Utils.arrayToString(m_StdDevs)
            + "};\n");
    result.append("\n");
    result.append("  /**\n");
    result.append("   * filters a single row\n");
    result.append("   * \n");
    result.append("   * @param i the row to process\n");
    result.append("   * @return the processed row\n");
    result.append("   */\n");
    result.append("  public static Object[] filter(Object[] i) {\n");
    result.append("    Object[] result;\n");
    result.append("\n");
    result.append("    result = new Object[i.length];\n");
    result.append("    for (int n = 0; n < i.length; n++) {\n");
    result.append("      if (PROCESS[n] && (i[n] != null)) {\n");
    result.append("        if (STDEVS[n] > 0)\n");
    result.append("          result[n] = (((Double) i[n]) - MEANS[n]) / STDEVS[n];\n");
    result.append("        else\n");
    result.append("          result[n] = ((Double) i[n]) - MEANS[n];\n");
    result.append("      }\n");
    result.append("      else {\n");
    result.append("        result[n] = i[n];\n");
    result.append("      }\n");
    result.append("    }\n");
    result.append("\n");
    result.append("    return result;\n");
    result.append("  }\n");
    result.append("\n");
    result.append("  /**\n");
    result.append("   * filters multiple rows\n");
    result.append("   * \n");
    result.append("   * @param i the rows to process\n");
    result.append("   * @return the processed rows\n");
    result.append("   */\n");
    result.append("  public static Object[][] filter(Object[][] i) {\n");
    result.append("    Object[][] result;\n");
    result.append("\n");
    result.append("    result = new Object[i.length][];\n");
    result.append("    for (int n = 0; n < i.length; n++) {\n");
    result.append("      result[n] = filter(i[n]);\n");
    result.append("    }\n");
    result.append("\n");
    result.append("    return result;\n");
    result.append("  }\n");
    result.append("}\n");

    return result.toString();
  }