/** * Signify that this batch of input to the filter is finished. If the filter requires all * instances prior to filtering, output() may now be called to retrieve the filtered instances. * * @return true if there are instances pending output * @exception Exception if an error occurs * @exception IllegalStateException if no input structure has been defined */ public boolean batchFinished() throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_Means == null) { Instances input = getInputFormat(); m_Means = new double[input.numAttributes()]; m_StdDevs = new double[input.numAttributes()]; for (int i = 0; i < input.numAttributes(); i++) { if (input.attribute(i).isNumeric() && (input.classIndex() != i)) { m_Means[i] = input.meanOrMode(i); m_StdDevs[i] = Math.sqrt(input.variance(i)); } } // Convert pending input instances for (int i = 0; i < input.numInstances(); i++) { convertInstance(input.instance(i)); } } // Free memory flushInput(); m_NewBatch = true; return (numPendingOutput() != 0); }
/** * Accept an instance for processing by StreamableFilters only * * @param e an <code>InstanceEvent</code> value */ public void acceptInstance(InstanceEvent e) { // to do! if (m_filterThread != null) { String messg = Messages.getInstance().getString("Filter_AcceptInstance_Mess_Text_First") + statusMessagePrefix() + Messages.getInstance().getString("Filter_AcceptInstance_Mess_Text_Second"); if (m_log != null) { m_log.logMessage(messg); m_log.statusMessage( statusMessagePrefix() + Messages.getInstance() .getString("Filter_AcceptInstance_StatusMessage_Text_First")); } else { System.err.println(messg); } return; } if (!(m_Filter instanceof StreamableFilter)) { stop(); // stop all processing if (m_log != null) { m_log.logMessage( Messages.getInstance().getString("Filter_AcceptInstance_LogMessage_Text_First") + statusMessagePrefix() + Messages.getInstance().getString("Filter_AcceptInstance_LogMessage_Text_Second") + m_Filter.getClass().getName() + Messages.getInstance().getString("Filter_AcceptInstance_LogMessage_Text_Third")); m_log.statusMessage( statusMessagePrefix() + Messages.getInstance() .getString("Filter_AcceptInstance_StatusMessage_Text_Second")); } return; } if (e.getStatus() == InstanceEvent.FORMAT_AVAILABLE) { try { m_instanceCount = 0; // notifyInstanceListeners(e); // Instances dataset = e.getInstance().dataset(); Instances dataset = e.getStructure(); if (m_Filter instanceof SupervisedFilter) { // defualt to last column if no class is set if (dataset.classIndex() < 0) { dataset.setClassIndex(dataset.numAttributes() - 1); } } // initialize filter m_Filter.setInputFormat(dataset); // attempt to determine post-filtering // structure. If successful this can be passed on to instance // listeners as a new FORMAT_AVAILABLE event. m_structurePassedOn = false; try { if (m_Filter.isOutputFormatDefined()) { // System.err.println("Filter - passing on output format..."); // System.err.println(m_Filter.getOutputFormat()); m_ie.setStructure(m_Filter.getOutputFormat()); notifyInstanceListeners(m_ie); m_structurePassedOn = true; } } catch (Exception ex) { stop(); // stop all processing if (m_log != null) { m_log.logMessage( Messages.getInstance().getString("Filter_AcceptInstance_LogMessage_Text_Fourth") + statusMessagePrefix() + Messages.getInstance() .getString("Filter_AcceptInstance_LogMessage_Text_Fifth") + ex.getMessage()); m_log.statusMessage( statusMessagePrefix() + Messages.getInstance() .getString("Filter_AcceptInstance_StatusMessage_Text_Third")); } else { System.err.println( Messages.getInstance().getString("Filter_AcceptInstance_Error_Text_First") + statusMessagePrefix() + Messages.getInstance().getString("Filter_AcceptInstance_Error_Text_Second")); } } } catch (Exception ex) { ex.printStackTrace(); } return; } if (e.getStatus() == InstanceEvent.BATCH_FINISHED) { // get the last instance (if available) try { if (m_log != null) { m_log.statusMessage( statusMessagePrefix() + Messages.getInstance() .getString("Filter_AcceptInstance_StatusMessage_Text_Fourth")); } if (m_Filter.input(e.getInstance())) { Instance filteredInstance = m_Filter.output(); if (filteredInstance != null) { if (!m_structurePassedOn) { // pass on the new structure first m_ie.setStructure(new Instances(filteredInstance.dataset(), 0)); notifyInstanceListeners(m_ie); m_structurePassedOn = true; } m_ie.setInstance(filteredInstance); // if there are instances pending for output don't want to send // a batch finisehd at this point... // System.err.println("Filter - in batch finisehd..."); if (m_Filter.batchFinished() && m_Filter.numPendingOutput() > 0) { m_ie.setStatus(InstanceEvent.INSTANCE_AVAILABLE); } else { m_ie.setStatus(e.getStatus()); } notifyInstanceListeners(m_ie); } } if (m_log != null) { m_log.statusMessage( statusMessagePrefix() + Messages.getInstance() .getString("Filter_AcceptInstance_StatusMessage_Text_Fourth_Alpha")); } } catch (Exception ex) { stop(); // stop all processing if (m_log != null) { m_log.logMessage( Messages.getInstance().getString("Filter_AcceptInstance_LogMessage_Text_Sixth") + statusMessagePrefix() + ex.getMessage()); m_log.statusMessage( statusMessagePrefix() + Messages.getInstance() .getString("Filter_AcceptInstance_StatusMessage_Text_Fifth")); } ex.printStackTrace(); } // check for any pending instances that we might need to pass on try { if (m_Filter.batchFinished() && m_Filter.numPendingOutput() > 0) { if (m_log != null) { m_log.statusMessage( statusMessagePrefix() + Messages.getInstance() .getString("Filter_AcceptInstance_StatusMessage_Text_Sixth")); } Instance filteredInstance = m_Filter.output(); if (filteredInstance != null) { if (!m_structurePassedOn) { // pass on the new structure first m_ie.setStructure(new Instances(filteredInstance.dataset(), 0)); notifyInstanceListeners(m_ie); m_structurePassedOn = true; } m_ie.setInstance(filteredInstance); // TODO here is the problem I think m_ie.setStatus(InstanceEvent.INSTANCE_AVAILABLE); notifyInstanceListeners(m_ie); } while (m_Filter.numPendingOutput() > 0) { filteredInstance = m_Filter.output(); m_ie.setInstance(filteredInstance); // System.err.println("Filter - sending pending..."); if (m_Filter.numPendingOutput() == 0) { m_ie.setStatus(InstanceEvent.BATCH_FINISHED); } else { m_ie.setStatus(InstanceEvent.INSTANCE_AVAILABLE); } notifyInstanceListeners(m_ie); } if (m_log != null) { m_log.statusMessage( statusMessagePrefix() + Messages.getInstance() .getString("Filter_AcceptInstance_StatusMessage_Text_Seventh")); } } } catch (Exception ex) { stop(); // stop all processing if (m_log != null) { m_log.logMessage( Messages.getInstance().getString("Filter_AcceptInstance_LogMessage_Text_Seventh") + statusMessagePrefix() + ex.toString()); m_log.statusMessage( statusMessagePrefix() + Messages.getInstance() .getString("Filter_AcceptInstance_StatusMessage_Text_Eighth")); } ex.printStackTrace(); } } else { // pass instance through the filter try { if (!m_Filter.input(e.getInstance())) { // System.err.println("Filter - inputing instance into filter..."); /* if (m_log != null) { m_log.logMessage("ERROR : filter not ready to output instance"); } */ // quietly return. Filter might be able to output some instances // once the batch is finished. return; } // collect output instance. Instance filteredInstance = m_Filter.output(); if (filteredInstance == null) { return; } m_instanceCount++; if (!m_structurePassedOn) { // pass on the new structure first m_ie.setStructure(new Instances(filteredInstance.dataset(), 0)); notifyInstanceListeners(m_ie); m_structurePassedOn = true; } m_ie.setInstance(filteredInstance); m_ie.setStatus(e.getStatus()); if (m_log != null && (m_instanceCount % 10000 == 0)) { m_log.statusMessage( statusMessagePrefix() + Messages.getInstance() .getString("Filter_AcceptInstance_StatusMessage_Text_Nineth") + m_instanceCount + Messages.getInstance() .getString("Filter_AcceptInstance_StatusMessage_Text_Tenth")); } notifyInstanceListeners(m_ie); } catch (Exception ex) { stop(); // stop all processing if (m_log != null) { m_log.logMessage( Messages.getInstance().getString("Filter_AcceptInstance_LogMessage_Text_Eighth") + statusMessagePrefix() + ex.toString()); m_log.statusMessage( statusMessagePrefix() + Messages.getInstance() .getString("Filter_AcceptInstance_StatusMessage_Text_Eleventh")); } ex.printStackTrace(); } } }
/** * Returns a string that describes the filter as source. The filter will be contained in a class * with the given name (there may be auxiliary classes), and will contain two methods with these * signatures: * * <pre><code> * // converts one row * public static Object[] filter(Object[] i); * // converts a full dataset (first dimension is row index) * public static Object[][] filter(Object[][] i); * </code></pre> * * where the array <code>i</code> contains elements that are either Double, String, with missing * values represented as null. The generated code is public domain and comes with no warranty. * * @param className the name that should be given to the source class. * @param data the dataset used for initializing the filter * @return the object source described by a string * @throws Exception if the source can't be computed */ public String toSource(String className, Instances data) throws Exception { StringBuffer result; boolean[] process; int i; result = new StringBuffer(); // determine what attributes were processed process = new boolean[data.numAttributes()]; for (i = 0; i < data.numAttributes(); i++) { process[i] = (data.attribute(i).isNumeric() && (i != data.classIndex())); } result.append("class " + className + " {\n"); result.append("\n"); result.append(" /** lists which attributes will be processed */\n"); result.append( " protected final static boolean[] PROCESS = new boolean[]{" + Utils.arrayToString(process) + "};\n"); result.append("\n"); result.append(" /** the computed means */\n"); result.append( " protected final static double[] MEANS = new double[]{" + Utils.arrayToString(m_Means) + "};\n"); result.append("\n"); result.append(" /** the computed standard deviations */\n"); result.append( " protected final static double[] STDEVS = new double[]{" + Utils.arrayToString(m_StdDevs) + "};\n"); result.append("\n"); result.append(" /**\n"); result.append(" * filters a single row\n"); result.append(" * \n"); result.append(" * @param i the row to process\n"); result.append(" * @return the processed row\n"); result.append(" */\n"); result.append(" public static Object[] filter(Object[] i) {\n"); result.append(" Object[] result;\n"); result.append("\n"); result.append(" result = new Object[i.length];\n"); result.append(" for (int n = 0; n < i.length; n++) {\n"); result.append(" if (PROCESS[n] && (i[n] != null)) {\n"); result.append(" if (STDEVS[n] > 0)\n"); result.append(" result[n] = (((Double) i[n]) - MEANS[n]) / STDEVS[n];\n"); result.append(" else\n"); result.append(" result[n] = ((Double) i[n]) - MEANS[n];\n"); result.append(" }\n"); result.append(" else {\n"); result.append(" result[n] = i[n];\n"); result.append(" }\n"); result.append(" }\n"); result.append("\n"); result.append(" return result;\n"); result.append(" }\n"); result.append("\n"); result.append(" /**\n"); result.append(" * filters multiple rows\n"); result.append(" * \n"); result.append(" * @param i the rows to process\n"); result.append(" * @return the processed rows\n"); result.append(" */\n"); result.append(" public static Object[][] filter(Object[][] i) {\n"); result.append(" Object[][] result;\n"); result.append("\n"); result.append(" result = new Object[i.length][];\n"); result.append(" for (int n = 0; n < i.length; n++) {\n"); result.append(" result[n] = filter(i[n]);\n"); result.append(" }\n"); result.append("\n"); result.append(" return result;\n"); result.append(" }\n"); result.append("}\n"); return result.toString(); }