/** * Sorts the in-memory buffer * * @param write whether to write the sorted buffer to a temp file * @throws Exception if a problem occurs */ protected void sortBuffer(boolean write) throws Exception { String msg = statusMessagePrefix() + "Sorting in memory buffer...."; if (m_log != null) { m_log.statusMessage(msg); m_log.logMessage("[" + getCustomName() + "] " + msg); } Collections.sort(m_incrementalBuffer, m_sortComparator); if (!write) { return; } String tmpDir = m_tempDirectory; File tempFile = File.createTempFile("Sorter", ".tmp"); if (tmpDir != null && tmpDir.length() > 0) { try { tmpDir = m_env.substitute(tmpDir); File tempDir = new File(tmpDir); if (tempDir.exists() && tempDir.canWrite()) { String filename = tempFile.getName(); File newFile = new File(tmpDir + File.separator + filename); tempFile = newFile; tempFile.deleteOnExit(); } } catch (Exception ex) { } } if (!m_stopRequested.get()) { m_bufferFiles.add(tempFile); FileOutputStream fos = new FileOutputStream(tempFile); // GZIPOutputStream gzo = new GZIPOutputStream(fos); BufferedOutputStream bos = new BufferedOutputStream(fos, 50000); ObjectOutputStream oos = new ObjectOutputStream(bos); msg = statusMessagePrefix() + "Writing buffer to temp file " + m_bufferFiles.size() + "..."; if (m_log != null) { m_log.statusMessage(msg); m_log.logMessage("[" + getCustomName() + "] " + msg); } for (int i = 0; i < m_incrementalBuffer.size(); i++) { InstanceHolder temp = m_incrementalBuffer.get(i); temp.m_instance.setDataset(null); oos.writeObject(temp); if (i % (m_bufferSizeI / 10) == 0) { oos.reset(); } } bos.flush(); oos.close(); } m_incrementalBuffer.clear(); }
/** * Accept and process a data set event * * @param e a <code>DataSetEvent</code> value */ @Override public void acceptDataSet(DataSetEvent e) { m_busy = true; m_stopRequested.set(false); if (m_log != null && e.getDataSet().numInstances() > 0) { m_log.statusMessage(statusMessagePrefix() + "Sorting batch..."); } if (e.isStructureOnly()) { // nothing to sort! // just notify listeners of structure DataSetEvent d = new DataSetEvent(this, e.getDataSet()); notifyDataListeners(d); m_busy = false; return; } try { init(new Instances(e.getDataSet(), 0)); } catch (IllegalArgumentException ex) { if (m_log != null) { String message = "ERROR: There is a problem with the incoming instance structure"; // m_log.statusMessage(statusMessagePrefix() + message // + " - see log for details"); // m_log.logMessage(statusMessagePrefix() + message + " :" // + ex.getMessage()); stopWithErrorMessage(message, ex); m_busy = false; return; } } List<InstanceHolder> instances = new ArrayList<InstanceHolder>(); for (int i = 0; i < e.getDataSet().numInstances(); i++) { InstanceHolder h = new InstanceHolder(); h.m_instance = e.getDataSet().instance(i); instances.add(h); } Collections.sort(instances, m_sortComparator); Instances output = new Instances(e.getDataSet(), 0); for (int i = 0; i < instances.size(); i++) { output.add(instances.get(i).m_instance); } DataSetEvent d = new DataSetEvent(this, output); notifyDataListeners(d); if (m_log != null) { m_log.statusMessage(statusMessagePrefix() + "Finished."); } m_busy = false; }
@Override public void acceptDataSet(DataSetEvent e) { m_busy = true; if (m_log != null && !e.isStructureOnly()) { m_log.statusMessage(statusMessagePrefix() + "Processing batch..."); } init(new Instances(e.getDataSet(), 0)); if (m_root != null) { Instances trueBatch = new Instances(e.getDataSet(), 0); Instances falseBatch = new Instances(e.getDataSet(), 0); for (int i = 0; i < e.getDataSet().numInstances(); i++) { Instance current = e.getDataSet().instance(i); boolean result = m_root.evaluate(current, true); if (result) { if (m_indexOfTrueStep >= 0) { trueBatch.add(current); } } else { if (m_indexOfFalseStep >= 0) { falseBatch.add(current); } } } if (m_indexOfTrueStep >= 0) { DataSetEvent d = new DataSetEvent(this, trueBatch); ((DataSourceListener) m_downstream[m_indexOfTrueStep]).acceptDataSet(d); } if (m_indexOfFalseStep >= 0) { DataSetEvent d = new DataSetEvent(this, falseBatch); ((DataSourceListener) m_downstream[m_indexOfFalseStep]).acceptDataSet(d); } } else { if (m_indexOfTrueStep >= 0) { DataSetEvent d = new DataSetEvent(this, e.getDataSet()); ((DataSourceListener) m_downstream[m_indexOfTrueStep]).acceptDataSet(d); } } if (m_log != null && !e.isStructureOnly()) { m_log.statusMessage(statusMessagePrefix() + "Finished"); } m_busy = false; }
/** * Stops the step (and upstream ones) and then prints an error message and optional exception * message * * @param error the error message to print * @param ex the optional exception */ protected void stopWithErrorMessage(String error, Exception ex) { stop(); if (m_log != null) { m_log.statusMessage(statusMessagePrefix() + error + " - see log for details"); m_log.logMessage(statusMessagePrefix() + error + (ex != null ? " " + ex.getMessage() : "")); } }
@Override public void stop() { if (m_listenee != null) { if (m_listenee instanceof BeanCommon) { ((BeanCommon) m_listenee).stop(); } } if (m_log != null) { m_log.statusMessage(statusMessagePrefix() + "Stopped"); } m_busy = false; }
@Override public void acceptInstance(InstanceEvent e) { m_busy = true; if (e.getStatus() == InstanceEvent.FORMAT_AVAILABLE) { Instances structure = e.getStructure(); init(structure); if (m_log != null) { m_log.statusMessage(statusMessagePrefix() + "Processing stream..."); } // notify listeners of structure m_ie.setStructure(structure); if (m_indexOfTrueStep >= 0) { ((InstanceListener) m_downstream[m_indexOfTrueStep]).acceptInstance(m_ie); } if (m_indexOfFalseStep >= 0) { ((InstanceListener) m_downstream[m_indexOfFalseStep]).acceptInstance(m_ie); } } else { Instance inst = e.getInstance(); m_ie.setStatus(e.getStatus()); if (inst == null || e.getStatus() == InstanceEvent.BATCH_FINISHED) { if (inst != null) { // evaluate and notify boolean result = true; if (m_root != null) { result = m_root.evaluate(inst, true); } if (result) { if (m_indexOfTrueStep >= 0) { m_ie.setInstance(inst); ((InstanceListener) m_downstream[m_indexOfTrueStep]).acceptInstance(m_ie); } if (m_indexOfFalseStep >= 0) { m_ie.setInstance(null); ((InstanceListener) m_downstream[m_indexOfFalseStep]).acceptInstance(m_ie); } } else { if (m_indexOfFalseStep >= 0) { m_ie.setInstance(inst); ((InstanceListener) m_downstream[m_indexOfFalseStep]).acceptInstance(m_ie); } if (m_indexOfTrueStep >= 0) { m_ie.setInstance(null); ((InstanceListener) m_downstream[m_indexOfTrueStep]).acceptInstance(m_ie); } } } else { // notify both of end of stream m_ie.setInstance(null); if (m_indexOfTrueStep >= 0) { ((InstanceListener) m_downstream[m_indexOfTrueStep]).acceptInstance(m_ie); } if (m_indexOfFalseStep >= 0) { ((InstanceListener) m_downstream[m_indexOfFalseStep]).acceptInstance(m_ie); } } if (m_log != null) { m_log.statusMessage(statusMessagePrefix() + "Finished"); } } else { boolean result = true; if (m_root != null) { result = m_root.evaluate(inst, true); } m_ie.setInstance(inst); if (result) { if (m_indexOfTrueStep >= 0) { ((InstanceListener) m_downstream[m_indexOfTrueStep]).acceptInstance(m_ie); } } else { if (m_indexOfFalseStep >= 0) { ((InstanceListener) m_downstream[m_indexOfFalseStep]).acceptInstance(m_ie); } } } } m_busy = false; }
/** * Accept and process an instance event * * @param e an <code>InstanceEvent</code> value */ @Override public void acceptInstance(InstanceEvent e) { if (e.getStatus() == InstanceEvent.FORMAT_AVAILABLE) { m_connectedFormat = e.getStructure(); m_stopRequested.set(false); try { init(new Instances(e.getStructure(), 0)); } catch (IllegalArgumentException ex) { if (m_log != null) { String message = "ERROR: There is a problem with the incoming instance structure"; // m_log.statusMessage(statusMessagePrefix() + message // + " - see log for details"); // m_log.logMessage(statusMessagePrefix() + message + " :" // + ex.getMessage()); stopWithErrorMessage(message, ex); // m_busy = false; return; } } String buffSize = m_bufferSize; try { buffSize = m_env.substitute(buffSize); m_bufferSizeI = Integer.parseInt(buffSize); } catch (Exception ex) { ex.printStackTrace(); } m_incrementalBuffer = new ArrayList<InstanceHolder>(m_bufferSizeI); m_bufferFiles = new ArrayList<File>(); m_streamCounter = 0; return; } m_busy = true; if (e.getInstance() != null) { if (m_streamCounter == 0) { if (m_log != null) { m_log.statusMessage(statusMessagePrefix() + "Starting streaming sort..."); m_log.logMessage( "[Sorter] " + statusMessagePrefix() + " Using streaming buffer size: " + m_bufferSizeI); } } InstanceHolder tempH = new InstanceHolder(); tempH.m_instance = e.getInstance(); tempH.m_fileNumber = -1; // unused here if (m_stringAttIndexes != null) { copyStringAttVals(tempH); } m_incrementalBuffer.add(tempH); m_streamCounter++; } if (e.getInstance() == null || e.getStatus() == InstanceEvent.BATCH_FINISHED) { emitBufferedInstances(); // thread will set busy to false and report done status when // complete return; } else if (m_incrementalBuffer.size() == m_bufferSizeI) { // time to sort and write this to a temp file try { sortBuffer(true); } catch (Exception ex) { String msg = statusMessagePrefix() + "ERROR: unable to write to temp file."; // if (m_log != null) { // m_log.statusMessage(msg); // m_log.logMessage("[" + getCustomName() + "] " + msg); // } stopWithErrorMessage(msg, ex); // ex.printStackTrace(); m_busy = false; return; } } m_busy = false; }