示例#1
0
  /**
   * Sorts the in-memory buffer
   *
   * @param write whether to write the sorted buffer to a temp file
   * @throws Exception if a problem occurs
   */
  protected void sortBuffer(boolean write) throws Exception {

    String msg = statusMessagePrefix() + "Sorting in memory buffer....";
    if (m_log != null) {
      m_log.statusMessage(msg);
      m_log.logMessage("[" + getCustomName() + "] " + msg);
    }

    Collections.sort(m_incrementalBuffer, m_sortComparator);

    if (!write) {
      return;
    }

    String tmpDir = m_tempDirectory;
    File tempFile = File.createTempFile("Sorter", ".tmp");

    if (tmpDir != null && tmpDir.length() > 0) {
      try {
        tmpDir = m_env.substitute(tmpDir);

        File tempDir = new File(tmpDir);
        if (tempDir.exists() && tempDir.canWrite()) {
          String filename = tempFile.getName();
          File newFile = new File(tmpDir + File.separator + filename);
          tempFile = newFile;
          tempFile.deleteOnExit();
        }
      } catch (Exception ex) {
      }
    }

    if (!m_stopRequested.get()) {

      m_bufferFiles.add(tempFile);
      FileOutputStream fos = new FileOutputStream(tempFile);
      // GZIPOutputStream gzo = new GZIPOutputStream(fos);
      BufferedOutputStream bos = new BufferedOutputStream(fos, 50000);
      ObjectOutputStream oos = new ObjectOutputStream(bos);

      msg = statusMessagePrefix() + "Writing buffer to temp file " + m_bufferFiles.size() + "...";
      if (m_log != null) {
        m_log.statusMessage(msg);
        m_log.logMessage("[" + getCustomName() + "] " + msg);
      }

      for (int i = 0; i < m_incrementalBuffer.size(); i++) {
        InstanceHolder temp = m_incrementalBuffer.get(i);
        temp.m_instance.setDataset(null);
        oos.writeObject(temp);
        if (i % (m_bufferSizeI / 10) == 0) {
          oos.reset();
        }
      }

      bos.flush();
      oos.close();
    }
    m_incrementalBuffer.clear();
  }
示例#2
0
  /**
   * Accept and process a data set event
   *
   * @param e a <code>DataSetEvent</code> value
   */
  @Override
  public void acceptDataSet(DataSetEvent e) {
    m_busy = true;
    m_stopRequested.set(false);

    if (m_log != null && e.getDataSet().numInstances() > 0) {
      m_log.statusMessage(statusMessagePrefix() + "Sorting batch...");
    }

    if (e.isStructureOnly()) {
      // nothing to sort!

      // just notify listeners of structure
      DataSetEvent d = new DataSetEvent(this, e.getDataSet());
      notifyDataListeners(d);

      m_busy = false;
      return;
    }

    try {
      init(new Instances(e.getDataSet(), 0));
    } catch (IllegalArgumentException ex) {
      if (m_log != null) {
        String message = "ERROR: There is a problem with the incoming instance structure";

        // m_log.statusMessage(statusMessagePrefix() + message
        // + " - see log for details");
        // m_log.logMessage(statusMessagePrefix() + message + " :"
        // + ex.getMessage());
        stopWithErrorMessage(message, ex);
        m_busy = false;
        return;
      }
    }

    List<InstanceHolder> instances = new ArrayList<InstanceHolder>();
    for (int i = 0; i < e.getDataSet().numInstances(); i++) {
      InstanceHolder h = new InstanceHolder();
      h.m_instance = e.getDataSet().instance(i);
      instances.add(h);
    }
    Collections.sort(instances, m_sortComparator);
    Instances output = new Instances(e.getDataSet(), 0);
    for (int i = 0; i < instances.size(); i++) {
      output.add(instances.get(i).m_instance);
    }

    DataSetEvent d = new DataSetEvent(this, output);
    notifyDataListeners(d);

    if (m_log != null) {
      m_log.statusMessage(statusMessagePrefix() + "Finished.");
    }
    m_busy = false;
  }
示例#3
0
  @Override
  public void acceptDataSet(DataSetEvent e) {

    m_busy = true;
    if (m_log != null && !e.isStructureOnly()) {
      m_log.statusMessage(statusMessagePrefix() + "Processing batch...");
    }

    init(new Instances(e.getDataSet(), 0));

    if (m_root != null) {
      Instances trueBatch = new Instances(e.getDataSet(), 0);
      Instances falseBatch = new Instances(e.getDataSet(), 0);

      for (int i = 0; i < e.getDataSet().numInstances(); i++) {
        Instance current = e.getDataSet().instance(i);

        boolean result = m_root.evaluate(current, true);

        if (result) {
          if (m_indexOfTrueStep >= 0) {
            trueBatch.add(current);
          }
        } else {
          if (m_indexOfFalseStep >= 0) {
            falseBatch.add(current);
          }
        }
      }

      if (m_indexOfTrueStep >= 0) {
        DataSetEvent d = new DataSetEvent(this, trueBatch);
        ((DataSourceListener) m_downstream[m_indexOfTrueStep]).acceptDataSet(d);
      }

      if (m_indexOfFalseStep >= 0) {
        DataSetEvent d = new DataSetEvent(this, falseBatch);
        ((DataSourceListener) m_downstream[m_indexOfFalseStep]).acceptDataSet(d);
      }
    } else {
      if (m_indexOfTrueStep >= 0) {
        DataSetEvent d = new DataSetEvent(this, e.getDataSet());
        ((DataSourceListener) m_downstream[m_indexOfTrueStep]).acceptDataSet(d);
      }
    }

    if (m_log != null && !e.isStructureOnly()) {
      m_log.statusMessage(statusMessagePrefix() + "Finished");
    }

    m_busy = false;
  }
示例#4
0
 /**
  * Stops the step (and upstream ones) and then prints an error message and optional exception
  * message
  *
  * @param error the error message to print
  * @param ex the optional exception
  */
 protected void stopWithErrorMessage(String error, Exception ex) {
   stop();
   if (m_log != null) {
     m_log.statusMessage(statusMessagePrefix() + error + " - see log for details");
     m_log.logMessage(statusMessagePrefix() + error + (ex != null ? " " + ex.getMessage() : ""));
   }
 }
示例#5
0
  @Override
  public void stop() {
    if (m_listenee != null) {
      if (m_listenee instanceof BeanCommon) {
        ((BeanCommon) m_listenee).stop();
      }
    }

    if (m_log != null) {
      m_log.statusMessage(statusMessagePrefix() + "Stopped");
    }

    m_busy = false;
  }
示例#6
0
  @Override
  public void acceptInstance(InstanceEvent e) {
    m_busy = true;

    if (e.getStatus() == InstanceEvent.FORMAT_AVAILABLE) {
      Instances structure = e.getStructure();
      init(structure);

      if (m_log != null) {
        m_log.statusMessage(statusMessagePrefix() + "Processing stream...");
      }

      // notify listeners of structure
      m_ie.setStructure(structure);
      if (m_indexOfTrueStep >= 0) {
        ((InstanceListener) m_downstream[m_indexOfTrueStep]).acceptInstance(m_ie);
      }
      if (m_indexOfFalseStep >= 0) {
        ((InstanceListener) m_downstream[m_indexOfFalseStep]).acceptInstance(m_ie);
      }
    } else {
      Instance inst = e.getInstance();
      m_ie.setStatus(e.getStatus());

      if (inst == null || e.getStatus() == InstanceEvent.BATCH_FINISHED) {
        if (inst != null) {
          // evaluate and notify
          boolean result = true;
          if (m_root != null) {
            result = m_root.evaluate(inst, true);
          }

          if (result) {
            if (m_indexOfTrueStep >= 0) {
              m_ie.setInstance(inst);
              ((InstanceListener) m_downstream[m_indexOfTrueStep]).acceptInstance(m_ie);
            }
            if (m_indexOfFalseStep >= 0) {
              m_ie.setInstance(null);
              ((InstanceListener) m_downstream[m_indexOfFalseStep]).acceptInstance(m_ie);
            }
          } else {
            if (m_indexOfFalseStep >= 0) {
              m_ie.setInstance(inst);
              ((InstanceListener) m_downstream[m_indexOfFalseStep]).acceptInstance(m_ie);
            }
            if (m_indexOfTrueStep >= 0) {
              m_ie.setInstance(null);
              ((InstanceListener) m_downstream[m_indexOfTrueStep]).acceptInstance(m_ie);
            }
          }
        } else {
          // notify both of end of stream
          m_ie.setInstance(null);
          if (m_indexOfTrueStep >= 0) {
            ((InstanceListener) m_downstream[m_indexOfTrueStep]).acceptInstance(m_ie);
          }
          if (m_indexOfFalseStep >= 0) {
            ((InstanceListener) m_downstream[m_indexOfFalseStep]).acceptInstance(m_ie);
          }
        }

        if (m_log != null) {
          m_log.statusMessage(statusMessagePrefix() + "Finished");
        }
      } else {
        boolean result = true;
        if (m_root != null) {
          result = m_root.evaluate(inst, true);
        }
        m_ie.setInstance(inst);
        if (result) {
          if (m_indexOfTrueStep >= 0) {
            ((InstanceListener) m_downstream[m_indexOfTrueStep]).acceptInstance(m_ie);
          }
        } else {
          if (m_indexOfFalseStep >= 0) {
            ((InstanceListener) m_downstream[m_indexOfFalseStep]).acceptInstance(m_ie);
          }
        }
      }
    }

    m_busy = false;
  }
示例#7
0
  /**
   * Accept and process an instance event
   *
   * @param e an <code>InstanceEvent</code> value
   */
  @Override
  public void acceptInstance(InstanceEvent e) {

    if (e.getStatus() == InstanceEvent.FORMAT_AVAILABLE) {
      m_connectedFormat = e.getStructure();
      m_stopRequested.set(false);
      try {
        init(new Instances(e.getStructure(), 0));
      } catch (IllegalArgumentException ex) {
        if (m_log != null) {
          String message = "ERROR: There is a problem with the incoming instance structure";

          // m_log.statusMessage(statusMessagePrefix() + message
          // + " - see log for details");
          // m_log.logMessage(statusMessagePrefix() + message + " :"
          // + ex.getMessage());

          stopWithErrorMessage(message, ex);
          // m_busy = false;
          return;
        }
      }

      String buffSize = m_bufferSize;
      try {
        buffSize = m_env.substitute(buffSize);
        m_bufferSizeI = Integer.parseInt(buffSize);
      } catch (Exception ex) {
        ex.printStackTrace();
      }
      m_incrementalBuffer = new ArrayList<InstanceHolder>(m_bufferSizeI);
      m_bufferFiles = new ArrayList<File>();
      m_streamCounter = 0;

      return;
    }

    m_busy = true;

    if (e.getInstance() != null) {
      if (m_streamCounter == 0) {
        if (m_log != null) {
          m_log.statusMessage(statusMessagePrefix() + "Starting streaming sort...");
          m_log.logMessage(
              "[Sorter] "
                  + statusMessagePrefix()
                  + " Using streaming buffer size: "
                  + m_bufferSizeI);
        }
      }

      InstanceHolder tempH = new InstanceHolder();
      tempH.m_instance = e.getInstance();
      tempH.m_fileNumber = -1; // unused here
      if (m_stringAttIndexes != null) {
        copyStringAttVals(tempH);
      }
      m_incrementalBuffer.add(tempH);
      m_streamCounter++;
    }

    if (e.getInstance() == null || e.getStatus() == InstanceEvent.BATCH_FINISHED) {
      emitBufferedInstances();
      // thread will set busy to false and report done status when
      // complete
      return;
    } else if (m_incrementalBuffer.size() == m_bufferSizeI) {
      // time to sort and write this to a temp file
      try {
        sortBuffer(true);
      } catch (Exception ex) {
        String msg = statusMessagePrefix() + "ERROR: unable to write to temp file.";
        // if (m_log != null) {
        // m_log.statusMessage(msg);
        // m_log.logMessage("[" + getCustomName() + "] " + msg);
        // }
        stopWithErrorMessage(msg, ex);

        // ex.printStackTrace();
        m_busy = false;
        return;
      }
    }

    m_busy = false;
  }