/** * Initialize the step. * * @param smi a <code>StepMetaInterface</code> value * @param sdi a <code>StepDataInterface</code> value * @return a <code>boolean</code> value */ public boolean init(StepMetaInterface smi, StepDataInterface sdi) { m_meta = (ReservoirSamplingMeta) smi; m_data = (ReservoirSamplingData) sdi; if (super.init(smi, sdi)) { List<StepMeta> previous = getTransMeta().findPreviousSteps(getStepMeta()); if (previous == null || previous.size() <= 0) { m_data.setProcessingMode(PROC_MODE.DISABLED); } return true; } return false; }
/** * Process an incoming row of data. * * @param smi a <code>StepMetaInterface</code> value * @param sdi a <code>StepDataInterface</code> value * @return a <code>boolean</code> value * @exception KettleException if an error occurs */ public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException { if (m_data.getProcessingMode() == PROC_MODE.DISABLED) { setOutputDone(); m_data.cleanUp(); return (false); } m_meta = (ReservoirSamplingMeta) smi; m_data = (ReservoirSamplingData) sdi; Object[] r = getRow(); // Handle the first row if (first) { first = false; if (r == null) // no input to be expected... { setOutputDone(); return false; } // Initialize the data object m_data.setOutputRowMeta(getInputRowMeta().clone()); String sampleSize = getTransMeta().environmentSubstitute(m_meta.getSampleSize()); String seed = getTransMeta().environmentSubstitute(m_meta.getSeed()); m_data.initialize(Integer.valueOf(sampleSize), Integer.valueOf(seed)); // no real reason to determine the output fields here // as we don't add/delete any fields } // end (if first) if (m_data.getProcessingMode() == PROC_MODE.PASSTHROUGH) { if (r == null) { setOutputDone(); m_data.cleanUp(); return (false); } putRow(m_data.getOutputRowMeta(), r); } else if (m_data.getProcessingMode() == PROC_MODE.SAMPLING) { if (r == null) { // Output the rows in the sample List<Object[]> samples = m_data.getSample(); int numRows = (samples != null) ? samples.size() : 0; logBasic( this.getStepname() + " Actual/Sample: " + numRows + "/" + m_data.m_k + " Seed:" + getTransMeta().environmentSubstitute(m_meta.m_randomSeed)); if (samples != null) { for (int i = 0; i < samples.size(); i++) { Object[] sample = samples.get(i); if (sample != null) { putRow(m_data.getOutputRowMeta(), sample); } else { // user probably requested more rows in // the sample than there were in total // in the end. Just break in this case break; } } } setOutputDone(); m_data.cleanUp(); return false; } // just pass the row to the data class for possible caching // in the sample m_data.processRow(r); } if (log.isRowLevel()) { log.logRowlevel(toString(), "Read row #" + getLinesRead() + " : " + r); } if (checkFeedback(getLinesRead())) { logBasic("Line number " + getLinesRead()); } return true; }