public boolean init(StepMetaInterface sii, StepDataInterface sdi) { if (super.init(sii, sdi)) { data.realSchemaName = environmentSubstitute(meta.getSchemaName()); data.realTableName = environmentSubstitute(meta.getTablename()); if (meta.getCacheSize() > 0) { data.cache = new HashMap<RowMetaAndData, Long>((int) (meta.getCacheSize() * 1.5)); } else { data.cache = new HashMap<RowMetaAndData, Long>(); } if (meta.getDatabaseMeta() == null) { logError( BaseMessages.getString(PKG, "CombinationLookup.Init.ConnectionMissing", getStepname())); return false; } data.db = new Database(this, meta.getDatabaseMeta()); data.db.shareVariablesWith(this); try { if (getTransMeta().isUsingUniqueConnections()) { synchronized (getTrans()) { data.db.connect(getTrans().getThreadName(), getPartitionID()); } } else { data.db.connect(getPartitionID()); } if (log.isDetailed()) logDetailed( BaseMessages.getString(PKG, "CombinationLookup.Log.ConnectedToDB")); // $NON-NLS-1$ data.db.setCommit(meta.getCommitSize()); return true; } catch (KettleDatabaseException dbe) { logError( BaseMessages.getString(PKG, "CombinationLookup.Log.UnableToConnectDB") + dbe.getMessage()); // $NON-NLS-1$ } } return false; }
/** This inserts new record into a junk dimension */ public Long combiInsert(RowMetaInterface rowMeta, Object[] row, Long val_key, Long val_crc) throws KettleDatabaseException { String debug = "Combination insert"; DatabaseMeta databaseMeta = meta.getDatabaseMeta(); try { if (data.prepStatementInsert == null) // first time: construct prepared statement { debug = "First: construct prepared statement"; data.insertRowMeta = new RowMeta(); /* Construct the SQL statement... * * INSERT INTO * d_test(keyfield, [crcfield,] keylookup[]) * VALUES(val_key, [val_crc], row values with keynrs[]) * ; */ String sql = ""; sql += "INSERT INTO " + data.schemaTable + ("( "); boolean comma = false; if (!isAutoIncrement()) // NO AUTOINCREMENT { sql += databaseMeta.quoteField(meta.getTechnicalKeyField()); data.insertRowMeta.addValueMeta( new ValueMeta(meta.getTechnicalKeyField(), ValueMetaInterface.TYPE_INTEGER)); comma = true; } else if (databaseMeta.needsPlaceHolder()) { sql += "0"; // placeholder on informix! Will be replaced in table by real autoinc value. data.insertRowMeta.addValueMeta( new ValueMeta(meta.getTechnicalKeyField(), ValueMetaInterface.TYPE_INTEGER)); comma = true; } if (meta.useHash()) { if (comma) sql += ", "; sql += databaseMeta.quoteField(meta.getHashField()); data.insertRowMeta.addValueMeta( new ValueMeta(meta.getHashField(), ValueMetaInterface.TYPE_INTEGER)); comma = true; } if (!Const.isEmpty(meta.getLastUpdateField())) { if (comma) sql += ", "; sql += databaseMeta.quoteField(meta.getLastUpdateField()); data.insertRowMeta.addValueMeta( new ValueMeta(meta.getLastUpdateField(), ValueMetaInterface.TYPE_DATE)); comma = true; } for (int i = 0; i < meta.getKeyLookup().length; i++) { if (comma) sql += ", "; sql += databaseMeta.quoteField(meta.getKeyLookup()[i]); data.insertRowMeta.addValueMeta(rowMeta.getValueMeta(data.keynrs[i])); comma = true; } sql += ") VALUES ("; comma = false; if (!isAutoIncrement()) { sql += '?'; comma = true; } if (meta.useHash()) { if (comma) sql += ','; sql += '?'; comma = true; } if (!Const.isEmpty(meta.getLastUpdateField())) { if (comma) sql += ','; sql += '?'; comma = true; } for (int i = 0; i < meta.getKeyLookup().length; i++) { if (comma) sql += ','; else comma = true; sql += '?'; } sql += " )"; String sqlStatement = sql; try { debug = "First: prepare statement"; if (isAutoIncrement()) { logDetailed("SQL with return keys: " + sqlStatement); data.prepStatementInsert = data.db .getConnection() .prepareStatement( databaseMeta.stripCR(sqlStatement), Statement.RETURN_GENERATED_KEYS); } else { logDetailed("SQL without return keys: " + sqlStatement); data.prepStatementInsert = data.db.getConnection().prepareStatement(databaseMeta.stripCR(sqlStatement)); } } catch (SQLException ex) { throw new KettleDatabaseException( "Unable to prepare combi insert statement : " + Const.CR + sqlStatement, ex); } catch (Exception ex) { throw new KettleDatabaseException( "Unable to prepare combi insert statement : " + Const.CR + sqlStatement, ex); } } debug = "Create new insert row rins"; Object[] insertRow = new Object[data.insertRowMeta.size()]; int insertIndex = 0; if (!isAutoIncrement()) { insertRow[insertIndex] = val_key; insertIndex++; } if (meta.useHash()) { insertRow[insertIndex] = val_crc; insertIndex++; } if (!Const.isEmpty(meta.getLastUpdateField())) { insertRow[insertIndex] = new Date(); insertIndex++; } for (int i = 0; i < data.keynrs.length; i++) { insertRow[insertIndex] = row[data.keynrs[i]]; insertIndex++; } if (log.isRowLevel()) logRowlevel("rins=" + data.insertRowMeta.getString(insertRow)); debug = "Set values on insert"; // INSERT NEW VALUE! data.db.setValues(data.insertRowMeta, insertRow, data.prepStatementInsert); debug = "Insert row"; data.db.insertRow(data.prepStatementInsert); debug = "Retrieve key"; if (isAutoIncrement()) { ResultSet keys = null; try { keys = data.prepStatementInsert.getGeneratedKeys(); // 1 key if (keys.next()) val_key = new Long(keys.getLong(1)); else { throw new KettleDatabaseException( "Unable to retrieve auto-increment of combi insert key : " + meta.getTechnicalKeyField() + ", no fields in resultset"); } } catch (SQLException ex) { throw new KettleDatabaseException( "Unable to retrieve auto-increment of combi insert key : " + meta.getTechnicalKeyField(), ex); } finally { try { if (keys != null) keys.close(); } catch (SQLException ex) { throw new KettleDatabaseException( "Unable to retrieve auto-increment of combi insert key : " + meta.getTechnicalKeyField(), ex); } } } } catch (Exception e) { logError(Const.getStackTracker(e)); throw new KettleDatabaseException( "Unexpected error in combination insert in part [" + debug + "] : " + e.toString(), e); } return val_key; }
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException { Object[] r = getRow(); // Get row from input rowset & set row busy! if (r == null) // no more input to be expected... { setOutputDone(); return false; } if (first) { first = false; data.outputRowMeta = getInputRowMeta().clone(); meta.getFields(data.outputRowMeta, getStepname(), null, null, this); data.schemaTable = meta.getDatabaseMeta() .getQuotedSchemaTableCombination(data.realSchemaName, data.realTableName); determineTechKeyCreation(); // The indexes of the key values... // data.keynrs = new int[meta.getKeyField().length]; for (int i = 0; i < meta.getKeyField().length; i++) { data.keynrs[i] = getInputRowMeta().indexOfValue(meta.getKeyField()[i]); if (data.keynrs[i] < 0) // couldn't find field! { throw new KettleStepException( BaseMessages.getString( PKG, "CombinationLookup.Exception.FieldNotFound", meta.getKeyField()[i])); // $NON-NLS-1$ //$NON-NLS-2$ } } // Determine for each input field if we want it removed or not. // data.removeField = new boolean[getInputRowMeta().size()]; // Sort lookup values keys so that we // for (int i = 0; i < getInputRowMeta().size(); i++) { ValueMetaInterface valueMeta = getInputRowMeta().getValueMeta(i); // Is this one of the keys? int idx = Const.indexOfString(valueMeta.getName(), meta.getKeyField()); data.removeField[i] = idx >= 0; } // Determine the metadata row to calculate hashcodes. // data.hashRowMeta = new RowMeta(); for (int i = 0; i < meta.getKeyField().length; i++) { data.hashRowMeta.addValueMeta(getInputRowMeta().getValueMeta(data.keynrs[i])); // KEYi = ? } setCombiLookup(getInputRowMeta()); } try { Object[] outputRow = lookupValues(getInputRowMeta(), r); // add new values to the row in rowset[0]. putRow(data.outputRowMeta, outputRow); // copy row to output rowset(s); if (checkFeedback(getLinesRead())) { if (log.isBasic()) logBasic( BaseMessages.getString(PKG, "CombinationLookup.Log.LineNumber") + getLinesRead()); //$NON-NLS-1$ } } catch (KettleException e) { if (getStepMeta().isDoingErrorHandling()) { putError(getInputRowMeta(), r, 1L, Const.getStackTracker(e), null, "CBL001"); } else { logError( BaseMessages.getString(PKG, "CombinationLookup.Log.ErrorInStepRunning") + e.getMessage()); // $NON-NLS-1$ setErrors(1); stopAll(); setOutputDone(); // signal end to receiver(s) return false; } } return true; }
/** * CombinationLookup table: dimension table keys[]: which dim-fields do we use to look up key? * retval: name of the key to return */ public void setCombiLookup(RowMetaInterface inputRowMeta) throws KettleDatabaseException { DatabaseMeta databaseMeta = meta.getDatabaseMeta(); String sql = ""; boolean comma; data.lookupRowMeta = new RowMeta(); /* * SELECT <retval> * FROM <table> * WHERE ( ( <key1> = ? ) OR ( <key1> IS NULL AND ? IS NULL ) ) * AND ( ( <key2> = ? ) OR ( <key1> IS NULL AND ? IS NULL ) ) * ... * ; * * OR * * SELECT <retval> * FROM <table> * WHERE <crcfield> = ? * AND ( ( <key1> = ? ) OR ( <key1> IS NULL AND ? IS NULL ) ) * AND ( ( <key2> = ? ) OR ( <key1> IS NULL AND ? IS NULL ) ) * ... * ; * */ sql += "SELECT " + databaseMeta.quoteField(meta.getTechnicalKeyField()) + Const.CR; sql += "FROM " + data.schemaTable + Const.CR; sql += "WHERE "; comma = false; if (meta.useHash()) { sql += databaseMeta.quoteField(meta.getHashField()) + " = ? " + Const.CR; comma = true; data.lookupRowMeta.addValueMeta( new ValueMeta(meta.getHashField(), ValueMetaInterface.TYPE_INTEGER)); } else { sql += "( ( "; } for (int i = 0; i < meta.getKeyLookup().length; i++) { if (comma) { sql += " AND ( ( "; } else { comma = true; } sql += databaseMeta.quoteField(meta.getKeyLookup()[i]) + " = ? ) OR ( " + databaseMeta.quoteField(meta.getKeyLookup()[i]); data.lookupRowMeta.addValueMeta(inputRowMeta.getValueMeta(data.keynrs[i])); sql += " IS NULL AND "; if (databaseMeta.requiresCastToVariousForIsNull()) { sql += "CAST(? AS VARCHAR(256)) IS NULL"; } else { sql += "? IS NULL"; } // Add the ValueMeta for the null check, BUT cloning needed. // Otherwise the field gets renamed and gives problems when referenced by previous steps. data.lookupRowMeta.addValueMeta(inputRowMeta.getValueMeta(data.keynrs[i]).clone()); sql += " ) )"; sql += Const.CR; } try { if (log.isDebug()) logDebug("preparing combi-lookup statement:" + Const.CR + sql); data.prepStatementLookup = data.db.getConnection().prepareStatement(databaseMeta.stripCR(sql)); if (databaseMeta.supportsSetMaxRows()) { data.prepStatementLookup.setMaxRows(1); // alywas get only 1 line back! } } catch (SQLException ex) { throw new KettleDatabaseException("Unable to prepare combi-lookup statement", ex); } }
/** * Adds a row to the cache In case we are doing updates, we need to store the complete rows from * the database. These are the values we need to store * * <p>Key: - natural key fields Value: - Technical key - lookup fields / extra fields (allows us * to compare or retrieve) - Date_from - Date_to * * @param row * @param returnValues * @throws KettleValueException */ private void addToCache(RowMetaInterface rowMeta, Object[] row, Long tk) throws KettleValueException { // Short circuit if cache is disabled. if (meta.getCacheSize() == -1) return; // store it in the cache if needed. data.cache.put(new RowMetaAndData(rowMeta, row), tk); // check if the size is not too big... // Allow for a buffer overrun of 20% and then remove those 20% in one go. // Just to keep performance in track. // int tenPercent = meta.getCacheSize() / 10; if (meta.getCacheSize() > 0 && data.cache.size() > meta.getCacheSize() + tenPercent) { // Which cache entries do we delete here? // We delete those with the lowest technical key... // Those would arguably be the "oldest" dimension entries. // Oh well... Nothing is going to be perfect here... // // Getting the lowest 20% requires some kind of sorting algorithm and I'm not sure we want to // do that. // Sorting is slow and even in the best case situation we need to do 2 passes over the cache // entries... // // Perhaps we should get 20% random values and delete everything below the lowest but one TK. // List<RowMetaAndData> keys = new ArrayList<RowMetaAndData>(data.cache.keySet()); int sizeBefore = keys.size(); List<Long> samples = new ArrayList<Long>(); // Take 10 sample technical keys.... int stepsize = keys.size() / 5; if (stepsize < 1) stepsize = 1; // make sure we have no endless loop for (int i = 0; i < keys.size(); i += stepsize) { RowMetaAndData key = (RowMetaAndData) keys.get(i); Long value = (Long) data.cache.get(key); if (value != null) { samples.add(value); } } // Sort these 5 elements... Collections.sort(samples); // What is the smallest? // Take the second, not the fist in the list, otherwise we would be removing a single entry = // not good. if (samples.size() > 1) { data.smallestCacheKey = ((Long) samples.get(1)).longValue(); } else { // except when there is only one sample data.smallestCacheKey = ((Long) samples.get(0)).longValue(); } // Remove anything in the cache <= smallest. // This makes it almost single pass... // This algorithm is not 100% correct, but I guess it beats sorting the whole cache all the // time. // for (int i = 0; i < keys.size(); i++) { RowMetaAndData key = (RowMetaAndData) keys.get(i); Long value = (Long) data.cache.get(key); if (value != null) { if (value.longValue() <= data.smallestCacheKey) { data.cache.remove(key); // this one has to go. } } } int sizeAfter = data.cache.size(); logDetailed("Reduced the lookup cache from " + sizeBefore + " to " + sizeAfter + " rows."); } if (log.isRowLevel()) logRowlevel("Cache store: key=" + rowMeta.getString(row) + " key=" + tk); }