private void storeRowInCache(RowMetaInterface lookupMeta, Object[] lookupRow, Object[] add) { RowMetaAndData rowMetaAndData = new RowMetaAndData(lookupMeta, lookupRow); // DEinspanjer 2009-02-01 XXX: I want to write a test case to prove this point before checking // in. // /* Don't insert a row with a duplicate key into the cache. It doesn't seem // * to serve a useful purpose and can potentially cause the step to return // * different values over the life of the transformation (if the source DB rows change) // * Additionally, if using the load all data feature, re-inserting would reverse the order // * specified in the step. // */ // if (!data.look.containsKey(rowMetaAndData)) { // data.look.put(rowMetaAndData, new TimedRow(add)); // } data.look.put(rowMetaAndData, new TimedRow(add)); // See if we have to limit the cache_size. // Sample 10% of the rows in the cache. // Remove everything below the second lowest date. // That should on average remove more than 10% of the entries // It's not exact science, but it will be faster than the old algorithm // DEinspanjer 2009-02-01: If you had previously set a cache size and then turned on load all, // this // method would throw out entries if the previous cache size wasn't big enough. if (!meta.isLoadingAllDataInCache() && meta.getCacheSize() > 0 && data.look.size() > meta.getCacheSize()) { List<RowMetaAndData> keys = new ArrayList<RowMetaAndData>(data.look.keySet()); List<Date> samples = new ArrayList<Date>(); int incr = keys.size() / 10; if (incr == 0) { incr = 1; } for (int k = 0; k < keys.size(); k += incr) { RowMetaAndData key = keys.get(k); TimedRow timedRow = data.look.get(key); samples.add(timedRow.getLogDate()); } Collections.sort(samples); if (samples.size() > 1) { Date smallest = samples.get(1); // Everything below the smallest date goes away... for (int k = 0; k < keys.size(); k++) { RowMetaAndData key = keys.get(k); TimedRow timedRow = data.look.get(key); if (timedRow.getLogDate().compareTo(smallest) < 0) { data.look.remove(key); } } } } }
private Object[] getRowFromCache(RowMetaInterface lookupMeta, Object[] lookupRow) throws KettleException { if (data.allEquals) { // only do the hashtable lookup when all equals otherwise conditions >, <, <> will give wrong // results TimedRow timedRow = data.look.get(new RowMetaAndData(data.lookupMeta, lookupRow)); if (timedRow != null) { return timedRow.getRow(); } } else { // special handling of conditions <,>, <> etc. if (!data.hasDBCondition) { // e.g. LIKE not handled by this routine, yet // TODO: find an alternative way to look up the data based on the condition. // Not all conditions are "=" so we are going to have to evaluate row by row // A sorted list or index might be a good solution here... // Enumeration<RowMetaAndData> keys = data.look.keys(); while (keys.hasMoreElements()) { RowMetaAndData key = keys.nextElement(); // Now verify that the key is matching our conditions... // boolean match = true; int lookupIndex = 0; for (int i = 0; i < data.conditions.length && match; i++) { ValueMetaInterface cmpMeta = lookupMeta.getValueMeta(lookupIndex); Object cmpData = lookupRow[lookupIndex]; ValueMetaInterface keyMeta = key.getValueMeta(i); Object keyData = key.getData()[i]; switch (data.conditions[i]) { case DatabaseLookupMeta.CONDITION_EQ: match = (cmpMeta.compare(cmpData, keyMeta, keyData) == 0); break; case DatabaseLookupMeta.CONDITION_NE: match = (cmpMeta.compare(cmpData, keyMeta, keyData) != 0); break; case DatabaseLookupMeta.CONDITION_LT: match = (cmpMeta.compare(cmpData, keyMeta, keyData) > 0); break; case DatabaseLookupMeta.CONDITION_LE: match = (cmpMeta.compare(cmpData, keyMeta, keyData) >= 0); break; case DatabaseLookupMeta.CONDITION_GT: match = (cmpMeta.compare(cmpData, keyMeta, keyData) < 0); break; case DatabaseLookupMeta.CONDITION_GE: match = (cmpMeta.compare(cmpData, keyMeta, keyData) <= 0); break; case DatabaseLookupMeta.CONDITION_IS_NULL: match = keyMeta.isNull(keyData); break; case DatabaseLookupMeta.CONDITION_IS_NOT_NULL: match = !keyMeta.isNull(keyData); break; case DatabaseLookupMeta.CONDITION_BETWEEN: // Between key >= cmp && key <= cmp2 ValueMetaInterface cmpMeta2 = lookupMeta.getValueMeta(lookupIndex + 1); Object cmpData2 = lookupRow[lookupIndex + 1]; match = (keyMeta.compare(keyData, cmpMeta, cmpData) >= 0); if (match) { match = (keyMeta.compare(keyData, cmpMeta2, cmpData2) <= 0); } lookupIndex++; break; // TODO: add LIKE operator (think of changing the hasDBCondition logic then) default: match = false; data.hasDBCondition = true; // avoid looping in here the next time, also safety when a new condition // will be introduced break; } lookupIndex++; } if (match) { TimedRow timedRow = data.look.get(key); if (timedRow != null) { return timedRow.getRow(); } } } } } return null; }