/** Add the record to the index. */ public void index(Record record) { // FIXME: check if record is already indexed // allocate an ID for this record long id = store.makeNewRecordId(); store.registerRecord(id, record); // go through ID properties and register them for (Property p : config.getIdentityProperties()) for (String extid : record.getValues(p.getName())) store.registerId(id, extid); // go through lookup properties and register those for (Property p : config.getLookupProperties()) { String propname = p.getName(); for (String value : record.getValues(propname)) { String[] tokens = StringUtils.split(value); for (int ix = 0; ix < tokens.length; ix++) store.registerToken(id, propname, tokens[ix]); } } }
@Test public void testLookupPropertiesDefault() throws IOException { ExactComparator comp = new ExactComparator(); List<Property> props = new ArrayList(); props.add(new PropertyImpl("ID")); Property name = new PropertyImpl("NAME", comp, 0.3, 0.8); props.add(name); Property email = new PropertyImpl("EMAIL", comp, 0.3, 0.8); email.setLookupBehaviour(Property.Lookup.DEFAULT); props.add(email); ConfigurationImpl config = new ConfigurationImpl(); config.setThreshold(0.85); config.setProperties(props); config.validate(); Collection<Property> lookups = config.getLookupProperties(); assertEquals(2, lookups.size()); assertTrue(lookups.contains(name)); assertTrue(lookups.contains(email)); }
/** Tokenizes lookup fields and returns all matching buckets in the index. */ private List<Bucket> lookup(Record record) { List<Bucket> buckets = new ArrayList(); for (Property p : config.getLookupProperties()) { String propname = p.getName(); Collection<String> values = record.getValues(propname); if (values == null) continue; for (String value : values) { String[] tokens = StringUtils.split(value); for (int ix = 0; ix < tokens.length; ix++) { Bucket b = store.lookupToken(propname, tokens[ix]); if (b == null || b.records == null) continue; long[] ids = b.records; if (DEBUG) System.out.println( propname + ", " + tokens[ix] + ": " + b.nextfree + " (" + b.getScore() + ")"); buckets.add(b); } } } return buckets; }