/** Compute number of correct pairs betwn src1 and src2, where src2>src1 */ protected int countCorrectPairs(MatchData data) { // count the number of times each id appears in each source */ Map counter = new HashMap(); for (int i = 0; i < data.numSources(); i++) { String src = data.getSource(i); for (int j = 0; j < data.numInstances(src); j++) { String id = data.getInstance(src, j).getId(); if (id != null) { IdKey key = new IdKey(id, src); Integer c = (Integer) counter.get(key); counter.put(key, (c == null ? new Integer(1) : new Integer(c.intValue() + 1))); } } } /* // show the counter for (Iterator i=counter.keySet().iterator(); i.hasNext(); ) { IdKey key = (IdKey) i.next(); System.out.println( key.src+"#"+key.id+" = "+counter.get(key) ); } */ // count the number of correct pairs int numCorrectPairs = 0; Set idsInSrc1 = new HashSet(); for (int i = 0; i < data.numSources(); i++) { String src1 = data.getSource(i); idsInSrc1.clear(); for (int j = 0; j < data.numInstances(src1); j++) { String id = data.getInstance(src1, j).getId(); idsInSrc1.add(id); for (int k = i + 1; k < data.numSources(); k++) { String src2 = data.getSource(k); Integer cInteger = (Integer) counter.get(new IdKey(id, src2)); if (cInteger != null) { numCorrectPairs += cInteger.intValue(); } // System.out.println( "src1:"+src1+" id:"+id+" src2:"+src2+" c:"+cInteger); } } if (clusterMode) { // count how often something in src1 can be matched correctly with something // else in src1 for (Iterator j = idsInSrc1.iterator(); j.hasNext(); ) { String id = (String) j.next(); Integer cInteger = (Integer) counter.get(new IdKey(id, src1)); int c = cInteger.intValue(); numCorrectPairs += c * (c - 1) / 2; } } } return numCorrectPairs; }
/** Returns the next StringWrapper as an object. */ public Object next() { Instance inst = data.getInstance(src, instanceCursor++); if (KEEP_OLD_ITERATION_BUG && instanceCursor > data.numInstances(src)) { sourceCursor++; instanceCursor = 0; if (sourceCursor < data.numSources()) src = data.getSource(sourceCursor); } if (!KEEP_OLD_ITERATION_BUG && instanceCursor >= data.numInstances(src)) { sourceCursor++; instanceCursor = 0; if (sourceCursor < data.numSources()) src = data.getSource(sourceCursor); } return inst; }
public boolean hasNext() { return sourceCursor < data.numSources() && instanceCursor < data.numInstances(src); }