示例#1
0
  /** Compute number of correct pairs betwn src1 and src2, where src2>src1 */
  protected int countCorrectPairs(MatchData data) {
    // count the number of times each id appears in each source */
    Map counter = new HashMap();
    for (int i = 0; i < data.numSources(); i++) {
      String src = data.getSource(i);
      for (int j = 0; j < data.numInstances(src); j++) {
        String id = data.getInstance(src, j).getId();
        if (id != null) {
          IdKey key = new IdKey(id, src);
          Integer c = (Integer) counter.get(key);
          counter.put(key, (c == null ? new Integer(1) : new Integer(c.intValue() + 1)));
        }
      }
    }

    /*
    // show the counter
    for (Iterator i=counter.keySet().iterator(); i.hasNext(); ) {
    	IdKey key = (IdKey) i.next();
    	System.out.println( key.src+"#"+key.id+" = "+counter.get(key) );
    }
    */

    // count the number of correct pairs
    int numCorrectPairs = 0;
    Set idsInSrc1 = new HashSet();
    for (int i = 0; i < data.numSources(); i++) {
      String src1 = data.getSource(i);
      idsInSrc1.clear();
      for (int j = 0; j < data.numInstances(src1); j++) {
        String id = data.getInstance(src1, j).getId();
        idsInSrc1.add(id);
        for (int k = i + 1; k < data.numSources(); k++) {
          String src2 = data.getSource(k);
          Integer cInteger = (Integer) counter.get(new IdKey(id, src2));
          if (cInteger != null) {
            numCorrectPairs += cInteger.intValue();
          }
          // System.out.println( "src1:"+src1+" id:"+id+" src2:"+src2+" c:"+cInteger);
        }
      }
      if (clusterMode) {
        // count how often something in src1 can be matched correctly with something
        // else in src1
        for (Iterator j = idsInSrc1.iterator(); j.hasNext(); ) {
          String id = (String) j.next();
          Integer cInteger = (Integer) counter.get(new IdKey(id, src1));
          int c = cInteger.intValue();
          numCorrectPairs += c * (c - 1) / 2;
        }
      }
    }
    return numCorrectPairs;
  }
示例#2
0
 public static void main(String[] argv) {
   try {
     MatchData md = new MatchData(argv[0]);
     System.out.println("Dump:");
     System.out.println(md.toString());
     System.out.println();
     System.out.println("Iteration:");
     for (Iterator i = md.getIterator(); i.hasNext(); ) {
       System.out.println(i.next().toString());
     }
   } catch (Exception e) {
     e.printStackTrace();
   }
 }
示例#3
0
 /** Returns the next StringWrapper as an object. */
 public Object next() {
   Instance inst = data.getInstance(src, instanceCursor++);
   if (KEEP_OLD_ITERATION_BUG && instanceCursor > data.numInstances(src)) {
     sourceCursor++;
     instanceCursor = 0;
     if (sourceCursor < data.numSources()) src = data.getSource(sourceCursor);
   }
   if (!KEEP_OLD_ITERATION_BUG && instanceCursor >= data.numInstances(src)) {
     sourceCursor++;
     instanceCursor = 0;
     if (sourceCursor < data.numSources()) src = data.getSource(sourceCursor);
   }
   return inst;
 }
示例#4
0
 public boolean hasNext() {
   return sourceCursor < data.numSources() && instanceCursor < data.numInstances(src);
 }
示例#5
0
 public MatchIterator(MatchData data) {
   this.data = data;
   sourceCursor = 0;
   instanceCursor = 0;
   src = data.getSource(sourceCursor);
 }