public static Set<RepairedCell> getDuplicates(Set<RepairedCell> found) { Set<RepairedCell> rst = new HashSet<RepairedCell>(); Map<String, List<RepairedCell>> tmpMap = new HashMap<String, List<RepairedCell>>(); for (RepairedCell cell : found) { String key = cell.getRowId() + cell.getColumnId(); if (tmpMap.containsKey(key)) { tmpMap.get(key).add(cell); } else { List<RepairedCell> tmpList = new ArrayList<RepairedCell>(); tmpList.add(cell); tmpMap.put(key, tmpList); } } if (tmpMap.size() > 0) { for (String key : tmpMap.keySet()) { if (tmpMap.get(key).size() > 1) { for (RepairedCell cell : tmpMap.get(key)) { rst.add(cell); } } } } return rst; }
public static Set<RepairedCell> getUnfound(Set<RepairedCell> truth, Set<RepairedCell> found) { Set<RepairedCell> rst = new HashSet<RepairedCell>(); if (found.size() != 0) { HashMap<Integer, HashSet<String>> foundMap = new HashMap<Integer, HashSet<String>>(); for (RepairedCell cell : found) { HashSet<String> columnIds = null; if (foundMap.get(cell.getRowId()) == null) { columnIds = new HashSet<String>(); } else { columnIds = foundMap.get(cell.getRowId()); } columnIds.add(cell.getColumnId()); foundMap.put(cell.getRowId(), columnIds); } for (RepairedCell cell : truth) { if (foundMap.get(cell.getRowId()) == null || !foundMap.get(cell.getRowId()).contains(cell.getColumnId())) { rst.add(cell); } } } return rst; }
public static double findAccuracy(Set<RepairedCell> truth, Set<RepairedCell> found) { if (found.size() != 0) { HashMap<Integer, HashSet<String>> foundMap = new HashMap<Integer, HashSet<String>>(); for (RepairedCell cell : found) { HashSet<String> columnIds = null; if (foundMap.get(cell.getRowId()) == null) { columnIds = new HashSet<String>(); } else { columnIds = foundMap.get(cell.getRowId()); } columnIds.add(cell.getColumnId()); foundMap.put(cell.getRowId(), columnIds); } int tAndF = 0; for (RepairedCell cell : truth) { if (foundMap.get(cell.getRowId()) != null) { if (foundMap.get(cell.getRowId()).contains(cell.getColumnId())) { tAndF++; } } } double precision = tAndF * 1.0 / found.size(), recall = tAndF * 1.0 / truth.size(); if (debug) System.out.println("find precision = " + precision + ", find recall = " + recall); return 2 * precision * recall / (precision + recall); } return 0; }