예제 #1
0
  public static Set<RepairedCell> getDuplicates(Set<RepairedCell> found) {
    Set<RepairedCell> rst = new HashSet<RepairedCell>();
    Map<String, List<RepairedCell>> tmpMap = new HashMap<String, List<RepairedCell>>();

    for (RepairedCell cell : found) {
      String key = cell.getRowId() + cell.getColumnId();

      if (tmpMap.containsKey(key)) {
        tmpMap.get(key).add(cell);
      } else {
        List<RepairedCell> tmpList = new ArrayList<RepairedCell>();
        tmpList.add(cell);
        tmpMap.put(key, tmpList);
      }
    }

    if (tmpMap.size() > 0) {
      for (String key : tmpMap.keySet()) {
        if (tmpMap.get(key).size() > 1) {
          for (RepairedCell cell : tmpMap.get(key)) {
            rst.add(cell);
          }
        }
      }
    }

    return rst;
  }
예제 #2
0
  public static Set<RepairedCell> getUnfound(Set<RepairedCell> truth, Set<RepairedCell> found) {
    Set<RepairedCell> rst = new HashSet<RepairedCell>();
    if (found.size() != 0) {
      HashMap<Integer, HashSet<String>> foundMap = new HashMap<Integer, HashSet<String>>();

      for (RepairedCell cell : found) {
        HashSet<String> columnIds = null;
        if (foundMap.get(cell.getRowId()) == null) {
          columnIds = new HashSet<String>();
        } else {
          columnIds = foundMap.get(cell.getRowId());
        }
        columnIds.add(cell.getColumnId());
        foundMap.put(cell.getRowId(), columnIds);
      }

      for (RepairedCell cell : truth) {
        if (foundMap.get(cell.getRowId()) == null
            || !foundMap.get(cell.getRowId()).contains(cell.getColumnId())) {
          rst.add(cell);
        }
      }
    }
    return rst;
  }
예제 #3
0
  public static double findAccuracy(Set<RepairedCell> truth, Set<RepairedCell> found) {
    if (found.size() != 0) {
      HashMap<Integer, HashSet<String>> foundMap = new HashMap<Integer, HashSet<String>>();

      for (RepairedCell cell : found) {
        HashSet<String> columnIds = null;
        if (foundMap.get(cell.getRowId()) == null) {
          columnIds = new HashSet<String>();
        } else {
          columnIds = foundMap.get(cell.getRowId());
        }
        columnIds.add(cell.getColumnId());
        foundMap.put(cell.getRowId(), columnIds);
      }

      int tAndF = 0;
      for (RepairedCell cell : truth) {
        if (foundMap.get(cell.getRowId()) != null) {
          if (foundMap.get(cell.getRowId()).contains(cell.getColumnId())) {
            tAndF++;
          }
        }
      }
      double precision = tAndF * 1.0 / found.size(), recall = tAndF * 1.0 / truth.size();
      if (debug) System.out.println("find precision = " + precision + ", find recall = " + recall);

      return 2 * precision * recall / (precision + recall);
    }
    return 0;
  }