コード例 #1
0
  public static HashMap<Integer, List<Integer>> groupSimilarUMLSCons(
      EncodedEntityStructure umlsGroup,
      Set<GenericProperty> preRanAtts,
      int size,
      FormRepository gi) {

    EncodedAnnotationMapping set = new EncodedAnnotationMapping();
    Set<Integer> srcs = new HashSet<Integer>();
    srcs.add(umlsGroup.getStructureId());
    //		Int2FloatMap idfMap = TFIDFTokenWeightGenerator.getInstance().generateIDFValues(srcs, size);
    //		Map<String,Object> externalObjects = new HashMap<String,Object>();
    //		externalObjects.put(TFIDFMatcher.IDF_MAP_SOURCE, idfMap);
    //		externalObjects.put(TFIDFMatcher.IDF_MAP_TARGET, idfMap);
    //		externalObjects.put(TFIDFMatcher.TFIDF_SOURCE_SEPARATED,false);
    long time = System.currentTimeMillis();
    MatchOperator mop =
        new MatchOperator(
            RegisteredMatcher.TRIGRAM_MATCHER,
            AggregationFunction.MAX,
            preRanAtts,
            preRanAtts,
            0.35f);
    //		mop.setGlobalObjects(externalObjects);
    ExecutionTree tree = new ExecutionTree();
    tree.addOperator(mop);
    try {
      set = gi.getMatchManager().matchEncoded(umlsGroup, umlsGroup, tree, null);
      log.debug("matched group");
    } catch (MatchingExecutionException e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
    }
    HashMap<Integer, Node> nodeMap = new HashMap<Integer, Node>();
    boolean isChange = true;
    int counter = 0;
    HashSet<EntityAnnotation> toRemoveCorrs = new HashSet<EntityAnnotation>();
    for (EntityAnnotation c : set.getAnnotations()) {
      if (c.getSrcId() == c.getTargetId()) {
        toRemoveCorrs.add(c);
      }
    }
    for (EntityAnnotation c : toRemoveCorrs) {
      set.removeAnnotation(c.getSrcId(), c.getTargetId());
    }

    while (isChange && counter < 10000) {
      counter++;
      if (set.getNumberOfAnnotations() != 0) {
        for (EntityAnnotation cor : set.getAnnotations()) {
          if (cor.getSrcId() != cor.getTargetId()) {
            Node n = nodeMap.get(cor.getSrcId());
            if (n == null) {
              n = new Node();
              n.ownId = cor.getSrcId();
              n.minId = n.ownId;
              nodeMap.put(n.ownId, n);
            }

            Node n2 = nodeMap.get(cor.getTargetId());
            if (n2 == null) {
              n2 = new Node();
              n2.ownId = cor.getTargetId();
              n2.minId = cor.getTargetId();
              nodeMap.put(n2.ownId, n2);
            }
            int min = Math.min(n.minId, n2.minId);
            if (n2.minId == min && n.minId == min) {
              isChange = false;
            } else {
              isChange = true;
            }
            n.minId = min;
            n2.minId = min;
          }
        }
      } else {
        for (int id : umlsGroup.getObjIds().keySet()) {
          Node n = new Node();
          n.ownId = id;
          n.minId = n.ownId;
          nodeMap.put(id, n);
        }
        isChange = false;
      }
    }

    HashMap<Integer, List<Integer>> umlsGroups = new HashMap<Integer, List<Integer>>();
    HashSet<Integer> notInGroup = new HashSet<Integer>();
    for (int id : umlsGroup.getObjIds().keySet()) {
      notInGroup.add(id);
    }
    notInGroup.removeAll(nodeMap.keySet());
    for (Entry<Integer, Node> e : nodeMap.entrySet()) {
      List<Integer> list = umlsGroups.get(e.getValue().minId);
      if (list == null) {
        list = new ArrayList<Integer>();
        umlsGroups.put(e.getValue().minId, list);
      }
      list.add(e.getKey());
    }
    for (Integer nig : notInGroup) {
      List<Integer> list = new ArrayList<Integer>();
      umlsGroups.put(nig, list);
      list.add(nig);
    }

    return umlsGroups;
  }
コード例 #2
0
  public static HashMap<Integer, List<Integer>> groupSimilarUMLSConsByConnectedComponent(
      EncodedEntityStructure umlsGroup,
      Set<GenericProperty> preRanAtts,
      int size,
      FormRepository gi,
      float threshold) {
    EncodedAnnotationMapping set = new EncodedAnnotationMapping();
    Set<Integer> srcs = new HashSet<Integer>();
    srcs.add(umlsGroup.getStructureId());
    MatchOperator mop =
        new MatchOperator(
            RegisteredMatcher.TRIGRAM_MATCHER,
            AggregationFunction.MAX,
            preRanAtts,
            preRanAtts,
            0.4f);

    ExecutionTree tree = new ExecutionTree();
    tree.addOperator(mop);
    try {
      log.debug(umlsGroup.getObjIds().size());
      set = gi.getMatchManager().matchEncoded(umlsGroup, umlsGroup, tree, null);
      log.debug("matched group");
    } catch (MatchingExecutionException e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
    }
    HashSet<EntityAnnotation> toRemoveCorrs = new HashSet<EntityAnnotation>();
    HashMap<Integer, Node> nodeMap = new HashMap<Integer, Node>();
    boolean isChange = true;
    int counter = 0;
    for (EntityAnnotation c : set.getAnnotations()) {
      if (c.getSrcId() == c.getTargetId()) {
        toRemoveCorrs.add(c);
      }
    }
    for (EntityAnnotation c : toRemoveCorrs) {
      set.removeAnnotation(c.getSrcId(), c.getTargetId());
    }

    while (isChange && counter < 10000) {
      counter++;
      if (set.getNumberOfAnnotations() != 0) {
        for (EntityAnnotation cor : set.getAnnotations()) {
          if (cor.getSrcId() != cor.getTargetId()) {
            Node n = nodeMap.get(cor.getSrcId());
            if (n == null) {
              n = new Node();
              n.ownId = cor.getSrcId();
              n.minId = n.ownId;
              nodeMap.put(n.ownId, n);
            }

            Node n2 = nodeMap.get(cor.getTargetId());
            if (n2 == null) {
              n2 = new Node();
              n2.ownId = cor.getTargetId();
              n2.minId = cor.getTargetId();
              nodeMap.put(n2.ownId, n2);
            }
            int min = Math.min(n.minId, n2.minId);
            if (n2.minId == min && n.minId == min) {
              isChange = false;
            } else {
              isChange = true;
            }
            n.minId = min;
            n2.minId = min;
          }
        }
      } else {
        for (int id : umlsGroup.getObjIds().keySet()) {
          Node n = new Node();
          n.ownId = id;
          n.minId = n.ownId;
          nodeMap.put(id, n);
        }
        isChange = false;
      }
    }

    HashMap<Integer, List<Integer>> umlsGroups = new HashMap<Integer, List<Integer>>();
    HashSet<Integer> notInGroup = new HashSet<Integer>();
    for (int id : umlsGroup.getObjIds().keySet()) {
      notInGroup.add(id);
    }
    notInGroup.removeAll(nodeMap.keySet());
    for (Entry<Integer, Node> e : nodeMap.entrySet()) {
      List<Integer> list = umlsGroups.get(e.getValue().minId);
      if (list == null) {
        list = new ArrayList<Integer>();
        umlsGroups.put(e.getValue().minId, list);
      }
      list.add(e.getKey());
    }
    for (Integer nig : notInGroup) {
      List<Integer> list = new ArrayList<Integer>();
      umlsGroups.put(nig, list);
      list.add(nig);
    }
    return umlsGroups;
  }