Example #1
0
  // read ASCII MeSH Tree file (d2015.bin) as input, and generates meshNodeMap(descriptors) and
  // meshLinkMap(MN's) as output
  public static void readMeshTreeFile(
      String fileName,
      Map<String, MeshNode> meshNodeMap,
      Map<String, MeshLink> meshLinkMap,
      Map<String, String> allMeshTermsMap)
      throws IOException {

    Path path = Paths.get(fileName);
    try (BufferedReader reader = Files.newBufferedReader(path, ENCODING)) {

      // Map<String, MeshNode> meshNodeMap = new HashMap<String, MeshNode>();
      // add a single ROOT, where all sciences meets
      int cnt_meshNode_id = 1;
      meshNodeMap.put(
          "0", new MeshNode("ROOT-SCIENCE", "0", "U00", String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;

      // root-level categories don't exist in the file, so we manually add them
      meshNodeMap.put("A", new MeshNode("Anatomy", "A", "U01", String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;
      meshNodeMap.put("B", new MeshNode("Organisms", "B", "U02", String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;
      meshNodeMap.put("C", new MeshNode("Diseases", "C", "U03", String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;
      meshNodeMap.put(
          "D", new MeshNode("Chemicals and Drugs", "D", "U04", String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;
      meshNodeMap.put(
          "E",
          new MeshNode(
              "Analytical, Diagnostic and Therapeutic Techniques and Equipment",
              "E",
              "U05",
              String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;
      meshNodeMap.put(
          "F",
          new MeshNode("Psychiatry and Psychology", "F", "U06", String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;
      meshNodeMap.put(
          "G",
          new MeshNode("Phenomena and Processes", "G", "U07", String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;
      meshNodeMap.put(
          "H",
          new MeshNode("Disciplines and Occupations", "H", "U08", String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;
      meshNodeMap.put(
          "I",
          new MeshNode(
              "Anthropology, Education, Sociology and Social Phenomena",
              "I",
              "U09",
              String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;
      meshNodeMap.put(
          "J",
          new MeshNode(
              "Technology, Industry, Agriculture", "J", "U010", String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;
      meshNodeMap.put(
          "K", new MeshNode("Humanities", "K", "U011", String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;
      meshNodeMap.put(
          "L", new MeshNode("Information Science", "L", "U012", String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;
      meshNodeMap.put(
          "M", new MeshNode("Named Groups", "M", "U013", String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;
      meshNodeMap.put(
          "N", new MeshNode("Health Care", "N", "U014", String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;
      meshNodeMap.put(
          "V",
          new MeshNode(
              "Publication Characteristics", "V", "U015", String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;
      meshNodeMap.put(
          "Z", new MeshNode("Geographicals", "Z", "U016", String.valueOf(cnt_meshNode_id)));
      cnt_meshNode_id++;

      // general map which stores all mesh terms, and their corresponding meshNodeUI (key:meshTerm,
      // value:meshDescriptorUI)
      // Map<String, String> allMeshTermsMap = new HashMap<String, String>();
      int cnt_duplicate = 0;
      MeshNode meshNode = new MeshNode();
      String line = null;
      while ((line = reader.readLine()) != null) {

        if (line.startsWith("*NEWRECORD")) // create new node
        meshNode = new MeshNode();

        if (line.startsWith("MH = ")) { // store MH(Main Heading),
          meshNode.MH = line.substring(5);
          meshNode.terms.add(meshNode.MH); // add MH to the terms.
        }

        if (line.startsWith("MN = ")) // MN(MeshTree Number)s, may be more than one for each record
        meshNode.MNlist.add(line.substring(5));

        if (line.startsWith("ENTRY = ")) { // MeSH terms, may be more than one for each record
          String entry = line.substring(8);
          if (entry.contains("|")) meshNode.terms.add(entry.substring(0, entry.indexOf("|")));
          // else
          //	meshNode.terms.add(entry);
        }

        if (line.startsWith(
            "UI = ")) { // since UniqueIdentifier is the last element for a record, add the
                        // descriptor to the meshNodeMap
          meshNode.UI = line.substring(5);
          if (!meshNode.MH.equals("Female")
              && !meshNode.MH.equals(
                  "Male")) { // exclude these two weird records which don't have MNs
            meshNode.node_id = String.valueOf(cnt_meshNode_id);
            cnt_meshNode_id++;
            meshNodeMap.put(meshNode.UI, meshNode);

            // now we have meshNode.UI, we will add meshTerms of this node to the GlobalMeshTermsMap
            for (Iterator<String> iter = meshNode.terms.iterator(); iter.hasNext(); ) {
              String term = iter.next();
              if (allMeshTermsMap.containsKey(term)) {
                System.out.println("key exist" + term + " " + meshNode.UI);
                cnt_duplicate++;
              }
              allMeshTermsMap.put(term, meshNode.UI);
            }
          }
        }
      }
      System.out.println("meshNodeSize: " + meshNodeMap.size());
      System.out.println("meshTermsMapSize: " + allMeshTermsMap.size());

      // sort the meshNodeMap based on keys
      //    Map<String, MeshNode> sortedMap = new TreeMap<String, MeshNode>(meshNodeMap);
      //   meshNodeMap = sortedMap;

      // Fill the links Map
      int cnt_meshLink_id = 1;
      // Map<String, MeshLink> meshLinkMap = new HashMap<String, MeshLink>();
      for (Iterator<Map.Entry<String, MeshNode>> iter = meshNodeMap.entrySet().iterator();
          iter.hasNext(); ) {

        Map.Entry<String, MeshNode> entry = iter.next();
        String meshNode_UI = entry.getKey();
        for (Iterator<String> iter2 = entry.getValue().MNlist.iterator(); iter2.hasNext(); ) {

          String self_MN_value = iter2.next();
          String parent_MN_value = "";

          if (self_MN_value.equals("0")) {
            // Case 0: if it is the Single ROOT-SCIENCE, it does not have parent,
            parent_MN_value = "";
          } else if (self_MN_value.length() == 1 && !self_MN_value.equals("0")) {
            // Case 1: if the length of MN=1 and notEquals(0) then it is a main category (A, B, C,
            // etc.)
            // it doesn't have a parent, so set its parent to 0
            parent_MN_value = "0";
          } else if (self_MN_value.length() > 1 && !self_MN_value.contains(".")) {
            // Case 2: if the length of MN>1 and MN does not contain dot(.), then it is a
            // subcategory (A01, B02, etc.)
            // the first letter of the MN = MN of the parent (A01 => A)
            parent_MN_value = self_MN_value.substring(0, 1);
          } else {
            // Case 3: for all other cases, there should be dot(.) within the MN
            // substring of the MN before the last dot(.) = MN of the parent (H01.770.644.053 =>
            // H01.770.644)
            parent_MN_value = self_MN_value.substring(0, self_MN_value.lastIndexOf("."));
          }

          meshLinkMap.put(
              self_MN_value,
              new MeshLink(
                  self_MN_value, parent_MN_value, meshNode_UI, String.valueOf(cnt_meshLink_id)));
          cnt_meshLink_id++;
        }
      }

      System.out.println("meshLinkSize: " + meshLinkMap.size());
    } catch (Exception e) {
      e.printStackTrace();
    }
  }