// read ASCII MeSH Tree file (d2015.bin) as input, and generates meshNodeMap(descriptors) and // meshLinkMap(MN's) as output public static void readMeshTreeFile( String fileName, Map<String, MeshNode> meshNodeMap, Map<String, MeshLink> meshLinkMap, Map<String, String> allMeshTermsMap) throws IOException { Path path = Paths.get(fileName); try (BufferedReader reader = Files.newBufferedReader(path, ENCODING)) { // Map<String, MeshNode> meshNodeMap = new HashMap<String, MeshNode>(); // add a single ROOT, where all sciences meets int cnt_meshNode_id = 1; meshNodeMap.put( "0", new MeshNode("ROOT-SCIENCE", "0", "U00", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; // root-level categories don't exist in the file, so we manually add them meshNodeMap.put("A", new MeshNode("Anatomy", "A", "U01", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; meshNodeMap.put("B", new MeshNode("Organisms", "B", "U02", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; meshNodeMap.put("C", new MeshNode("Diseases", "C", "U03", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; meshNodeMap.put( "D", new MeshNode("Chemicals and Drugs", "D", "U04", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; meshNodeMap.put( "E", new MeshNode( "Analytical, Diagnostic and Therapeutic Techniques and Equipment", "E", "U05", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; meshNodeMap.put( "F", new MeshNode("Psychiatry and Psychology", "F", "U06", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; meshNodeMap.put( "G", new MeshNode("Phenomena and Processes", "G", "U07", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; meshNodeMap.put( "H", new MeshNode("Disciplines and Occupations", "H", "U08", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; meshNodeMap.put( "I", new MeshNode( "Anthropology, Education, Sociology and Social Phenomena", "I", "U09", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; meshNodeMap.put( "J", new MeshNode( "Technology, Industry, Agriculture", "J", "U010", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; meshNodeMap.put( "K", new MeshNode("Humanities", "K", "U011", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; meshNodeMap.put( "L", new MeshNode("Information Science", "L", "U012", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; meshNodeMap.put( "M", new MeshNode("Named Groups", "M", "U013", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; meshNodeMap.put( "N", new MeshNode("Health Care", "N", "U014", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; meshNodeMap.put( "V", new MeshNode( "Publication Characteristics", "V", "U015", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; meshNodeMap.put( "Z", new MeshNode("Geographicals", "Z", "U016", String.valueOf(cnt_meshNode_id))); cnt_meshNode_id++; // general map which stores all mesh terms, and their corresponding meshNodeUI (key:meshTerm, // value:meshDescriptorUI) // Map<String, String> allMeshTermsMap = new HashMap<String, String>(); int cnt_duplicate = 0; MeshNode meshNode = new MeshNode(); String line = null; while ((line = reader.readLine()) != null) { if (line.startsWith("*NEWRECORD")) // create new node meshNode = new MeshNode(); if (line.startsWith("MH = ")) { // store MH(Main Heading), meshNode.MH = line.substring(5); meshNode.terms.add(meshNode.MH); // add MH to the terms. } if (line.startsWith("MN = ")) // MN(MeshTree Number)s, may be more than one for each record meshNode.MNlist.add(line.substring(5)); if (line.startsWith("ENTRY = ")) { // MeSH terms, may be more than one for each record String entry = line.substring(8); if (entry.contains("|")) meshNode.terms.add(entry.substring(0, entry.indexOf("|"))); // else // meshNode.terms.add(entry); } if (line.startsWith( "UI = ")) { // since UniqueIdentifier is the last element for a record, add the // descriptor to the meshNodeMap meshNode.UI = line.substring(5); if (!meshNode.MH.equals("Female") && !meshNode.MH.equals( "Male")) { // exclude these two weird records which don't have MNs meshNode.node_id = String.valueOf(cnt_meshNode_id); cnt_meshNode_id++; meshNodeMap.put(meshNode.UI, meshNode); // now we have meshNode.UI, we will add meshTerms of this node to the GlobalMeshTermsMap for (Iterator<String> iter = meshNode.terms.iterator(); iter.hasNext(); ) { String term = iter.next(); if (allMeshTermsMap.containsKey(term)) { System.out.println("key exist" + term + " " + meshNode.UI); cnt_duplicate++; } allMeshTermsMap.put(term, meshNode.UI); } } } } System.out.println("meshNodeSize: " + meshNodeMap.size()); System.out.println("meshTermsMapSize: " + allMeshTermsMap.size()); // sort the meshNodeMap based on keys // Map<String, MeshNode> sortedMap = new TreeMap<String, MeshNode>(meshNodeMap); // meshNodeMap = sortedMap; // Fill the links Map int cnt_meshLink_id = 1; // Map<String, MeshLink> meshLinkMap = new HashMap<String, MeshLink>(); for (Iterator<Map.Entry<String, MeshNode>> iter = meshNodeMap.entrySet().iterator(); iter.hasNext(); ) { Map.Entry<String, MeshNode> entry = iter.next(); String meshNode_UI = entry.getKey(); for (Iterator<String> iter2 = entry.getValue().MNlist.iterator(); iter2.hasNext(); ) { String self_MN_value = iter2.next(); String parent_MN_value = ""; if (self_MN_value.equals("0")) { // Case 0: if it is the Single ROOT-SCIENCE, it does not have parent, parent_MN_value = ""; } else if (self_MN_value.length() == 1 && !self_MN_value.equals("0")) { // Case 1: if the length of MN=1 and notEquals(0) then it is a main category (A, B, C, // etc.) // it doesn't have a parent, so set its parent to 0 parent_MN_value = "0"; } else if (self_MN_value.length() > 1 && !self_MN_value.contains(".")) { // Case 2: if the length of MN>1 and MN does not contain dot(.), then it is a // subcategory (A01, B02, etc.) // the first letter of the MN = MN of the parent (A01 => A) parent_MN_value = self_MN_value.substring(0, 1); } else { // Case 3: for all other cases, there should be dot(.) within the MN // substring of the MN before the last dot(.) = MN of the parent (H01.770.644.053 => // H01.770.644) parent_MN_value = self_MN_value.substring(0, self_MN_value.lastIndexOf(".")); } meshLinkMap.put( self_MN_value, new MeshLink( self_MN_value, parent_MN_value, meshNode_UI, String.valueOf(cnt_meshLink_id))); cnt_meshLink_id++; } } System.out.println("meshLinkSize: " + meshLinkMap.size()); } catch (Exception e) { e.printStackTrace(); } }