// For example, here is a line from the 5kb chr1 MAPQGE30 raw observed contact matrix // (GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.RAWobserved): // 40000000 40100000 59.0 private static void processRawContactInformation( String fileToRead, double minValue, ArrayList<DesiredChrContact> contactsToCheck, boolean intra) throws IOException { // Check if sorted version is available // If not make sorted available. if (!Gpio.exists(fileToRead + ".sorted")) { if (intra) { umcg.genetica.io.chrContacts.SortIntraChrContacts.readNonSortedWriteSorted( fileToRead, fileToRead + ".sorted"); } else { umcg.genetica.io.chrContacts.SortInterChrContacts.readNonSortedWriteSorted( fileToRead, fileToRead + ".sorted"); } } int numberToBeMatched = 0; LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8"); try { while (it.hasNext()) { String[] parts = StringUtils.split(it.nextLine(), '\t'); int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]); int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]); while (numberToBeMatched < contactsToCheck.size()) { if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { break; } else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { break; } if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { double contact = org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]); if (contact >= minValue) { contactsToCheck.get(numberToBeMatched).setContact(); numberToBeMatched++; } else { numberToBeMatched++; } } else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { numberToBeMatched++; } } else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { numberToBeMatched++; } } } } finally { LineIterator.closeQuietly(it); } }
// For example, here is a line from the 5kb chr1 MAPQGE30 raw observed contact matrix // (GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.RAWobserved): // 40000000 40100000 59.0 // To normalize this entry using the KR normalization vector, one would divide 59.0 by the 8001st // line ((40000000/5000)+1=8001) and the 8021st line ((40100000/5000)+1=8021) // of GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.KRnorm. The 8001st // line of the KR norm file is 1.2988778370674694; // The 8021st line of the KR norm file is 1.6080499717941548. So the corresponding KR normalized // entry for the entry above is 59.0/(1.2988778370674694*1.6080499717941548) // or 28.24776973966101. // If the KR normalization vector file is empty or all NaNs, then the KR algorithm didn’t converge // on that particular matrix (likely due to sparsity of the matrix). private static void processNormalizedInterContactInformation( String fileToRead, String baseName, String normMethod, String chrSmaller, String chrLarger, ArrayList<DesiredChrContact> contactsToCheck, String resolution, double minValue, TextFile outWriter) throws IOException { // ReadIn normalization chr1 TextFile inputNormChr1 = new TextFile( baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R); ArrayList<String> normFactorSmallerChr = inputNormChr1.readAsArrayList(); inputNormChr1.close(); // System.out.println("Done reading norm factor 1"); // ReadIn normalization chr2 TextFile inputNormChr2 = new TextFile( baseName + "\\chr" + chrLarger + "_" + resolution + "." + normMethod, TextFile.R); ArrayList<String> normFactorLargerChr = inputNormChr2.readAsArrayList(); inputNormChr2.close(); // System.out.println("Done reading norm factor 2"); if (!Gpio.exists(fileToRead + ".sorted")) { umcg.genetica.io.chrContacts.SortInterChrContacts.readNonSortedWriteSorted( fileToRead, fileToRead + ".sorted"); } int numberToBeMatched = 0; LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8"); try { while (it.hasNext()) { String[] parts = StringUtils.split(it.nextLine(), '\t'); int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]); int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]); while (numberToBeMatched < contactsToCheck.size()) { if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { break; } else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { break; } if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { if (((posChr1 / getNumericResolution(resolution)) + 1) > normFactorSmallerChr.size()) { System.out.println(baseName); System.out.println("Smaller"); System.out.println((posChr1 / getNumericResolution(resolution) + 1)); System.out.println(normFactorSmallerChr.size()); System.exit(-1); } if (((posChr2 / getNumericResolution(resolution)) + 1) > normFactorLargerChr.size()) { System.out.println(baseName); System.out.println("Larger"); System.out.println((posChr2 / getNumericResolution(resolution)) + 1); System.out.println(normFactorLargerChr.size()); System.exit(-1); } String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1); String factor2Base = normFactorLargerChr.get((posChr2 / getNumericResolution(resolution)) + 1); double factor1 = 1.0; double factor2 = 1.0; if (NumberUtils.isNumber(factor1Base) && NumberUtils.isNumber(factor2Base)) { factor1 = Double.parseDouble(factor1Base); factor2 = Double.parseDouble(factor2Base); } else if (NumberUtils.isNumber(factor1Base)) { factor1 = Double.parseDouble(factor1Base); System.out.println("Error in files."); System.out.println("Base 2 is reset to 1"); } else if (NumberUtils.isNumber(factor2Base)) { factor2 = Double.parseDouble(factor2Base); System.out.println("Error in files."); System.out.println("Base 1 is reset to 1"); } double contact = org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]) / (factor1 * factor2); if (contact >= minValue) { outWriter.writeln( contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\tContact\t" + contact + "\t" + org.apache.commons.lang.math.NumberUtils.createDouble(parts[2])); numberToBeMatched++; } else { outWriter.writeln( contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-"); numberToBeMatched++; } } else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { outWriter.writeln( contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-"); numberToBeMatched++; } } else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { outWriter.writeln( contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-"); numberToBeMatched++; } } } } finally { LineIterator.closeQuietly(it); } }