// For example, here is a line from the 5kb chr1 MAPQGE30 raw observed contact matrix // (GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.RAWobserved): // 40000000 40100000 59.0 private static void processRawContactInformation( String fileToRead, double minValue, ArrayList<DesiredChrContact> contactsToCheck, boolean intra) throws IOException { // Check if sorted version is available // If not make sorted available. if (!Gpio.exists(fileToRead + ".sorted")) { if (intra) { umcg.genetica.io.chrContacts.SortIntraChrContacts.readNonSortedWriteSorted( fileToRead, fileToRead + ".sorted"); } else { umcg.genetica.io.chrContacts.SortInterChrContacts.readNonSortedWriteSorted( fileToRead, fileToRead + ".sorted"); } } int numberToBeMatched = 0; LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8"); try { while (it.hasNext()) { String[] parts = StringUtils.split(it.nextLine(), '\t'); int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]); int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]); while (numberToBeMatched < contactsToCheck.size()) { if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { break; } else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { break; } if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { double contact = org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]); if (contact >= minValue) { contactsToCheck.get(numberToBeMatched).setContact(); numberToBeMatched++; } else { numberToBeMatched++; } } else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { numberToBeMatched++; } } else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { numberToBeMatched++; } } } } finally { LineIterator.closeQuietly(it); } }
private static void processNormalizedIntraContactInformation( String fileToRead, String baseName, String normMethod, String chrSmaller, ArrayList<DesiredChrContact> contactsToCheck, String resolution, double minValue, TextFile outWriter) throws IOException { // ReadIn normalization chr1 TextFile inputNormChr1 = new TextFile( baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R); ArrayList<String> normFactorSmallerChr = inputNormChr1.readAsArrayList(); inputNormChr1.close(); // System.out.println("Done reading norm factor 1"); if (!Gpio.exists(fileToRead + ".sorted")) { umcg.genetica.io.chrContacts.SortIntraChrContacts.readNonSortedWriteSorted( fileToRead, fileToRead + ".sorted"); } int numberToBeMatched = 0; LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8"); try { while (it.hasNext()) { String[] parts = StringUtils.split(it.nextLine(), '\t'); int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]); int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]); while (numberToBeMatched < contactsToCheck.size()) { if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { break; } else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { break; } if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1); String factor2Base = normFactorSmallerChr.get((posChr2 / getNumericResolution(resolution)) + 1); double factor1; double factor2; if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) { factor1 = org.apache.commons.lang.math.NumberUtils.createDouble(factor1Base); factor2 = org.apache.commons.lang.math.NumberUtils.createDouble(factor2Base); double contact = org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]) / (factor1 * factor2); if (contact >= minValue) { outWriter.writeln( contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\tContact\t" + contact + "\t" + org.apache.commons.lang.math.NumberUtils.createDouble(parts[2])); numberToBeMatched++; } else { outWriter.writeln( contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-"); numberToBeMatched++; } } else { System.out.println("Error in files."); numberToBeMatched++; } } else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { outWriter.writeln( contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-"); numberToBeMatched++; } } else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { outWriter.writeln( contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-"); numberToBeMatched++; } } } } finally { LineIterator.closeQuietly(it); } }