public static void convertGMTFileToPlainTextBinaryNetwork( String infile, String outfile, boolean hasIds, boolean isActuallyGMT) throws IOException { TextFile in = new TextFile(infile, TextFile.R); String line = null; Map<String, Integer> hashItems = new HashMap<String, Integer>(); Map<String, Set<Integer>> hashSetIndices = new HashMap<String, Set<Integer>>(); List<String> sets = new ArrayList<String>(); int nextIndex = 0; int nrItemSets = 0; while ((line = in.readLine()) != null) { String[] split = line.split("\t"); if (split.length == 0) { continue; } int firstItemIndex = hasIds ? (isActuallyGMT ? 2 : 1) : 0; if (hasIds) { sets.add(split[0]); } for (int i = firstItemIndex; i < split.length; i++) { Integer itemIndex = hashItems.get(split[i]); if (itemIndex == null) { hashItems.put(split[i], nextIndex); nextIndex++; } Set<Integer> setIndicesThisItem = hashSetIndices.get(split[i]); if (setIndicesThisItem == null) { setIndicesThisItem = new HashSet<Integer>(); hashSetIndices.put(split[i], setIndicesThisItem); } setIndicesThisItem.add(nrItemSets); } nrItemSets++; } in.close(); TextFile out = new TextFile(outfile, TextFile.W); for (int i = 0; i < nrItemSets; i++) { if (hasIds) { out.write("\t" + sets.get(i)); } else { out.write("\tComplex" + (i + 1)); } } out.writeln(); for (String item : hashSetIndices.keySet()) { if (!"".equals(item)) { out.write(item); Set<Integer> setIndicesThisItem = hashSetIndices.get(item); for (int set = 0; set < nrItemSets; set++) { if (setIndicesThisItem.contains(set)) { out.write("\t1"); } else { out.write("\t0"); } } out.writeln(); } } out.close(); }
private static void remapPositionsAndSnps( LinkedHashSet<String> OrderingEst, HashMap<String, String> mappingEst, HashMap<String, String> mappingUmcg, String outFile, String outFile2) { try { TextFile out = new TextFile(outFile, TextFile.W); TextFile out2 = new TextFile(outFile2, TextFile.W); for (String entry : OrderingEst) { // System.out.println(entry); String snpPos = mappingEst.get(entry); String[] t = SPLIT_ON_COLON.split(snpPos); if (mappingUmcg.containsKey(snpPos)) { out.writeln(mappingUmcg.get(snpPos)); out2.writeln(t[0] + "\t" + t[1] + "\t" + mappingUmcg.get(snpPos)); } else { System.out.println("Problem: " + entry); out.writeln(entry); out2.writeln(t[0] + "\t" + t[1] + "\t" + entry); } } out.close(); out2.close(); } catch (IOException ex) { Logger.getLogger(ConverteMappingAndSnpFile.class.getName()).log(Level.SEVERE, null, ex); } }
public static void convertPlainTextDoubleMatrixToPlainTextEdgeList( String matrixfile, String outfile, String infileDelimiter, String outfileDelimiter) throws IOException { TextFile in = new TextFile(matrixfile, TextFile.R); String line = in.readLine(); String[] split = line.split(infileDelimiter); String[] nodes = Arrays.copyOfRange(split, 1, split.length); LOGGER.log(Level.FINE, "{0} nodes in {1}", new Object[] {nodes.length, matrixfile}); TextFile out = new TextFile(outfile, TextFile.W); int lineNr = 1; int nrEdges = 0; while ((line = in.readLine()) != null) { split = line.split(infileDelimiter); if (split.length != nodes.length + 1) { throw new IllegalArgumentException( "The data in file '" + matrixfile + "' are not a matrix. Check line " + (lineNr + 1) + "."); } String node = split[0]; for (int i = 1; i < split.length; i++) { if (lineNr == i) { continue; // skip diagonal } try { double weight = Double.parseDouble(split[i]); if (weight > 0) { out.writeln(node + outfileDelimiter + nodes[i - 1] + outfileDelimiter + weight); nrEdges++; } } catch (NumberFormatException ex) { throw new IllegalArgumentException( "The data in file '" + matrixfile + "' are not numerical (cast to double failed). Check line " + (lineNr + 1) + ", column " + (i + 1) + "."); } } lineNr++; } in.close(); out.close(); LOGGER.log( Level.FINE, "{0} edges from {1} written to ''{2}''", new Object[] {nrEdges, matrixfile, outfile}); }
public static void writeGMTFileBasedOnGeneSetFileAndMappingFileRemovingDuplicateGeneSets( String genesetfile, String mappingfile, String mappingdelimiter, String gmtfile) throws IOException { TextFile in = new TextFile(mappingfile, TextFile.R); String line = in.readLine(); Map<String, String> code2name = new HashMap<String, String>(); while ((line = in.readLine()) != null) { String[] split = line.split(mappingdelimiter); String name = split[2].trim().replace("\"", "") + " (" + split[6].trim() + ")"; code2name.put(split[0].trim().replace("\"", ""), name); } in.close(); System.out.println(code2name.size() + " gene set annotations read"); in = new TextFile(genesetfile, TextFile.R); TextFile out = new TextFile(gmtfile, TextFile.W); Map<String, Integer> usedNames = new HashMap<String, Integer>(); Set<String> usedGeneSets = new HashSet<String>(); while ((line = in.readLine()) != null) { String[] split = line.split("\t"); if (split.length == 0) { continue; } String code = split[0].trim(); String name = code2name.get(code); if (name == null) { LOGGER.log(Level.WARNING, "No annotation for gene set ''{0}''", code); } else { String genes = Arrays.asList(Arrays.copyOfRange(split, 1, split.length)).toString(); System.out.println(genes); if (!usedGeneSets.contains(genes)) { Integer oldNrItems = usedNames.get(name); if (oldNrItems == null) { out.write(code + "\t" + name); for (int i = 1; i < split.length; i++) { out.write("\t" + split[i]); } out.writeln(); usedNames.put(name, split.length - 1); usedGeneSets.add(genes); } else { // if (oldNrItems != split.length - 1) { System.out.println("Sets with different numbers of genes for " + name); // } } } } } in.close(); out.close(); }
private static ArrayList<InterChrContact> writeRawInterContactInformation( ArrayList<InterChrContact> contacts, String fileToWrite) throws IOException { ArrayList<InterChrContact> chrContactInfo = new ArrayList<InterChrContact>(); TextFile outWriter = new TextFile(fileToWrite, TextFile.W); String row; for (InterChrContact contact : contacts) { outWriter.writeln( contact.getChrLocationSmaller() + "\t" + contact.getChrLocationLarger() + "\t" + contact.getContactValue()); } outWriter.close(); return chrContactInfo; }
@Override public void run() throws IOException { for (int d = 0; d < m_gg.length; d++) { // now test all triples SNPLoader snpLoader = m_gg[d].getGenotypeData().createSNPLoader(); int[] indWGA = m_gg[d].getExpressionToGenotypeIdArray(); for (int perm = 0; perm < m_settings.nrPermutationsFDR + 1; perm++) { String outfile = null; if (perm == 0) { outfile = outDir + m_gg[d].getSettings().name + "_IVAnalysis-RealData.txt"; } else { outfile = outDir + m_gg[d].getSettings().name + "_IVAnalysis-PermutationRound-" + perm + ".txt"; m_gg[d].permuteSampleLables(); } TextFile out = new TextFile(outfile, TextFile.W); Iterator<Triple<String, String, String>> it = snpProbeCombos.iterator(); Triple<String, String, String> next = it.next(); ProgressBar pb = new ProgressBar( snpProbeCombos.size(), "Running Mediation Analysis - Permutation " + perm); out.writeln( "SNP\tSNP Chr\tSNP ChrPos\t" + "Alleles\tDirectionAllele\t" + "N\t" + "CisArrayAddress\tCisProbe Chr\tCisProbe ChrPos\t" + "CisGeneName\t" + "TransArrayAddress\tTransProbe Chr\tTransProbe ChrPos\t" + "TransGeneName\t" + "CisTrans-Correlation\t" + "Cis-eQTL-Beta\t" + "Cis-eQTL-SE\t" + "CisTrans-Beta\t" + "CisTrans-SE\t" + "Trans-eQTL-Beta\t" + "Trans-eQTL-SE\t" + "CisTrans-Residual-Correlation\t" + "CisTrans-Residual-Beta\t" + "CisTrans-Residual-SE\t" + "Trans-eQTL-Residual-Beta\t" + "Trans-eQTL-Residual-SE\t" + "Beta-Ratio"); while (next != null) { String snp = next.getLeft(); String cisprobe = next.getMiddle(); String transprobe = next.getRight(); Integer snpId = m_gg[d].getGenotypeData().getSnpToSNPId().get(snp); Integer cisProbeId = m_gg[d].getExpressionData().getProbeToId().get(cisprobe); Integer transProbeId = m_gg[d].getExpressionData().getProbeToId().get(transprobe); if (snpId == -9 || cisProbeId == null || transProbeId == null) { // out.writeln(snp + "\t" + snpId + "\t" + cisprobe + "\t" + // cisProbeId + "\t" + null + "\t" + transprobe + "\t" + transProbeId + "\t" + null + // "\t" + null + "\t" + null + "\t" + null + "\t" + null + // "\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA"); } else { SNP snpObj = m_gg[d].getGenotypeData().getSNPObject(snpId); snpLoader.loadGenotypes(snpObj); if (snpLoader.hasDosageInformation()) { snpLoader.loadDosage(snpObj); } double[] origCisVals = m_gg[d].getExpressionData().getMatrix()[cisProbeId]; double[] origTransVals = m_gg[d].getExpressionData().getMatrix()[transProbeId]; int calledGenotypes = 0; for (int i = 0; i < m_gg[d].getExpressionData().getIndividuals().length; i++) { int genotypeId = indWGA[i]; short gt = snpObj.getGenotypes()[genotypeId]; if (genotypeId > -1 && gt > -1) { calledGenotypes++; } } double[] genotypes = new double[calledGenotypes]; double[] cisvals = new double[calledGenotypes]; double[] transvals = new double[calledGenotypes]; calledGenotypes = 0; for (int i = 0; i < m_gg[d].getExpressionData().getIndividuals().length; i++) { int genotypeId = indWGA[i]; short gt = snpObj.getGenotypes()[genotypeId]; if (genotypeId > -1 && gt > -1) { genotypes[calledGenotypes] = snpObj.getDosageValues()[genotypeId]; cisvals[calledGenotypes] = origCisVals[i]; transvals[calledGenotypes] = origTransVals[i]; calledGenotypes++; } } // normalize genotype and cis + trans to get beta's equal to the correlation coefficient genotypes = normalize(genotypes); cisvals = normalize(cisvals); transvals = normalize(transvals); double corrCisTrans = JSci.maths.ArrayMath.correlation(cisvals, transvals); // for code validation double[] cisTransRCs = Regression.getLinearRegressionCoefficients( cisvals, transvals); // returns beta, alpha, se, t double[] snpCisRCs = Regression.getLinearRegressionCoefficients( genotypes, cisvals); // returns beta, alpha, se, t double[] snpTransRCs = Regression.getLinearRegressionCoefficients(genotypes, transvals); // remove correlation between cis and trans probe // double[] resCis = new double[cisvals.length]; double[] resTransVals = new double[cisvals.length]; for (int i = 0; i < resTransVals.length; i++) { // resCis[i] = cisvals[i] - snpCisRCs[0] * genotypes[i]; resTransVals[i] = transvals[i] - cisTransRCs[0] * cisvals[i]; } resTransVals = normalize(resTransVals); double[] cisResTransRCs = Regression.getLinearRegressionCoefficients( cisvals, resTransVals); // returns beta, alpha, se, t double[] snpResTransRCs = Regression.getLinearRegressionCoefficients(genotypes, resTransVals); double rescorr = JSci.maths.ArrayMath.correlation(cisvals, resTransVals); // for code validation out.writeln( snp + "\t" + snpObj.getChr() + "\t" + snpObj.getChrPos() + "\t" + BaseAnnot.toString(snpObj.getAlleles()[0]) + "/" + BaseAnnot.toString(snpObj.getAlleles()[1]) + "\t" + BaseAnnot.toString(snpObj.getAlleles()[0]) + "\t" + transvals.length + "\t" + cisprobe + "\t" + m_gg[d].getExpressionData().getChr()[cisProbeId] + "\t" + m_gg[d].getExpressionData().getChrStart()[cisProbeId] + ":" + m_gg[d].getExpressionData().getChrStop()[cisProbeId] + "\t" + m_gg[d].getExpressionData().getAnnotation()[cisProbeId] + "\t" + transprobe + "\t" + m_gg[d].getExpressionData().getChr()[transProbeId] + "\t" + m_gg[d].getExpressionData().getChrStart()[transProbeId] + ":" + m_gg[d].getExpressionData().getChrStop()[transProbeId] + "\t" + m_gg[d].getExpressionData().getAnnotation()[transProbeId] + "\t" + corrCisTrans + "\t" + snpCisRCs[0] + "\t" + snpCisRCs[2] + "\t" + cisTransRCs[0] + "\t" + cisTransRCs[2] + "\t" + snpTransRCs[0] + "\t" + snpTransRCs[2] + "\t" + rescorr + "\t" + cisResTransRCs[0] + "\t" + cisResTransRCs[2] + "\t" + snpResTransRCs[0] + "\t" + snpResTransRCs[2] + "\t" + (snpResTransRCs[0] / snpTransRCs[0])); snpObj.clearGenotypes(); } if (it.hasNext()) { next = it.next(); } else { next = null; } pb.iterate(); } pb.close(); out.close(); } snpLoader.close(); } }
public final void compareOverlapAndZScoreDirectionTwoEQTLFiles( String eQTL, String meQTL, String eQTMFile, String outputFile, boolean matchOnGeneName, double fdrCutt, boolean matchSnpOnPos, boolean splitGeneNames, boolean flipUsingEQTM, boolean topeffect) throws IOException, Exception { System.out.println("Performing comparison of eQTLs and meQTLs"); double filterOnFDR = fdrCutt; // Do we want to use another FDR measure? When set to -1 this is not used at all. HashSet<String> hashExcludeEQTLs = new HashSet< String>(); // We can exclude some eQTLs from the analysis. If requested, put the entire // eQTL string in this HashMap for each eQTL. Does not work in combination // with mathcing based on chr and pos HashSet<String> hashConfineAnalysisToSubsetOfProbes = new HashSet< String>(); // We can confine the analysis to only a subset of probes. If requested put // the probe name in this HapMap HashSet<String> hashTestedSNPsThatPassedQC = null; // We can confine the analysis to only those eQTLs for which the SNP has been // successfully passed QC, otherwise sometimes unfair comparisons are made. If // requested, put the SNP name in this HashMap // Load the eQTM File QTLTextFile eQTLsTextFile = new QTLTextFile(eQTMFile, QTLTextFile.R); HashMap<String, ArrayList<EQTL>> eQtmInfo = new HashMap<String, ArrayList<EQTL>>(); for (Iterator<EQTL> eQtlIt = eQTLsTextFile.getEQtlIterator(); eQtlIt.hasNext(); ) { EQTL eQtm = eQtlIt.next(); String eQtmKey = eQtm.getRsName(); if (!eQtm.getAlleleAssessed().equals("C")) { eQtm.setAlleleAssessed("C"); eQtm.setZscore(eQtm.getZscore() * -1); Double[] zscores = eQtm.getDatasetZScores(); Double[] correlation = eQtm.getCorrelations(); for (int i = 0; i < eQtm.getDatasets().length; ++i) { zscores[i] *= -1; correlation[i] *= -1; } eQtm.setDatasetZScores(zscores); eQtm.setCorrelations(correlation); } ArrayList<EQTL> posEqtls = eQtmInfo.get(eQtmKey); if (posEqtls == null) { posEqtls = new ArrayList<EQTL>(1); posEqtls.add(eQtm); eQtmInfo.put(eQtmKey, posEqtls); } else if (!topeffect) { eQtmInfo.get(eQtmKey).add(eQtm); } } System.out.println("eQTMs read in: " + eQtmInfo.size()); // Now load the eQTLs for file 1: THashMap<String, String[]> hashEQTLs = new THashMap<String, String[]>(); THashSet<String> hashUniqueProbes = new THashSet<String>(); THashSet<String> hashUniqueGenes = new THashSet<String>(); TextFile in = new TextFile(eQTL, TextFile.R); in.readLine(); String[] data = in.readLineElemsReturnReference(SPLIT_ON_TAB); if (data.length < 5) { throw new IllegalStateException( "QTL File does not have enough columns. Detected columns: " + data.length + " in file " + in.getFileName()); } while (data != null) { if (filterOnFDR == -1 || Double.parseDouble(data[18]) <= filterOnFDR) { if (hashConfineAnalysisToSubsetOfProbes.isEmpty() || hashConfineAnalysisToSubsetOfProbes.contains(data[4])) { if (matchOnGeneName) { if (data[16].length() > 1) { if (splitGeneNames) { for (String gene : SEMI_COLON_PATTERN.split(data[16])) { hashEQTLs.put( (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + gene, data); hashUniqueProbes.add(data[4]); hashUniqueGenes.add(gene); } } else { if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[16])) { hashEQTLs.put( (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[16], data); hashUniqueProbes.add(data[4]); hashUniqueGenes.add(data[16]); // log.write("Added eQTL from original file " + (matchSnpOnPos ? data[2] + ":" + // data[3] : data[1]) + "\t" + data[16]); } } } } else { if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[4])) { hashEQTLs.put( (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[4], data); hashUniqueProbes.add(data[4]); hashUniqueGenes.add(data[16]); // log.write("Added eQTL from original file " + (matchSnpOnPos ? data[2] + ":" + // data[3] : data[1]) + "\t" + data[4]); } } } data = in.readLineElemsReturnReference(SPLIT_ON_TAB); } } in.close(); int nrUniqueProbes = hashUniqueProbes.size(); int nrUniqueGenes = hashUniqueGenes.size(); hashUniqueProbes = null; hashUniqueGenes = null; // Initialize Graphics2D for the Z-Score allelic direction comparison: // int width = 1000; // int height = 1000; // int margin = 100; // int x0 = margin; // int x1 = width - margin; // int y0 = margin; // int y1 = height - margin; ZScorePlot zs = new ZScorePlot(); String zsOutFileName = outputFile + "-ZScoreComparison.pdf"; zs.init(2, new String[] {"eQTLs", "meQTLs"}, true, zsOutFileName); // Variables holding variousStatistics: int nreQTLsIdenticalDirection = 0; int nreQTLsOppositeDirection = 0; HashMap<String, Integer> hashEQTLNrTimesAssessed = new HashMap<String, Integer>(); THashSet<String> hashEQTLs2 = new THashSet<String>(); THashSet<String> hashUniqueProbes2 = new THashSet<String>(); THashSet<String> hashUniqueGenes2 = new THashSet<String>(); THashSet<String> hashUniqueProbesOverlap = new THashSet<String>(); THashSet<String> hashUniqueGenesOverlap = new THashSet<String>(); int counterFile2 = 0; int overlap = 0; ArrayDoubleList vecX = new ArrayDoubleList(); ArrayDoubleList vecY = new ArrayDoubleList(); // Vector holding all opposite allelic effects: // LinkedHashSet<String> vecOppositeEQTLs = new LinkedHashSet<String>(); // Now process file 2: in = new TextFile(meQTL, TextFile.R); in.readLine(); int skippedDueToMapping = 0; data = null; TextFile identicalOut = new TextFile(outputFile + "-eQTLsWithIdenticalDirecton.txt.gz", TextFile.W); TextFile disconcordantOut = new TextFile(outputFile + "-OppositeEQTLs.txt", TextFile.W); TextFile log = new TextFile(outputFile + "-eQTL-meQTL-ComparisonLog.txt", TextFile.W); TextFile log2 = new TextFile(outputFile + "-eQTM-missingnessLog.txt", TextFile.W); THashSet<String> identifiersUsed = new THashSet<String>(); while ((data = in.readLineElemsReturnReference(SPLIT_ON_TAB)) != null) { if (filterOnFDR == -1 || Double.parseDouble(data[18]) <= filterOnFDR) { if (!eQtmInfo.containsKey(data[4])) { skippedDueToMapping++; log2.write( "meQTL probe not present In eQTM file:\t" + data[4] + ", effect statistics: \t" + data[0] + "\t" + data[2] + "\t" + data[3] + "\t" + data[16] + "\n"); continue; } String orgDataFour = data[4]; for (int i = 0; i < eQtmInfo.get(orgDataFour).size(); ++i) { if (topeffect && i > 0) { break; } data[16] = eQtmInfo.get(orgDataFour).get(i).getProbeHUGO(); data[4] = eQtmInfo.get(orgDataFour).get(i).getProbe(); if (flipUsingEQTM) { Double zScoreQTM = eQtmInfo.get(orgDataFour).get(i).getZscore(); if (zScoreQTM < 0) { data[10] = String.valueOf(Double.parseDouble(data[10]) * -1); } } if (hashConfineAnalysisToSubsetOfProbes.isEmpty() || hashConfineAnalysisToSubsetOfProbes.contains(data[4])) { if (matchOnGeneName) { if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[16])) { if (data[16].length() > 1) { if (splitGeneNames) { for (String gene : SEMI_COLON_PATTERN.split(data[16])) { hashUniqueProbes2.add(data[4]); hashUniqueGenes2.add(gene); if (!hashEQTLs2.contains( (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + gene)) { hashEQTLs2.add( (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + gene); counterFile2++; } } } else { hashUniqueProbes2.add(data[4]); hashUniqueGenes2.add(data[16]); if (!hashEQTLs2.contains( (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[16])) { hashEQTLs2.add( (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[16]); counterFile2++; } } } } } else { if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[4])) { // hashEQTLs2.put(data[1] + "\t" + data[4], str); hashUniqueProbes2.add(data[4]); hashUniqueGenes2.add(data[16]); counterFile2++; } } } String[] QTL = null; String identifier = null; if (matchOnGeneName) { if (data.length > 16 && data[16].length() > 1) { if (splitGeneNames) { // NB Plotting and processing of all QTLs here is not okay! for (String gene : SEMI_COLON_PATTERN.split(data[16])) { if (!hashExcludeEQTLs.contains(data[1] + "\t" + gene)) { identifier = (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + gene; if (hashEQTLs.containsKey(identifier)) { QTL = hashEQTLs.get(identifier); } } } } else { if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[16])) { identifier = (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[16]; if (hashEQTLs.containsKey(identifier)) { QTL = hashEQTLs.get(identifier); } } } } } else { if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[4])) { identifier = (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[4]; if (hashEQTLs.containsKey(identifier)) { QTL = hashEQTLs.get(identifier); } } } if (QTL == null) { // The eQTL, present in file 2 is not present in file 1: // if (Double.parseDouble(data[0]); < 1E-4) { if (hashTestedSNPsThatPassedQC == null || hashTestedSNPsThatPassedQC.contains(data[1])) { log.write( "eQTL Present In New file But Not In Original File:\t" + identifier + "\t" + data[0] + "\t" + data[2] + "\t" + data[3] + "\t" + data[16] + "\n"); } // } double zScore2 = Double.parseDouble(data[10]); // int posX = 500 + (int) 0; // int posY = 500 - (int) Math.round(zScore2 * 10); zs.draw(null, zScore2, 0, 1); } else { identifiersUsed.add(identifier); String[] eQtlData = QTL; boolean identicalProbe = true; String probe = data[4]; String probeFound = eQtlData[4]; if (!probe.equals(probeFound)) { identicalProbe = false; } hashUniqueProbesOverlap.add(data[4]); hashUniqueGenesOverlap.add(data[16]); if (!hashEQTLNrTimesAssessed.containsKey(identifier)) { hashEQTLNrTimesAssessed.put(identifier, 1); } else { hashEQTLNrTimesAssessed.put(identifier, 1 + hashEQTLNrTimesAssessed.get(identifier)); } String alleles = eQtlData[8]; String alleleAssessed = eQtlData[9]; String correlations[] = (eQtlData[17]).split(";"); double correlation = 0; int numCorr1 = 0; for (int c = 0; c < correlations.length; c++) { try { if (!correlations[c].equals("-")) { correlation += Double.parseDouble(correlations[c]); numCorr1++; } } catch (Exception e) { } } correlation /= (double) numCorr1; // if(numCorr1 == 0){ // System.out.println("Warning: no correlations defined for // eqtl file 1"); // } double zScore = Double.parseDouble(eQtlData[10]); // double pValue = Double.parseDouble(eQtlData[0]); String alleles2 = data[8]; String alleleAssessed2 = data[9]; double zScore2 = Double.parseDouble(data[10]); // double pValue2 = Double.parseDouble(data[0]); String correlations2[] = data[17].split(";"); double correlation2 = 0; boolean alleleflipped = false; if (!alleleAssessed.equals(data[9])) { if (data[9].equals(eQtlData[8].split("/")[0])) { alleleflipped = true; } else { // System.out.println("WTF BBQ!"); } } int numCorr2 = 0; for (int c = 0; c < correlations2.length; c++) { try { if (!correlations2[c].equals("-")) { correlation2 += (Double.parseDouble(correlations2[c])); numCorr2++; } } catch (NumberFormatException e) { } } // if(numCorr2 == 0){ // System.out.println("Warning: no correlations defined for // eqtl file 2"); // } correlation2 /= (double) numCorr2; if (alleleflipped) { correlation2 = -correlation2; } boolean sameDirection = false; int nrIdenticalAlleles = 0; if (alleles.length() > 2 && alleles2.length() > 2) { for (int a = 0; a < 3; a++) { for (int b = 0; b < 3; b++) { if (a != 1 && b != 1) { if (alleles.getBytes()[a] == alleles2.getBytes()[b]) { nrIdenticalAlleles++; } } } } } if (nrIdenticalAlleles == 0) { alleles2 = (char) BaseAnnot.getComplement((byte) alleles2.charAt(0)) + "/" + (char) BaseAnnot.getComplement((byte) alleles2.charAt(2)); alleleAssessed2 = BaseAnnot.getComplement(alleleAssessed2); if (alleles.length() > 2 && alleles2.length() > 2) { for (int a = 0; a < 3; a++) { for (int b = 0; b < 3; b++) { if (a != 1 && b != 1) { if (alleles.getBytes()[a] == alleles2.getBytes()[b]) { nrIdenticalAlleles++; } } } } } } if (nrIdenticalAlleles != 2) { log.write( "Error! SNPs have incompatible alleles!!:\t" + alleles + "\t" + alleles2 + "\t" + identifier + "\n"); } else { overlap++; if (!alleleAssessed.equals(alleleAssessed2)) { zScore2 = -zScore2; // correlation2 = -correlation2; alleleAssessed2 = alleleAssessed; } // Recode alleles: // if contains T, but no A, take complement // if (alleles.contains("T") && !alleles.contains("A")) { // alleles = BaseAnnot.getComplement(alleles); // alleleAssessed = // BaseAnnot.getComplement(alleleAssessed); // alleleAssessed2 = // BaseAnnot.getComplement(alleleAssessed2); // } if (zScore2 * zScore > 0) { sameDirection = true; } // if(correlation != correlation2 && (numCorr1 > 0 && numCorr2 > // 0)){ // if(Math.abs(correlation - correlation2) > 0.00001){ // System.out.println("Correlations are different: // "+lineno+"\t"+correlation +"\t"+correlation2+"\t"+str); // } // // } zs.draw(zScore, zScore2, 0, 1); if (!sameDirection) { nreQTLsOppositeDirection++; if (matchOnGeneName) { disconcordantOut.append( data[1] + '\t' + data[16] + '\t' + alleles + '\t' + alleleAssessed + '\t' + zScore + '\t' + alleles2 + '\t' + alleleAssessed2 + '\t' + zScore2); } else { disconcordantOut.append( data[1] + '\t' + data[4] + '\t' + alleles + '\t' + alleleAssessed + '\t' + zScore + '\t' + alleles2 + '\t' + alleleAssessed2 + '\t' + zScore2); } // int posX = 500 + (int) Math.round(zScore * 10); // int posY = 500 - (int) Math.round(zScore2 * 10); vecX.add(zScore); vecY.add(zScore2); } else { // write to output identicalOut.writeln( identifier + '\t' + alleles + '\t' + alleleAssessed + '\t' + zScore + '\t' + alleles2 + '\t' + alleleAssessed2 + '\t' + zScore2); nreQTLsIdenticalDirection++; if (alleles.length() > 2 && !alleles.equals("A/T") && !alleles.equals("T/A") && !alleles.equals("C/G") && !alleles.equals("G/C")) { // int posX = 500 + (int) Math.round(zScore * 10); // int posY = 500 - (int) Math.round(zScore2 * 10); vecX.add(zScore); vecY.add(zScore2); } } } } } } } identicalOut.close(); disconcordantOut.close(); in.close(); log2.close(); log.write( "\n/// Writing missing QTLs observed in original file but not in the new file ////\n\n"); for (Entry<String, String[]> QTL : hashEQTLs.entrySet()) { if (!identifiersUsed.contains(QTL.getKey())) { // The eQTL, present in file 1 is not present in file 2: // if (Double.parseDouble(QTL.getValue()[0]) < 1E-4) { if (hashTestedSNPsThatPassedQC == null || hashTestedSNPsThatPassedQC.contains(data[1])) { log.write( "eQTL Present In Original file But Not In New File:\t" + QTL.getKey() + "\t" + QTL.getValue()[0] + "\t" + QTL.getValue()[2] + "\t" + QTL.getValue()[3] + "\t" + QTL.getValue()[16] + "\n"); } // } double zScore = Double.parseDouble(QTL.getValue()[10]); // int posX = 500 + (int) 0; // int posY = 500 - (int) Math.round(zScore * 10); zs.draw(zScore, null, 0, 1); } } log.close(); zs.write(zsOutFileName); double[] valsX = vecX.toArray(); double[] valsY = vecY.toArray(); if (valsX.length > 2) { double correlation = JSci.maths.ArrayMath.correlation(valsX, valsY); double r2 = correlation * correlation; cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = new cern.jet.random.tdouble.engine.DRand(); cern.jet.random.tdouble.StudentT tDistColt = new cern.jet.random.tdouble.StudentT(valsX.length - 2, randomEngine); double pValuePearson = 1; double tValue = correlation / (Math.sqrt((1 - r2) / (double) (valsX.length - 2))); if (tValue < 0) { pValuePearson = tDistColt.cdf(tValue); } else { pValuePearson = tDistColt.cdf(-tValue); } pValuePearson *= 2; System.out.println( "\nCorrelation between the Z-Scores of the overlapping set of eQTLs:\t" + correlation + "\tP-Value:\t" + pValuePearson); } TextFile outSummary = new TextFile(outputFile + "-Summary.txt", TextFile.W); System.out.println(""); System.out.println( "Nr of eQTLs:\t" + hashEQTLs.size() + "\tin file:\t" + eQTL + "\tNrUniqueProbes:\t" + nrUniqueProbes + "\tNrUniqueGenes:\t" + nrUniqueGenes); outSummary.writeln( "Nr of eQTLs:\t" + hashEQTLs.size() + "\tin file:\t" + eQTL + "\tNrUniqueProbes:\t" + nrUniqueProbes + "\tNrUniqueGenes:\t" + nrUniqueGenes); System.out.println( "Nr of meQTLs:\t" + counterFile2 + "\tin file:\t" + meQTL + "\tNrUniqueProbes:\t" + hashUniqueProbes2.size() + "\tNrUniqueGenes:\t" + hashUniqueGenes2.size() + " *With eQTM mapping."); outSummary.writeln( "Nr of meQTLs:\t" + counterFile2 + "\tin file:\t" + meQTL + "\tNrUniqueProbes:\t" + hashUniqueProbes2.size() + "\tNrUniqueGenes:\t" + hashUniqueGenes2.size() + " *With eQTM mapping."); System.out.println("Skipped over meQTLs:\t" + skippedDueToMapping); outSummary.writeln("Skipped over meQTLs:\t" + skippedDueToMapping); System.out.println( "Overlap:\t" + overlap + "\tNrUniqueProbesOverlap:\t" + hashUniqueProbesOverlap.size() + "\tNrUniqueGenesOverlap:\t" + hashUniqueGenesOverlap.size()); outSummary.writeln( "Overlap:\t" + overlap + "\tNrUniqueProbesOverlap:\t" + hashUniqueProbesOverlap.size() + "\tNrUniqueGenesOverlap:\t" + hashUniqueGenesOverlap.size()); System.out.println(""); outSummary.writeln(); System.out.println("Nr eQTLs with identical direction:\t" + nreQTLsIdenticalDirection); outSummary.writeln("Nr eQTLs with identical direction:\t" + nreQTLsIdenticalDirection); double proportionOppositeDirection = 100d * (double) nreQTLsOppositeDirection / (double) (nreQTLsOppositeDirection + nreQTLsIdenticalDirection); String proportionOppositeDirectionString = (new java.text.DecimalFormat( "0.00;-0.00", new java.text.DecimalFormatSymbols(java.util.Locale.US))) .format(proportionOppositeDirection); System.out.println( "Nr eQTLs with opposite direction:\t" + nreQTLsOppositeDirection + "\t(" + proportionOppositeDirectionString + "%)"); outSummary.writeln( "Nr eQTLs with opposite direction:\t" + nreQTLsOppositeDirection + "\t(" + proportionOppositeDirectionString + "%)"); outSummary.close(); nrShared = hashUniqueProbesOverlap.size(); nrOpposite = nreQTLsOppositeDirection; }
private static void processNormalizedIntraContactInformation( String fileToRead, String baseName, String normMethod, String chrSmaller, ArrayList<DesiredChrContact> contactsToCheck, String resolution, double minValue, TextFile outWriter) throws IOException { // ReadIn normalization chr1 TextFile inputNormChr1 = new TextFile( baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R); ArrayList<String> normFactorSmallerChr = inputNormChr1.readAsArrayList(); inputNormChr1.close(); // System.out.println("Done reading norm factor 1"); if (!Gpio.exists(fileToRead + ".sorted")) { umcg.genetica.io.chrContacts.SortIntraChrContacts.readNonSortedWriteSorted( fileToRead, fileToRead + ".sorted"); } int numberToBeMatched = 0; LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8"); try { while (it.hasNext()) { String[] parts = StringUtils.split(it.nextLine(), '\t'); int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]); int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]); while (numberToBeMatched < contactsToCheck.size()) { if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { break; } else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { break; } if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1); String factor2Base = normFactorSmallerChr.get((posChr2 / getNumericResolution(resolution)) + 1); double factor1; double factor2; if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) { factor1 = org.apache.commons.lang.math.NumberUtils.createDouble(factor1Base); factor2 = org.apache.commons.lang.math.NumberUtils.createDouble(factor2Base); double contact = org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]) / (factor1 * factor2); if (contact >= minValue) { outWriter.writeln( contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\tContact\t" + contact + "\t" + org.apache.commons.lang.math.NumberUtils.createDouble(parts[2])); numberToBeMatched++; } else { outWriter.writeln( contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-"); numberToBeMatched++; } } else { System.out.println("Error in files."); numberToBeMatched++; } } else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { outWriter.writeln( contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-"); numberToBeMatched++; } } else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { outWriter.writeln( contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-"); numberToBeMatched++; } } } } finally { LineIterator.closeQuietly(it); } }