public static void convertGMTFileToPlainTextBinaryNetwork(
      String infile, String outfile, boolean hasIds, boolean isActuallyGMT) throws IOException {
    TextFile in = new TextFile(infile, TextFile.R);
    String line = null;
    Map<String, Integer> hashItems = new HashMap<String, Integer>();
    Map<String, Set<Integer>> hashSetIndices = new HashMap<String, Set<Integer>>();
    List<String> sets = new ArrayList<String>();
    int nextIndex = 0;
    int nrItemSets = 0;
    while ((line = in.readLine()) != null) {
      String[] split = line.split("\t");
      if (split.length == 0) {
        continue;
      }
      int firstItemIndex = hasIds ? (isActuallyGMT ? 2 : 1) : 0;
      if (hasIds) {
        sets.add(split[0]);
      }
      for (int i = firstItemIndex; i < split.length; i++) {
        Integer itemIndex = hashItems.get(split[i]);
        if (itemIndex == null) {
          hashItems.put(split[i], nextIndex);
          nextIndex++;
        }
        Set<Integer> setIndicesThisItem = hashSetIndices.get(split[i]);
        if (setIndicesThisItem == null) {
          setIndicesThisItem = new HashSet<Integer>();
          hashSetIndices.put(split[i], setIndicesThisItem);
        }
        setIndicesThisItem.add(nrItemSets);
      }
      nrItemSets++;
    }
    in.close();

    TextFile out = new TextFile(outfile, TextFile.W);
    for (int i = 0; i < nrItemSets; i++) {
      if (hasIds) {
        out.write("\t" + sets.get(i));
      } else {
        out.write("\tComplex" + (i + 1));
      }
    }
    out.writeln();
    for (String item : hashSetIndices.keySet()) {
      if (!"".equals(item)) {
        out.write(item);
        Set<Integer> setIndicesThisItem = hashSetIndices.get(item);
        for (int set = 0; set < nrItemSets; set++) {
          if (setIndicesThisItem.contains(set)) {
            out.write("\t1");
          } else {
            out.write("\t0");
          }
        }
        out.writeln();
      }
    }
    out.close();
  }
  private static void remapPositionsAndSnps(
      LinkedHashSet<String> OrderingEst,
      HashMap<String, String> mappingEst,
      HashMap<String, String> mappingUmcg,
      String outFile,
      String outFile2) {
    try {
      TextFile out = new TextFile(outFile, TextFile.W);
      TextFile out2 = new TextFile(outFile2, TextFile.W);

      for (String entry : OrderingEst) {
        //                System.out.println(entry);
        String snpPos = mappingEst.get(entry);
        String[] t = SPLIT_ON_COLON.split(snpPos);
        if (mappingUmcg.containsKey(snpPos)) {
          out.writeln(mappingUmcg.get(snpPos));
          out2.writeln(t[0] + "\t" + t[1] + "\t" + mappingUmcg.get(snpPos));
        } else {
          System.out.println("Problem: " + entry);
          out.writeln(entry);
          out2.writeln(t[0] + "\t" + t[1] + "\t" + entry);
        }
      }
      out.close();
      out2.close();
    } catch (IOException ex) {
      Logger.getLogger(ConverteMappingAndSnpFile.class.getName()).log(Level.SEVERE, null, ex);
    }
  }
  public static void convertPlainTextDoubleMatrixToPlainTextEdgeList(
      String matrixfile, String outfile, String infileDelimiter, String outfileDelimiter)
      throws IOException {

    TextFile in = new TextFile(matrixfile, TextFile.R);
    String line = in.readLine();
    String[] split = line.split(infileDelimiter);
    String[] nodes = Arrays.copyOfRange(split, 1, split.length);
    LOGGER.log(Level.FINE, "{0} nodes in {1}", new Object[] {nodes.length, matrixfile});

    TextFile out = new TextFile(outfile, TextFile.W);
    int lineNr = 1;
    int nrEdges = 0;
    while ((line = in.readLine()) != null) {
      split = line.split(infileDelimiter);
      if (split.length != nodes.length + 1) {
        throw new IllegalArgumentException(
            "The data in file '"
                + matrixfile
                + "' are not a matrix. Check line "
                + (lineNr + 1)
                + ".");
      }
      String node = split[0];
      for (int i = 1; i < split.length; i++) {
        if (lineNr == i) {
          continue; // skip diagonal
        }
        try {
          double weight = Double.parseDouble(split[i]);
          if (weight > 0) {
            out.writeln(node + outfileDelimiter + nodes[i - 1] + outfileDelimiter + weight);
            nrEdges++;
          }
        } catch (NumberFormatException ex) {
          throw new IllegalArgumentException(
              "The data in file '"
                  + matrixfile
                  + "' are not numerical (cast to double failed). Check line "
                  + (lineNr + 1)
                  + ", column "
                  + (i + 1)
                  + ".");
        }
      }
      lineNr++;
    }
    in.close();
    out.close();
    LOGGER.log(
        Level.FINE,
        "{0} edges from {1} written to ''{2}''",
        new Object[] {nrEdges, matrixfile, outfile});
  }
  public static void writeGMTFileBasedOnGeneSetFileAndMappingFileRemovingDuplicateGeneSets(
      String genesetfile, String mappingfile, String mappingdelimiter, String gmtfile)
      throws IOException {
    TextFile in = new TextFile(mappingfile, TextFile.R);
    String line = in.readLine();
    Map<String, String> code2name = new HashMap<String, String>();
    while ((line = in.readLine()) != null) {
      String[] split = line.split(mappingdelimiter);
      String name = split[2].trim().replace("\"", "") + " (" + split[6].trim() + ")";
      code2name.put(split[0].trim().replace("\"", ""), name);
    }
    in.close();
    System.out.println(code2name.size() + " gene set annotations read");

    in = new TextFile(genesetfile, TextFile.R);
    TextFile out = new TextFile(gmtfile, TextFile.W);
    Map<String, Integer> usedNames = new HashMap<String, Integer>();
    Set<String> usedGeneSets = new HashSet<String>();
    while ((line = in.readLine()) != null) {
      String[] split = line.split("\t");
      if (split.length == 0) {
        continue;
      }
      String code = split[0].trim();
      String name = code2name.get(code);
      if (name == null) {
        LOGGER.log(Level.WARNING, "No annotation for gene set ''{0}''", code);
      } else {
        String genes = Arrays.asList(Arrays.copyOfRange(split, 1, split.length)).toString();
        System.out.println(genes);
        if (!usedGeneSets.contains(genes)) {
          Integer oldNrItems = usedNames.get(name);
          if (oldNrItems == null) {
            out.write(code + "\t" + name);
            for (int i = 1; i < split.length; i++) {
              out.write("\t" + split[i]);
            }
            out.writeln();
            usedNames.put(name, split.length - 1);
            usedGeneSets.add(genes);
          } else {
            //                    if (oldNrItems != split.length - 1) {
            System.out.println("Sets with different numbers of genes for " + name);
            //                    }
          }
        }
      }
    }
    in.close();
    out.close();
  }
  private static ArrayList<InterChrContact> writeRawInterContactInformation(
      ArrayList<InterChrContact> contacts, String fileToWrite) throws IOException {
    ArrayList<InterChrContact> chrContactInfo = new ArrayList<InterChrContact>();

    TextFile outWriter = new TextFile(fileToWrite, TextFile.W);

    String row;

    for (InterChrContact contact : contacts) {
      outWriter.writeln(
          contact.getChrLocationSmaller()
              + "\t"
              + contact.getChrLocationLarger()
              + "\t"
              + contact.getContactValue());
    }
    outWriter.close();
    return chrContactInfo;
  }
  @Override
  public void run() throws IOException {
    for (int d = 0; d < m_gg.length; d++) {
      // now test all triples
      SNPLoader snpLoader = m_gg[d].getGenotypeData().createSNPLoader();
      int[] indWGA = m_gg[d].getExpressionToGenotypeIdArray();

      for (int perm = 0; perm < m_settings.nrPermutationsFDR + 1; perm++) {
        String outfile = null;
        if (perm == 0) {
          outfile = outDir + m_gg[d].getSettings().name + "_IVAnalysis-RealData.txt";
        } else {
          outfile =
              outDir + m_gg[d].getSettings().name + "_IVAnalysis-PermutationRound-" + perm + ".txt";
          m_gg[d].permuteSampleLables();
        }
        TextFile out = new TextFile(outfile, TextFile.W);
        Iterator<Triple<String, String, String>> it = snpProbeCombos.iterator();
        Triple<String, String, String> next = it.next();
        ProgressBar pb =
            new ProgressBar(
                snpProbeCombos.size(), "Running Mediation Analysis - Permutation " + perm);

        out.writeln(
            "SNP\tSNP Chr\tSNP ChrPos\t"
                + "Alleles\tDirectionAllele\t"
                + "N\t"
                + "CisArrayAddress\tCisProbe Chr\tCisProbe ChrPos\t"
                + "CisGeneName\t"
                + "TransArrayAddress\tTransProbe Chr\tTransProbe ChrPos\t"
                + "TransGeneName\t"
                + "CisTrans-Correlation\t"
                + "Cis-eQTL-Beta\t"
                + "Cis-eQTL-SE\t"
                + "CisTrans-Beta\t"
                + "CisTrans-SE\t"
                + "Trans-eQTL-Beta\t"
                + "Trans-eQTL-SE\t"
                + "CisTrans-Residual-Correlation\t"
                + "CisTrans-Residual-Beta\t"
                + "CisTrans-Residual-SE\t"
                + "Trans-eQTL-Residual-Beta\t"
                + "Trans-eQTL-Residual-SE\t"
                + "Beta-Ratio");

        while (next != null) {
          String snp = next.getLeft();
          String cisprobe = next.getMiddle();
          String transprobe = next.getRight();

          Integer snpId = m_gg[d].getGenotypeData().getSnpToSNPId().get(snp);
          Integer cisProbeId = m_gg[d].getExpressionData().getProbeToId().get(cisprobe);
          Integer transProbeId = m_gg[d].getExpressionData().getProbeToId().get(transprobe);

          if (snpId == -9 || cisProbeId == null || transProbeId == null) {
            //                        out.writeln(snp + "\t" + snpId + "\t" + cisprobe + "\t" +
            // cisProbeId + "\t" + null + "\t" + transprobe + "\t" + transProbeId + "\t" + null +
            // "\t" + null + "\t" + null + "\t" + null + "\t" + null +
            // "\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA");
          } else {

            SNP snpObj = m_gg[d].getGenotypeData().getSNPObject(snpId);
            snpLoader.loadGenotypes(snpObj);
            if (snpLoader.hasDosageInformation()) {
              snpLoader.loadDosage(snpObj);
            }
            double[] origCisVals = m_gg[d].getExpressionData().getMatrix()[cisProbeId];
            double[] origTransVals = m_gg[d].getExpressionData().getMatrix()[transProbeId];

            int calledGenotypes = 0;
            for (int i = 0; i < m_gg[d].getExpressionData().getIndividuals().length; i++) {
              int genotypeId = indWGA[i];
              short gt = snpObj.getGenotypes()[genotypeId];
              if (genotypeId > -1 && gt > -1) {
                calledGenotypes++;
              }
            }

            double[] genotypes = new double[calledGenotypes];
            double[] cisvals = new double[calledGenotypes];
            double[] transvals = new double[calledGenotypes];

            calledGenotypes = 0;
            for (int i = 0; i < m_gg[d].getExpressionData().getIndividuals().length; i++) {
              int genotypeId = indWGA[i];
              short gt = snpObj.getGenotypes()[genotypeId];
              if (genotypeId > -1 && gt > -1) {
                genotypes[calledGenotypes] = snpObj.getDosageValues()[genotypeId];
                cisvals[calledGenotypes] = origCisVals[i];
                transvals[calledGenotypes] = origTransVals[i];
                calledGenotypes++;
              }
            }

            // normalize genotype and cis + trans to get beta's equal to the correlation coefficient
            genotypes = normalize(genotypes);
            cisvals = normalize(cisvals);
            transvals = normalize(transvals);

            double corrCisTrans =
                JSci.maths.ArrayMath.correlation(cisvals, transvals); // for code validation
            double[] cisTransRCs =
                Regression.getLinearRegressionCoefficients(
                    cisvals, transvals); // returns beta, alpha, se, t
            double[] snpCisRCs =
                Regression.getLinearRegressionCoefficients(
                    genotypes, cisvals); // returns beta, alpha, se, t
            double[] snpTransRCs = Regression.getLinearRegressionCoefficients(genotypes, transvals);

            // remove correlation between cis and trans probe
            //                        double[] resCis = new double[cisvals.length];
            double[] resTransVals = new double[cisvals.length];
            for (int i = 0; i < resTransVals.length; i++) {
              //                            resCis[i] = cisvals[i] - snpCisRCs[0] * genotypes[i];
              resTransVals[i] = transvals[i] - cisTransRCs[0] * cisvals[i];
            }

            resTransVals = normalize(resTransVals);

            double[] cisResTransRCs =
                Regression.getLinearRegressionCoefficients(
                    cisvals, resTransVals); // returns beta, alpha, se, t
            double[] snpResTransRCs =
                Regression.getLinearRegressionCoefficients(genotypes, resTransVals);

            double rescorr =
                JSci.maths.ArrayMath.correlation(cisvals, resTransVals); // for code validation

            out.writeln(
                snp
                    + "\t"
                    + snpObj.getChr()
                    + "\t"
                    + snpObj.getChrPos()
                    + "\t"
                    + BaseAnnot.toString(snpObj.getAlleles()[0])
                    + "/"
                    + BaseAnnot.toString(snpObj.getAlleles()[1])
                    + "\t"
                    + BaseAnnot.toString(snpObj.getAlleles()[0])
                    + "\t"
                    + transvals.length
                    + "\t"
                    + cisprobe
                    + "\t"
                    + m_gg[d].getExpressionData().getChr()[cisProbeId]
                    + "\t"
                    + m_gg[d].getExpressionData().getChrStart()[cisProbeId]
                    + ":"
                    + m_gg[d].getExpressionData().getChrStop()[cisProbeId]
                    + "\t"
                    + m_gg[d].getExpressionData().getAnnotation()[cisProbeId]
                    + "\t"
                    + transprobe
                    + "\t"
                    + m_gg[d].getExpressionData().getChr()[transProbeId]
                    + "\t"
                    + m_gg[d].getExpressionData().getChrStart()[transProbeId]
                    + ":"
                    + m_gg[d].getExpressionData().getChrStop()[transProbeId]
                    + "\t"
                    + m_gg[d].getExpressionData().getAnnotation()[transProbeId]
                    + "\t"
                    + corrCisTrans
                    + "\t"
                    + snpCisRCs[0]
                    + "\t"
                    + snpCisRCs[2]
                    + "\t"
                    + cisTransRCs[0]
                    + "\t"
                    + cisTransRCs[2]
                    + "\t"
                    + snpTransRCs[0]
                    + "\t"
                    + snpTransRCs[2]
                    + "\t"
                    + rescorr
                    + "\t"
                    + cisResTransRCs[0]
                    + "\t"
                    + cisResTransRCs[2]
                    + "\t"
                    + snpResTransRCs[0]
                    + "\t"
                    + snpResTransRCs[2]
                    + "\t"
                    + (snpResTransRCs[0] / snpTransRCs[0]));
            snpObj.clearGenotypes();
          }

          if (it.hasNext()) {
            next = it.next();
          } else {
            next = null;
          }
          pb.iterate();
        }
        pb.close();
        out.close();
      }
      snpLoader.close();
    }
  }
예제 #7
0
  public final void compareOverlapAndZScoreDirectionTwoEQTLFiles(
      String eQTL,
      String meQTL,
      String eQTMFile,
      String outputFile,
      boolean matchOnGeneName,
      double fdrCutt,
      boolean matchSnpOnPos,
      boolean splitGeneNames,
      boolean flipUsingEQTM,
      boolean topeffect)
      throws IOException, Exception {
    System.out.println("Performing comparison of eQTLs and meQTLs");
    double filterOnFDR =
        fdrCutt; // Do we want to use another FDR measure? When set to -1 this is not used at all.

    HashSet<String> hashExcludeEQTLs =
        new HashSet<
            String>(); // We can exclude some eQTLs from the analysis. If requested, put the entire
                       // eQTL string in this HashMap for each eQTL. Does not work in combination
                       // with mathcing based on chr and pos
    HashSet<String> hashConfineAnalysisToSubsetOfProbes =
        new HashSet<
            String>(); // We can confine the analysis to only a subset of probes. If requested put
                       // the probe name in this HapMap
    HashSet<String> hashTestedSNPsThatPassedQC =
        null; // We can confine the analysis to only those eQTLs for which the SNP has been
              // successfully passed QC, otherwise sometimes unfair comparisons are made. If
              // requested, put the SNP name in this HashMap

    // Load the eQTM File
    QTLTextFile eQTLsTextFile = new QTLTextFile(eQTMFile, QTLTextFile.R);

    HashMap<String, ArrayList<EQTL>> eQtmInfo = new HashMap<String, ArrayList<EQTL>>();

    for (Iterator<EQTL> eQtlIt = eQTLsTextFile.getEQtlIterator(); eQtlIt.hasNext(); ) {
      EQTL eQtm = eQtlIt.next();
      String eQtmKey = eQtm.getRsName();

      if (!eQtm.getAlleleAssessed().equals("C")) {
        eQtm.setAlleleAssessed("C");
        eQtm.setZscore(eQtm.getZscore() * -1);

        Double[] zscores = eQtm.getDatasetZScores();
        Double[] correlation = eQtm.getCorrelations();
        for (int i = 0; i < eQtm.getDatasets().length; ++i) {
          zscores[i] *= -1;
          correlation[i] *= -1;
        }
        eQtm.setDatasetZScores(zscores);
        eQtm.setCorrelations(correlation);
      }

      ArrayList<EQTL> posEqtls = eQtmInfo.get(eQtmKey);

      if (posEqtls == null) {
        posEqtls = new ArrayList<EQTL>(1);
        posEqtls.add(eQtm);
        eQtmInfo.put(eQtmKey, posEqtls);
      } else if (!topeffect) {
        eQtmInfo.get(eQtmKey).add(eQtm);
      }
    }

    System.out.println("eQTMs read in: " + eQtmInfo.size());

    // Now load the eQTLs for file 1:
    THashMap<String, String[]> hashEQTLs = new THashMap<String, String[]>();
    THashSet<String> hashUniqueProbes = new THashSet<String>();
    THashSet<String> hashUniqueGenes = new THashSet<String>();

    TextFile in = new TextFile(eQTL, TextFile.R);
    in.readLine();
    String[] data = in.readLineElemsReturnReference(SPLIT_ON_TAB);

    if (data.length < 5) {
      throw new IllegalStateException(
          "QTL File does not have enough columns. Detected columns: "
              + data.length
              + " in file "
              + in.getFileName());
    }

    while (data != null) {
      if (filterOnFDR == -1 || Double.parseDouble(data[18]) <= filterOnFDR) {
        if (hashConfineAnalysisToSubsetOfProbes.isEmpty()
            || hashConfineAnalysisToSubsetOfProbes.contains(data[4])) {
          if (matchOnGeneName) {
            if (data[16].length() > 1) {

              if (splitGeneNames) {
                for (String gene : SEMI_COLON_PATTERN.split(data[16])) {

                  hashEQTLs.put(
                      (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + gene, data);
                  hashUniqueProbes.add(data[4]);
                  hashUniqueGenes.add(gene);
                }
              } else {

                if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[16])) {
                  hashEQTLs.put(
                      (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[16], data);
                  hashUniqueProbes.add(data[4]);
                  hashUniqueGenes.add(data[16]);
                  // log.write("Added eQTL from original file " + (matchSnpOnPos ? data[2] + ":" +
                  // data[3] : data[1]) + "\t" + data[16]);
                }
              }
            }
          } else {
            if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[4])) {
              hashEQTLs.put(
                  (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[4], data);
              hashUniqueProbes.add(data[4]);
              hashUniqueGenes.add(data[16]);
              //	log.write("Added eQTL from original file " + (matchSnpOnPos ? data[2] + ":" +
              // data[3] : data[1]) + "\t" + data[4]);
            }
          }
        }
        data = in.readLineElemsReturnReference(SPLIT_ON_TAB);
      }
    }
    in.close();

    int nrUniqueProbes = hashUniqueProbes.size();
    int nrUniqueGenes = hashUniqueGenes.size();
    hashUniqueProbes = null;
    hashUniqueGenes = null;

    // Initialize Graphics2D for the Z-Score allelic direction comparison:
    //        int width = 1000;
    //        int height = 1000;
    //        int margin = 100;
    //        int x0 = margin;
    //        int x1 = width - margin;
    //        int y0 = margin;
    //        int y1 = height - margin;

    ZScorePlot zs = new ZScorePlot();
    String zsOutFileName = outputFile + "-ZScoreComparison.pdf";
    zs.init(2, new String[] {"eQTLs", "meQTLs"}, true, zsOutFileName);

    // Variables holding variousStatistics:
    int nreQTLsIdenticalDirection = 0;
    int nreQTLsOppositeDirection = 0;
    HashMap<String, Integer> hashEQTLNrTimesAssessed = new HashMap<String, Integer>();

    THashSet<String> hashEQTLs2 = new THashSet<String>();
    THashSet<String> hashUniqueProbes2 = new THashSet<String>();
    THashSet<String> hashUniqueGenes2 = new THashSet<String>();
    THashSet<String> hashUniqueProbesOverlap = new THashSet<String>();
    THashSet<String> hashUniqueGenesOverlap = new THashSet<String>();

    int counterFile2 = 0;
    int overlap = 0;
    ArrayDoubleList vecX = new ArrayDoubleList();
    ArrayDoubleList vecY = new ArrayDoubleList();

    // Vector holding all opposite allelic effects:
    //        LinkedHashSet<String> vecOppositeEQTLs = new LinkedHashSet<String>();

    // Now process file 2:
    in = new TextFile(meQTL, TextFile.R);
    in.readLine();

    int skippedDueToMapping = 0;
    data = null;
    TextFile identicalOut =
        new TextFile(outputFile + "-eQTLsWithIdenticalDirecton.txt.gz", TextFile.W);
    TextFile disconcordantOut = new TextFile(outputFile + "-OppositeEQTLs.txt", TextFile.W);
    TextFile log = new TextFile(outputFile + "-eQTL-meQTL-ComparisonLog.txt", TextFile.W);
    TextFile log2 = new TextFile(outputFile + "-eQTM-missingnessLog.txt", TextFile.W);

    THashSet<String> identifiersUsed = new THashSet<String>();

    while ((data = in.readLineElemsReturnReference(SPLIT_ON_TAB)) != null) {

      if (filterOnFDR == -1 || Double.parseDouble(data[18]) <= filterOnFDR) {
        if (!eQtmInfo.containsKey(data[4])) {
          skippedDueToMapping++;
          log2.write(
              "meQTL probe not present In eQTM file:\t"
                  + data[4]
                  + ", effect statistics: \t"
                  + data[0]
                  + "\t"
                  + data[2]
                  + "\t"
                  + data[3]
                  + "\t"
                  + data[16]
                  + "\n");
          continue;
        }

        String orgDataFour = data[4];

        for (int i = 0; i < eQtmInfo.get(orgDataFour).size(); ++i) {
          if (topeffect && i > 0) {
            break;
          }
          data[16] = eQtmInfo.get(orgDataFour).get(i).getProbeHUGO();
          data[4] = eQtmInfo.get(orgDataFour).get(i).getProbe();

          if (flipUsingEQTM) {
            Double zScoreQTM = eQtmInfo.get(orgDataFour).get(i).getZscore();
            if (zScoreQTM < 0) {
              data[10] = String.valueOf(Double.parseDouble(data[10]) * -1);
            }
          }

          if (hashConfineAnalysisToSubsetOfProbes.isEmpty()
              || hashConfineAnalysisToSubsetOfProbes.contains(data[4])) {
            if (matchOnGeneName) {
              if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[16])) {
                if (data[16].length() > 1) {

                  if (splitGeneNames) {
                    for (String gene : SEMI_COLON_PATTERN.split(data[16])) {

                      hashUniqueProbes2.add(data[4]);
                      hashUniqueGenes2.add(gene);
                      if (!hashEQTLs2.contains(
                          (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + gene)) {
                        hashEQTLs2.add(
                            (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + gene);
                        counterFile2++;
                      }
                    }
                  } else {

                    hashUniqueProbes2.add(data[4]);
                    hashUniqueGenes2.add(data[16]);
                    if (!hashEQTLs2.contains(
                        (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[16])) {
                      hashEQTLs2.add(
                          (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[16]);
                      counterFile2++;
                    }
                  }
                }
              }
            } else {
              if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[4])) {
                // hashEQTLs2.put(data[1] + "\t" + data[4], str);
                hashUniqueProbes2.add(data[4]);
                hashUniqueGenes2.add(data[16]);
                counterFile2++;
              }
            }
          }
          String[] QTL = null;
          String identifier = null;
          if (matchOnGeneName) {

            if (data.length > 16 && data[16].length() > 1) {
              if (splitGeneNames) {
                // NB Plotting and processing of all QTLs here is not okay!
                for (String gene : SEMI_COLON_PATTERN.split(data[16])) {
                  if (!hashExcludeEQTLs.contains(data[1] + "\t" + gene)) {
                    identifier = (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + gene;
                    if (hashEQTLs.containsKey(identifier)) {
                      QTL = hashEQTLs.get(identifier);
                    }
                  }
                }
              } else {
                if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[16])) {
                  identifier =
                      (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[16];
                  if (hashEQTLs.containsKey(identifier)) {
                    QTL = hashEQTLs.get(identifier);
                  }
                }
              }
            }
          } else {
            if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[4])) {
              identifier = (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[4];
              if (hashEQTLs.containsKey(identifier)) {
                QTL = hashEQTLs.get(identifier);
              }
            }
          }

          if (QTL == null) {

            // The eQTL, present in file 2 is not present in file 1:
            // if (Double.parseDouble(data[0]); < 1E-4) {
            if (hashTestedSNPsThatPassedQC == null
                || hashTestedSNPsThatPassedQC.contains(data[1])) {
              log.write(
                  "eQTL Present In New file But Not In Original File:\t"
                      + identifier
                      + "\t"
                      + data[0]
                      + "\t"
                      + data[2]
                      + "\t"
                      + data[3]
                      + "\t"
                      + data[16]
                      + "\n");
            }
            // }
            double zScore2 = Double.parseDouble(data[10]);
            //                        int posX = 500 + (int) 0;
            //                        int posY = 500 - (int) Math.round(zScore2 * 10);
            zs.draw(null, zScore2, 0, 1);

          } else {
            identifiersUsed.add(identifier);
            String[] eQtlData = QTL;
            boolean identicalProbe = true;
            String probe = data[4];
            String probeFound = eQtlData[4];
            if (!probe.equals(probeFound)) {
              identicalProbe = false;
            }

            hashUniqueProbesOverlap.add(data[4]);
            hashUniqueGenesOverlap.add(data[16]);
            if (!hashEQTLNrTimesAssessed.containsKey(identifier)) {
              hashEQTLNrTimesAssessed.put(identifier, 1);
            } else {
              hashEQTLNrTimesAssessed.put(identifier, 1 + hashEQTLNrTimesAssessed.get(identifier));
            }
            String alleles = eQtlData[8];
            String alleleAssessed = eQtlData[9];

            String correlations[] = (eQtlData[17]).split(";");
            double correlation = 0;
            int numCorr1 = 0;
            for (int c = 0; c < correlations.length; c++) {
              try {
                if (!correlations[c].equals("-")) {
                  correlation += Double.parseDouble(correlations[c]);
                  numCorr1++;
                }
              } catch (Exception e) {
              }
            }

            correlation /= (double) numCorr1;
            //                       if(numCorr1 == 0){
            //                           System.out.println("Warning: no correlations defined for
            // eqtl file 1");
            //                       }
            double zScore = Double.parseDouble(eQtlData[10]);
            //                        double pValue = Double.parseDouble(eQtlData[0]);
            String alleles2 = data[8];
            String alleleAssessed2 = data[9];
            double zScore2 = Double.parseDouble(data[10]);

            //                        double pValue2 = Double.parseDouble(data[0]);
            String correlations2[] = data[17].split(";");
            double correlation2 = 0;

            boolean alleleflipped = false;
            if (!alleleAssessed.equals(data[9])) {
              if (data[9].equals(eQtlData[8].split("/")[0])) {
                alleleflipped = true;
              } else {
                //                               System.out.println("WTF BBQ!");
              }
            }

            int numCorr2 = 0;
            for (int c = 0; c < correlations2.length; c++) {
              try {
                if (!correlations2[c].equals("-")) {

                  correlation2 += (Double.parseDouble(correlations2[c]));

                  numCorr2++;
                }
              } catch (NumberFormatException e) {
              }
            }
            //                       if(numCorr2 == 0){
            //                           System.out.println("Warning: no correlations defined for
            // eqtl file 2");
            //                       }
            correlation2 /= (double) numCorr2;
            if (alleleflipped) {
              correlation2 = -correlation2;
            }
            boolean sameDirection = false;
            int nrIdenticalAlleles = 0;
            if (alleles.length() > 2 && alleles2.length() > 2) {
              for (int a = 0; a < 3; a++) {
                for (int b = 0; b < 3; b++) {
                  if (a != 1 && b != 1) {
                    if (alleles.getBytes()[a] == alleles2.getBytes()[b]) {
                      nrIdenticalAlleles++;
                    }
                  }
                }
              }
            }

            if (nrIdenticalAlleles == 0) {
              alleles2 =
                  (char) BaseAnnot.getComplement((byte) alleles2.charAt(0))
                      + "/"
                      + (char) BaseAnnot.getComplement((byte) alleles2.charAt(2));
              alleleAssessed2 = BaseAnnot.getComplement(alleleAssessed2);
              if (alleles.length() > 2 && alleles2.length() > 2) {
                for (int a = 0; a < 3; a++) {
                  for (int b = 0; b < 3; b++) {
                    if (a != 1 && b != 1) {
                      if (alleles.getBytes()[a] == alleles2.getBytes()[b]) {
                        nrIdenticalAlleles++;
                      }
                    }
                  }
                }
              }
            }

            if (nrIdenticalAlleles != 2) {
              log.write(
                  "Error! SNPs have incompatible alleles!!:\t"
                      + alleles
                      + "\t"
                      + alleles2
                      + "\t"
                      + identifier
                      + "\n");
            } else {
              overlap++;
              if (!alleleAssessed.equals(alleleAssessed2)) {
                zScore2 = -zScore2;
                //                           correlation2 = -correlation2;
                alleleAssessed2 = alleleAssessed;
              }

              // Recode alleles:
              // if contains T, but no A, take complement
              //                        if (alleles.contains("T") && !alleles.contains("A")) {
              //                            alleles = BaseAnnot.getComplement(alleles);
              //                            alleleAssessed =
              // BaseAnnot.getComplement(alleleAssessed);
              //                            alleleAssessed2 =
              // BaseAnnot.getComplement(alleleAssessed2);
              //                        }
              if (zScore2 * zScore > 0) {
                sameDirection = true;
              }

              //                       if(correlation != correlation2 && (numCorr1 > 0 && numCorr2 >
              // 0)){
              //                           if(Math.abs(correlation - correlation2) > 0.00001){
              //                               System.out.println("Correlations are different:
              // "+lineno+"\t"+correlation +"\t"+correlation2+"\t"+str);
              //                           }
              //
              //                       }
              zs.draw(zScore, zScore2, 0, 1);
              if (!sameDirection) {
                nreQTLsOppositeDirection++;

                if (matchOnGeneName) {
                  disconcordantOut.append(
                      data[1]
                          + '\t'
                          + data[16]
                          + '\t'
                          + alleles
                          + '\t'
                          + alleleAssessed
                          + '\t'
                          + zScore
                          + '\t'
                          + alleles2
                          + '\t'
                          + alleleAssessed2
                          + '\t'
                          + zScore2);

                } else {
                  disconcordantOut.append(
                      data[1]
                          + '\t'
                          + data[4]
                          + '\t'
                          + alleles
                          + '\t'
                          + alleleAssessed
                          + '\t'
                          + zScore
                          + '\t'
                          + alleles2
                          + '\t'
                          + alleleAssessed2
                          + '\t'
                          + zScore2);
                }

                //                            int posX = 500 + (int) Math.round(zScore * 10);
                //                            int posY = 500 - (int) Math.round(zScore2 * 10);
                vecX.add(zScore);
                vecY.add(zScore2);

              } else {
                // write to output
                identicalOut.writeln(
                    identifier
                        + '\t'
                        + alleles
                        + '\t'
                        + alleleAssessed
                        + '\t'
                        + zScore
                        + '\t'
                        + alleles2
                        + '\t'
                        + alleleAssessed2
                        + '\t'
                        + zScore2);
                nreQTLsIdenticalDirection++;
                if (alleles.length() > 2
                    && !alleles.equals("A/T")
                    && !alleles.equals("T/A")
                    && !alleles.equals("C/G")
                    && !alleles.equals("G/C")) {
                  //                                int posX = 500 + (int) Math.round(zScore * 10);
                  //                                int posY = 500 - (int) Math.round(zScore2 * 10);
                  vecX.add(zScore);
                  vecY.add(zScore2);
                }
              }
            }
          }
        }
      }
    }
    identicalOut.close();
    disconcordantOut.close();
    in.close();
    log2.close();

    log.write(
        "\n/// Writing missing QTLs observed in original file but not in the new file ////\n\n");
    for (Entry<String, String[]> QTL : hashEQTLs.entrySet()) {
      if (!identifiersUsed.contains(QTL.getKey())) {
        // The eQTL, present in file 1 is not present in file 2:

        // if (Double.parseDouble(QTL.getValue()[0]) < 1E-4) {
        if (hashTestedSNPsThatPassedQC == null || hashTestedSNPsThatPassedQC.contains(data[1])) {
          log.write(
              "eQTL Present In Original file But Not In New File:\t"
                  + QTL.getKey()
                  + "\t"
                  + QTL.getValue()[0]
                  + "\t"
                  + QTL.getValue()[2]
                  + "\t"
                  + QTL.getValue()[3]
                  + "\t"
                  + QTL.getValue()[16]
                  + "\n");
        }
        // }
        double zScore = Double.parseDouble(QTL.getValue()[10]);
        //                int posX = 500 + (int) 0;
        //                int posY = 500 - (int) Math.round(zScore * 10);
        zs.draw(zScore, null, 0, 1);
      }
    }

    log.close();
    zs.write(zsOutFileName);

    double[] valsX = vecX.toArray();
    double[] valsY = vecY.toArray();

    if (valsX.length > 2) {
      double correlation = JSci.maths.ArrayMath.correlation(valsX, valsY);
      double r2 = correlation * correlation;

      cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine =
          new cern.jet.random.tdouble.engine.DRand();
      cern.jet.random.tdouble.StudentT tDistColt =
          new cern.jet.random.tdouble.StudentT(valsX.length - 2, randomEngine);
      double pValuePearson = 1;
      double tValue = correlation / (Math.sqrt((1 - r2) / (double) (valsX.length - 2)));
      if (tValue < 0) {
        pValuePearson = tDistColt.cdf(tValue);
      } else {
        pValuePearson = tDistColt.cdf(-tValue);
      }
      pValuePearson *= 2;
      System.out.println(
          "\nCorrelation between the Z-Scores of the overlapping set of eQTLs:\t"
              + correlation
              + "\tP-Value:\t"
              + pValuePearson);
    }

    TextFile outSummary = new TextFile(outputFile + "-Summary.txt", TextFile.W);

    System.out.println("");
    System.out.println(
        "Nr of eQTLs:\t"
            + hashEQTLs.size()
            + "\tin file:\t"
            + eQTL
            + "\tNrUniqueProbes:\t"
            + nrUniqueProbes
            + "\tNrUniqueGenes:\t"
            + nrUniqueGenes);
    outSummary.writeln(
        "Nr of eQTLs:\t"
            + hashEQTLs.size()
            + "\tin file:\t"
            + eQTL
            + "\tNrUniqueProbes:\t"
            + nrUniqueProbes
            + "\tNrUniqueGenes:\t"
            + nrUniqueGenes);

    System.out.println(
        "Nr of meQTLs:\t"
            + counterFile2
            + "\tin file:\t"
            + meQTL
            + "\tNrUniqueProbes:\t"
            + hashUniqueProbes2.size()
            + "\tNrUniqueGenes:\t"
            + hashUniqueGenes2.size()
            + " *With eQTM mapping.");
    outSummary.writeln(
        "Nr of meQTLs:\t"
            + counterFile2
            + "\tin file:\t"
            + meQTL
            + "\tNrUniqueProbes:\t"
            + hashUniqueProbes2.size()
            + "\tNrUniqueGenes:\t"
            + hashUniqueGenes2.size()
            + " *With eQTM mapping.");

    System.out.println("Skipped over meQTLs:\t" + skippedDueToMapping);
    outSummary.writeln("Skipped over meQTLs:\t" + skippedDueToMapping);

    System.out.println(
        "Overlap:\t"
            + overlap
            + "\tNrUniqueProbesOverlap:\t"
            + hashUniqueProbesOverlap.size()
            + "\tNrUniqueGenesOverlap:\t"
            + hashUniqueGenesOverlap.size());
    outSummary.writeln(
        "Overlap:\t"
            + overlap
            + "\tNrUniqueProbesOverlap:\t"
            + hashUniqueProbesOverlap.size()
            + "\tNrUniqueGenesOverlap:\t"
            + hashUniqueGenesOverlap.size());

    System.out.println("");
    outSummary.writeln();

    System.out.println("Nr eQTLs with identical direction:\t" + nreQTLsIdenticalDirection);
    outSummary.writeln("Nr eQTLs with identical direction:\t" + nreQTLsIdenticalDirection);

    double proportionOppositeDirection =
        100d
            * (double) nreQTLsOppositeDirection
            / (double) (nreQTLsOppositeDirection + nreQTLsIdenticalDirection);
    String proportionOppositeDirectionString =
        (new java.text.DecimalFormat(
                "0.00;-0.00", new java.text.DecimalFormatSymbols(java.util.Locale.US)))
            .format(proportionOppositeDirection);

    System.out.println(
        "Nr eQTLs with opposite direction:\t"
            + nreQTLsOppositeDirection
            + "\t("
            + proportionOppositeDirectionString
            + "%)");
    outSummary.writeln(
        "Nr eQTLs with opposite direction:\t"
            + nreQTLsOppositeDirection
            + "\t("
            + proportionOppositeDirectionString
            + "%)");

    outSummary.close();

    nrShared = hashUniqueProbesOverlap.size();
    nrOpposite = nreQTLsOppositeDirection;
  }
  private static void processNormalizedIntraContactInformation(
      String fileToRead,
      String baseName,
      String normMethod,
      String chrSmaller,
      ArrayList<DesiredChrContact> contactsToCheck,
      String resolution,
      double minValue,
      TextFile outWriter)
      throws IOException {

    // ReadIn normalization chr1
    TextFile inputNormChr1 =
        new TextFile(
            baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R);
    ArrayList<String> normFactorSmallerChr = inputNormChr1.readAsArrayList();
    inputNormChr1.close();

    //        System.out.println("Done reading norm factor 1");
    if (!Gpio.exists(fileToRead + ".sorted")) {
      umcg.genetica.io.chrContacts.SortIntraChrContacts.readNonSortedWriteSorted(
          fileToRead, fileToRead + ".sorted");
    }

    int numberToBeMatched = 0;

    LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8");

    try {
      while (it.hasNext()) {
        String[] parts = StringUtils.split(it.nextLine(), '\t');

        int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]);
        int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]);

        while (numberToBeMatched < contactsToCheck.size()) {
          if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
            break;
          } else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
            if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
              break;
            }
            if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {

              String factor1Base =
                  normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1);
              String factor2Base =
                  normFactorSmallerChr.get((posChr2 / getNumericResolution(resolution)) + 1);

              double factor1;
              double factor2;

              if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) {
                factor1 = org.apache.commons.lang.math.NumberUtils.createDouble(factor1Base);
                factor2 = org.apache.commons.lang.math.NumberUtils.createDouble(factor2Base);

                double contact =
                    org.apache.commons.lang.math.NumberUtils.createDouble(parts[2])
                        / (factor1 * factor2);
                if (contact >= minValue) {
                  outWriter.writeln(
                      contactsToCheck.get(numberToBeMatched).getSnpName()
                          + "\t"
                          + contactsToCheck.get(numberToBeMatched).getProbeName()
                          + "\t"
                          + posChr1
                          + "\t"
                          + posChr2
                          + "\tContact\t"
                          + contact
                          + "\t"
                          + org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]));
                  numberToBeMatched++;
                } else {
                  outWriter.writeln(
                      contactsToCheck.get(numberToBeMatched).getSnpName()
                          + "\t"
                          + contactsToCheck.get(numberToBeMatched).getProbeName()
                          + "\t"
                          + posChr1
                          + "\t"
                          + posChr2
                          + "\t-\t-\t-");
                  numberToBeMatched++;
                }
              } else {
                System.out.println("Error in files.");
                numberToBeMatched++;
              }
            } else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
              outWriter.writeln(
                  contactsToCheck.get(numberToBeMatched).getSnpName()
                      + "\t"
                      + contactsToCheck.get(numberToBeMatched).getProbeName()
                      + "\t"
                      + posChr1
                      + "\t"
                      + posChr2
                      + "\t-\t-\t-");
              numberToBeMatched++;
            }
          } else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
            outWriter.writeln(
                contactsToCheck.get(numberToBeMatched).getSnpName()
                    + "\t"
                    + contactsToCheck.get(numberToBeMatched).getProbeName()
                    + "\t"
                    + posChr1
                    + "\t"
                    + posChr2
                    + "\t-\t-\t-");
            numberToBeMatched++;
          }
        }
      }
    } finally {
      LineIterator.closeQuietly(it);
    }
  }