public static void convertGMTFileToPlainTextBinaryNetwork(
      String infile, String outfile, boolean hasIds, boolean isActuallyGMT) throws IOException {
    TextFile in = new TextFile(infile, TextFile.R);
    String line = null;
    Map<String, Integer> hashItems = new HashMap<String, Integer>();
    Map<String, Set<Integer>> hashSetIndices = new HashMap<String, Set<Integer>>();
    List<String> sets = new ArrayList<String>();
    int nextIndex = 0;
    int nrItemSets = 0;
    while ((line = in.readLine()) != null) {
      String[] split = line.split("\t");
      if (split.length == 0) {
        continue;
      }
      int firstItemIndex = hasIds ? (isActuallyGMT ? 2 : 1) : 0;
      if (hasIds) {
        sets.add(split[0]);
      }
      for (int i = firstItemIndex; i < split.length; i++) {
        Integer itemIndex = hashItems.get(split[i]);
        if (itemIndex == null) {
          hashItems.put(split[i], nextIndex);
          nextIndex++;
        }
        Set<Integer> setIndicesThisItem = hashSetIndices.get(split[i]);
        if (setIndicesThisItem == null) {
          setIndicesThisItem = new HashSet<Integer>();
          hashSetIndices.put(split[i], setIndicesThisItem);
        }
        setIndicesThisItem.add(nrItemSets);
      }
      nrItemSets++;
    }
    in.close();

    TextFile out = new TextFile(outfile, TextFile.W);
    for (int i = 0; i < nrItemSets; i++) {
      if (hasIds) {
        out.write("\t" + sets.get(i));
      } else {
        out.write("\tComplex" + (i + 1));
      }
    }
    out.writeln();
    for (String item : hashSetIndices.keySet()) {
      if (!"".equals(item)) {
        out.write(item);
        Set<Integer> setIndicesThisItem = hashSetIndices.get(item);
        for (int set = 0; set < nrItemSets; set++) {
          if (setIndicesThisItem.contains(set)) {
            out.write("\t1");
          } else {
            out.write("\t0");
          }
        }
        out.writeln();
      }
    }
    out.close();
  }
  public static void writeGMTFileBasedOnGeneSetFileAndMappingFileRemovingDuplicateGeneSets(
      String genesetfile, String mappingfile, String mappingdelimiter, String gmtfile)
      throws IOException {
    TextFile in = new TextFile(mappingfile, TextFile.R);
    String line = in.readLine();
    Map<String, String> code2name = new HashMap<String, String>();
    while ((line = in.readLine()) != null) {
      String[] split = line.split(mappingdelimiter);
      String name = split[2].trim().replace("\"", "") + " (" + split[6].trim() + ")";
      code2name.put(split[0].trim().replace("\"", ""), name);
    }
    in.close();
    System.out.println(code2name.size() + " gene set annotations read");

    in = new TextFile(genesetfile, TextFile.R);
    TextFile out = new TextFile(gmtfile, TextFile.W);
    Map<String, Integer> usedNames = new HashMap<String, Integer>();
    Set<String> usedGeneSets = new HashSet<String>();
    while ((line = in.readLine()) != null) {
      String[] split = line.split("\t");
      if (split.length == 0) {
        continue;
      }
      String code = split[0].trim();
      String name = code2name.get(code);
      if (name == null) {
        LOGGER.log(Level.WARNING, "No annotation for gene set ''{0}''", code);
      } else {
        String genes = Arrays.asList(Arrays.copyOfRange(split, 1, split.length)).toString();
        System.out.println(genes);
        if (!usedGeneSets.contains(genes)) {
          Integer oldNrItems = usedNames.get(name);
          if (oldNrItems == null) {
            out.write(code + "\t" + name);
            for (int i = 1; i < split.length; i++) {
              out.write("\t" + split[i]);
            }
            out.writeln();
            usedNames.put(name, split.length - 1);
            usedGeneSets.add(genes);
          } else {
            //                    if (oldNrItems != split.length - 1) {
            System.out.println("Sets with different numbers of genes for " + name);
            //                    }
          }
        }
      }
    }
    in.close();
    out.close();
  }
  private static void printOutContacts(ArrayList<DesiredChrContact> contacts, TextFile outWriter)
      throws IOException {
    //        System.out.println("Write contacts to file.");
    HashMap<String, Boolean> textToStore = new HashMap<>();

    for (DesiredChrContact c : contacts) {
      String key = c.getProbeName() + "-" + c.getSnpName();

      if (c.hasContact()) {
        textToStore.put(key, Boolean.TRUE);
      } else if (!textToStore.containsKey(key)) {
        textToStore.put(key, Boolean.FALSE);
      }
    }
    for (Entry<String, Boolean> contactInfo : textToStore.entrySet()) {
      outWriter.write(contactInfo.getKey() + "\t" + contactInfo.getValue() + "\n");
    }
  }
  /**
   * Starts parsing a genotype report file, which can be in different formats. This import program
   * can accomodate many formats, and uses buffering to achieve sufficient performance.
   */
  public FinalReportToTriTyper(
      String inputFile,
      String outputDirString,
      boolean isIlluminaFinalReportFile,
      String delimiter,
      String decimalSeparator)
      throws IOException {

    // Check whether we can write to the output directory:
    File outputDir = new File(outputDirString);
    if (!outputDir.isDirectory()) {
      System.out.println("Your output directory does not exist!");
      System.exit(-1);
    }

    // ArrayLists and hashes for determining file size of final report file:
    HashMap<String, Integer> hashInd = new HashMap<String, Integer>();
    ArrayList<String> vecInd = new ArrayList<String>();
    HashMap<String, Integer> hashSNP = new HashMap<String, Integer>();
    ArrayList<String> vecSNP = new ArrayList<String>();

    // First parse file, determine what the amount of unique samples and SNPs is.
    System.out.println("");
    System.out.println(
        "TriTyperImporter V1.0, 2008, Lude Franke, University Medical Centre Utrecht, [email protected]");
    System.out.println("");
    System.out.println("Processing file:\t" + inputFile);
    System.out.println("Inventorizing input file, determining number of unique SNPs and samples:");
    int columnSample = -1;
    int columnSNP = -1;
    int columnAllele1 = -1;
    int columnAllele2 = -1;
    int columnTheta = -1;
    int columnR = -1;
    int columnX = -1;
    int columnY = -1;
    boolean rawDataAvailable = false;

    // Try to open the input file:
    if (!Gpio.canRead(inputFile)) {
      System.out.println("");
      System.out.println("Cannot open file:\t" + inputFile);
      System.out.println("Are you sure it is located at this place?");
      System.exit(-1);
    }
    TextFile in = new TextFile(inputFile, TextFile.R);

    // If this is an Illumina Final Report file, first process the irrelevant header:
    String str = null;
    if (isIlluminaFinalReportFile) {
      int countIlluminaFinalReport = 0;
      while ((str = in.readLine()) != null) {
        String[] data = str.split(delimiter);
        if (data[0].trim().equals("[Data]")) {
          break;
        }
        countIlluminaFinalReport++;
        if (countIlluminaFinalReport > 100) {
          System.out.println(
              "\nError: You have defined that this file is a final report file, which it does not seem to be as a row with the word [Data] cannot be found!");
          System.exit(-1);
        }
      }
    }

    // Now parse the column identifiers:
    str = in.readLine();

    // Check whether we actually are dealing with a Final Report file, user might have forgotten to
    // instruct this:
    if (str.toLowerCase().startsWith("[header]")) {
      while ((str = in.readLine()) != null) {
        String[] data = str.split(delimiter);
        if (data[0].trim().equals("[Data]")) {
          break;
        }
      }
      str = in.readLine();
    }

    String[] data = str.split(delimiter);
    if (data.length <= 1) {
      System.out.println("");
      System.out.println("Error parsing input file! The file cannot be delimited!");
      String delimiterDescription = "tab";
      if (delimiter.equals(" ")) {
        delimiterDescription = "space";
      }
      if (delimiter.equals(",")) {
        delimiterDescription = "comma";
      }
      if (delimiter.equals(";")) {
        delimiterDescription = "semicolon";
      }
      System.out.println("Are you sure it is " + delimiterDescription + " delimited ?");
      System.exit(-1);
    }

    for (int d = 0; d < data.length; d++) {
      String column = data[d].trim().toLowerCase();
      if (column.equals("sample id")) {
        columnSample = d;
      }
      if (column.equals("snp name")) {
        columnSNP = d;
      }
      if (column.contains("allele1")) {
        columnAllele1 = d;
      }
      if (column.contains("allele 1")) {
        columnAllele1 = d;
      }
      if (column.contains("allele2")) {
        columnAllele2 = d;
      }
      if (column.contains("allele 2")) {
        columnAllele2 = d;
      }
      if (column.equals("r")) {
        columnR = d;
      }
      if (column.equals("theta")) {
        columnTheta = d;
      }
    }
    if (columnSample == -1) {
      System.out.println(
          "\nError: Within the header of this file the sample id column (Sample ID) cannot be found!");
      System.exit(-1);
    }
    if (columnAllele1 == -1) {
      System.out.println(
          "\nError: Within the header of this file the allele 1 column (Allele1) cannot be found!");
      System.exit(-1);
    }
    if (columnAllele2 == -1) {
      System.out.println(
          "\nError: Within the header of this file the allele 2 column (Allele2) cannot be found!");
      System.exit(-1);
    }
    if (columnSNP == -1) {
      System.out.println(
          "\nError: Within the header of this file the SNP name column (SNP Name) cannot be found!");
      System.exit(-1);
    }
    rawDataAvailable = true;
    for (int d = 0; d < data.length; d++) {
      String column = data[d].trim().toLowerCase();
      if (column.equals("x")) {
        columnX = d;
      }
      if (column.equals("y")) {
        columnY = d;
      }
    }

    if ((columnR == -1 || columnTheta == -1) && (columnX == -1 || columnY == -1)) {
      System.out.println(
          "Within the header of this file no raw intensity data is present (either R and Theta, or X and Y). Only imputation of triallelic SNPs will be possible");
      rawDataAvailable = false;
    }

    System.out.println("");

    boolean fileAlreadyInventorized = false;
    if ((new File(outputDirString + "Individuals.txt")).exists()
        && (new File(outputDirString + "SNPs.txt")).exists()) {
      fileAlreadyInventorized = true;
    }

    if (!fileAlreadyInventorized) {

      // Start processing this file
      String previousSNP = null;
      String previousInd = null;
      long linesProcessed = 0;
      while ((str = in.readLine()) != null) {
        // System.out.println(str);
        data = str.split(delimiter);

        if (data.length <= 1) {
          System.out.println("\nError parsing input file! The file cannot be delimited!");
          String delimiterDescription = "tab";
          if (delimiter.equals(" ")) {
            delimiterDescription = "space";
          }
          if (delimiter.equals(",")) {
            delimiterDescription = "comma";
          }
          if (delimiter.equals(";")) {
            delimiterDescription = "semicolon";
          }
          System.out.println("Are you sure it is " + delimiterDescription + " delimited ?");
        }
        if (data.length <= columnSNP || data.length <= columnSample) {
          System.out.println(
              "\nError: For record entry "
                  + (linesProcessed + 1)
                  + " the SNP or sample cannot be parsed! Record: "
                  + str);
          System.exit(-1);
        }
        String snp = data[columnSNP];
        String ind = data[columnSample];

        if (!snp.equals(previousSNP) && !hashSNP.containsKey(snp)) {
          hashSNP.put(snp, vecSNP.size());
          vecSNP.add(snp);
        }

        if (!ind.equals(previousInd) && !hashInd.containsKey(ind)) {
          hashInd.put(ind, vecInd.size());
          vecInd.add(ind);
        }

        previousSNP = snp;
        previousInd = ind;

        linesProcessed++;
        if (linesProcessed % 500000 == 0) {
          System.out.println(
              linesProcessed
                  + "\tLines processed. Number of unique SNPs read so far:\t"
                  + vecSNP.size()
                  + "\tNumber of unique Individuals read so far:\t"
                  + vecInd.size());
        }
      }
      System.out.println(
          linesProcessed
              + "\tLines processed. Number of unique SNPs read:\t"
              + vecSNP.size()
              + "\tNumber of unique Individuals read:\t"
              + vecInd.size());
      in.close();

      // Check whether SNPMappings.txt is available. This will improve processing speed considerably
      // in subsequent operations:
      String fileSNPMappings = new String(outputDirString + "SNPMappings.txt");
      if (!Gpio.canRead(fileSNPMappings)) {
        System.out.println(
            "\nNon critical warning: SNPMappings.txt can not be found in the output directory. Data will not be stored in optimized way, which will negatively affect the speed of TriTyper.\n");
      } else {

        System.out.println("\nLoading SNP mappings from file:\t" + fileSNPMappings);
        TextFile inSNP = new TextFile(fileSNPMappings, TextFile.R);
        String str2;
        ArrayList<String> vectorTemp = new ArrayList<String>();
        boolean needsSorting = false;
        while ((str2 = inSNP.readLine()) != null) {
          data = str2.split("\t");
          if (hashSNP.containsKey(data[2])) {
            if (data[1].length() != 9) {
              needsSorting = true;
              while (data[1].length() < 9) {
                data[1] = "0" + data[1];
              }
            }
            vectorTemp.add(data[0] + "\t" + data[1] + "\t" + data[2]);
          }
        }
        inSNP.close();
        if (needsSorting) {
          System.out.println(
              "Sorting SNPs on chromosome and physical position that are present in SNP mappings file:");
          Collections.sort(vectorTemp);
        }

        HashMap<String, Integer> hashSNPMappings = new HashMap<String, Integer>();
        ArrayList<String> vecSNPMappings = new ArrayList<String>();
        for (int snp = 0; snp < vectorTemp.size(); snp++) {
          String snpString = vectorTemp.get(snp);
          hashSNPMappings.put(snpString.split("\t")[2], vecSNPMappings.size());
          vecSNPMappings.add(snpString.split("\t")[2]);
        }
        System.out.println(
            "Number of SNPs with available physical mappings:\t" + vecSNPMappings.size());

        // Now sort the processed SNPs and arrange them, according to what is known:
        boolean[] snpMappingsUsed = new boolean[vecSNPMappings.size()];
        ArrayList vecSNPCopy = new ArrayList();
        for (int snp = 0; snp < vecSNP.size(); snp++) {
          String rsName = vecSNP.get(snp);
          if (hashSNPMappings.containsKey(rsName)) {
            snpMappingsUsed[hashSNPMappings.get(rsName)] = true;
          }
        }
        ArrayList<String> vecSNPNew = new ArrayList<String>();
        HashMap<String, Integer> hashSNPNew = new HashMap<String, Integer>();
        for (int snp = 0; snp < vecSNPMappings.size(); snp++) {
          if (snpMappingsUsed[snp]) {
            String rsName = vecSNPMappings.get(snp);
            hashSNPNew.put(rsName, vecSNPNew.size());
            vecSNPNew.add(rsName);
          }
        }

        // Now add the SNPs for which no mapping is available. These will be imported, but we cannot
        // do anything with them:
        ArrayList<String> snpsWithoutMapping = new ArrayList<String>();
        for (int snp = 0; snp < vecSNP.size(); snp++) {
          String rsName = vecSNP.get(snp);
          if (!hashSNPNew.containsKey(rsName)) {
            hashSNPNew.put(rsName, vecSNPNew.size());
            vecSNPNew.add(rsName);
            snpsWithoutMapping.add(rsName);
          }
        }
        if (snpsWithoutMapping.size() > 0) {
          System.out.println("Non critical warning: No physical mapping is available for SNPs:");
          for (int s = 0; s < snpsWithoutMapping.size(); s++) {
            System.out.println(snpsWithoutMapping.get(s));
          }
          System.out.println("");
        }

        // Replace the SNP hashmap and vector.
        vecSNP.clear();
        hashSNP.clear();
        vecSNP = vecSNPNew;
        hashSNP = hashSNPNew;
      }

      // Write individuals to file:

      System.out.print("Writing individuals to file:\t");
      TextFile outInd = new TextFile(outputDirString + "Individuals.txt", TextFile.W);
      for (int ind = 0; ind < vecInd.size(); ind++) {
        String individual = ((String) vecInd.get(ind));
        outInd.write(individual + "\n");
      }
      outInd.close();
      System.out.println("OK");

      System.out.print("Writing SNPs to file:\t");
      TextFile outSNP = new TextFile(outputDirString + "SNPs.txt", TextFile.W);
      for (int snp = 0; snp < vecSNP.size(); snp++) {
        outSNP.write(((String) vecSNP.get(snp)) + "\n");
      }
      outSNP.close();
      System.out.println("OK");

    } else {

      // Load individuals from file:
      vecInd.clear();
      hashInd.clear();

      TextFile inInd = new TextFile(outputDirString + "Individuals.txt", TextFile.R);
      while ((str = inInd.readLine()) != null) {
        hashInd.put(str, vecInd.size());
        vecInd.add(str);
      }
      inInd.close();

      // Load SNPs from file:
      vecSNP.clear();
      hashSNP.clear();

      TextFile inSNP = new TextFile(outputDirString + "SNPs.txt", TextFile.R);
      while ((str = inSNP.readLine()) != null) {
        hashSNP.put(str, vecSNP.size());
        vecSNP.add(str);
      }
      inSNP.close();
    }

    int nrInds = vecInd.size();
    int nrSNPs = vecSNP.size();

    // We now have inventorized the file and have generated the SNPs.txt and Individuals.txt files.
    // Now try to determine the order of genotypes, so we can chose a buffering technique. If no
    // order can be found we do not buffer, but importing will be extremely slow.
    boolean fileOrderPerSampleAllSNPs = false;
    boolean fileOrderPerSNPAllSamples = false;

    // Try to open the input file:
    in = new TextFile(inputFile, TextFile.R);

    // If this is an Illumina Final Report file, first process the irrelevant header:
    str = null;
    if (isIlluminaFinalReportFile) {
      while ((str = in.readLine()) != null) {
        data = str.split(delimiter);
        if (data[0].trim().equals("[Data]")) {
          break;
        }
      }
    }

    // Now parse the column identifiers:
    str = in.readLine();

    // Check whether we actually are dealing with a Final Report file, user might have forgotten to
    // instruct this:
    if (str.toLowerCase().startsWith("[header]")) {
      while ((str = in.readLine()) != null) {
        data = str.split(delimiter);
        if (data[0].trim().equals("[Data]")) {
          break;
        }
      }
      str = in.readLine();
    }

    data = str.split(delimiter);
    int previousIndID = -1;
    int previousSNPID = -1;
    while ((str = in.readLine()) != null) {
      if (str.indexOf("\"") != -1) {
        str.replaceAll("\"", "");
      }
      if (str.indexOf("\'") != -1) {
        str.replaceAll("\'", "");
      }
      data = str.split(delimiter);
      String snp = data[columnSNP];
      String ind = data[columnSample];
      int snpID = hashSNP.get(snp);
      int indID = hashInd.get(ind);
      if (previousIndID != -1 && previousSNPID != -1) {
        if (snpID == previousSNPID && indID != previousIndID) {
          fileOrderPerSNPAllSamples = true;
          System.out.println(
              "Based on the import file, TriTyper Importer assumes that the order of the file is such that for each SNP all samples are underneath each other in the import file. This assumptions increases importing performance.");
        }
        if (snpID != previousSNPID && indID == previousIndID) {
          fileOrderPerSampleAllSNPs = true;
          System.out.println(
              "Based on the import file, TriTyper Importer assumes that the order of the file is such that for each sample all SNPs are underneath each other in the import file. This assumptions increases importing performance.");
        }
        break;
      }
      previousIndID = indID;
      previousSNPID = snpID;
    }

    System.out.print("Initializing binary data files:\t");
    RandomAccessFile file = new RandomAccessFile(outputDirString + "GenotypeMatrix.dat", "rw");
    RandomAccessFile fileRawData = null;
    if (rawDataAvailable) {
      fileRawData = new RandomAccessFile(outputDirString + "RawDataMatrix.dat", "rw");
    }
    System.out.println("OK");

    // Fill files with zeros:
    long size = (long) vecSNP.size() * (long) vecInd.size();
    long sizeGenotypeMatrix = size * 2;
    long sizeRawDataMatrix = size * 3;

    // Set size of files:
    file.setLength(0);
    if (rawDataAvailable) {
      fileRawData.setLength(0);
    }

    System.out.print("Making binary files zero:\t");
    // Quickly fill using buffers:
    file.seek(0);
    if (rawDataAvailable) {
      fileRawData.seek(0);
    }
    byte[] emptyString = new byte[10000];
    for (int s = 0; s < 10000; s++) {
      emptyString[s] = 0;
    }
    for (long a = 0; a < size / 10000; a++) {
      file.write(emptyString);
      file.write(emptyString);
      if (rawDataAvailable) {
        fileRawData.write(emptyString);
        fileRawData.write(emptyString);
        fileRawData.write(emptyString);
      }
    }

    // Fill rest with bytes:
    long rest = size % 10000;
    for (int a = 0; a < rest; a++) {
      byte emptyByte = 0;
      file.write(emptyByte);
      file.write(emptyByte);
      if (rawDataAvailable) {
        fileRawData.write(emptyByte);
        fileRawData.write(emptyByte);
        fileRawData.write(emptyByte);
      }
    }
    System.out.println("OK");

    System.out.println("Processing input file:");

    // Seek to beginning of file:
    file.seek(0);
    if (rawDataAvailable) {
      fileRawData.seek(0);
    }

    // Try to open the input file:

    in = new TextFile(inputFile, TextFile.R);

    // If this is an Illumina Final Report file, first process the irrelevant header:
    str = null;
    if (isIlluminaFinalReportFile) {
      while ((str = in.readLine()) != null) {
        data = str.split(delimiter);
        if (data[0].trim().equals("[Data]")) {
          break;
        }
      }
    }

    // Now parse the column identifiers:
    str = in.readLine();

    // Check whether we actually are dealing with a Final Report file, user might have forgotten to
    // instruct this:
    if (str.toLowerCase().startsWith("[header]")) {
      while ((str = in.readLine()) != null) {
        data = str.split(delimiter);
        if (data[0].trim().equals("[Data]")) {
          break;
        }
      }
      str = in.readLine();
    }

    data = str.split(delimiter);

    // If the file has such an order that for each sample all SNPs are underneath each other, we use
    // a buffering approach:
    byte[][] bufferAllele1 = null;
    byte[][] bufferAllele2 = null;
    byte[][] bufferR = null;
    byte[][] bufferTheta = null;
    int bufferFirstInd = 0;
    int bufferCurrentPos = 0;
    if (fileOrderPerSampleAllSNPs) {
      bufferAllele1 = new byte[nrSNPs][100];
      bufferAllele2 = new byte[nrSNPs][100];
      bufferR = new byte[nrSNPs][100];
      bufferTheta = new byte[nrSNPs][100];
    }
    if (fileOrderPerSNPAllSamples) {
      bufferAllele1 = new byte[1][nrInds];
      bufferAllele2 = new byte[1][nrInds];
      bufferR = new byte[1][nrInds];
      bufferTheta = new byte[1][nrInds];
    }

    // Start processing this file
    long linesProcessed = 0;
    previousIndID = -1;
    previousSNPID = -1;
    boolean warningGivenOnABGenotypeDefinition = false;
    while ((str = in.readLine()) != null) {

      // Remove quotes, if they exist:
      if (str.indexOf("\"") != -1) {
        str.replaceAll("\"", "");
      }
      if (str.indexOf("\'") != -1) {
        str.replaceAll("\'", "");
      }

      // Get individual values:
      data = str.split(delimiter);
      String snp = data[columnSNP];
      String ind = data[columnSample];
      double r = 0;
      double theta = 0;
      if (rawDataAvailable) {
        if (columnR != -1 && columnTheta != -1) {
          if (data.length <= columnR || data.length <= columnTheta) {
            System.out.println(
                "\nError: For record entry "
                    + (linesProcessed + 1)
                    + " R or Theta values cannot be parsed! Record: "
                    + str);
            System.out.println(
                "Can it be there are some entries in the file that do not have R or Theta value information?");
            System.exit(-1);
          }
          String rString = data[columnR];
          String thetaString = data[columnTheta];
          if (!decimalSeparator.equals(".")) {
            thetaString = thetaString.replaceAll(decimalSeparator, ".");
            rString = rString.replaceAll(decimalSeparator, ".");
          }
          // Parse R value:
          try {
            r = Double.parseDouble(rString);
          } catch (Exception e) {
            System.out.println(
                "\nError parsing R value: '"
                    + rString
                    + "'. Are you sure it has been saved in the correct locale?");
            System.out.println(
                "This method assumes R values have a decimal separator that is a dot.");
            System.out.println(
                "E.g. if you export a final report from within BeadStudio, using a Dutch Windows");
            System.out.println(
                "locale, the eventual final report file uses a comma as decimal separator.");
            System.out.println("In that case use option '-decimalseparatoriscomma'");
            System.exit(-1);
          }
          // Parse Theta value:
          try {
            theta = Double.parseDouble(thetaString);
          } catch (Exception e) {
            System.out.println(
                "\nError parsing theta value: '"
                    + thetaString
                    + "'. Are you sure it has been saved in the correct locale?");
            System.out.println(
                "This method assumes theta values have a decimal separator that is a dot.");
            System.out.println(
                "E.g. if you export a final report from within BeadStudio, using a Dutch Windows");
            System.out.println(
                "locale, the eventual final report file uses a comma as decimal separator.");
            System.out.println("In that case use option '-decimalseparatoriscomma'");
            System.exit(-1);
          }
        } else {
          if (data.length <= columnX || data.length <= columnY) {
            System.out.println(
                "\nError: For record entry "
                    + (linesProcessed + 1)
                    + " X or Y intensities cannot be parsed! Record: "
                    + str);
            System.out.println(
                "Can it be there are some entries in the file that do not have X or Y intensity information?");
            System.exit(-1);
          }
          String xString = data[columnX];
          String yString = data[columnY];
          if (!decimalSeparator.equals(".")) {
            xString = xString.replaceAll(decimalSeparator, ".");
            yString = yString.replaceAll(decimalSeparator, ".");
          }
          double x = 0;
          double y = 0;
          try {
            x = Double.parseDouble(xString);
          } catch (Exception e) {
            System.out.println(
                "\nError parsing X value: '"
                    + xString
                    + "'. Are you sure it has been saved in the correct locale?");
            System.out.println(
                "This method assumes X values have a decimal separator that is a dot.");
            System.out.println(
                "E.g. if you export a final report from within BeadStudio, using a Dutch Windows");
            System.out.println(
                "locale, the eventual final report file uses a comma as decimal separator.");
            System.out.println("In that case use option '-decimalseparatoriscomma'");
            System.exit(-1);
          }
          try {
            y = Double.parseDouble(yString);
          } catch (Exception e) {
            System.out.println(
                "\nError parsing Y value: '"
                    + yString
                    + "'. Are you sure it has been saved in the correct locale?");
            System.out.println(
                "This method assumes Y values have a decimal separator that is a dot.");
            System.out.println(
                "E.g. if you export a final report from within BeadStudio, using a Dutch Windows");
            System.out.println(
                "locale, the eventual final report file uses a comma as decimal separator.");
            System.out.println("In that case use option '-decimalseparatoriscomma'");
            System.exit(-1);
          }
          // r = Math.sqrt(x * x + y * y);
          r = x + y;
          theta = 1;
          if (x > 0) {
            theta = twoDividedByPI * Math.atan2(y, x);
          }
        }
      }
      byte rByte = (byte) (Byte.MIN_VALUE + (Math.min(255d, r * 50d)));
      byte thetaByte = (byte) (Byte.MIN_VALUE + (theta * 200d));

      // Inspect genotype calls, these either should be A, C, G or T, - will become 0:
      byte allele1 = data[columnAllele1].getBytes()[0];
      byte allele2 = data[columnAllele2].getBytes()[0];
      if (allele1 == 45) {
        allele1 = 0;
      }
      if (allele2 == 45) {
        allele2 = 0;
      }
      if (allele1 == 66) {
        allele1 = 67;
        if (!warningGivenOnABGenotypeDefinition) {
          warningGivenOnABGenotypeDefinition = true;
          System.out.println(
              "\n\n\nWarning! The input genotype report file contains alleles that have been coded as B! These will be changed to C, please take this into account!!!\n\n\n");
        }
      }
      if (allele2 == 66) {
        allele2 = 67;
        if (!warningGivenOnABGenotypeDefinition) {
          warningGivenOnABGenotypeDefinition = true;
          System.out.println(
              "\n\n\nWarning! The input genotype report file contains alleles that have been coded as B! These will be changed to C, please take this into account!!!\n\n\n");
        }
      }

      // Write data:
      int snpID = ((Integer) hashSNP.get(snp)).intValue();
      int indID = ((Integer) hashInd.get(ind)).intValue();

      if (fileOrderPerSampleAllSNPs || fileOrderPerSNPAllSamples) {
        if (fileOrderPerSampleAllSNPs) {
          if (indID != previousIndID && previousIndID != -1) {
            bufferCurrentPos++;
          }
          if (bufferCurrentPos == 100) {
            // Flush buffer, hundred samples have just been processed
            System.out.println("100 samples have been processed, flushing buffers:");
            for (int s = 0; s < nrSNPs; s++) {
              file.seek((long) s * (long) nrInds * 2 + (long) bufferFirstInd);
              file.write(bufferAllele1[s]);
              file.seek((long) s * (long) nrInds * 2 + (long) nrInds + (long) bufferFirstInd);
              file.write(bufferAllele2[s]);
              if (rawDataAvailable) {
                fileRawData.seek(
                    (long) s * (long) vecInd.size() * 3
                        + (long) vecInd.size()
                        + (long) bufferFirstInd);
                fileRawData.write(bufferR[s]);
                fileRawData.seek(
                    (long) s * (long) vecInd.size() * 3
                        + (long) 2 * vecInd.size()
                        + (long) bufferFirstInd);
                fileRawData.write(bufferTheta[s]);
              }
            }
            bufferAllele1 = new byte[nrSNPs][100];
            bufferAllele2 = new byte[nrSNPs][100];
            bufferR = new byte[nrSNPs][100];
            bufferTheta = new byte[nrSNPs][100];
            bufferCurrentPos = 0;
            bufferFirstInd = indID;
          }
          bufferAllele1[snpID][bufferCurrentPos] = allele1;
          bufferAllele2[snpID][bufferCurrentPos] = allele2;
          bufferR[snpID][bufferCurrentPos] = rByte;
          bufferTheta[snpID][bufferCurrentPos] = thetaByte;
        } else {
          if (snpID != previousSNPID && previousSNPID != -1) {
            int s = previousSNPID;
            file.seek((long) s * (long) nrInds * 2);
            file.write(bufferAllele1[0]);
            file.seek((long) s * (long) nrInds * 2 + (long) nrInds);
            file.write(bufferAllele2[0]);
            if (rawDataAvailable) {
              fileRawData.seek((long) s * (long) vecInd.size() * 3 + (long) vecInd.size());
              fileRawData.write(bufferR[0]);
              fileRawData.seek((long) s * (long) vecInd.size() * 3 + (long) 2 * vecInd.size());
              fileRawData.write(bufferTheta[0]);
            }
            bufferAllele1 = new byte[1][nrInds];
            bufferAllele2 = new byte[1][nrInds];
            bufferR = new byte[1][nrInds];
            bufferTheta = new byte[1][nrInds];
          }
          bufferAllele1[0][indID] = allele1;
          bufferAllele2[0][indID] = allele2;
          bufferR[0][indID] = rByte;
          bufferTheta[0][indID] = thetaByte;
        }
      } else {
        file.seek((long) snpID * (long) nrInds * 2 + (long) indID);
        file.write(allele1);
        file.seek((long) snpID * (long) nrInds * 2 + (long) nrInds + (long) indID);
        file.write(allele2);
        if (rawDataAvailable) {
          fileRawData.seek(
              (long) snpID * (long) vecInd.size() * 3 + (long) vecInd.size() + (long) indID);
          fileRawData.write(rByte);
          fileRawData.seek(
              (long) snpID * (long) vecInd.size() * 3 + (long) 2 * vecInd.size() + (long) indID);
          fileRawData.write(thetaByte);
        }
      }

      linesProcessed++;
      if (linesProcessed % 500000 == 0) {
        System.out.println(linesProcessed + "\tLines processed");
      }

      previousIndID = indID;
      previousSNPID = snpID;
    }

    if (fileOrderPerSampleAllSNPs || fileOrderPerSNPAllSamples) {
      if (fileOrderPerSampleAllSNPs) {
        // Flush remaining buffer:
        System.out.println("Flushing remaining buffer (" + (bufferCurrentPos + 1) + " samples):");
        for (int s = 0; s < nrSNPs; s++) {
          byte[] bufferAllele1Subset = new byte[bufferCurrentPos + 1];
          byte[] bufferAllele2Subset = new byte[bufferCurrentPos + 1];
          byte[] bufferRSubset = new byte[bufferCurrentPos + 1];
          byte[] bufferThetaSubset = new byte[bufferCurrentPos + 1];
          for (int i = 0; i <= bufferCurrentPos; i++) {
            bufferAllele1Subset[i] = bufferAllele1[s][i];
            bufferAllele2Subset[i] = bufferAllele2[s][i];
            bufferRSubset[i] = bufferR[s][i];
            bufferThetaSubset[i] = bufferTheta[s][i];
          }
          file.seek((long) s * (long) nrInds * 2 + (long) bufferFirstInd);
          file.write(bufferAllele1Subset);
          file.seek((long) s * (long) nrInds * 2 + (long) nrInds + (long) bufferFirstInd);
          file.write(bufferAllele2Subset);
          if (rawDataAvailable) {
            fileRawData.seek(
                (long) s * (long) vecInd.size() * 3 + (long) vecInd.size() + (long) bufferFirstInd);
            fileRawData.write(bufferRSubset);
            fileRawData.seek(
                (long) s * (long) vecInd.size() * 3
                    + (long) 2 * vecInd.size()
                    + (long) bufferFirstInd);
            fileRawData.write(bufferThetaSubset);
          }
        }
      } else {
        // Flush remaining buffer:
        int s = previousSNPID;
        file.seek((long) s * (long) nrInds * 2);
        file.write(bufferAllele1[0]);
        file.seek((long) s * (long) nrInds * 2 + (long) nrInds);
        file.write(bufferAllele2[0]);
        if (rawDataAvailable) {
          fileRawData.seek((long) s * (long) vecInd.size() * 3 + (long) vecInd.size());
          fileRawData.write(bufferR[0]);
          fileRawData.seek((long) s * (long) vecInd.size() * 3 + (long) 2 * vecInd.size());
          fileRawData.write(bufferTheta[0]);
        }
      }
    }

    System.out.println(linesProcessed + "\tLines processed");

    // Close files:
    in.close();
    file.close();
    if (rawDataAvailable) {
      fileRawData.close();
    }

    // Output final remarks:
    System.out.println("Import of data has completed successfully!");
    System.out.println("");
    System.out.println(
        "Please ensure you include a valid PhenotypeInformation.txt and SNPMappings.txt in the output directory.");
    System.out.println(
        "These two additional files are required in order for TriTyper to function correctly.");
  }
예제 #5
0
  public final void compareOverlapAndZScoreDirectionTwoEQTLFiles(
      String eQTL,
      String meQTL,
      String eQTMFile,
      String outputFile,
      boolean matchOnGeneName,
      double fdrCutt,
      boolean matchSnpOnPos,
      boolean splitGeneNames,
      boolean flipUsingEQTM,
      boolean topeffect)
      throws IOException, Exception {
    System.out.println("Performing comparison of eQTLs and meQTLs");
    double filterOnFDR =
        fdrCutt; // Do we want to use another FDR measure? When set to -1 this is not used at all.

    HashSet<String> hashExcludeEQTLs =
        new HashSet<
            String>(); // We can exclude some eQTLs from the analysis. If requested, put the entire
                       // eQTL string in this HashMap for each eQTL. Does not work in combination
                       // with mathcing based on chr and pos
    HashSet<String> hashConfineAnalysisToSubsetOfProbes =
        new HashSet<
            String>(); // We can confine the analysis to only a subset of probes. If requested put
                       // the probe name in this HapMap
    HashSet<String> hashTestedSNPsThatPassedQC =
        null; // We can confine the analysis to only those eQTLs for which the SNP has been
              // successfully passed QC, otherwise sometimes unfair comparisons are made. If
              // requested, put the SNP name in this HashMap

    // Load the eQTM File
    QTLTextFile eQTLsTextFile = new QTLTextFile(eQTMFile, QTLTextFile.R);

    HashMap<String, ArrayList<EQTL>> eQtmInfo = new HashMap<String, ArrayList<EQTL>>();

    for (Iterator<EQTL> eQtlIt = eQTLsTextFile.getEQtlIterator(); eQtlIt.hasNext(); ) {
      EQTL eQtm = eQtlIt.next();
      String eQtmKey = eQtm.getRsName();

      if (!eQtm.getAlleleAssessed().equals("C")) {
        eQtm.setAlleleAssessed("C");
        eQtm.setZscore(eQtm.getZscore() * -1);

        Double[] zscores = eQtm.getDatasetZScores();
        Double[] correlation = eQtm.getCorrelations();
        for (int i = 0; i < eQtm.getDatasets().length; ++i) {
          zscores[i] *= -1;
          correlation[i] *= -1;
        }
        eQtm.setDatasetZScores(zscores);
        eQtm.setCorrelations(correlation);
      }

      ArrayList<EQTL> posEqtls = eQtmInfo.get(eQtmKey);

      if (posEqtls == null) {
        posEqtls = new ArrayList<EQTL>(1);
        posEqtls.add(eQtm);
        eQtmInfo.put(eQtmKey, posEqtls);
      } else if (!topeffect) {
        eQtmInfo.get(eQtmKey).add(eQtm);
      }
    }

    System.out.println("eQTMs read in: " + eQtmInfo.size());

    // Now load the eQTLs for file 1:
    THashMap<String, String[]> hashEQTLs = new THashMap<String, String[]>();
    THashSet<String> hashUniqueProbes = new THashSet<String>();
    THashSet<String> hashUniqueGenes = new THashSet<String>();

    TextFile in = new TextFile(eQTL, TextFile.R);
    in.readLine();
    String[] data = in.readLineElemsReturnReference(SPLIT_ON_TAB);

    if (data.length < 5) {
      throw new IllegalStateException(
          "QTL File does not have enough columns. Detected columns: "
              + data.length
              + " in file "
              + in.getFileName());
    }

    while (data != null) {
      if (filterOnFDR == -1 || Double.parseDouble(data[18]) <= filterOnFDR) {
        if (hashConfineAnalysisToSubsetOfProbes.isEmpty()
            || hashConfineAnalysisToSubsetOfProbes.contains(data[4])) {
          if (matchOnGeneName) {
            if (data[16].length() > 1) {

              if (splitGeneNames) {
                for (String gene : SEMI_COLON_PATTERN.split(data[16])) {

                  hashEQTLs.put(
                      (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + gene, data);
                  hashUniqueProbes.add(data[4]);
                  hashUniqueGenes.add(gene);
                }
              } else {

                if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[16])) {
                  hashEQTLs.put(
                      (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[16], data);
                  hashUniqueProbes.add(data[4]);
                  hashUniqueGenes.add(data[16]);
                  // log.write("Added eQTL from original file " + (matchSnpOnPos ? data[2] + ":" +
                  // data[3] : data[1]) + "\t" + data[16]);
                }
              }
            }
          } else {
            if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[4])) {
              hashEQTLs.put(
                  (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[4], data);
              hashUniqueProbes.add(data[4]);
              hashUniqueGenes.add(data[16]);
              //	log.write("Added eQTL from original file " + (matchSnpOnPos ? data[2] + ":" +
              // data[3] : data[1]) + "\t" + data[4]);
            }
          }
        }
        data = in.readLineElemsReturnReference(SPLIT_ON_TAB);
      }
    }
    in.close();

    int nrUniqueProbes = hashUniqueProbes.size();
    int nrUniqueGenes = hashUniqueGenes.size();
    hashUniqueProbes = null;
    hashUniqueGenes = null;

    // Initialize Graphics2D for the Z-Score allelic direction comparison:
    //        int width = 1000;
    //        int height = 1000;
    //        int margin = 100;
    //        int x0 = margin;
    //        int x1 = width - margin;
    //        int y0 = margin;
    //        int y1 = height - margin;

    ZScorePlot zs = new ZScorePlot();
    String zsOutFileName = outputFile + "-ZScoreComparison.pdf";
    zs.init(2, new String[] {"eQTLs", "meQTLs"}, true, zsOutFileName);

    // Variables holding variousStatistics:
    int nreQTLsIdenticalDirection = 0;
    int nreQTLsOppositeDirection = 0;
    HashMap<String, Integer> hashEQTLNrTimesAssessed = new HashMap<String, Integer>();

    THashSet<String> hashEQTLs2 = new THashSet<String>();
    THashSet<String> hashUniqueProbes2 = new THashSet<String>();
    THashSet<String> hashUniqueGenes2 = new THashSet<String>();
    THashSet<String> hashUniqueProbesOverlap = new THashSet<String>();
    THashSet<String> hashUniqueGenesOverlap = new THashSet<String>();

    int counterFile2 = 0;
    int overlap = 0;
    ArrayDoubleList vecX = new ArrayDoubleList();
    ArrayDoubleList vecY = new ArrayDoubleList();

    // Vector holding all opposite allelic effects:
    //        LinkedHashSet<String> vecOppositeEQTLs = new LinkedHashSet<String>();

    // Now process file 2:
    in = new TextFile(meQTL, TextFile.R);
    in.readLine();

    int skippedDueToMapping = 0;
    data = null;
    TextFile identicalOut =
        new TextFile(outputFile + "-eQTLsWithIdenticalDirecton.txt.gz", TextFile.W);
    TextFile disconcordantOut = new TextFile(outputFile + "-OppositeEQTLs.txt", TextFile.W);
    TextFile log = new TextFile(outputFile + "-eQTL-meQTL-ComparisonLog.txt", TextFile.W);
    TextFile log2 = new TextFile(outputFile + "-eQTM-missingnessLog.txt", TextFile.W);

    THashSet<String> identifiersUsed = new THashSet<String>();

    while ((data = in.readLineElemsReturnReference(SPLIT_ON_TAB)) != null) {

      if (filterOnFDR == -1 || Double.parseDouble(data[18]) <= filterOnFDR) {
        if (!eQtmInfo.containsKey(data[4])) {
          skippedDueToMapping++;
          log2.write(
              "meQTL probe not present In eQTM file:\t"
                  + data[4]
                  + ", effect statistics: \t"
                  + data[0]
                  + "\t"
                  + data[2]
                  + "\t"
                  + data[3]
                  + "\t"
                  + data[16]
                  + "\n");
          continue;
        }

        String orgDataFour = data[4];

        for (int i = 0; i < eQtmInfo.get(orgDataFour).size(); ++i) {
          if (topeffect && i > 0) {
            break;
          }
          data[16] = eQtmInfo.get(orgDataFour).get(i).getProbeHUGO();
          data[4] = eQtmInfo.get(orgDataFour).get(i).getProbe();

          if (flipUsingEQTM) {
            Double zScoreQTM = eQtmInfo.get(orgDataFour).get(i).getZscore();
            if (zScoreQTM < 0) {
              data[10] = String.valueOf(Double.parseDouble(data[10]) * -1);
            }
          }

          if (hashConfineAnalysisToSubsetOfProbes.isEmpty()
              || hashConfineAnalysisToSubsetOfProbes.contains(data[4])) {
            if (matchOnGeneName) {
              if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[16])) {
                if (data[16].length() > 1) {

                  if (splitGeneNames) {
                    for (String gene : SEMI_COLON_PATTERN.split(data[16])) {

                      hashUniqueProbes2.add(data[4]);
                      hashUniqueGenes2.add(gene);
                      if (!hashEQTLs2.contains(
                          (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + gene)) {
                        hashEQTLs2.add(
                            (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + gene);
                        counterFile2++;
                      }
                    }
                  } else {

                    hashUniqueProbes2.add(data[4]);
                    hashUniqueGenes2.add(data[16]);
                    if (!hashEQTLs2.contains(
                        (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[16])) {
                      hashEQTLs2.add(
                          (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[16]);
                      counterFile2++;
                    }
                  }
                }
              }
            } else {
              if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[4])) {
                // hashEQTLs2.put(data[1] + "\t" + data[4], str);
                hashUniqueProbes2.add(data[4]);
                hashUniqueGenes2.add(data[16]);
                counterFile2++;
              }
            }
          }
          String[] QTL = null;
          String identifier = null;
          if (matchOnGeneName) {

            if (data.length > 16 && data[16].length() > 1) {
              if (splitGeneNames) {
                // NB Plotting and processing of all QTLs here is not okay!
                for (String gene : SEMI_COLON_PATTERN.split(data[16])) {
                  if (!hashExcludeEQTLs.contains(data[1] + "\t" + gene)) {
                    identifier = (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + gene;
                    if (hashEQTLs.containsKey(identifier)) {
                      QTL = hashEQTLs.get(identifier);
                    }
                  }
                }
              } else {
                if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[16])) {
                  identifier =
                      (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[16];
                  if (hashEQTLs.containsKey(identifier)) {
                    QTL = hashEQTLs.get(identifier);
                  }
                }
              }
            }
          } else {
            if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[4])) {
              identifier = (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[4];
              if (hashEQTLs.containsKey(identifier)) {
                QTL = hashEQTLs.get(identifier);
              }
            }
          }

          if (QTL == null) {

            // The eQTL, present in file 2 is not present in file 1:
            // if (Double.parseDouble(data[0]); < 1E-4) {
            if (hashTestedSNPsThatPassedQC == null
                || hashTestedSNPsThatPassedQC.contains(data[1])) {
              log.write(
                  "eQTL Present In New file But Not In Original File:\t"
                      + identifier
                      + "\t"
                      + data[0]
                      + "\t"
                      + data[2]
                      + "\t"
                      + data[3]
                      + "\t"
                      + data[16]
                      + "\n");
            }
            // }
            double zScore2 = Double.parseDouble(data[10]);
            //                        int posX = 500 + (int) 0;
            //                        int posY = 500 - (int) Math.round(zScore2 * 10);
            zs.draw(null, zScore2, 0, 1);

          } else {
            identifiersUsed.add(identifier);
            String[] eQtlData = QTL;
            boolean identicalProbe = true;
            String probe = data[4];
            String probeFound = eQtlData[4];
            if (!probe.equals(probeFound)) {
              identicalProbe = false;
            }

            hashUniqueProbesOverlap.add(data[4]);
            hashUniqueGenesOverlap.add(data[16]);
            if (!hashEQTLNrTimesAssessed.containsKey(identifier)) {
              hashEQTLNrTimesAssessed.put(identifier, 1);
            } else {
              hashEQTLNrTimesAssessed.put(identifier, 1 + hashEQTLNrTimesAssessed.get(identifier));
            }
            String alleles = eQtlData[8];
            String alleleAssessed = eQtlData[9];

            String correlations[] = (eQtlData[17]).split(";");
            double correlation = 0;
            int numCorr1 = 0;
            for (int c = 0; c < correlations.length; c++) {
              try {
                if (!correlations[c].equals("-")) {
                  correlation += Double.parseDouble(correlations[c]);
                  numCorr1++;
                }
              } catch (Exception e) {
              }
            }

            correlation /= (double) numCorr1;
            //                       if(numCorr1 == 0){
            //                           System.out.println("Warning: no correlations defined for
            // eqtl file 1");
            //                       }
            double zScore = Double.parseDouble(eQtlData[10]);
            //                        double pValue = Double.parseDouble(eQtlData[0]);
            String alleles2 = data[8];
            String alleleAssessed2 = data[9];
            double zScore2 = Double.parseDouble(data[10]);

            //                        double pValue2 = Double.parseDouble(data[0]);
            String correlations2[] = data[17].split(";");
            double correlation2 = 0;

            boolean alleleflipped = false;
            if (!alleleAssessed.equals(data[9])) {
              if (data[9].equals(eQtlData[8].split("/")[0])) {
                alleleflipped = true;
              } else {
                //                               System.out.println("WTF BBQ!");
              }
            }

            int numCorr2 = 0;
            for (int c = 0; c < correlations2.length; c++) {
              try {
                if (!correlations2[c].equals("-")) {

                  correlation2 += (Double.parseDouble(correlations2[c]));

                  numCorr2++;
                }
              } catch (NumberFormatException e) {
              }
            }
            //                       if(numCorr2 == 0){
            //                           System.out.println("Warning: no correlations defined for
            // eqtl file 2");
            //                       }
            correlation2 /= (double) numCorr2;
            if (alleleflipped) {
              correlation2 = -correlation2;
            }
            boolean sameDirection = false;
            int nrIdenticalAlleles = 0;
            if (alleles.length() > 2 && alleles2.length() > 2) {
              for (int a = 0; a < 3; a++) {
                for (int b = 0; b < 3; b++) {
                  if (a != 1 && b != 1) {
                    if (alleles.getBytes()[a] == alleles2.getBytes()[b]) {
                      nrIdenticalAlleles++;
                    }
                  }
                }
              }
            }

            if (nrIdenticalAlleles == 0) {
              alleles2 =
                  (char) BaseAnnot.getComplement((byte) alleles2.charAt(0))
                      + "/"
                      + (char) BaseAnnot.getComplement((byte) alleles2.charAt(2));
              alleleAssessed2 = BaseAnnot.getComplement(alleleAssessed2);
              if (alleles.length() > 2 && alleles2.length() > 2) {
                for (int a = 0; a < 3; a++) {
                  for (int b = 0; b < 3; b++) {
                    if (a != 1 && b != 1) {
                      if (alleles.getBytes()[a] == alleles2.getBytes()[b]) {
                        nrIdenticalAlleles++;
                      }
                    }
                  }
                }
              }
            }

            if (nrIdenticalAlleles != 2) {
              log.write(
                  "Error! SNPs have incompatible alleles!!:\t"
                      + alleles
                      + "\t"
                      + alleles2
                      + "\t"
                      + identifier
                      + "\n");
            } else {
              overlap++;
              if (!alleleAssessed.equals(alleleAssessed2)) {
                zScore2 = -zScore2;
                //                           correlation2 = -correlation2;
                alleleAssessed2 = alleleAssessed;
              }

              // Recode alleles:
              // if contains T, but no A, take complement
              //                        if (alleles.contains("T") && !alleles.contains("A")) {
              //                            alleles = BaseAnnot.getComplement(alleles);
              //                            alleleAssessed =
              // BaseAnnot.getComplement(alleleAssessed);
              //                            alleleAssessed2 =
              // BaseAnnot.getComplement(alleleAssessed2);
              //                        }
              if (zScore2 * zScore > 0) {
                sameDirection = true;
              }

              //                       if(correlation != correlation2 && (numCorr1 > 0 && numCorr2 >
              // 0)){
              //                           if(Math.abs(correlation - correlation2) > 0.00001){
              //                               System.out.println("Correlations are different:
              // "+lineno+"\t"+correlation +"\t"+correlation2+"\t"+str);
              //                           }
              //
              //                       }
              zs.draw(zScore, zScore2, 0, 1);
              if (!sameDirection) {
                nreQTLsOppositeDirection++;

                if (matchOnGeneName) {
                  disconcordantOut.append(
                      data[1]
                          + '\t'
                          + data[16]
                          + '\t'
                          + alleles
                          + '\t'
                          + alleleAssessed
                          + '\t'
                          + zScore
                          + '\t'
                          + alleles2
                          + '\t'
                          + alleleAssessed2
                          + '\t'
                          + zScore2);

                } else {
                  disconcordantOut.append(
                      data[1]
                          + '\t'
                          + data[4]
                          + '\t'
                          + alleles
                          + '\t'
                          + alleleAssessed
                          + '\t'
                          + zScore
                          + '\t'
                          + alleles2
                          + '\t'
                          + alleleAssessed2
                          + '\t'
                          + zScore2);
                }

                //                            int posX = 500 + (int) Math.round(zScore * 10);
                //                            int posY = 500 - (int) Math.round(zScore2 * 10);
                vecX.add(zScore);
                vecY.add(zScore2);

              } else {
                // write to output
                identicalOut.writeln(
                    identifier
                        + '\t'
                        + alleles
                        + '\t'
                        + alleleAssessed
                        + '\t'
                        + zScore
                        + '\t'
                        + alleles2
                        + '\t'
                        + alleleAssessed2
                        + '\t'
                        + zScore2);
                nreQTLsIdenticalDirection++;
                if (alleles.length() > 2
                    && !alleles.equals("A/T")
                    && !alleles.equals("T/A")
                    && !alleles.equals("C/G")
                    && !alleles.equals("G/C")) {
                  //                                int posX = 500 + (int) Math.round(zScore * 10);
                  //                                int posY = 500 - (int) Math.round(zScore2 * 10);
                  vecX.add(zScore);
                  vecY.add(zScore2);
                }
              }
            }
          }
        }
      }
    }
    identicalOut.close();
    disconcordantOut.close();
    in.close();
    log2.close();

    log.write(
        "\n/// Writing missing QTLs observed in original file but not in the new file ////\n\n");
    for (Entry<String, String[]> QTL : hashEQTLs.entrySet()) {
      if (!identifiersUsed.contains(QTL.getKey())) {
        // The eQTL, present in file 1 is not present in file 2:

        // if (Double.parseDouble(QTL.getValue()[0]) < 1E-4) {
        if (hashTestedSNPsThatPassedQC == null || hashTestedSNPsThatPassedQC.contains(data[1])) {
          log.write(
              "eQTL Present In Original file But Not In New File:\t"
                  + QTL.getKey()
                  + "\t"
                  + QTL.getValue()[0]
                  + "\t"
                  + QTL.getValue()[2]
                  + "\t"
                  + QTL.getValue()[3]
                  + "\t"
                  + QTL.getValue()[16]
                  + "\n");
        }
        // }
        double zScore = Double.parseDouble(QTL.getValue()[10]);
        //                int posX = 500 + (int) 0;
        //                int posY = 500 - (int) Math.round(zScore * 10);
        zs.draw(zScore, null, 0, 1);
      }
    }

    log.close();
    zs.write(zsOutFileName);

    double[] valsX = vecX.toArray();
    double[] valsY = vecY.toArray();

    if (valsX.length > 2) {
      double correlation = JSci.maths.ArrayMath.correlation(valsX, valsY);
      double r2 = correlation * correlation;

      cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine =
          new cern.jet.random.tdouble.engine.DRand();
      cern.jet.random.tdouble.StudentT tDistColt =
          new cern.jet.random.tdouble.StudentT(valsX.length - 2, randomEngine);
      double pValuePearson = 1;
      double tValue = correlation / (Math.sqrt((1 - r2) / (double) (valsX.length - 2)));
      if (tValue < 0) {
        pValuePearson = tDistColt.cdf(tValue);
      } else {
        pValuePearson = tDistColt.cdf(-tValue);
      }
      pValuePearson *= 2;
      System.out.println(
          "\nCorrelation between the Z-Scores of the overlapping set of eQTLs:\t"
              + correlation
              + "\tP-Value:\t"
              + pValuePearson);
    }

    TextFile outSummary = new TextFile(outputFile + "-Summary.txt", TextFile.W);

    System.out.println("");
    System.out.println(
        "Nr of eQTLs:\t"
            + hashEQTLs.size()
            + "\tin file:\t"
            + eQTL
            + "\tNrUniqueProbes:\t"
            + nrUniqueProbes
            + "\tNrUniqueGenes:\t"
            + nrUniqueGenes);
    outSummary.writeln(
        "Nr of eQTLs:\t"
            + hashEQTLs.size()
            + "\tin file:\t"
            + eQTL
            + "\tNrUniqueProbes:\t"
            + nrUniqueProbes
            + "\tNrUniqueGenes:\t"
            + nrUniqueGenes);

    System.out.println(
        "Nr of meQTLs:\t"
            + counterFile2
            + "\tin file:\t"
            + meQTL
            + "\tNrUniqueProbes:\t"
            + hashUniqueProbes2.size()
            + "\tNrUniqueGenes:\t"
            + hashUniqueGenes2.size()
            + " *With eQTM mapping.");
    outSummary.writeln(
        "Nr of meQTLs:\t"
            + counterFile2
            + "\tin file:\t"
            + meQTL
            + "\tNrUniqueProbes:\t"
            + hashUniqueProbes2.size()
            + "\tNrUniqueGenes:\t"
            + hashUniqueGenes2.size()
            + " *With eQTM mapping.");

    System.out.println("Skipped over meQTLs:\t" + skippedDueToMapping);
    outSummary.writeln("Skipped over meQTLs:\t" + skippedDueToMapping);

    System.out.println(
        "Overlap:\t"
            + overlap
            + "\tNrUniqueProbesOverlap:\t"
            + hashUniqueProbesOverlap.size()
            + "\tNrUniqueGenesOverlap:\t"
            + hashUniqueGenesOverlap.size());
    outSummary.writeln(
        "Overlap:\t"
            + overlap
            + "\tNrUniqueProbesOverlap:\t"
            + hashUniqueProbesOverlap.size()
            + "\tNrUniqueGenesOverlap:\t"
            + hashUniqueGenesOverlap.size());

    System.out.println("");
    outSummary.writeln();

    System.out.println("Nr eQTLs with identical direction:\t" + nreQTLsIdenticalDirection);
    outSummary.writeln("Nr eQTLs with identical direction:\t" + nreQTLsIdenticalDirection);

    double proportionOppositeDirection =
        100d
            * (double) nreQTLsOppositeDirection
            / (double) (nreQTLsOppositeDirection + nreQTLsIdenticalDirection);
    String proportionOppositeDirectionString =
        (new java.text.DecimalFormat(
                "0.00;-0.00", new java.text.DecimalFormatSymbols(java.util.Locale.US)))
            .format(proportionOppositeDirection);

    System.out.println(
        "Nr eQTLs with opposite direction:\t"
            + nreQTLsOppositeDirection
            + "\t("
            + proportionOppositeDirectionString
            + "%)");
    outSummary.writeln(
        "Nr eQTLs with opposite direction:\t"
            + nreQTLsOppositeDirection
            + "\t("
            + proportionOppositeDirectionString
            + "%)");

    outSummary.close();

    nrShared = hashUniqueProbesOverlap.size();
    nrOpposite = nreQTLsOppositeDirection;
  }