private static HashMap<String, HashMap<String, Triple<String, Integer, Integer>>>
      readFileWithPositions(String fileWithPositions) throws IOException {
    HashMap<String, HashMap<String, Triple<String, Integer, Integer>>> chromosomalProbeInformation =
        new HashMap<>();
    TextFile gffFileReader = new TextFile(fileWithPositions, TextFile.R);

    int counter = 0;
    String str;
    while ((str = gffFileReader.readLine()) != null) {
      String[] parts = str.split("\t");
      //            System.out.println(str);

      if (!chromosomalProbeInformation.containsKey(parts[0])) {
        chromosomalProbeInformation.put(
            parts[0], new HashMap<String, Triple<String, Integer, Integer>>());
      }

      chromosomalProbeInformation
          .get(parts[0])
          .put(
              parts[8],
              new Triple<String, Integer, Integer>(
                  parts[6], Integer.parseInt(parts[3]), Integer.parseInt(parts[4])));
      counter++;
    }
    System.out.println("Number of entries read in: " + counter);
    gffFileReader.close();
    return chromosomalProbeInformation;
  }
  private static void remapPositionsAndSnps(
      LinkedHashSet<String> OrderingEst,
      HashMap<String, String> mappingEst,
      HashMap<String, String> mappingUmcg,
      String outFile,
      String outFile2) {
    try {
      TextFile out = new TextFile(outFile, TextFile.W);
      TextFile out2 = new TextFile(outFile2, TextFile.W);

      for (String entry : OrderingEst) {
        //                System.out.println(entry);
        String snpPos = mappingEst.get(entry);
        String[] t = SPLIT_ON_COLON.split(snpPos);
        if (mappingUmcg.containsKey(snpPos)) {
          out.writeln(mappingUmcg.get(snpPos));
          out2.writeln(t[0] + "\t" + t[1] + "\t" + mappingUmcg.get(snpPos));
        } else {
          System.out.println("Problem: " + entry);
          out.writeln(entry);
          out2.writeln(t[0] + "\t" + t[1] + "\t" + entry);
        }
      }
      out.close();
      out2.close();
    } catch (IOException ex) {
      Logger.getLogger(ConverteMappingAndSnpFile.class.getName()).log(Level.SEVERE, null, ex);
    }
  }
  private static ArrayList<InterChrContact> writeRawInterContactInformation(
      ArrayList<InterChrContact> contacts, String fileToWrite) throws IOException {
    ArrayList<InterChrContact> chrContactInfo = new ArrayList<InterChrContact>();

    TextFile outWriter = new TextFile(fileToWrite, TextFile.W);

    String row;

    for (InterChrContact contact : contacts) {
      outWriter.writeln(
          contact.getChrLocationSmaller()
              + "\t"
              + contact.getChrLocationLarger()
              + "\t"
              + contact.getContactValue());
    }
    outWriter.close();
    return chrContactInfo;
  }
  public GenomicBoundaries<Object> readRepeatData(String fileLocation) throws IOException {
    String fileLine;
    String[] fileLineData;

    TextFile repeatTextFile = new TextFile(fileLocation, false);
    while ((fileLine = repeatTextFile.readLine()) != null) {
      fileLineData = TAB_PATTERN.split(fileLine);

      // Add a filter to exclude chromosome X and other non chromosome specific entries.
      Matcher matcher = CHR_PATTERN.matcher(fileLineData[0]);
      if (matcher.matches()) {
        String chr = fileLineData[0];
        int startPos = Integer.parseInt(fileLineData[1]);
        int stopPos = Integer.parseInt(fileLineData[2]);
      }
    }
    repeatTextFile.close();
    // return repeatBoundaries;
    return null;
  }
  public static void convertPlainTextDoubleMatrixToPlainTextEdgeList(
      String matrixfile, String outfile, String infileDelimiter, String outfileDelimiter)
      throws IOException {

    TextFile in = new TextFile(matrixfile, TextFile.R);
    String line = in.readLine();
    String[] split = line.split(infileDelimiter);
    String[] nodes = Arrays.copyOfRange(split, 1, split.length);
    LOGGER.log(Level.FINE, "{0} nodes in {1}", new Object[] {nodes.length, matrixfile});

    TextFile out = new TextFile(outfile, TextFile.W);
    int lineNr = 1;
    int nrEdges = 0;
    while ((line = in.readLine()) != null) {
      split = line.split(infileDelimiter);
      if (split.length != nodes.length + 1) {
        throw new IllegalArgumentException(
            "The data in file '"
                + matrixfile
                + "' are not a matrix. Check line "
                + (lineNr + 1)
                + ".");
      }
      String node = split[0];
      for (int i = 1; i < split.length; i++) {
        if (lineNr == i) {
          continue; // skip diagonal
        }
        try {
          double weight = Double.parseDouble(split[i]);
          if (weight > 0) {
            out.writeln(node + outfileDelimiter + nodes[i - 1] + outfileDelimiter + weight);
            nrEdges++;
          }
        } catch (NumberFormatException ex) {
          throw new IllegalArgumentException(
              "The data in file '"
                  + matrixfile
                  + "' are not numerical (cast to double failed). Check line "
                  + (lineNr + 1)
                  + ", column "
                  + (i + 1)
                  + ".");
        }
      }
      lineNr++;
    }
    in.close();
    out.close();
    LOGGER.log(
        Level.FINE,
        "{0} edges from {1} written to ''{2}''",
        new Object[] {nrEdges, matrixfile, outfile});
  }
  private static HashMap<String, String> readSnpMappingsBed(
      String file1, HashMap<String, String> inputMap, int initialSize) {
    HashMap<String, String> snpMapping =
        new HashMap<String, String>((int) Math.ceil(initialSize / 0.75));

    HashSet<String> interestCombinations = new HashSet<>();

    for (Entry<String, String> snp : inputMap.entrySet()) {
      interestCombinations.add(snp.getValue());
    }

    File folderIn = new File(file1);
    for (File f : folderIn.listFiles()) {
      String file = f.getAbsolutePath();
      if (file.endsWith(".bed") || file.endsWith(".bed.gz")) {

        String chr = f.getName().split("_")[2].replace(".gz", "").replace(".bed", "");
        //                System.out.println(chr);
        try {
          TextFile in = new TextFile(file, TextFile.R);
          String str = in.readLine();
          while ((str = in.readLine()) != null) {
            String[] parts = SPLIT_ON_TAB.split(str);
            //                        parts[0] = parts[0].replace("chr", "");
            if (interestCombinations.contains(chr + ":" + parts[2])) {
              snpMapping.put(chr + ":" + parts[2], parts[3]);
            }
          }
          in.close();
        } catch (IOException e) {
          System.out.println(e.getMessage());
          System.exit(-1);
        }
      }
    }
    return (snpMapping);
  }
  private static void printOutContacts(ArrayList<DesiredChrContact> contacts, TextFile outWriter)
      throws IOException {
    //        System.out.println("Write contacts to file.");
    HashMap<String, Boolean> textToStore = new HashMap<>();

    for (DesiredChrContact c : contacts) {
      String key = c.getProbeName() + "-" + c.getSnpName();

      if (c.hasContact()) {
        textToStore.put(key, Boolean.TRUE);
      } else if (!textToStore.containsKey(key)) {
        textToStore.put(key, Boolean.FALSE);
      }
    }
    for (Entry<String, Boolean> contactInfo : textToStore.entrySet()) {
      outWriter.write(contactInfo.getKey() + "\t" + contactInfo.getValue() + "\n");
    }
  }
  @Override
  public void run() throws IOException {
    for (int d = 0; d < m_gg.length; d++) {
      // now test all triples
      SNPLoader snpLoader = m_gg[d].getGenotypeData().createSNPLoader();
      int[] indWGA = m_gg[d].getExpressionToGenotypeIdArray();

      for (int perm = 0; perm < m_settings.nrPermutationsFDR + 1; perm++) {
        String outfile = null;
        if (perm == 0) {
          outfile = outDir + m_gg[d].getSettings().name + "_IVAnalysis-RealData.txt";
        } else {
          outfile =
              outDir + m_gg[d].getSettings().name + "_IVAnalysis-PermutationRound-" + perm + ".txt";
          m_gg[d].permuteSampleLables();
        }
        TextFile out = new TextFile(outfile, TextFile.W);
        Iterator<Triple<String, String, String>> it = snpProbeCombos.iterator();
        Triple<String, String, String> next = it.next();
        ProgressBar pb =
            new ProgressBar(
                snpProbeCombos.size(), "Running Mediation Analysis - Permutation " + perm);

        out.writeln(
            "SNP\tSNP Chr\tSNP ChrPos\t"
                + "Alleles\tDirectionAllele\t"
                + "N\t"
                + "CisArrayAddress\tCisProbe Chr\tCisProbe ChrPos\t"
                + "CisGeneName\t"
                + "TransArrayAddress\tTransProbe Chr\tTransProbe ChrPos\t"
                + "TransGeneName\t"
                + "CisTrans-Correlation\t"
                + "Cis-eQTL-Beta\t"
                + "Cis-eQTL-SE\t"
                + "CisTrans-Beta\t"
                + "CisTrans-SE\t"
                + "Trans-eQTL-Beta\t"
                + "Trans-eQTL-SE\t"
                + "CisTrans-Residual-Correlation\t"
                + "CisTrans-Residual-Beta\t"
                + "CisTrans-Residual-SE\t"
                + "Trans-eQTL-Residual-Beta\t"
                + "Trans-eQTL-Residual-SE\t"
                + "Beta-Ratio");

        while (next != null) {
          String snp = next.getLeft();
          String cisprobe = next.getMiddle();
          String transprobe = next.getRight();

          Integer snpId = m_gg[d].getGenotypeData().getSnpToSNPId().get(snp);
          Integer cisProbeId = m_gg[d].getExpressionData().getProbeToId().get(cisprobe);
          Integer transProbeId = m_gg[d].getExpressionData().getProbeToId().get(transprobe);

          if (snpId == -9 || cisProbeId == null || transProbeId == null) {
            //                        out.writeln(snp + "\t" + snpId + "\t" + cisprobe + "\t" +
            // cisProbeId + "\t" + null + "\t" + transprobe + "\t" + transProbeId + "\t" + null +
            // "\t" + null + "\t" + null + "\t" + null + "\t" + null +
            // "\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA");
          } else {

            SNP snpObj = m_gg[d].getGenotypeData().getSNPObject(snpId);
            snpLoader.loadGenotypes(snpObj);
            if (snpLoader.hasDosageInformation()) {
              snpLoader.loadDosage(snpObj);
            }
            double[] origCisVals = m_gg[d].getExpressionData().getMatrix()[cisProbeId];
            double[] origTransVals = m_gg[d].getExpressionData().getMatrix()[transProbeId];

            int calledGenotypes = 0;
            for (int i = 0; i < m_gg[d].getExpressionData().getIndividuals().length; i++) {
              int genotypeId = indWGA[i];
              short gt = snpObj.getGenotypes()[genotypeId];
              if (genotypeId > -1 && gt > -1) {
                calledGenotypes++;
              }
            }

            double[] genotypes = new double[calledGenotypes];
            double[] cisvals = new double[calledGenotypes];
            double[] transvals = new double[calledGenotypes];

            calledGenotypes = 0;
            for (int i = 0; i < m_gg[d].getExpressionData().getIndividuals().length; i++) {
              int genotypeId = indWGA[i];
              short gt = snpObj.getGenotypes()[genotypeId];
              if (genotypeId > -1 && gt > -1) {
                genotypes[calledGenotypes] = snpObj.getDosageValues()[genotypeId];
                cisvals[calledGenotypes] = origCisVals[i];
                transvals[calledGenotypes] = origTransVals[i];
                calledGenotypes++;
              }
            }

            // normalize genotype and cis + trans to get beta's equal to the correlation coefficient
            genotypes = normalize(genotypes);
            cisvals = normalize(cisvals);
            transvals = normalize(transvals);

            double corrCisTrans =
                JSci.maths.ArrayMath.correlation(cisvals, transvals); // for code validation
            double[] cisTransRCs =
                Regression.getLinearRegressionCoefficients(
                    cisvals, transvals); // returns beta, alpha, se, t
            double[] snpCisRCs =
                Regression.getLinearRegressionCoefficients(
                    genotypes, cisvals); // returns beta, alpha, se, t
            double[] snpTransRCs = Regression.getLinearRegressionCoefficients(genotypes, transvals);

            // remove correlation between cis and trans probe
            //                        double[] resCis = new double[cisvals.length];
            double[] resTransVals = new double[cisvals.length];
            for (int i = 0; i < resTransVals.length; i++) {
              //                            resCis[i] = cisvals[i] - snpCisRCs[0] * genotypes[i];
              resTransVals[i] = transvals[i] - cisTransRCs[0] * cisvals[i];
            }

            resTransVals = normalize(resTransVals);

            double[] cisResTransRCs =
                Regression.getLinearRegressionCoefficients(
                    cisvals, resTransVals); // returns beta, alpha, se, t
            double[] snpResTransRCs =
                Regression.getLinearRegressionCoefficients(genotypes, resTransVals);

            double rescorr =
                JSci.maths.ArrayMath.correlation(cisvals, resTransVals); // for code validation

            out.writeln(
                snp
                    + "\t"
                    + snpObj.getChr()
                    + "\t"
                    + snpObj.getChrPos()
                    + "\t"
                    + BaseAnnot.toString(snpObj.getAlleles()[0])
                    + "/"
                    + BaseAnnot.toString(snpObj.getAlleles()[1])
                    + "\t"
                    + BaseAnnot.toString(snpObj.getAlleles()[0])
                    + "\t"
                    + transvals.length
                    + "\t"
                    + cisprobe
                    + "\t"
                    + m_gg[d].getExpressionData().getChr()[cisProbeId]
                    + "\t"
                    + m_gg[d].getExpressionData().getChrStart()[cisProbeId]
                    + ":"
                    + m_gg[d].getExpressionData().getChrStop()[cisProbeId]
                    + "\t"
                    + m_gg[d].getExpressionData().getAnnotation()[cisProbeId]
                    + "\t"
                    + transprobe
                    + "\t"
                    + m_gg[d].getExpressionData().getChr()[transProbeId]
                    + "\t"
                    + m_gg[d].getExpressionData().getChrStart()[transProbeId]
                    + ":"
                    + m_gg[d].getExpressionData().getChrStop()[transProbeId]
                    + "\t"
                    + m_gg[d].getExpressionData().getAnnotation()[transProbeId]
                    + "\t"
                    + corrCisTrans
                    + "\t"
                    + snpCisRCs[0]
                    + "\t"
                    + snpCisRCs[2]
                    + "\t"
                    + cisTransRCs[0]
                    + "\t"
                    + cisTransRCs[2]
                    + "\t"
                    + snpTransRCs[0]
                    + "\t"
                    + snpTransRCs[2]
                    + "\t"
                    + rescorr
                    + "\t"
                    + cisResTransRCs[0]
                    + "\t"
                    + cisResTransRCs[2]
                    + "\t"
                    + snpResTransRCs[0]
                    + "\t"
                    + snpResTransRCs[2]
                    + "\t"
                    + (snpResTransRCs[0] / snpTransRCs[0]));
            snpObj.clearGenotypes();
          }

          if (it.hasNext()) {
            next = it.next();
          } else {
            next = null;
          }
          pb.iterate();
        }
        pb.close();
        out.close();
      }
      snpLoader.close();
    }
  }
  /**
   * Starts parsing a genotype report file, which can be in different formats. This import program
   * can accomodate many formats, and uses buffering to achieve sufficient performance.
   */
  public FinalReportToTriTyper(
      String inputFile,
      String outputDirString,
      boolean isIlluminaFinalReportFile,
      String delimiter,
      String decimalSeparator)
      throws IOException {

    // Check whether we can write to the output directory:
    File outputDir = new File(outputDirString);
    if (!outputDir.isDirectory()) {
      System.out.println("Your output directory does not exist!");
      System.exit(-1);
    }

    // ArrayLists and hashes for determining file size of final report file:
    HashMap<String, Integer> hashInd = new HashMap<String, Integer>();
    ArrayList<String> vecInd = new ArrayList<String>();
    HashMap<String, Integer> hashSNP = new HashMap<String, Integer>();
    ArrayList<String> vecSNP = new ArrayList<String>();

    // First parse file, determine what the amount of unique samples and SNPs is.
    System.out.println("");
    System.out.println(
        "TriTyperImporter V1.0, 2008, Lude Franke, University Medical Centre Utrecht, [email protected]");
    System.out.println("");
    System.out.println("Processing file:\t" + inputFile);
    System.out.println("Inventorizing input file, determining number of unique SNPs and samples:");
    int columnSample = -1;
    int columnSNP = -1;
    int columnAllele1 = -1;
    int columnAllele2 = -1;
    int columnTheta = -1;
    int columnR = -1;
    int columnX = -1;
    int columnY = -1;
    boolean rawDataAvailable = false;

    // Try to open the input file:
    if (!Gpio.canRead(inputFile)) {
      System.out.println("");
      System.out.println("Cannot open file:\t" + inputFile);
      System.out.println("Are you sure it is located at this place?");
      System.exit(-1);
    }
    TextFile in = new TextFile(inputFile, TextFile.R);

    // If this is an Illumina Final Report file, first process the irrelevant header:
    String str = null;
    if (isIlluminaFinalReportFile) {
      int countIlluminaFinalReport = 0;
      while ((str = in.readLine()) != null) {
        String[] data = str.split(delimiter);
        if (data[0].trim().equals("[Data]")) {
          break;
        }
        countIlluminaFinalReport++;
        if (countIlluminaFinalReport > 100) {
          System.out.println(
              "\nError: You have defined that this file is a final report file, which it does not seem to be as a row with the word [Data] cannot be found!");
          System.exit(-1);
        }
      }
    }

    // Now parse the column identifiers:
    str = in.readLine();

    // Check whether we actually are dealing with a Final Report file, user might have forgotten to
    // instruct this:
    if (str.toLowerCase().startsWith("[header]")) {
      while ((str = in.readLine()) != null) {
        String[] data = str.split(delimiter);
        if (data[0].trim().equals("[Data]")) {
          break;
        }
      }
      str = in.readLine();
    }

    String[] data = str.split(delimiter);
    if (data.length <= 1) {
      System.out.println("");
      System.out.println("Error parsing input file! The file cannot be delimited!");
      String delimiterDescription = "tab";
      if (delimiter.equals(" ")) {
        delimiterDescription = "space";
      }
      if (delimiter.equals(",")) {
        delimiterDescription = "comma";
      }
      if (delimiter.equals(";")) {
        delimiterDescription = "semicolon";
      }
      System.out.println("Are you sure it is " + delimiterDescription + " delimited ?");
      System.exit(-1);
    }

    for (int d = 0; d < data.length; d++) {
      String column = data[d].trim().toLowerCase();
      if (column.equals("sample id")) {
        columnSample = d;
      }
      if (column.equals("snp name")) {
        columnSNP = d;
      }
      if (column.contains("allele1")) {
        columnAllele1 = d;
      }
      if (column.contains("allele 1")) {
        columnAllele1 = d;
      }
      if (column.contains("allele2")) {
        columnAllele2 = d;
      }
      if (column.contains("allele 2")) {
        columnAllele2 = d;
      }
      if (column.equals("r")) {
        columnR = d;
      }
      if (column.equals("theta")) {
        columnTheta = d;
      }
    }
    if (columnSample == -1) {
      System.out.println(
          "\nError: Within the header of this file the sample id column (Sample ID) cannot be found!");
      System.exit(-1);
    }
    if (columnAllele1 == -1) {
      System.out.println(
          "\nError: Within the header of this file the allele 1 column (Allele1) cannot be found!");
      System.exit(-1);
    }
    if (columnAllele2 == -1) {
      System.out.println(
          "\nError: Within the header of this file the allele 2 column (Allele2) cannot be found!");
      System.exit(-1);
    }
    if (columnSNP == -1) {
      System.out.println(
          "\nError: Within the header of this file the SNP name column (SNP Name) cannot be found!");
      System.exit(-1);
    }
    rawDataAvailable = true;
    for (int d = 0; d < data.length; d++) {
      String column = data[d].trim().toLowerCase();
      if (column.equals("x")) {
        columnX = d;
      }
      if (column.equals("y")) {
        columnY = d;
      }
    }

    if ((columnR == -1 || columnTheta == -1) && (columnX == -1 || columnY == -1)) {
      System.out.println(
          "Within the header of this file no raw intensity data is present (either R and Theta, or X and Y). Only imputation of triallelic SNPs will be possible");
      rawDataAvailable = false;
    }

    System.out.println("");

    boolean fileAlreadyInventorized = false;
    if ((new File(outputDirString + "Individuals.txt")).exists()
        && (new File(outputDirString + "SNPs.txt")).exists()) {
      fileAlreadyInventorized = true;
    }

    if (!fileAlreadyInventorized) {

      // Start processing this file
      String previousSNP = null;
      String previousInd = null;
      long linesProcessed = 0;
      while ((str = in.readLine()) != null) {
        // System.out.println(str);
        data = str.split(delimiter);

        if (data.length <= 1) {
          System.out.println("\nError parsing input file! The file cannot be delimited!");
          String delimiterDescription = "tab";
          if (delimiter.equals(" ")) {
            delimiterDescription = "space";
          }
          if (delimiter.equals(",")) {
            delimiterDescription = "comma";
          }
          if (delimiter.equals(";")) {
            delimiterDescription = "semicolon";
          }
          System.out.println("Are you sure it is " + delimiterDescription + " delimited ?");
        }
        if (data.length <= columnSNP || data.length <= columnSample) {
          System.out.println(
              "\nError: For record entry "
                  + (linesProcessed + 1)
                  + " the SNP or sample cannot be parsed! Record: "
                  + str);
          System.exit(-1);
        }
        String snp = data[columnSNP];
        String ind = data[columnSample];

        if (!snp.equals(previousSNP) && !hashSNP.containsKey(snp)) {
          hashSNP.put(snp, vecSNP.size());
          vecSNP.add(snp);
        }

        if (!ind.equals(previousInd) && !hashInd.containsKey(ind)) {
          hashInd.put(ind, vecInd.size());
          vecInd.add(ind);
        }

        previousSNP = snp;
        previousInd = ind;

        linesProcessed++;
        if (linesProcessed % 500000 == 0) {
          System.out.println(
              linesProcessed
                  + "\tLines processed. Number of unique SNPs read so far:\t"
                  + vecSNP.size()
                  + "\tNumber of unique Individuals read so far:\t"
                  + vecInd.size());
        }
      }
      System.out.println(
          linesProcessed
              + "\tLines processed. Number of unique SNPs read:\t"
              + vecSNP.size()
              + "\tNumber of unique Individuals read:\t"
              + vecInd.size());
      in.close();

      // Check whether SNPMappings.txt is available. This will improve processing speed considerably
      // in subsequent operations:
      String fileSNPMappings = new String(outputDirString + "SNPMappings.txt");
      if (!Gpio.canRead(fileSNPMappings)) {
        System.out.println(
            "\nNon critical warning: SNPMappings.txt can not be found in the output directory. Data will not be stored in optimized way, which will negatively affect the speed of TriTyper.\n");
      } else {

        System.out.println("\nLoading SNP mappings from file:\t" + fileSNPMappings);
        TextFile inSNP = new TextFile(fileSNPMappings, TextFile.R);
        String str2;
        ArrayList<String> vectorTemp = new ArrayList<String>();
        boolean needsSorting = false;
        while ((str2 = inSNP.readLine()) != null) {
          data = str2.split("\t");
          if (hashSNP.containsKey(data[2])) {
            if (data[1].length() != 9) {
              needsSorting = true;
              while (data[1].length() < 9) {
                data[1] = "0" + data[1];
              }
            }
            vectorTemp.add(data[0] + "\t" + data[1] + "\t" + data[2]);
          }
        }
        inSNP.close();
        if (needsSorting) {
          System.out.println(
              "Sorting SNPs on chromosome and physical position that are present in SNP mappings file:");
          Collections.sort(vectorTemp);
        }

        HashMap<String, Integer> hashSNPMappings = new HashMap<String, Integer>();
        ArrayList<String> vecSNPMappings = new ArrayList<String>();
        for (int snp = 0; snp < vectorTemp.size(); snp++) {
          String snpString = vectorTemp.get(snp);
          hashSNPMappings.put(snpString.split("\t")[2], vecSNPMappings.size());
          vecSNPMappings.add(snpString.split("\t")[2]);
        }
        System.out.println(
            "Number of SNPs with available physical mappings:\t" + vecSNPMappings.size());

        // Now sort the processed SNPs and arrange them, according to what is known:
        boolean[] snpMappingsUsed = new boolean[vecSNPMappings.size()];
        ArrayList vecSNPCopy = new ArrayList();
        for (int snp = 0; snp < vecSNP.size(); snp++) {
          String rsName = vecSNP.get(snp);
          if (hashSNPMappings.containsKey(rsName)) {
            snpMappingsUsed[hashSNPMappings.get(rsName)] = true;
          }
        }
        ArrayList<String> vecSNPNew = new ArrayList<String>();
        HashMap<String, Integer> hashSNPNew = new HashMap<String, Integer>();
        for (int snp = 0; snp < vecSNPMappings.size(); snp++) {
          if (snpMappingsUsed[snp]) {
            String rsName = vecSNPMappings.get(snp);
            hashSNPNew.put(rsName, vecSNPNew.size());
            vecSNPNew.add(rsName);
          }
        }

        // Now add the SNPs for which no mapping is available. These will be imported, but we cannot
        // do anything with them:
        ArrayList<String> snpsWithoutMapping = new ArrayList<String>();
        for (int snp = 0; snp < vecSNP.size(); snp++) {
          String rsName = vecSNP.get(snp);
          if (!hashSNPNew.containsKey(rsName)) {
            hashSNPNew.put(rsName, vecSNPNew.size());
            vecSNPNew.add(rsName);
            snpsWithoutMapping.add(rsName);
          }
        }
        if (snpsWithoutMapping.size() > 0) {
          System.out.println("Non critical warning: No physical mapping is available for SNPs:");
          for (int s = 0; s < snpsWithoutMapping.size(); s++) {
            System.out.println(snpsWithoutMapping.get(s));
          }
          System.out.println("");
        }

        // Replace the SNP hashmap and vector.
        vecSNP.clear();
        hashSNP.clear();
        vecSNP = vecSNPNew;
        hashSNP = hashSNPNew;
      }

      // Write individuals to file:

      System.out.print("Writing individuals to file:\t");
      TextFile outInd = new TextFile(outputDirString + "Individuals.txt", TextFile.W);
      for (int ind = 0; ind < vecInd.size(); ind++) {
        String individual = ((String) vecInd.get(ind));
        outInd.write(individual + "\n");
      }
      outInd.close();
      System.out.println("OK");

      System.out.print("Writing SNPs to file:\t");
      TextFile outSNP = new TextFile(outputDirString + "SNPs.txt", TextFile.W);
      for (int snp = 0; snp < vecSNP.size(); snp++) {
        outSNP.write(((String) vecSNP.get(snp)) + "\n");
      }
      outSNP.close();
      System.out.println("OK");

    } else {

      // Load individuals from file:
      vecInd.clear();
      hashInd.clear();

      TextFile inInd = new TextFile(outputDirString + "Individuals.txt", TextFile.R);
      while ((str = inInd.readLine()) != null) {
        hashInd.put(str, vecInd.size());
        vecInd.add(str);
      }
      inInd.close();

      // Load SNPs from file:
      vecSNP.clear();
      hashSNP.clear();

      TextFile inSNP = new TextFile(outputDirString + "SNPs.txt", TextFile.R);
      while ((str = inSNP.readLine()) != null) {
        hashSNP.put(str, vecSNP.size());
        vecSNP.add(str);
      }
      inSNP.close();
    }

    int nrInds = vecInd.size();
    int nrSNPs = vecSNP.size();

    // We now have inventorized the file and have generated the SNPs.txt and Individuals.txt files.
    // Now try to determine the order of genotypes, so we can chose a buffering technique. If no
    // order can be found we do not buffer, but importing will be extremely slow.
    boolean fileOrderPerSampleAllSNPs = false;
    boolean fileOrderPerSNPAllSamples = false;

    // Try to open the input file:
    in = new TextFile(inputFile, TextFile.R);

    // If this is an Illumina Final Report file, first process the irrelevant header:
    str = null;
    if (isIlluminaFinalReportFile) {
      while ((str = in.readLine()) != null) {
        data = str.split(delimiter);
        if (data[0].trim().equals("[Data]")) {
          break;
        }
      }
    }

    // Now parse the column identifiers:
    str = in.readLine();

    // Check whether we actually are dealing with a Final Report file, user might have forgotten to
    // instruct this:
    if (str.toLowerCase().startsWith("[header]")) {
      while ((str = in.readLine()) != null) {
        data = str.split(delimiter);
        if (data[0].trim().equals("[Data]")) {
          break;
        }
      }
      str = in.readLine();
    }

    data = str.split(delimiter);
    int previousIndID = -1;
    int previousSNPID = -1;
    while ((str = in.readLine()) != null) {
      if (str.indexOf("\"") != -1) {
        str.replaceAll("\"", "");
      }
      if (str.indexOf("\'") != -1) {
        str.replaceAll("\'", "");
      }
      data = str.split(delimiter);
      String snp = data[columnSNP];
      String ind = data[columnSample];
      int snpID = hashSNP.get(snp);
      int indID = hashInd.get(ind);
      if (previousIndID != -1 && previousSNPID != -1) {
        if (snpID == previousSNPID && indID != previousIndID) {
          fileOrderPerSNPAllSamples = true;
          System.out.println(
              "Based on the import file, TriTyper Importer assumes that the order of the file is such that for each SNP all samples are underneath each other in the import file. This assumptions increases importing performance.");
        }
        if (snpID != previousSNPID && indID == previousIndID) {
          fileOrderPerSampleAllSNPs = true;
          System.out.println(
              "Based on the import file, TriTyper Importer assumes that the order of the file is such that for each sample all SNPs are underneath each other in the import file. This assumptions increases importing performance.");
        }
        break;
      }
      previousIndID = indID;
      previousSNPID = snpID;
    }

    System.out.print("Initializing binary data files:\t");
    RandomAccessFile file = new RandomAccessFile(outputDirString + "GenotypeMatrix.dat", "rw");
    RandomAccessFile fileRawData = null;
    if (rawDataAvailable) {
      fileRawData = new RandomAccessFile(outputDirString + "RawDataMatrix.dat", "rw");
    }
    System.out.println("OK");

    // Fill files with zeros:
    long size = (long) vecSNP.size() * (long) vecInd.size();
    long sizeGenotypeMatrix = size * 2;
    long sizeRawDataMatrix = size * 3;

    // Set size of files:
    file.setLength(0);
    if (rawDataAvailable) {
      fileRawData.setLength(0);
    }

    System.out.print("Making binary files zero:\t");
    // Quickly fill using buffers:
    file.seek(0);
    if (rawDataAvailable) {
      fileRawData.seek(0);
    }
    byte[] emptyString = new byte[10000];
    for (int s = 0; s < 10000; s++) {
      emptyString[s] = 0;
    }
    for (long a = 0; a < size / 10000; a++) {
      file.write(emptyString);
      file.write(emptyString);
      if (rawDataAvailable) {
        fileRawData.write(emptyString);
        fileRawData.write(emptyString);
        fileRawData.write(emptyString);
      }
    }

    // Fill rest with bytes:
    long rest = size % 10000;
    for (int a = 0; a < rest; a++) {
      byte emptyByte = 0;
      file.write(emptyByte);
      file.write(emptyByte);
      if (rawDataAvailable) {
        fileRawData.write(emptyByte);
        fileRawData.write(emptyByte);
        fileRawData.write(emptyByte);
      }
    }
    System.out.println("OK");

    System.out.println("Processing input file:");

    // Seek to beginning of file:
    file.seek(0);
    if (rawDataAvailable) {
      fileRawData.seek(0);
    }

    // Try to open the input file:

    in = new TextFile(inputFile, TextFile.R);

    // If this is an Illumina Final Report file, first process the irrelevant header:
    str = null;
    if (isIlluminaFinalReportFile) {
      while ((str = in.readLine()) != null) {
        data = str.split(delimiter);
        if (data[0].trim().equals("[Data]")) {
          break;
        }
      }
    }

    // Now parse the column identifiers:
    str = in.readLine();

    // Check whether we actually are dealing with a Final Report file, user might have forgotten to
    // instruct this:
    if (str.toLowerCase().startsWith("[header]")) {
      while ((str = in.readLine()) != null) {
        data = str.split(delimiter);
        if (data[0].trim().equals("[Data]")) {
          break;
        }
      }
      str = in.readLine();
    }

    data = str.split(delimiter);

    // If the file has such an order that for each sample all SNPs are underneath each other, we use
    // a buffering approach:
    byte[][] bufferAllele1 = null;
    byte[][] bufferAllele2 = null;
    byte[][] bufferR = null;
    byte[][] bufferTheta = null;
    int bufferFirstInd = 0;
    int bufferCurrentPos = 0;
    if (fileOrderPerSampleAllSNPs) {
      bufferAllele1 = new byte[nrSNPs][100];
      bufferAllele2 = new byte[nrSNPs][100];
      bufferR = new byte[nrSNPs][100];
      bufferTheta = new byte[nrSNPs][100];
    }
    if (fileOrderPerSNPAllSamples) {
      bufferAllele1 = new byte[1][nrInds];
      bufferAllele2 = new byte[1][nrInds];
      bufferR = new byte[1][nrInds];
      bufferTheta = new byte[1][nrInds];
    }

    // Start processing this file
    long linesProcessed = 0;
    previousIndID = -1;
    previousSNPID = -1;
    boolean warningGivenOnABGenotypeDefinition = false;
    while ((str = in.readLine()) != null) {

      // Remove quotes, if they exist:
      if (str.indexOf("\"") != -1) {
        str.replaceAll("\"", "");
      }
      if (str.indexOf("\'") != -1) {
        str.replaceAll("\'", "");
      }

      // Get individual values:
      data = str.split(delimiter);
      String snp = data[columnSNP];
      String ind = data[columnSample];
      double r = 0;
      double theta = 0;
      if (rawDataAvailable) {
        if (columnR != -1 && columnTheta != -1) {
          if (data.length <= columnR || data.length <= columnTheta) {
            System.out.println(
                "\nError: For record entry "
                    + (linesProcessed + 1)
                    + " R or Theta values cannot be parsed! Record: "
                    + str);
            System.out.println(
                "Can it be there are some entries in the file that do not have R or Theta value information?");
            System.exit(-1);
          }
          String rString = data[columnR];
          String thetaString = data[columnTheta];
          if (!decimalSeparator.equals(".")) {
            thetaString = thetaString.replaceAll(decimalSeparator, ".");
            rString = rString.replaceAll(decimalSeparator, ".");
          }
          // Parse R value:
          try {
            r = Double.parseDouble(rString);
          } catch (Exception e) {
            System.out.println(
                "\nError parsing R value: '"
                    + rString
                    + "'. Are you sure it has been saved in the correct locale?");
            System.out.println(
                "This method assumes R values have a decimal separator that is a dot.");
            System.out.println(
                "E.g. if you export a final report from within BeadStudio, using a Dutch Windows");
            System.out.println(
                "locale, the eventual final report file uses a comma as decimal separator.");
            System.out.println("In that case use option '-decimalseparatoriscomma'");
            System.exit(-1);
          }
          // Parse Theta value:
          try {
            theta = Double.parseDouble(thetaString);
          } catch (Exception e) {
            System.out.println(
                "\nError parsing theta value: '"
                    + thetaString
                    + "'. Are you sure it has been saved in the correct locale?");
            System.out.println(
                "This method assumes theta values have a decimal separator that is a dot.");
            System.out.println(
                "E.g. if you export a final report from within BeadStudio, using a Dutch Windows");
            System.out.println(
                "locale, the eventual final report file uses a comma as decimal separator.");
            System.out.println("In that case use option '-decimalseparatoriscomma'");
            System.exit(-1);
          }
        } else {
          if (data.length <= columnX || data.length <= columnY) {
            System.out.println(
                "\nError: For record entry "
                    + (linesProcessed + 1)
                    + " X or Y intensities cannot be parsed! Record: "
                    + str);
            System.out.println(
                "Can it be there are some entries in the file that do not have X or Y intensity information?");
            System.exit(-1);
          }
          String xString = data[columnX];
          String yString = data[columnY];
          if (!decimalSeparator.equals(".")) {
            xString = xString.replaceAll(decimalSeparator, ".");
            yString = yString.replaceAll(decimalSeparator, ".");
          }
          double x = 0;
          double y = 0;
          try {
            x = Double.parseDouble(xString);
          } catch (Exception e) {
            System.out.println(
                "\nError parsing X value: '"
                    + xString
                    + "'. Are you sure it has been saved in the correct locale?");
            System.out.println(
                "This method assumes X values have a decimal separator that is a dot.");
            System.out.println(
                "E.g. if you export a final report from within BeadStudio, using a Dutch Windows");
            System.out.println(
                "locale, the eventual final report file uses a comma as decimal separator.");
            System.out.println("In that case use option '-decimalseparatoriscomma'");
            System.exit(-1);
          }
          try {
            y = Double.parseDouble(yString);
          } catch (Exception e) {
            System.out.println(
                "\nError parsing Y value: '"
                    + yString
                    + "'. Are you sure it has been saved in the correct locale?");
            System.out.println(
                "This method assumes Y values have a decimal separator that is a dot.");
            System.out.println(
                "E.g. if you export a final report from within BeadStudio, using a Dutch Windows");
            System.out.println(
                "locale, the eventual final report file uses a comma as decimal separator.");
            System.out.println("In that case use option '-decimalseparatoriscomma'");
            System.exit(-1);
          }
          // r = Math.sqrt(x * x + y * y);
          r = x + y;
          theta = 1;
          if (x > 0) {
            theta = twoDividedByPI * Math.atan2(y, x);
          }
        }
      }
      byte rByte = (byte) (Byte.MIN_VALUE + (Math.min(255d, r * 50d)));
      byte thetaByte = (byte) (Byte.MIN_VALUE + (theta * 200d));

      // Inspect genotype calls, these either should be A, C, G or T, - will become 0:
      byte allele1 = data[columnAllele1].getBytes()[0];
      byte allele2 = data[columnAllele2].getBytes()[0];
      if (allele1 == 45) {
        allele1 = 0;
      }
      if (allele2 == 45) {
        allele2 = 0;
      }
      if (allele1 == 66) {
        allele1 = 67;
        if (!warningGivenOnABGenotypeDefinition) {
          warningGivenOnABGenotypeDefinition = true;
          System.out.println(
              "\n\n\nWarning! The input genotype report file contains alleles that have been coded as B! These will be changed to C, please take this into account!!!\n\n\n");
        }
      }
      if (allele2 == 66) {
        allele2 = 67;
        if (!warningGivenOnABGenotypeDefinition) {
          warningGivenOnABGenotypeDefinition = true;
          System.out.println(
              "\n\n\nWarning! The input genotype report file contains alleles that have been coded as B! These will be changed to C, please take this into account!!!\n\n\n");
        }
      }

      // Write data:
      int snpID = ((Integer) hashSNP.get(snp)).intValue();
      int indID = ((Integer) hashInd.get(ind)).intValue();

      if (fileOrderPerSampleAllSNPs || fileOrderPerSNPAllSamples) {
        if (fileOrderPerSampleAllSNPs) {
          if (indID != previousIndID && previousIndID != -1) {
            bufferCurrentPos++;
          }
          if (bufferCurrentPos == 100) {
            // Flush buffer, hundred samples have just been processed
            System.out.println("100 samples have been processed, flushing buffers:");
            for (int s = 0; s < nrSNPs; s++) {
              file.seek((long) s * (long) nrInds * 2 + (long) bufferFirstInd);
              file.write(bufferAllele1[s]);
              file.seek((long) s * (long) nrInds * 2 + (long) nrInds + (long) bufferFirstInd);
              file.write(bufferAllele2[s]);
              if (rawDataAvailable) {
                fileRawData.seek(
                    (long) s * (long) vecInd.size() * 3
                        + (long) vecInd.size()
                        + (long) bufferFirstInd);
                fileRawData.write(bufferR[s]);
                fileRawData.seek(
                    (long) s * (long) vecInd.size() * 3
                        + (long) 2 * vecInd.size()
                        + (long) bufferFirstInd);
                fileRawData.write(bufferTheta[s]);
              }
            }
            bufferAllele1 = new byte[nrSNPs][100];
            bufferAllele2 = new byte[nrSNPs][100];
            bufferR = new byte[nrSNPs][100];
            bufferTheta = new byte[nrSNPs][100];
            bufferCurrentPos = 0;
            bufferFirstInd = indID;
          }
          bufferAllele1[snpID][bufferCurrentPos] = allele1;
          bufferAllele2[snpID][bufferCurrentPos] = allele2;
          bufferR[snpID][bufferCurrentPos] = rByte;
          bufferTheta[snpID][bufferCurrentPos] = thetaByte;
        } else {
          if (snpID != previousSNPID && previousSNPID != -1) {
            int s = previousSNPID;
            file.seek((long) s * (long) nrInds * 2);
            file.write(bufferAllele1[0]);
            file.seek((long) s * (long) nrInds * 2 + (long) nrInds);
            file.write(bufferAllele2[0]);
            if (rawDataAvailable) {
              fileRawData.seek((long) s * (long) vecInd.size() * 3 + (long) vecInd.size());
              fileRawData.write(bufferR[0]);
              fileRawData.seek((long) s * (long) vecInd.size() * 3 + (long) 2 * vecInd.size());
              fileRawData.write(bufferTheta[0]);
            }
            bufferAllele1 = new byte[1][nrInds];
            bufferAllele2 = new byte[1][nrInds];
            bufferR = new byte[1][nrInds];
            bufferTheta = new byte[1][nrInds];
          }
          bufferAllele1[0][indID] = allele1;
          bufferAllele2[0][indID] = allele2;
          bufferR[0][indID] = rByte;
          bufferTheta[0][indID] = thetaByte;
        }
      } else {
        file.seek((long) snpID * (long) nrInds * 2 + (long) indID);
        file.write(allele1);
        file.seek((long) snpID * (long) nrInds * 2 + (long) nrInds + (long) indID);
        file.write(allele2);
        if (rawDataAvailable) {
          fileRawData.seek(
              (long) snpID * (long) vecInd.size() * 3 + (long) vecInd.size() + (long) indID);
          fileRawData.write(rByte);
          fileRawData.seek(
              (long) snpID * (long) vecInd.size() * 3 + (long) 2 * vecInd.size() + (long) indID);
          fileRawData.write(thetaByte);
        }
      }

      linesProcessed++;
      if (linesProcessed % 500000 == 0) {
        System.out.println(linesProcessed + "\tLines processed");
      }

      previousIndID = indID;
      previousSNPID = snpID;
    }

    if (fileOrderPerSampleAllSNPs || fileOrderPerSNPAllSamples) {
      if (fileOrderPerSampleAllSNPs) {
        // Flush remaining buffer:
        System.out.println("Flushing remaining buffer (" + (bufferCurrentPos + 1) + " samples):");
        for (int s = 0; s < nrSNPs; s++) {
          byte[] bufferAllele1Subset = new byte[bufferCurrentPos + 1];
          byte[] bufferAllele2Subset = new byte[bufferCurrentPos + 1];
          byte[] bufferRSubset = new byte[bufferCurrentPos + 1];
          byte[] bufferThetaSubset = new byte[bufferCurrentPos + 1];
          for (int i = 0; i <= bufferCurrentPos; i++) {
            bufferAllele1Subset[i] = bufferAllele1[s][i];
            bufferAllele2Subset[i] = bufferAllele2[s][i];
            bufferRSubset[i] = bufferR[s][i];
            bufferThetaSubset[i] = bufferTheta[s][i];
          }
          file.seek((long) s * (long) nrInds * 2 + (long) bufferFirstInd);
          file.write(bufferAllele1Subset);
          file.seek((long) s * (long) nrInds * 2 + (long) nrInds + (long) bufferFirstInd);
          file.write(bufferAllele2Subset);
          if (rawDataAvailable) {
            fileRawData.seek(
                (long) s * (long) vecInd.size() * 3 + (long) vecInd.size() + (long) bufferFirstInd);
            fileRawData.write(bufferRSubset);
            fileRawData.seek(
                (long) s * (long) vecInd.size() * 3
                    + (long) 2 * vecInd.size()
                    + (long) bufferFirstInd);
            fileRawData.write(bufferThetaSubset);
          }
        }
      } else {
        // Flush remaining buffer:
        int s = previousSNPID;
        file.seek((long) s * (long) nrInds * 2);
        file.write(bufferAllele1[0]);
        file.seek((long) s * (long) nrInds * 2 + (long) nrInds);
        file.write(bufferAllele2[0]);
        if (rawDataAvailable) {
          fileRawData.seek((long) s * (long) vecInd.size() * 3 + (long) vecInd.size());
          fileRawData.write(bufferR[0]);
          fileRawData.seek((long) s * (long) vecInd.size() * 3 + (long) 2 * vecInd.size());
          fileRawData.write(bufferTheta[0]);
        }
      }
    }

    System.out.println(linesProcessed + "\tLines processed");

    // Close files:
    in.close();
    file.close();
    if (rawDataAvailable) {
      fileRawData.close();
    }

    // Output final remarks:
    System.out.println("Import of data has completed successfully!");
    System.out.println("");
    System.out.println(
        "Please ensure you include a valid PhenotypeInformation.txt and SNPMappings.txt in the output directory.");
    System.out.println(
        "These two additional files are required in order for TriTyper to function correctly.");
  }
Esempio n. 10
0
  public final void compareOverlapAndZScoreDirectionTwoEQTLFiles(
      String eQTL,
      String meQTL,
      String eQTMFile,
      String outputFile,
      boolean matchOnGeneName,
      double fdrCutt,
      boolean matchSnpOnPos,
      boolean splitGeneNames,
      boolean flipUsingEQTM,
      boolean topeffect)
      throws IOException, Exception {
    System.out.println("Performing comparison of eQTLs and meQTLs");
    double filterOnFDR =
        fdrCutt; // Do we want to use another FDR measure? When set to -1 this is not used at all.

    HashSet<String> hashExcludeEQTLs =
        new HashSet<
            String>(); // We can exclude some eQTLs from the analysis. If requested, put the entire
                       // eQTL string in this HashMap for each eQTL. Does not work in combination
                       // with mathcing based on chr and pos
    HashSet<String> hashConfineAnalysisToSubsetOfProbes =
        new HashSet<
            String>(); // We can confine the analysis to only a subset of probes. If requested put
                       // the probe name in this HapMap
    HashSet<String> hashTestedSNPsThatPassedQC =
        null; // We can confine the analysis to only those eQTLs for which the SNP has been
              // successfully passed QC, otherwise sometimes unfair comparisons are made. If
              // requested, put the SNP name in this HashMap

    // Load the eQTM File
    QTLTextFile eQTLsTextFile = new QTLTextFile(eQTMFile, QTLTextFile.R);

    HashMap<String, ArrayList<EQTL>> eQtmInfo = new HashMap<String, ArrayList<EQTL>>();

    for (Iterator<EQTL> eQtlIt = eQTLsTextFile.getEQtlIterator(); eQtlIt.hasNext(); ) {
      EQTL eQtm = eQtlIt.next();
      String eQtmKey = eQtm.getRsName();

      if (!eQtm.getAlleleAssessed().equals("C")) {
        eQtm.setAlleleAssessed("C");
        eQtm.setZscore(eQtm.getZscore() * -1);

        Double[] zscores = eQtm.getDatasetZScores();
        Double[] correlation = eQtm.getCorrelations();
        for (int i = 0; i < eQtm.getDatasets().length; ++i) {
          zscores[i] *= -1;
          correlation[i] *= -1;
        }
        eQtm.setDatasetZScores(zscores);
        eQtm.setCorrelations(correlation);
      }

      ArrayList<EQTL> posEqtls = eQtmInfo.get(eQtmKey);

      if (posEqtls == null) {
        posEqtls = new ArrayList<EQTL>(1);
        posEqtls.add(eQtm);
        eQtmInfo.put(eQtmKey, posEqtls);
      } else if (!topeffect) {
        eQtmInfo.get(eQtmKey).add(eQtm);
      }
    }

    System.out.println("eQTMs read in: " + eQtmInfo.size());

    // Now load the eQTLs for file 1:
    THashMap<String, String[]> hashEQTLs = new THashMap<String, String[]>();
    THashSet<String> hashUniqueProbes = new THashSet<String>();
    THashSet<String> hashUniqueGenes = new THashSet<String>();

    TextFile in = new TextFile(eQTL, TextFile.R);
    in.readLine();
    String[] data = in.readLineElemsReturnReference(SPLIT_ON_TAB);

    if (data.length < 5) {
      throw new IllegalStateException(
          "QTL File does not have enough columns. Detected columns: "
              + data.length
              + " in file "
              + in.getFileName());
    }

    while (data != null) {
      if (filterOnFDR == -1 || Double.parseDouble(data[18]) <= filterOnFDR) {
        if (hashConfineAnalysisToSubsetOfProbes.isEmpty()
            || hashConfineAnalysisToSubsetOfProbes.contains(data[4])) {
          if (matchOnGeneName) {
            if (data[16].length() > 1) {

              if (splitGeneNames) {
                for (String gene : SEMI_COLON_PATTERN.split(data[16])) {

                  hashEQTLs.put(
                      (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + gene, data);
                  hashUniqueProbes.add(data[4]);
                  hashUniqueGenes.add(gene);
                }
              } else {

                if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[16])) {
                  hashEQTLs.put(
                      (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[16], data);
                  hashUniqueProbes.add(data[4]);
                  hashUniqueGenes.add(data[16]);
                  // log.write("Added eQTL from original file " + (matchSnpOnPos ? data[2] + ":" +
                  // data[3] : data[1]) + "\t" + data[16]);
                }
              }
            }
          } else {
            if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[4])) {
              hashEQTLs.put(
                  (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[4], data);
              hashUniqueProbes.add(data[4]);
              hashUniqueGenes.add(data[16]);
              //	log.write("Added eQTL from original file " + (matchSnpOnPos ? data[2] + ":" +
              // data[3] : data[1]) + "\t" + data[4]);
            }
          }
        }
        data = in.readLineElemsReturnReference(SPLIT_ON_TAB);
      }
    }
    in.close();

    int nrUniqueProbes = hashUniqueProbes.size();
    int nrUniqueGenes = hashUniqueGenes.size();
    hashUniqueProbes = null;
    hashUniqueGenes = null;

    // Initialize Graphics2D for the Z-Score allelic direction comparison:
    //        int width = 1000;
    //        int height = 1000;
    //        int margin = 100;
    //        int x0 = margin;
    //        int x1 = width - margin;
    //        int y0 = margin;
    //        int y1 = height - margin;

    ZScorePlot zs = new ZScorePlot();
    String zsOutFileName = outputFile + "-ZScoreComparison.pdf";
    zs.init(2, new String[] {"eQTLs", "meQTLs"}, true, zsOutFileName);

    // Variables holding variousStatistics:
    int nreQTLsIdenticalDirection = 0;
    int nreQTLsOppositeDirection = 0;
    HashMap<String, Integer> hashEQTLNrTimesAssessed = new HashMap<String, Integer>();

    THashSet<String> hashEQTLs2 = new THashSet<String>();
    THashSet<String> hashUniqueProbes2 = new THashSet<String>();
    THashSet<String> hashUniqueGenes2 = new THashSet<String>();
    THashSet<String> hashUniqueProbesOverlap = new THashSet<String>();
    THashSet<String> hashUniqueGenesOverlap = new THashSet<String>();

    int counterFile2 = 0;
    int overlap = 0;
    ArrayDoubleList vecX = new ArrayDoubleList();
    ArrayDoubleList vecY = new ArrayDoubleList();

    // Vector holding all opposite allelic effects:
    //        LinkedHashSet<String> vecOppositeEQTLs = new LinkedHashSet<String>();

    // Now process file 2:
    in = new TextFile(meQTL, TextFile.R);
    in.readLine();

    int skippedDueToMapping = 0;
    data = null;
    TextFile identicalOut =
        new TextFile(outputFile + "-eQTLsWithIdenticalDirecton.txt.gz", TextFile.W);
    TextFile disconcordantOut = new TextFile(outputFile + "-OppositeEQTLs.txt", TextFile.W);
    TextFile log = new TextFile(outputFile + "-eQTL-meQTL-ComparisonLog.txt", TextFile.W);
    TextFile log2 = new TextFile(outputFile + "-eQTM-missingnessLog.txt", TextFile.W);

    THashSet<String> identifiersUsed = new THashSet<String>();

    while ((data = in.readLineElemsReturnReference(SPLIT_ON_TAB)) != null) {

      if (filterOnFDR == -1 || Double.parseDouble(data[18]) <= filterOnFDR) {
        if (!eQtmInfo.containsKey(data[4])) {
          skippedDueToMapping++;
          log2.write(
              "meQTL probe not present In eQTM file:\t"
                  + data[4]
                  + ", effect statistics: \t"
                  + data[0]
                  + "\t"
                  + data[2]
                  + "\t"
                  + data[3]
                  + "\t"
                  + data[16]
                  + "\n");
          continue;
        }

        String orgDataFour = data[4];

        for (int i = 0; i < eQtmInfo.get(orgDataFour).size(); ++i) {
          if (topeffect && i > 0) {
            break;
          }
          data[16] = eQtmInfo.get(orgDataFour).get(i).getProbeHUGO();
          data[4] = eQtmInfo.get(orgDataFour).get(i).getProbe();

          if (flipUsingEQTM) {
            Double zScoreQTM = eQtmInfo.get(orgDataFour).get(i).getZscore();
            if (zScoreQTM < 0) {
              data[10] = String.valueOf(Double.parseDouble(data[10]) * -1);
            }
          }

          if (hashConfineAnalysisToSubsetOfProbes.isEmpty()
              || hashConfineAnalysisToSubsetOfProbes.contains(data[4])) {
            if (matchOnGeneName) {
              if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[16])) {
                if (data[16].length() > 1) {

                  if (splitGeneNames) {
                    for (String gene : SEMI_COLON_PATTERN.split(data[16])) {

                      hashUniqueProbes2.add(data[4]);
                      hashUniqueGenes2.add(gene);
                      if (!hashEQTLs2.contains(
                          (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + gene)) {
                        hashEQTLs2.add(
                            (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + gene);
                        counterFile2++;
                      }
                    }
                  } else {

                    hashUniqueProbes2.add(data[4]);
                    hashUniqueGenes2.add(data[16]);
                    if (!hashEQTLs2.contains(
                        (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[16])) {
                      hashEQTLs2.add(
                          (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[16]);
                      counterFile2++;
                    }
                  }
                }
              }
            } else {
              if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[4])) {
                // hashEQTLs2.put(data[1] + "\t" + data[4], str);
                hashUniqueProbes2.add(data[4]);
                hashUniqueGenes2.add(data[16]);
                counterFile2++;
              }
            }
          }
          String[] QTL = null;
          String identifier = null;
          if (matchOnGeneName) {

            if (data.length > 16 && data[16].length() > 1) {
              if (splitGeneNames) {
                // NB Plotting and processing of all QTLs here is not okay!
                for (String gene : SEMI_COLON_PATTERN.split(data[16])) {
                  if (!hashExcludeEQTLs.contains(data[1] + "\t" + gene)) {
                    identifier = (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + gene;
                    if (hashEQTLs.containsKey(identifier)) {
                      QTL = hashEQTLs.get(identifier);
                    }
                  }
                }
              } else {
                if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[16])) {
                  identifier =
                      (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[16];
                  if (hashEQTLs.containsKey(identifier)) {
                    QTL = hashEQTLs.get(identifier);
                  }
                }
              }
            }
          } else {
            if (!hashExcludeEQTLs.contains(data[1] + "\t" + data[4])) {
              identifier = (matchSnpOnPos ? data[2] + ":" + data[3] : data[1]) + "\t" + data[4];
              if (hashEQTLs.containsKey(identifier)) {
                QTL = hashEQTLs.get(identifier);
              }
            }
          }

          if (QTL == null) {

            // The eQTL, present in file 2 is not present in file 1:
            // if (Double.parseDouble(data[0]); < 1E-4) {
            if (hashTestedSNPsThatPassedQC == null
                || hashTestedSNPsThatPassedQC.contains(data[1])) {
              log.write(
                  "eQTL Present In New file But Not In Original File:\t"
                      + identifier
                      + "\t"
                      + data[0]
                      + "\t"
                      + data[2]
                      + "\t"
                      + data[3]
                      + "\t"
                      + data[16]
                      + "\n");
            }
            // }
            double zScore2 = Double.parseDouble(data[10]);
            //                        int posX = 500 + (int) 0;
            //                        int posY = 500 - (int) Math.round(zScore2 * 10);
            zs.draw(null, zScore2, 0, 1);

          } else {
            identifiersUsed.add(identifier);
            String[] eQtlData = QTL;
            boolean identicalProbe = true;
            String probe = data[4];
            String probeFound = eQtlData[4];
            if (!probe.equals(probeFound)) {
              identicalProbe = false;
            }

            hashUniqueProbesOverlap.add(data[4]);
            hashUniqueGenesOverlap.add(data[16]);
            if (!hashEQTLNrTimesAssessed.containsKey(identifier)) {
              hashEQTLNrTimesAssessed.put(identifier, 1);
            } else {
              hashEQTLNrTimesAssessed.put(identifier, 1 + hashEQTLNrTimesAssessed.get(identifier));
            }
            String alleles = eQtlData[8];
            String alleleAssessed = eQtlData[9];

            String correlations[] = (eQtlData[17]).split(";");
            double correlation = 0;
            int numCorr1 = 0;
            for (int c = 0; c < correlations.length; c++) {
              try {
                if (!correlations[c].equals("-")) {
                  correlation += Double.parseDouble(correlations[c]);
                  numCorr1++;
                }
              } catch (Exception e) {
              }
            }

            correlation /= (double) numCorr1;
            //                       if(numCorr1 == 0){
            //                           System.out.println("Warning: no correlations defined for
            // eqtl file 1");
            //                       }
            double zScore = Double.parseDouble(eQtlData[10]);
            //                        double pValue = Double.parseDouble(eQtlData[0]);
            String alleles2 = data[8];
            String alleleAssessed2 = data[9];
            double zScore2 = Double.parseDouble(data[10]);

            //                        double pValue2 = Double.parseDouble(data[0]);
            String correlations2[] = data[17].split(";");
            double correlation2 = 0;

            boolean alleleflipped = false;
            if (!alleleAssessed.equals(data[9])) {
              if (data[9].equals(eQtlData[8].split("/")[0])) {
                alleleflipped = true;
              } else {
                //                               System.out.println("WTF BBQ!");
              }
            }

            int numCorr2 = 0;
            for (int c = 0; c < correlations2.length; c++) {
              try {
                if (!correlations2[c].equals("-")) {

                  correlation2 += (Double.parseDouble(correlations2[c]));

                  numCorr2++;
                }
              } catch (NumberFormatException e) {
              }
            }
            //                       if(numCorr2 == 0){
            //                           System.out.println("Warning: no correlations defined for
            // eqtl file 2");
            //                       }
            correlation2 /= (double) numCorr2;
            if (alleleflipped) {
              correlation2 = -correlation2;
            }
            boolean sameDirection = false;
            int nrIdenticalAlleles = 0;
            if (alleles.length() > 2 && alleles2.length() > 2) {
              for (int a = 0; a < 3; a++) {
                for (int b = 0; b < 3; b++) {
                  if (a != 1 && b != 1) {
                    if (alleles.getBytes()[a] == alleles2.getBytes()[b]) {
                      nrIdenticalAlleles++;
                    }
                  }
                }
              }
            }

            if (nrIdenticalAlleles == 0) {
              alleles2 =
                  (char) BaseAnnot.getComplement((byte) alleles2.charAt(0))
                      + "/"
                      + (char) BaseAnnot.getComplement((byte) alleles2.charAt(2));
              alleleAssessed2 = BaseAnnot.getComplement(alleleAssessed2);
              if (alleles.length() > 2 && alleles2.length() > 2) {
                for (int a = 0; a < 3; a++) {
                  for (int b = 0; b < 3; b++) {
                    if (a != 1 && b != 1) {
                      if (alleles.getBytes()[a] == alleles2.getBytes()[b]) {
                        nrIdenticalAlleles++;
                      }
                    }
                  }
                }
              }
            }

            if (nrIdenticalAlleles != 2) {
              log.write(
                  "Error! SNPs have incompatible alleles!!:\t"
                      + alleles
                      + "\t"
                      + alleles2
                      + "\t"
                      + identifier
                      + "\n");
            } else {
              overlap++;
              if (!alleleAssessed.equals(alleleAssessed2)) {
                zScore2 = -zScore2;
                //                           correlation2 = -correlation2;
                alleleAssessed2 = alleleAssessed;
              }

              // Recode alleles:
              // if contains T, but no A, take complement
              //                        if (alleles.contains("T") && !alleles.contains("A")) {
              //                            alleles = BaseAnnot.getComplement(alleles);
              //                            alleleAssessed =
              // BaseAnnot.getComplement(alleleAssessed);
              //                            alleleAssessed2 =
              // BaseAnnot.getComplement(alleleAssessed2);
              //                        }
              if (zScore2 * zScore > 0) {
                sameDirection = true;
              }

              //                       if(correlation != correlation2 && (numCorr1 > 0 && numCorr2 >
              // 0)){
              //                           if(Math.abs(correlation - correlation2) > 0.00001){
              //                               System.out.println("Correlations are different:
              // "+lineno+"\t"+correlation +"\t"+correlation2+"\t"+str);
              //                           }
              //
              //                       }
              zs.draw(zScore, zScore2, 0, 1);
              if (!sameDirection) {
                nreQTLsOppositeDirection++;

                if (matchOnGeneName) {
                  disconcordantOut.append(
                      data[1]
                          + '\t'
                          + data[16]
                          + '\t'
                          + alleles
                          + '\t'
                          + alleleAssessed
                          + '\t'
                          + zScore
                          + '\t'
                          + alleles2
                          + '\t'
                          + alleleAssessed2
                          + '\t'
                          + zScore2);

                } else {
                  disconcordantOut.append(
                      data[1]
                          + '\t'
                          + data[4]
                          + '\t'
                          + alleles
                          + '\t'
                          + alleleAssessed
                          + '\t'
                          + zScore
                          + '\t'
                          + alleles2
                          + '\t'
                          + alleleAssessed2
                          + '\t'
                          + zScore2);
                }

                //                            int posX = 500 + (int) Math.round(zScore * 10);
                //                            int posY = 500 - (int) Math.round(zScore2 * 10);
                vecX.add(zScore);
                vecY.add(zScore2);

              } else {
                // write to output
                identicalOut.writeln(
                    identifier
                        + '\t'
                        + alleles
                        + '\t'
                        + alleleAssessed
                        + '\t'
                        + zScore
                        + '\t'
                        + alleles2
                        + '\t'
                        + alleleAssessed2
                        + '\t'
                        + zScore2);
                nreQTLsIdenticalDirection++;
                if (alleles.length() > 2
                    && !alleles.equals("A/T")
                    && !alleles.equals("T/A")
                    && !alleles.equals("C/G")
                    && !alleles.equals("G/C")) {
                  //                                int posX = 500 + (int) Math.round(zScore * 10);
                  //                                int posY = 500 - (int) Math.round(zScore2 * 10);
                  vecX.add(zScore);
                  vecY.add(zScore2);
                }
              }
            }
          }
        }
      }
    }
    identicalOut.close();
    disconcordantOut.close();
    in.close();
    log2.close();

    log.write(
        "\n/// Writing missing QTLs observed in original file but not in the new file ////\n\n");
    for (Entry<String, String[]> QTL : hashEQTLs.entrySet()) {
      if (!identifiersUsed.contains(QTL.getKey())) {
        // The eQTL, present in file 1 is not present in file 2:

        // if (Double.parseDouble(QTL.getValue()[0]) < 1E-4) {
        if (hashTestedSNPsThatPassedQC == null || hashTestedSNPsThatPassedQC.contains(data[1])) {
          log.write(
              "eQTL Present In Original file But Not In New File:\t"
                  + QTL.getKey()
                  + "\t"
                  + QTL.getValue()[0]
                  + "\t"
                  + QTL.getValue()[2]
                  + "\t"
                  + QTL.getValue()[3]
                  + "\t"
                  + QTL.getValue()[16]
                  + "\n");
        }
        // }
        double zScore = Double.parseDouble(QTL.getValue()[10]);
        //                int posX = 500 + (int) 0;
        //                int posY = 500 - (int) Math.round(zScore * 10);
        zs.draw(zScore, null, 0, 1);
      }
    }

    log.close();
    zs.write(zsOutFileName);

    double[] valsX = vecX.toArray();
    double[] valsY = vecY.toArray();

    if (valsX.length > 2) {
      double correlation = JSci.maths.ArrayMath.correlation(valsX, valsY);
      double r2 = correlation * correlation;

      cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine =
          new cern.jet.random.tdouble.engine.DRand();
      cern.jet.random.tdouble.StudentT tDistColt =
          new cern.jet.random.tdouble.StudentT(valsX.length - 2, randomEngine);
      double pValuePearson = 1;
      double tValue = correlation / (Math.sqrt((1 - r2) / (double) (valsX.length - 2)));
      if (tValue < 0) {
        pValuePearson = tDistColt.cdf(tValue);
      } else {
        pValuePearson = tDistColt.cdf(-tValue);
      }
      pValuePearson *= 2;
      System.out.println(
          "\nCorrelation between the Z-Scores of the overlapping set of eQTLs:\t"
              + correlation
              + "\tP-Value:\t"
              + pValuePearson);
    }

    TextFile outSummary = new TextFile(outputFile + "-Summary.txt", TextFile.W);

    System.out.println("");
    System.out.println(
        "Nr of eQTLs:\t"
            + hashEQTLs.size()
            + "\tin file:\t"
            + eQTL
            + "\tNrUniqueProbes:\t"
            + nrUniqueProbes
            + "\tNrUniqueGenes:\t"
            + nrUniqueGenes);
    outSummary.writeln(
        "Nr of eQTLs:\t"
            + hashEQTLs.size()
            + "\tin file:\t"
            + eQTL
            + "\tNrUniqueProbes:\t"
            + nrUniqueProbes
            + "\tNrUniqueGenes:\t"
            + nrUniqueGenes);

    System.out.println(
        "Nr of meQTLs:\t"
            + counterFile2
            + "\tin file:\t"
            + meQTL
            + "\tNrUniqueProbes:\t"
            + hashUniqueProbes2.size()
            + "\tNrUniqueGenes:\t"
            + hashUniqueGenes2.size()
            + " *With eQTM mapping.");
    outSummary.writeln(
        "Nr of meQTLs:\t"
            + counterFile2
            + "\tin file:\t"
            + meQTL
            + "\tNrUniqueProbes:\t"
            + hashUniqueProbes2.size()
            + "\tNrUniqueGenes:\t"
            + hashUniqueGenes2.size()
            + " *With eQTM mapping.");

    System.out.println("Skipped over meQTLs:\t" + skippedDueToMapping);
    outSummary.writeln("Skipped over meQTLs:\t" + skippedDueToMapping);

    System.out.println(
        "Overlap:\t"
            + overlap
            + "\tNrUniqueProbesOverlap:\t"
            + hashUniqueProbesOverlap.size()
            + "\tNrUniqueGenesOverlap:\t"
            + hashUniqueGenesOverlap.size());
    outSummary.writeln(
        "Overlap:\t"
            + overlap
            + "\tNrUniqueProbesOverlap:\t"
            + hashUniqueProbesOverlap.size()
            + "\tNrUniqueGenesOverlap:\t"
            + hashUniqueGenesOverlap.size());

    System.out.println("");
    outSummary.writeln();

    System.out.println("Nr eQTLs with identical direction:\t" + nreQTLsIdenticalDirection);
    outSummary.writeln("Nr eQTLs with identical direction:\t" + nreQTLsIdenticalDirection);

    double proportionOppositeDirection =
        100d
            * (double) nreQTLsOppositeDirection
            / (double) (nreQTLsOppositeDirection + nreQTLsIdenticalDirection);
    String proportionOppositeDirectionString =
        (new java.text.DecimalFormat(
                "0.00;-0.00", new java.text.DecimalFormatSymbols(java.util.Locale.US)))
            .format(proportionOppositeDirection);

    System.out.println(
        "Nr eQTLs with opposite direction:\t"
            + nreQTLsOppositeDirection
            + "\t("
            + proportionOppositeDirectionString
            + "%)");
    outSummary.writeln(
        "Nr eQTLs with opposite direction:\t"
            + nreQTLsOppositeDirection
            + "\t("
            + proportionOppositeDirectionString
            + "%)");

    outSummary.close();

    nrShared = hashUniqueProbesOverlap.size();
    nrOpposite = nreQTLsOppositeDirection;
  }
  private static void addAnnotationToQTLOutput(
      String in,
      String inProxies,
      String folderHighC,
      String resolution,
      String qualityCutOff,
      String normMethod,
      double minValue,
      boolean alternativePermutation,
      boolean permutationFile,
      String probeMap,
      String snpMap,
      String out)
      throws IOException {

    HashMap<String, ArrayList<DesiredChrContact>> qtls =
        readInQtlTransformBlocks(
            in, inProxies, probeMap, snpMap, permutationFile, resolution, alternativePermutation);

    ProgressBar pb =
        new ProgressBar(
            qtls.size(), "Checking for contacts for: " + qtls.size() + " Chromosome combinations");

    TextFile outWriter = new TextFile(out, TextFile.W);

    for (Entry<String, ArrayList<DesiredChrContact>> contactsToCheck : qtls.entrySet()) {
      Collections.sort(contactsToCheck.getValue());

      String[] chrs = contactsToCheck.getKey().split("-");

      String ChrSmaller = chrs[0];
      String ChrLarger = chrs[1];

      String baseName;
      String fileToReads;
      boolean intra = false;

      if (ChrSmaller.equals(ChrLarger)) {
        baseName =
            folderHighC
                + "\\GM12878_combined_intrachromosomal\\"
                + resolution
                + "_resolution_intrachromosomal\\chr"
                + ChrSmaller
                + "\\MAPQG"
                + qualityCutOff;
        fileToReads = baseName + "\\chr" + ChrSmaller + "_" + resolution + ".RAWobserved";
        intra = true;
        //                continue;
      } else {
        baseName =
            folderHighC
                + "\\GM12878_combined_interchromosomal\\"
                + resolution
                + "_resolution_interchromosomal\\chr"
                + ChrSmaller
                + "_chr"
                + ChrLarger
                + "\\MAPQG"
                + qualityCutOff;
        fileToReads =
            baseName + "\\chr" + ChrSmaller + "_" + ChrLarger + "_" + resolution + ".RAWobserved";
      }

      //            if (normMethod == null) {
      processRawContactInformation(fileToReads, minValue, contactsToCheck.getValue(), intra);
      //            } else {
      //                if (intra) {
      //                    processNormalizedIntraContactInformation(fileToReads, baseName,
      // normMethod, ChrSmaller, contactsToCheck.getValue(), resolution, minValue, outWriter);
      //                } else {
      //                    processNormalizedInterContactInformation(fileToReads, baseName,
      // normMethod, ChrSmaller, ChrLarger, contactsToCheck.getValue(), resolution, minValue,
      // outWriter);
      //                }
      //            }
      printOutContacts(contactsToCheck.getValue(), outWriter);
      pb.iterate();
    }
    pb.close();
    outWriter.close();
  }
  private static void processNormalizedIntraContactInformation(
      String fileToRead,
      String baseName,
      String normMethod,
      String chrSmaller,
      ArrayList<DesiredChrContact> contactsToCheck,
      String resolution,
      double minValue,
      TextFile outWriter)
      throws IOException {

    // ReadIn normalization chr1
    TextFile inputNormChr1 =
        new TextFile(
            baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R);
    ArrayList<String> normFactorSmallerChr = inputNormChr1.readAsArrayList();
    inputNormChr1.close();

    //        System.out.println("Done reading norm factor 1");
    if (!Gpio.exists(fileToRead + ".sorted")) {
      umcg.genetica.io.chrContacts.SortIntraChrContacts.readNonSortedWriteSorted(
          fileToRead, fileToRead + ".sorted");
    }

    int numberToBeMatched = 0;

    LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8");

    try {
      while (it.hasNext()) {
        String[] parts = StringUtils.split(it.nextLine(), '\t');

        int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]);
        int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]);

        while (numberToBeMatched < contactsToCheck.size()) {
          if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
            break;
          } else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
            if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
              break;
            }
            if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {

              String factor1Base =
                  normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1);
              String factor2Base =
                  normFactorSmallerChr.get((posChr2 / getNumericResolution(resolution)) + 1);

              double factor1;
              double factor2;

              if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) {
                factor1 = org.apache.commons.lang.math.NumberUtils.createDouble(factor1Base);
                factor2 = org.apache.commons.lang.math.NumberUtils.createDouble(factor2Base);

                double contact =
                    org.apache.commons.lang.math.NumberUtils.createDouble(parts[2])
                        / (factor1 * factor2);
                if (contact >= minValue) {
                  outWriter.writeln(
                      contactsToCheck.get(numberToBeMatched).getSnpName()
                          + "\t"
                          + contactsToCheck.get(numberToBeMatched).getProbeName()
                          + "\t"
                          + posChr1
                          + "\t"
                          + posChr2
                          + "\tContact\t"
                          + contact
                          + "\t"
                          + org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]));
                  numberToBeMatched++;
                } else {
                  outWriter.writeln(
                      contactsToCheck.get(numberToBeMatched).getSnpName()
                          + "\t"
                          + contactsToCheck.get(numberToBeMatched).getProbeName()
                          + "\t"
                          + posChr1
                          + "\t"
                          + posChr2
                          + "\t-\t-\t-");
                  numberToBeMatched++;
                }
              } else {
                System.out.println("Error in files.");
                numberToBeMatched++;
              }
            } else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
              outWriter.writeln(
                  contactsToCheck.get(numberToBeMatched).getSnpName()
                      + "\t"
                      + contactsToCheck.get(numberToBeMatched).getProbeName()
                      + "\t"
                      + posChr1
                      + "\t"
                      + posChr2
                      + "\t-\t-\t-");
              numberToBeMatched++;
            }
          } else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
            outWriter.writeln(
                contactsToCheck.get(numberToBeMatched).getSnpName()
                    + "\t"
                    + contactsToCheck.get(numberToBeMatched).getProbeName()
                    + "\t"
                    + posChr1
                    + "\t"
                    + posChr2
                    + "\t-\t-\t-");
            numberToBeMatched++;
          }
        }
      }
    } finally {
      LineIterator.closeQuietly(it);
    }
  }
  public static void writeGMTFileBasedOnGeneSetFileAndMappingFileRemovingDuplicateGeneSets(
      String genesetfile, String mappingfile, String mappingdelimiter, String gmtfile)
      throws IOException {
    TextFile in = new TextFile(mappingfile, TextFile.R);
    String line = in.readLine();
    Map<String, String> code2name = new HashMap<String, String>();
    while ((line = in.readLine()) != null) {
      String[] split = line.split(mappingdelimiter);
      String name = split[2].trim().replace("\"", "") + " (" + split[6].trim() + ")";
      code2name.put(split[0].trim().replace("\"", ""), name);
    }
    in.close();
    System.out.println(code2name.size() + " gene set annotations read");

    in = new TextFile(genesetfile, TextFile.R);
    TextFile out = new TextFile(gmtfile, TextFile.W);
    Map<String, Integer> usedNames = new HashMap<String, Integer>();
    Set<String> usedGeneSets = new HashSet<String>();
    while ((line = in.readLine()) != null) {
      String[] split = line.split("\t");
      if (split.length == 0) {
        continue;
      }
      String code = split[0].trim();
      String name = code2name.get(code);
      if (name == null) {
        LOGGER.log(Level.WARNING, "No annotation for gene set ''{0}''", code);
      } else {
        String genes = Arrays.asList(Arrays.copyOfRange(split, 1, split.length)).toString();
        System.out.println(genes);
        if (!usedGeneSets.contains(genes)) {
          Integer oldNrItems = usedNames.get(name);
          if (oldNrItems == null) {
            out.write(code + "\t" + name);
            for (int i = 1; i < split.length; i++) {
              out.write("\t" + split[i]);
            }
            out.writeln();
            usedNames.put(name, split.length - 1);
            usedGeneSets.add(genes);
          } else {
            //                    if (oldNrItems != split.length - 1) {
            System.out.println("Sets with different numbers of genes for " + name);
            //                    }
          }
        }
      }
    }
    in.close();
    out.close();
  }
  public static void convertGMTFileToPlainTextBinaryNetwork(
      String infile, String outfile, boolean hasIds, boolean isActuallyGMT) throws IOException {
    TextFile in = new TextFile(infile, TextFile.R);
    String line = null;
    Map<String, Integer> hashItems = new HashMap<String, Integer>();
    Map<String, Set<Integer>> hashSetIndices = new HashMap<String, Set<Integer>>();
    List<String> sets = new ArrayList<String>();
    int nextIndex = 0;
    int nrItemSets = 0;
    while ((line = in.readLine()) != null) {
      String[] split = line.split("\t");
      if (split.length == 0) {
        continue;
      }
      int firstItemIndex = hasIds ? (isActuallyGMT ? 2 : 1) : 0;
      if (hasIds) {
        sets.add(split[0]);
      }
      for (int i = firstItemIndex; i < split.length; i++) {
        Integer itemIndex = hashItems.get(split[i]);
        if (itemIndex == null) {
          hashItems.put(split[i], nextIndex);
          nextIndex++;
        }
        Set<Integer> setIndicesThisItem = hashSetIndices.get(split[i]);
        if (setIndicesThisItem == null) {
          setIndicesThisItem = new HashSet<Integer>();
          hashSetIndices.put(split[i], setIndicesThisItem);
        }
        setIndicesThisItem.add(nrItemSets);
      }
      nrItemSets++;
    }
    in.close();

    TextFile out = new TextFile(outfile, TextFile.W);
    for (int i = 0; i < nrItemSets; i++) {
      if (hasIds) {
        out.write("\t" + sets.get(i));
      } else {
        out.write("\tComplex" + (i + 1));
      }
    }
    out.writeln();
    for (String item : hashSetIndices.keySet()) {
      if (!"".equals(item)) {
        out.write(item);
        Set<Integer> setIndicesThisItem = hashSetIndices.get(item);
        for (int set = 0; set < nrItemSets; set++) {
          if (setIndicesThisItem.contains(set)) {
            out.write("\t1");
          } else {
            out.write("\t0");
          }
        }
        out.writeln();
      }
    }
    out.close();
  }
  public MetaQTL4TraitAnnotation(File probeAnnotationFile, Set<String> platformsToInclude)
      throws IOException {
    TextFile tf = new TextFile(probeAnnotationFile, TextFile.R);
    int nrLines = tf.countLines();
    String[] header = tf.readLineElems(TextFile.tab);
    boolean[] colsToInclude = new boolean[header.length];
    int nrPlatforms = 0;
    HashSet<String> visitedPlatforms = new HashSet<String>();
    for (int i = 0; i < header.length; i++) {
      if (platformsToInclude.contains(header[i])) {
        if (!visitedPlatforms.contains(header[i])) {
          colsToInclude[i] = true;
          visitedPlatforms.add(header[i]);
          nrPlatforms++;
        } else {
          System.err.println(
              "ERROR: your probe annotation file contains duplicate platform identifiers!");
        }
      }
    }

    metatraits = new MetaQTL4MetaTraitTreeSet();
    metaTraitNameToObj = new HashMap<String, MetaQTL4MetaTrait>();
    platformToId = new HashMap<String, Integer>();
    platforms = new String[nrPlatforms];
    traitHashPerPlatform = new ArrayList<HashMap<String, MetaQTL4MetaTrait>>();

    int platformNr = 0;
    for (int i = 0; i < header.length; i++) {
      if (colsToInclude[i]) {
        platforms[platformNr] = header[i];
        platformToId.put(header[i], platformNr);
        HashMap<String, MetaQTL4MetaTrait> probeToId = new HashMap<String, MetaQTL4MetaTrait>();
        traitHashPerPlatform.add(probeToId);
        platformNr++;
      }
    }

    for (String platform : platformsToInclude) {
      if (!visitedPlatforms.contains(platform)) {
        System.err.println("WARNING: no annotation will be loaded for platform: " + platform);
      }
    }

    int probeCounter = 0;
    for (String[] elems : tf.readLineElemsIterable(TextFile.tab)) {

      String metaTraitName = new String(elems[0].getBytes("UTF-8")).intern();
      String chr = new String(elems[2].getBytes("UTF-8")).intern();
      String chrpos = elems[3];
      String[] chrposElems = chrpos.split("-");
      int chrstartpos = -1;
      int chrendpos = -1;
      if (chrposElems.length >= 1) {
        try {
          chrstartpos = Integer.parseInt(chrposElems[0]);
        } catch (NumberFormatException e) {
        }
        try {
          chrendpos = Integer.parseInt(chrposElems[chrposElems.length - 1]);
        } catch (NumberFormatException e) {
        }
      }

      String hugo = new String(elems[4].getBytes("UTF-8")).intern();
      String[] platformIds = new String[nrPlatforms];
      // int metaTraitId, String metaTraitName, String chr, int chrStart, int chrEnd, String
      // annotation, String[] platformIds
      MetaQTL4MetaTrait metaTraitObj =
          new MetaQTL4MetaTrait(
              probeCounter, metaTraitName, chr, chrstartpos, chrendpos, hugo, platformIds);

      for (int i = 5; i < elems.length; i++) {
        platformNr = 0;
        if (colsToInclude[i]) {
          platformIds[platformNr] = new String(elems[i].getBytes("UTF-8")).intern();
          HashMap<String, MetaQTL4MetaTrait> probeToId = traitHashPerPlatform.get(platformNr);
          probeToId.put(elems[i], metaTraitObj);
          platformNr++;
        }
      }
      probeCounter++;
      metatraits.add(metaTraitObj);
      metaTraitNameToObj.put(metaTraitName, metaTraitObj);
    }
    System.out.println(
        tf.getFileName()
            + " has annotation for "
            + visitedPlatforms.size()
            + " platforms and "
            + metatraits.size()
            + " traits.");
    tf.close();
  }