Пример #1
0
  /**
   * Load chromosomes from given ID or file name.
   *
   * @param idOrFile Genome ID or file name where chromosome lengths written
   * @return Chromosome lengths
   * @throws IOException if chromosome length file not found
   */
  public static List<Chromosome> loadChromosomes(String idOrFile) throws IOException {

    InputStream is = null;

    try {
      // Note: to get this to work, had to edit Intellij settings
      // so that "?*.sizes" are considered sources to be copied to class path
      is = HiCTools.class.getResourceAsStream(idOrFile + ".chrom.sizes");

      if (is == null) {
        // Not an ID,  see if its a file
        File file = new File(idOrFile);
        if (file.exists()) {
          is = new FileInputStream(file);
        } else {
          throw new FileNotFoundException("Could not find chromosome sizes file for: " + idOrFile);
        }
      }

      List<Chromosome> chromosomes = new ArrayList();
      chromosomes.add(0, null); // Index 0 reserved for "whole genome" psuedo-chromosome

      Pattern pattern = Pattern.compile("\t");
      BufferedReader reader = new BufferedReader(new InputStreamReader(is));
      String nextLine;
      long genomeLength = 0;
      int idx = 1;

      while ((nextLine = reader.readLine()) != null) {
        String[] tokens = pattern.split(nextLine);
        if (tokens.length == 2) {
          String name = tokens[0];
          int length = Integer.parseInt(tokens[1]);
          genomeLength += length;
          chromosomes.add(idx, new Chromosome(idx, name, length));
          idx++;
        } else {
          System.out.println("Skipping " + nextLine);
        }
      }

      // Add the "psuedo-chromosome" All, representing the whole genome.  Units are in kilo-bases
      chromosomes.set(0, new Chromosome(0, "All", (int) (genomeLength / 1000)));

      return chromosomes;
    } finally {
      if (is != null) is.close();
    }
  }
Пример #2
0
  public static void main(String[] argv)
      throws IOException, CmdLineParser.UnknownOptionException,
          CmdLineParser.IllegalOptionValueException {

    if (argv.length < 4) {
      System.out.println("Usage: hictools pre <options> <inputFile> <outputFile> <genomeID>");
      System.out.println("  <options>: -d only calculate intra chromosome (diagonal) [false]");
      System.out.println(
          "           : -o calculate densities (observed/expected), write to file [false]");
      System.out.println("           : -t <int> only write cells with count above threshold t [0]");
      System.out.println(
          "           : -c <chromosome ID> only calculate map on specific chromosome");
      System.exit(0);
    }

    Globals.setHeadless(true);

    CommandLineParser parser = new CommandLineParser();
    parser.parse(argv);
    String[] args = parser.getRemainingArgs();

    if (args[0].equals("sort")) {
      AlignmentsSorter.sort(args[1], args[2], null);
    } else if (args[0].equals("pairsToBin")) {
      String ifile = args[1];
      String ofile = args[2];
      String genomeId = args[3];
      List<Chromosome> chromosomes = loadChromosomes(genomeId);
      AsciiToBinConverter.convert(ifile, ofile, chromosomes);
    } else if (args[0].equals("binToPairs")) {
      String ifile = args[1];
      String ofile = args[2];
      AsciiToBinConverter.convertBack(ifile, ofile);
    } else if (args[0].equals("printmatrix")) {
      if (args.length < 5) {
        System.err.println(
            "Usage: hictools printmatrix <observed/oe/pearson> hicFile chr1 chr2 binsize");
        System.exit(-1);
      }
      String type = args[1];
      String file = args[2];
      String chr1 = args[3];
      String chr2 = args[4];
      String binSizeSt = args[5];
      int binSize = 0;
      try {
        binSize = Integer.parseInt(binSizeSt);
      } catch (NumberFormatException e) {
        System.err.println("Integer expected.  Found: " + binSizeSt);
        System.exit(-1);
      }

      dumpMatrix(file, chr1, chr2, binSize, type);

    } else if (args[0].equals("eigenvector")) {
      if (args.length < 4) {
        System.err.println("Usage: hictools eigenvector hicFile chr binsize");
      }
      String file = args[1];
      String chr = args[2];
      String binSizeSt = args[3];
      int binSize = 0;
      try {
        binSize = Integer.parseInt(binSizeSt);
      } catch (NumberFormatException e) {
        System.err.println("Integer expected.  Found: " + binSizeSt);
        System.exit(-1);
      }
      calculateEigenvector(file, chr, binSize);
    } else if (args[0].equals("pre")) {
      String genomeId = "";
      try {
        genomeId = args[3];
      } catch (ArrayIndexOutOfBoundsException e) {
        System.err.println("No genome ID given");
        System.exit(0);
      }
      List<Chromosome> chromosomes = loadChromosomes(genomeId);

      long genomeLength = 0;
      for (Chromosome c : chromosomes) {
        if (c != null) genomeLength += c.getSize();
      }
      chromosomes.set(0, new Chromosome(0, "All", (int) (genomeLength / 1000)));

      String[] tokens = args[1].split(",");
      List<String> files = new ArrayList<String>(tokens.length);

      for (String f : tokens) {
        files.add(f);
      }

      Preprocessor preprocessor = new Preprocessor(new File(args[2]), chromosomes);

      preprocessor.setIncludedChromosomes(parser.getChromosomeOption());
      preprocessor.setCountThreshold(parser.getCountThresholdOption());
      preprocessor.setNumberOfThreads(parser.getThreadedOption());
      preprocessor.setDiagonalsOnly(parser.getDiagonalsOption());
      preprocessor.setLoadDensities(parser.getDensitiesOption());
      preprocessor.preprocess(files);
    }
  }