/** * Load chromosomes from given ID or file name. * * @param idOrFile Genome ID or file name where chromosome lengths written * @return Chromosome lengths * @throws IOException if chromosome length file not found */ public static List<Chromosome> loadChromosomes(String idOrFile) throws IOException { InputStream is = null; try { // Note: to get this to work, had to edit Intellij settings // so that "?*.sizes" are considered sources to be copied to class path is = HiCTools.class.getResourceAsStream(idOrFile + ".chrom.sizes"); if (is == null) { // Not an ID, see if its a file File file = new File(idOrFile); if (file.exists()) { is = new FileInputStream(file); } else { throw new FileNotFoundException("Could not find chromosome sizes file for: " + idOrFile); } } List<Chromosome> chromosomes = new ArrayList(); chromosomes.add(0, null); // Index 0 reserved for "whole genome" psuedo-chromosome Pattern pattern = Pattern.compile("\t"); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); String nextLine; long genomeLength = 0; int idx = 1; while ((nextLine = reader.readLine()) != null) { String[] tokens = pattern.split(nextLine); if (tokens.length == 2) { String name = tokens[0]; int length = Integer.parseInt(tokens[1]); genomeLength += length; chromosomes.add(idx, new Chromosome(idx, name, length)); idx++; } else { System.out.println("Skipping " + nextLine); } } // Add the "psuedo-chromosome" All, representing the whole genome. Units are in kilo-bases chromosomes.set(0, new Chromosome(0, "All", (int) (genomeLength / 1000))); return chromosomes; } finally { if (is != null) is.close(); } }
public static void main(String[] argv) throws IOException, CmdLineParser.UnknownOptionException, CmdLineParser.IllegalOptionValueException { if (argv.length < 4) { System.out.println("Usage: hictools pre <options> <inputFile> <outputFile> <genomeID>"); System.out.println(" <options>: -d only calculate intra chromosome (diagonal) [false]"); System.out.println( " : -o calculate densities (observed/expected), write to file [false]"); System.out.println(" : -t <int> only write cells with count above threshold t [0]"); System.out.println( " : -c <chromosome ID> only calculate map on specific chromosome"); System.exit(0); } Globals.setHeadless(true); CommandLineParser parser = new CommandLineParser(); parser.parse(argv); String[] args = parser.getRemainingArgs(); if (args[0].equals("sort")) { AlignmentsSorter.sort(args[1], args[2], null); } else if (args[0].equals("pairsToBin")) { String ifile = args[1]; String ofile = args[2]; String genomeId = args[3]; List<Chromosome> chromosomes = loadChromosomes(genomeId); AsciiToBinConverter.convert(ifile, ofile, chromosomes); } else if (args[0].equals("binToPairs")) { String ifile = args[1]; String ofile = args[2]; AsciiToBinConverter.convertBack(ifile, ofile); } else if (args[0].equals("printmatrix")) { if (args.length < 5) { System.err.println( "Usage: hictools printmatrix <observed/oe/pearson> hicFile chr1 chr2 binsize"); System.exit(-1); } String type = args[1]; String file = args[2]; String chr1 = args[3]; String chr2 = args[4]; String binSizeSt = args[5]; int binSize = 0; try { binSize = Integer.parseInt(binSizeSt); } catch (NumberFormatException e) { System.err.println("Integer expected. Found: " + binSizeSt); System.exit(-1); } dumpMatrix(file, chr1, chr2, binSize, type); } else if (args[0].equals("eigenvector")) { if (args.length < 4) { System.err.println("Usage: hictools eigenvector hicFile chr binsize"); } String file = args[1]; String chr = args[2]; String binSizeSt = args[3]; int binSize = 0; try { binSize = Integer.parseInt(binSizeSt); } catch (NumberFormatException e) { System.err.println("Integer expected. Found: " + binSizeSt); System.exit(-1); } calculateEigenvector(file, chr, binSize); } else if (args[0].equals("pre")) { String genomeId = ""; try { genomeId = args[3]; } catch (ArrayIndexOutOfBoundsException e) { System.err.println("No genome ID given"); System.exit(0); } List<Chromosome> chromosomes = loadChromosomes(genomeId); long genomeLength = 0; for (Chromosome c : chromosomes) { if (c != null) genomeLength += c.getSize(); } chromosomes.set(0, new Chromosome(0, "All", (int) (genomeLength / 1000))); String[] tokens = args[1].split(","); List<String> files = new ArrayList<String>(tokens.length); for (String f : tokens) { files.add(f); } Preprocessor preprocessor = new Preprocessor(new File(args[2]), chromosomes); preprocessor.setIncludedChromosomes(parser.getChromosomeOption()); preprocessor.setCountThreshold(parser.getCountThresholdOption()); preprocessor.setNumberOfThreads(parser.getThreadedOption()); preprocessor.setDiagonalsOnly(parser.getDiagonalsOption()); preprocessor.setLoadDensities(parser.getDensitiesOption()); preprocessor.preprocess(files); } }