예제 #1
0
  public void toTDF(
      String typeString,
      String ifile,
      String ofile,
      String probeFile,
      String genomeId,
      int maxZoomValue,
      Collection<WindowFunction> windowFunctions,
      String tmpDirName,
      int maxRecords)
      throws IOException, PreprocessingException {

    if (!ifile.endsWith(".affective.csv")) validateIsTilable(typeString);

    System.out.println("toTDF.  File = " + ifile);
    System.out.println("Max zoom = " + maxZoomValue);
    if (probeFile != null && probeFile.trim().length() > 0) {
      System.out.println("Probe file = " + probeFile);
    }
    System.out.print("Window functions: ");
    for (WindowFunction wf : windowFunctions) {
      System.out.print(wf.toString() + " ");
    }
    System.out.println();

    boolean isGCT = isGCT(typeString);
    Genome genome = loadGenome(genomeId, isGCT);
    if (genome == null) {
      throw new PreprocessingException("Genome could not be loaded: " + genomeId);
    }
    File inputFileOrDir = new File(ifile);

    // Estimae the total number of lines to be parsed, for progress updates
    int nLines = estimateLineCount(inputFileOrDir);

    // TODO -- move this block of code out of here, this should be done before calling this method
    // Convert  gct files to igv format first
    File deleteme = null;
    if (isGCT(typeString)) {
      File tmpDir = null;
      if (tmpDirName != null && tmpDirName.length() > 0) {
        tmpDir = new File(tmpDirName);
        if (!tmpDir.exists() || !tmpDir.isDirectory()) {
          throw new PreprocessingException(
              "Specified tmp directory does not exist or is not directory: " + tmpDirName);
        }
      } else {
        tmpDir = new File(System.getProperty("java.io.tmpdir"), System.getProperty("user.name"));
      }
      if (!tmpDir.exists()) {
        tmpDir.mkdir();
      }

      String baseName = (new File(ifile)).getName();
      File igvFile = new File(tmpDir, baseName + ".igv");
      igvFile.deleteOnExit();
      doGCTtoIGV(typeString, ifile, igvFile, probeFile, maxRecords, tmpDirName, genome);

      inputFileOrDir = igvFile;
      deleteme = igvFile;
      typeString = ".igv";
    }

    // Convert to tdf
    File outputFile = new File(ofile);
    try {
      Preprocessor p = new Preprocessor(outputFile, genome, windowFunctions, nLines, null);
      if (inputFileOrDir.isDirectory() || inputFileOrDir.getName().endsWith(".list")) {
        List<File> files = getFilesFromDirOrList(inputFileOrDir);
        for (File f : files) {
          p.preprocess(f, maxZoomValue, typeString);
        }
      } else {
        p.preprocess(inputFileOrDir, maxZoomValue, typeString);
      }
      p.finish();
    } catch (IOException e) {
      e.printStackTrace();
      // Delete output file as its probably corrupt
      if (outputFile.exists()) {
        outputFile.delete();
      }
    } finally {
      if (deleteme != null && deleteme.exists()) {
        deleteme.delete();
      }
    }

    System.out.flush();
  }
예제 #2
0
  public static void main(String[] argv)
      throws IOException, CmdLineParser.UnknownOptionException,
          CmdLineParser.IllegalOptionValueException {

    if (argv.length < 4) {
      System.out.println("Usage: hictools pre <options> <inputFile> <outputFile> <genomeID>");
      System.out.println("  <options>: -d only calculate intra chromosome (diagonal) [false]");
      System.out.println(
          "           : -o calculate densities (observed/expected), write to file [false]");
      System.out.println("           : -t <int> only write cells with count above threshold t [0]");
      System.out.println(
          "           : -c <chromosome ID> only calculate map on specific chromosome");
      System.exit(0);
    }

    Globals.setHeadless(true);

    CommandLineParser parser = new CommandLineParser();
    parser.parse(argv);
    String[] args = parser.getRemainingArgs();

    if (args[0].equals("sort")) {
      AlignmentsSorter.sort(args[1], args[2], null);
    } else if (args[0].equals("pairsToBin")) {
      String ifile = args[1];
      String ofile = args[2];
      String genomeId = args[3];
      List<Chromosome> chromosomes = loadChromosomes(genomeId);
      AsciiToBinConverter.convert(ifile, ofile, chromosomes);
    } else if (args[0].equals("binToPairs")) {
      String ifile = args[1];
      String ofile = args[2];
      AsciiToBinConverter.convertBack(ifile, ofile);
    } else if (args[0].equals("printmatrix")) {
      if (args.length < 5) {
        System.err.println(
            "Usage: hictools printmatrix <observed/oe/pearson> hicFile chr1 chr2 binsize");
        System.exit(-1);
      }
      String type = args[1];
      String file = args[2];
      String chr1 = args[3];
      String chr2 = args[4];
      String binSizeSt = args[5];
      int binSize = 0;
      try {
        binSize = Integer.parseInt(binSizeSt);
      } catch (NumberFormatException e) {
        System.err.println("Integer expected.  Found: " + binSizeSt);
        System.exit(-1);
      }

      dumpMatrix(file, chr1, chr2, binSize, type);

    } else if (args[0].equals("eigenvector")) {
      if (args.length < 4) {
        System.err.println("Usage: hictools eigenvector hicFile chr binsize");
      }
      String file = args[1];
      String chr = args[2];
      String binSizeSt = args[3];
      int binSize = 0;
      try {
        binSize = Integer.parseInt(binSizeSt);
      } catch (NumberFormatException e) {
        System.err.println("Integer expected.  Found: " + binSizeSt);
        System.exit(-1);
      }
      calculateEigenvector(file, chr, binSize);
    } else if (args[0].equals("pre")) {
      String genomeId = "";
      try {
        genomeId = args[3];
      } catch (ArrayIndexOutOfBoundsException e) {
        System.err.println("No genome ID given");
        System.exit(0);
      }
      List<Chromosome> chromosomes = loadChromosomes(genomeId);

      long genomeLength = 0;
      for (Chromosome c : chromosomes) {
        if (c != null) genomeLength += c.getSize();
      }
      chromosomes.set(0, new Chromosome(0, "All", (int) (genomeLength / 1000)));

      String[] tokens = args[1].split(",");
      List<String> files = new ArrayList<String>(tokens.length);

      for (String f : tokens) {
        files.add(f);
      }

      Preprocessor preprocessor = new Preprocessor(new File(args[2]), chromosomes);

      preprocessor.setIncludedChromosomes(parser.getChromosomeOption());
      preprocessor.setCountThreshold(parser.getCountThresholdOption());
      preprocessor.setNumberOfThreads(parser.getThreadedOption());
      preprocessor.setDiagonalsOnly(parser.getDiagonalsOption());
      preprocessor.setLoadDensities(parser.getDensitiesOption());
      preprocessor.preprocess(files);
    }
  }