/**
   * Sample and calculate the probability of hitting each type of marker (marker.class). Creates
   * 'numReads' reads of size 'readLen' and count how many of them hit each marker type.
   */
  CountByType randomSampling(int readLen, int numReads) {
    CountByType countReads = new CountByType();
    RandMarker randMarker = new RandMarker(snpEffectPredictor.getGenome());

    for (int i = 0; i < numReads; i++) {
      // Random read
      Marker read = randMarker.rand(readLen);

      // Where does it hit?
      Markers regions = snpEffectPredictor.queryDeep(read);
      HashSet<String> doneRegion = new HashSet<String>();
      for (Marker m : regions) {
        String mtype = markerTypes.getType(m);
        String msubtype = markerTypes.getSubType(m);

        if (!doneRegion.contains(mtype)) {
          countReads.inc(mtype); // Count reads
          doneRegion.add(mtype); // Do not count twice
        }

        if ((msubtype != null) && !doneRegion.contains(msubtype)) {
          countReads.inc(msubtype); // Count reads
          doneRegion.add(msubtype); // Do not count twice
        }
      }
    }

    return countReads;
  }
  /** Build a genome from a GFF3 file and compare results to 'expected' results */
  public SnpEffectPredictor buildAndCompare(
      String genome,
      String gff3File,
      String resultFile,
      boolean readSeqs,
      boolean createRandSequences) {
    String expectedResult = (resultFile == null ? "" : Gpr.readFile(resultFile).trim());

    // Build
    Config config = new Config(genome, Config.DEFAULT_CONFIG_FILE);
    SnpEffPredictorFactoryGff3 fgff3 = new SnpEffPredictorFactoryGff3(config);
    fgff3.setVerbose(verbose);
    fgff3.setFileName(gff3File);
    fgff3.setReadSequences(readSeqs);
    fgff3.setCreateRandSequences(createRandSequences);
    fgff3.setRandom(
        new Random(
            20140410)); // Note: we want consistent results in our test cases, so we always
                        // initialize the random generator in the same way

    SnpEffectPredictor sep = fgff3.create();

    // Compare result
    String result = show(sep.getGenome()).trim();
    if (verbose || !Gpr.noSpaces(expectedResult).equals(Gpr.noSpaces(result)))
      System.out.println("Result:\n----------\n" + result + "\n----------\n");
    Assert.assertEquals(Gpr.noSpaces(expectedResult), Gpr.noSpaces(result));

    return sep;
  }
  /** Count bases covered for each marker type */
  public void countBases() {
    // ---
    // Add all markers
    // ---
    Markers markers = new Markers();
    markers.add(snpEffectPredictor.getMarkers());
    for (Gene gene : snpEffectPredictor.getGenome().getGenes()) {
      markers.add(gene);
      markers.add(gene.markers());
    }

    for (Chromosome chr : snpEffectPredictor.getGenome()) markers.add(chr);

    // ---
    // Calculate raw counts
    // ---
    for (Marker m : markers) {
      String mtype = markerTypes.getType(m);
      String msubtype = markerTypes.getSubType(m);

      rawCountMarkers.inc(mtype);
      rawCountBases.inc(mtype, m.size());

      // Count sub-types (if any)
      if (msubtype != null) {
        rawCountMarkers.inc(msubtype);
        rawCountBases.inc(msubtype, m.size());
      }
    }

    // ---
    // Count number of bases for each marker type (overlap and join)
    // ---
    for (String mtype : rawCountMarkers.keysSorted()) {
      if (mtype.equals(Chromosome.class.getSimpleName()))
        continue; // We calculate chromosomes later (it's faster)

      if (verbose) System.err.print(mtype + ":");

      if (countMarkers.get(mtype) == 0) {
        for (Chromosome chr : snpEffectPredictor.getGenome()) countBases(mtype, chr, markers);
      }

      if (verbose) System.err.println("");
    }

    // Show chromosomes length
    String mtype = Chromosome.class.getSimpleName();
    for (Chromosome chr : snpEffectPredictor.getGenome()) {
      countBases.inc(mtype, chr.size());
      countMarkers.inc(mtype);
    }
  }
Esempio n. 4
0
  public void testCase_05_PaeruPA14muccA() {
    String genome = "paeru.PA14";
    String gff3File = "tests/paeru.PA14.muccA.gff";
    String resultFile = "tests/paeru.PA14.muccA.txt";
    SnpEffectPredictor sep = buildAndCompare(genome, gff3File, resultFile, true, false);

    // Make sure no splice site is added
    Gene gene = sep.getGenome().getGenes().iterator().next();
    Transcript tr = gene.iterator().next();
    List<SpliceSite> spliceSites = tr.createSpliceSites(SpliceSite.CORE_SPLICE_SITE_SIZE, 0, 0, 0);
    Assert.assertEquals(0, spliceSites.size());
  }
  /**
   * Run main analysis
   *
   * @param xmlFileName
   */
  @Override
  public boolean run() {
    // Initialzie
    readConfig(); // Read config file

    if (verbose)
      Timer.showStdErr(
          "Reading database for genome version '"
              + genomeVer
              + "' from file '"
              + config.getFileSnpEffectPredictor()
              + "' (this might take a while)");
    SnpEffectPredictor snpEffectPredictor = config.loadSnpEffectPredictor();
    genome = config.getGenome();
    if (verbose) Timer.showStdErr("done");

    // Build transcript map
    for (Gene gene : snpEffectPredictor.getGenome().getGenes())
      for (Transcript tr : gene) trById.put(tr.getId(), tr);

    // Parse all XML files in directory
    String files[] = (new File(xmlDirName)).list();
    for (String xmlFileName : files) {
      if (xmlFileName.endsWith(".xml.gz") || xmlFileName.endsWith(".xml")) {
        String path = xmlDirName + "/" + xmlFileName;
        parse(path);
      }
    }

    // Show stats
    if (verbose)
      Timer.showStdErr(
          "Proteing sequences:" //
              + "\n\tMatch       : "
              + proteinOk.size() //
              + "\n\tDifferences : "
              + proteinDifferences.size() //
              + "\n\tAA errros   : "
              + aaErrors //
          );

    analyzeSequenceConservation();

    // Save database
    save();

    if (verbose) Timer.showStdErr("Done!");
    return true;
  }
Esempio n. 6
0
  /**
   * Build a genome from a GFF3 file and compare results to 'expected' results
   *
   * @param genome
   * @param gff3File
   * @param resultFile
   */
  public SnpEffectPredictor buildAndCompare(
      String genome,
      String gff3File,
      String resultFile,
      boolean readSeqs,
      boolean createRandSequences) {
    String expectedResult = (resultFile == null ? "" : Gpr.readFile(resultFile).trim());

    // Build
    Config config = new Config(genome, Config.DEFAULT_CONFIG_FILE);
    SnpEffPredictorFactoryGff3 fgff3 = new SnpEffPredictorFactoryGff3(config);
    fgff3.setFileName(gff3File);
    fgff3.setReadSequences(readSeqs);
    fgff3.setCreateRandSequences(createRandSequences);
    SnpEffectPredictor sep = fgff3.create();

    // Compare result
    String result = show(sep.getGenome()).trim();
    System.out.println("Result:\n----------\n" + result + "\n----------\n");
    Assert.assertEquals(Gpr.noSpaces(expectedResult), Gpr.noSpaces(result));

    return sep;
  }