예제 #1
0
 @Override
 protected Index doInBackground() throws Exception {
   int binSize = IgvTools.LINEAR_BIN_SIZE;
   FeatureCodec codec =
       CodecFactory.getCodec(
           file.getAbsolutePath(), GenomeManager.getInstance().getCurrentGenome());
   if (codec != null) {
     try {
       Index index = IndexFactory.createLinearIndex(file, codec, binSize);
       if (index != null) {
         IgvTools.writeTribbleIndex(index, idxFile.getAbsolutePath());
       }
       return index;
     } catch (TribbleException.MalformedFeatureFile e) {
       StringBuffer buf = new StringBuffer();
       buf.append("<html>Files must be sorted by start position prior to indexing.<br>");
       buf.append(e.getMessage());
       buf.append(
           "<br><br>Note: igvtools can be used to sort the file, select \"File > Run igvtools...\".");
       MessageUtils.showMessage(buf.toString());
     }
   } else {
     throw new DataLoadException("Unknown File Type", file.getAbsolutePath());
   }
   return null;
 }
예제 #2
0
 /**
  * Return an parser instance appropriate the the file type. Currently the filename is used to
  * determine file type, this is fragile obviously but what is the alternative?
  */
 public static FeatureParser getInstanceFor(ResourceLocator locator, Genome genome) {
   FeatureCodec codec = CodecFactory.getCodec(locator, genome);
   if (codec != null && codec instanceof AsciiFeatureCodec) {
     return new FeatureCodecParser((AsciiFeatureCodec) codec, genome);
   } else {
     return null;
   }
 }
예제 #3
0
  public static boolean isIndexed(ResourceLocator locator, Genome genome) {

    // Checking for the index is expensive over HTTP.  First see if this is an indexable format by
    // fetching the codec
    String fullPath = locator.getPath();
    String pathNoQuery = locator.getURLPath();
    if (!CodecFactory.hasCodec(locator, genome)) {
      return false;
    }

    String indexExtension = pathNoQuery.endsWith("gz") ? ".tbi" : ".idx";

    String indexPath = fullPath + indexExtension;
    if (HttpUtils.isRemoteURL(fullPath)) {
      // Handle query string, if it exists
      String[] toks = fullPath.split("\\?", 2);
      if (toks.length == 2) {
        indexPath = String.format("%s%s?%s", toks[0], indexExtension, toks[1]);
      }
    }
    return FileUtils.resourceExists(indexPath);
  }
예제 #4
0
 private static FeatureCodec getCodec(String path, Genome genome) {
   String tmp = getStrippedFilename(path);
   return CodecFactory.getCodec(tmp, genome);
 }
예제 #5
0
  /**
   * Switches on various attributes of locator (mainly locator path extension and whether the
   * locator is indexed) to call the appropriate loading method.
   *
   * @param locator
   * @param genome
   * @return
   */
  public List<Track> load(ResourceLocator locator, Genome genome) throws DataLoadException {

    final String path = locator.getPath().trim();
    log.info("Loading resource, path " + path);
    try {
      String typeString = locator.getTypeString();

      if (typeString.endsWith(".tbi")) {
        MessageUtils.showMessage(
            "<html><b>Error:</b>File type '.tbi' is not recognized.  If this is a 'tabix' index <br>"
                + " load the associated gzipped file, which should have an extension of '.gz'");
      }

      // This list will hold all new tracks created for this locator
      List<Track> newTracks = new ArrayList<Track>();

      String dbUrl = locator.getDBUrl();
      LoadHandler handler = getTrackLoaderHandler(typeString);
      if (dbUrl != null) {
        this.loadFromDatabase(locator, newTracks, genome);
      } else if (typeString.endsWith(".dbxml")) {
        loadFromDBProfile(locator, newTracks);
      } else if (typeString.endsWith(".gmt")) {
        loadGMT(locator);
      } else if (typeString.equals("das")) {
        loadDASResource(locator, newTracks);
      } else if (typeString.endsWith(".vcf.list")) {
        loadVCFListFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".trio")) {
        loadTrioData(locator);
      } else if (typeString.endsWith("varlist")) {
        VariantListManager.loadVariants(locator);
      } else if (typeString.endsWith("samplepathmap")) {
        VariantListManager.loadSamplePathMap(locator);
      } else if (typeString.endsWith(".rnai.gct")) {
        loadRnaiGctFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".gct")
          || typeString.endsWith("res")
          || typeString.endsWith("tab")) {
        loadGctFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".gbk") || typeString.endsWith(".gb")) {
        loadGbkFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".cn")
          || typeString.endsWith(".xcn")
          || typeString.endsWith(".snp")
          || typeString.endsWith(".igv")
          || typeString.endsWith(".loh")) {
        loadIGVFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".cbs")
          || typeString.endsWith(".seg")
          || typeString.endsWith("glad")
          || typeString.endsWith("birdseye_canary_calls")
          || typeString.endsWith(".seg.zip")) {
        loadSegFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".gistic")) {
        loadGisticFile(locator, newTracks);
      } else if (typeString.endsWith(".gs")) {
        loadRNAiGeneScoreFile(locator, newTracks, RNAIGeneScoreParser.Type.GENE_SCORE, genome);
      } else if (typeString.endsWith(".riger")) {
        loadRNAiGeneScoreFile(locator, newTracks, RNAIGeneScoreParser.Type.POOLED, genome);
      } else if (typeString.endsWith(".hp")) {
        loadRNAiHPScoreFile(locator);
      } else if (typeString.contains(".tabblastn") || typeString.endsWith(".orthologs")) {
        loadSyntentyMapping(locator, newTracks);
      } else if (typeString.endsWith(".sam")
          || typeString.endsWith(".bam")
          || typeString.endsWith(".cram")
          || typeString.endsWith(".sam.list")
          || typeString.endsWith(".bam.list")
          || typeString.endsWith(".aligned")
          || typeString.endsWith(".sai")
          || typeString.endsWith(".bai")
          || typeString.equals("alist")
          || typeString.equals(Ga4ghAPIHelper.RESOURCE_TYPE)) {
        loadAlignmentsTrack(locator, newTracks, genome);
      } else if (typeString.endsWith(".wig")
          || typeString.endsWith(".bedgraph")
          || typeString.endsWith(".bdg")
          || typeString.endsWith("cpg.txt")
          || typeString.endsWith(".expr")) {
        loadWigFile(locator, newTracks, genome);
      } else if (typeString.endsWith("fpkm_tracking")
          || typeString.endsWith("gene_exp.diff")
          || typeString.endsWith("cds_exp.diff")) {
        loadCufflinksFile(locator, newTracks, genome);
      } else if (typeString.contains(".dranger")) {
        loadDRangerFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".ewig.tdf") || (typeString.endsWith(".ewig.ibf"))) {
        loadEwigIBFFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".bw")
          || typeString.endsWith(".bb")
          || typeString.endsWith(".bigwig")
          || typeString.endsWith(".bigbed")) {
        loadBWFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".ibf") || typeString.endsWith(".tdf")) {
        loadTDFFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".counts")) {
        loadGobyCountsArchive(locator, newTracks, genome);
      } else if (WiggleParser.isWiggle(locator)) {
        loadWigFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".maf")) {
        loadMultipleAlignmentTrack(locator, newTracks, genome);
      } else if (typeString.endsWith(".maf.dict")) {
        loadMultipleAlignmentTrack(locator, newTracks, genome);
      } else if (typeString.contains(".peak.bin")) {
        loadPeakTrack(locator, newTracks, genome);
      } else if (typeString.endsWith("mage-tab")
          || ExpressionFileParser.parsableMAGE_TAB(locator)) {
        locator.setDescription("MAGE_TAB");
        loadGctFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".bp")) {
        loadBasePairFile(locator, newTracks, genome);
      } else if (GWASParser.isGWASFile(typeString)) {
        loadGWASFile(locator, newTracks, genome);
      } else if (GobyAlignmentQueryReader.supportsFileType(path)) {
        loadAlignmentsTrack(locator, newTracks, genome);
      } else if (typeString.endsWith(".list")) {
        // This should be deprecated
        loadListFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".smap")) {
        loadSMAPFile(locator, newTracks, genome);
      } else if (CodecFactory.hasCodec(locator, genome) && !forceNotTribble(typeString)) {
        loadTribbleFile(locator, newTracks, genome);
      } else if (handler != null) {
        // Custom loader specified
        log.info(String.format("Loading %s with %s", path, handler));
        handler.load(path, newTracks);
      } else if (AttributeManager.isSampleInfoFile(locator)) {
        // This might be a sample information file.
        AttributeManager.getInstance().loadSampleInfo(locator);
      } else {
        MessageUtils.showMessage("<html>Unknown file type: " + path + "<br>Check file extension");
      }

      // Track line
      TrackProperties tp = null;
      String trackLine = locator.getTrackLine();
      if (trackLine != null) {
        tp = new TrackProperties();
        ParsingUtils.parseTrackLine(trackLine, tp);
      }

      for (Track track : newTracks) {

        if (locator.getFeatureInfoURL() != null) {
          track.setUrl(locator.getFeatureInfoURL());
        }
        if (tp != null) {
          track.setProperties(tp);
        }
        if (locator.getColor() != null) {
          track.setColor(locator.getColor());
        }
        if (locator.getSampleId() != null) {
          track.setSampleId(locator.getSampleId());
        }
      }

      return newTracks;
    } catch (Exception e) {
      if (!NOLogExceptions.contains(e.getClass())) {
        log.error(e.getMessage(), e);
      }
      throw new DataLoadException(e.getMessage());
    }
  }
예제 #6
0
  /**
   * Create an index for an alignment or feature file The output index will have the same base name
   * is the input file, although it may be in a different directory. An appropriate index extension
   * (.sai, .idx, etc.) will be appended.
   *
   * @param ifile
   * @param typeString
   * @param outputDir
   * @param indexType
   * @param binSize
   * @throws IOException
   */
  public String doIndex(
      String ifile, String typeString, String outputDir, int indexType, int binSize)
      throws IOException {
    File inputFile = new File(ifile);

    if (outputDir == null) {
      outputDir = inputFile.getParent();
    }
    String outputFileName = (new File(outputDir, inputFile.getName())).getAbsolutePath();

    if (typeString.endsWith("gz")) {
      System.out.println("Cannot index a gzipped file");
      throw new PreprocessingException("Cannot index a gzipped file");
    }

    if (typeString.endsWith("bam")) {
      String msg =
          "Cannot index a BAM file. Use the samtools package for sorting and indexing BAM files.";
      System.out.println(msg);
      throw new PreprocessingException(msg);
    }

    // We have different naming conventions for different index files
    if (typeString.endsWith("sam") && !outputFileName.endsWith(".sai")) {
      outputFileName += ".sai";
    } else if (typeString.endsWith("bam") && !outputFileName.endsWith(".bai")) {
      outputFileName += ".bai";
    } else if (typeString.endsWith("fa") && !outputFileName.endsWith(".fai")) {
      outputFileName += ".fai";
    } else if (typeString.endsWith("fasta") && !outputFileName.endsWith(".fai")) {
      outputFileName += ".fai";
    } else if (!typeString.endsWith("sam")
        && !typeString.endsWith("bam")
        && !outputFileName.endsWith(".idx")) {
      outputFileName += ".idx";
    }

    File outputFile = new File(outputFileName);

    // Sam/FASTA files are special
    try {
      if (typeString.endsWith("sam")) {
        AlignmentIndexer indexer = AlignmentIndexer.getInstance(inputFile, null, null);
        indexer.createSamIndex(outputFile);
        return outputFileName;
      } else if (typeString.equals(".fa") || typeString.equals(".fasta")) {
        FastaUtils.createIndexFile(inputFile.getAbsolutePath(), outputFileName);
        return outputFileName;
      }
    } catch (Exception e) {
      e.printStackTrace();
      // Delete output file as it is probably corrupt
      if (outputFile.exists()) {
        outputFile.delete();
      }
    }

    Genome genome = null; // <= don't do chromosome conversion
    FeatureCodec codec = CodecFactory.getCodec(ifile, genome);
    if (codec != null) {
      try {
        createTribbleIndex(ifile, outputFile, indexType, binSize, codec);
      } catch (TribbleException.MalformedFeatureFile e) {
        StringBuffer buf = new StringBuffer();
        buf.append("<html>Files must be sorted by start position prior to indexing.<br>");
        buf.append(e.getMessage());
        buf.append(
            "<br><br>Note: igvtools can be used to sort the file, select \"File > Run igvtools...\".");
        MessageUtils.showMessage(buf.toString());
      }
    } else {
      throw new DataLoadException("Unknown File Type", ifile);
    }
    System.out.flush();
    return outputFileName;
  }
예제 #7
0
 public static boolean canParse(ResourceLocator locator) {
   return CodecFactory.getCodec(locator, null) != null;
 }