@Override protected Index doInBackground() throws Exception { int binSize = IgvTools.LINEAR_BIN_SIZE; FeatureCodec codec = CodecFactory.getCodec( file.getAbsolutePath(), GenomeManager.getInstance().getCurrentGenome()); if (codec != null) { try { Index index = IndexFactory.createLinearIndex(file, codec, binSize); if (index != null) { IgvTools.writeTribbleIndex(index, idxFile.getAbsolutePath()); } return index; } catch (TribbleException.MalformedFeatureFile e) { StringBuffer buf = new StringBuffer(); buf.append("<html>Files must be sorted by start position prior to indexing.<br>"); buf.append(e.getMessage()); buf.append( "<br><br>Note: igvtools can be used to sort the file, select \"File > Run igvtools...\"."); MessageUtils.showMessage(buf.toString()); } } else { throw new DataLoadException("Unknown File Type", file.getAbsolutePath()); } return null; }
/** * Return an parser instance appropriate the the file type. Currently the filename is used to * determine file type, this is fragile obviously but what is the alternative? */ public static FeatureParser getInstanceFor(ResourceLocator locator, Genome genome) { FeatureCodec codec = CodecFactory.getCodec(locator, genome); if (codec != null && codec instanceof AsciiFeatureCodec) { return new FeatureCodecParser((AsciiFeatureCodec) codec, genome); } else { return null; } }
public static boolean isIndexed(ResourceLocator locator, Genome genome) { // Checking for the index is expensive over HTTP. First see if this is an indexable format by // fetching the codec String fullPath = locator.getPath(); String pathNoQuery = locator.getURLPath(); if (!CodecFactory.hasCodec(locator, genome)) { return false; } String indexExtension = pathNoQuery.endsWith("gz") ? ".tbi" : ".idx"; String indexPath = fullPath + indexExtension; if (HttpUtils.isRemoteURL(fullPath)) { // Handle query string, if it exists String[] toks = fullPath.split("\\?", 2); if (toks.length == 2) { indexPath = String.format("%s%s?%s", toks[0], indexExtension, toks[1]); } } return FileUtils.resourceExists(indexPath); }
private static FeatureCodec getCodec(String path, Genome genome) { String tmp = getStrippedFilename(path); return CodecFactory.getCodec(tmp, genome); }
/** * Switches on various attributes of locator (mainly locator path extension and whether the * locator is indexed) to call the appropriate loading method. * * @param locator * @param genome * @return */ public List<Track> load(ResourceLocator locator, Genome genome) throws DataLoadException { final String path = locator.getPath().trim(); log.info("Loading resource, path " + path); try { String typeString = locator.getTypeString(); if (typeString.endsWith(".tbi")) { MessageUtils.showMessage( "<html><b>Error:</b>File type '.tbi' is not recognized. If this is a 'tabix' index <br>" + " load the associated gzipped file, which should have an extension of '.gz'"); } // This list will hold all new tracks created for this locator List<Track> newTracks = new ArrayList<Track>(); String dbUrl = locator.getDBUrl(); LoadHandler handler = getTrackLoaderHandler(typeString); if (dbUrl != null) { this.loadFromDatabase(locator, newTracks, genome); } else if (typeString.endsWith(".dbxml")) { loadFromDBProfile(locator, newTracks); } else if (typeString.endsWith(".gmt")) { loadGMT(locator); } else if (typeString.equals("das")) { loadDASResource(locator, newTracks); } else if (typeString.endsWith(".vcf.list")) { loadVCFListFile(locator, newTracks, genome); } else if (typeString.endsWith(".trio")) { loadTrioData(locator); } else if (typeString.endsWith("varlist")) { VariantListManager.loadVariants(locator); } else if (typeString.endsWith("samplepathmap")) { VariantListManager.loadSamplePathMap(locator); } else if (typeString.endsWith(".rnai.gct")) { loadRnaiGctFile(locator, newTracks, genome); } else if (typeString.endsWith(".gct") || typeString.endsWith("res") || typeString.endsWith("tab")) { loadGctFile(locator, newTracks, genome); } else if (typeString.endsWith(".gbk") || typeString.endsWith(".gb")) { loadGbkFile(locator, newTracks, genome); } else if (typeString.endsWith(".cn") || typeString.endsWith(".xcn") || typeString.endsWith(".snp") || typeString.endsWith(".igv") || typeString.endsWith(".loh")) { loadIGVFile(locator, newTracks, genome); } else if (typeString.endsWith(".cbs") || typeString.endsWith(".seg") || typeString.endsWith("glad") || typeString.endsWith("birdseye_canary_calls") || typeString.endsWith(".seg.zip")) { loadSegFile(locator, newTracks, genome); } else if (typeString.endsWith(".gistic")) { loadGisticFile(locator, newTracks); } else if (typeString.endsWith(".gs")) { loadRNAiGeneScoreFile(locator, newTracks, RNAIGeneScoreParser.Type.GENE_SCORE, genome); } else if (typeString.endsWith(".riger")) { loadRNAiGeneScoreFile(locator, newTracks, RNAIGeneScoreParser.Type.POOLED, genome); } else if (typeString.endsWith(".hp")) { loadRNAiHPScoreFile(locator); } else if (typeString.contains(".tabblastn") || typeString.endsWith(".orthologs")) { loadSyntentyMapping(locator, newTracks); } else if (typeString.endsWith(".sam") || typeString.endsWith(".bam") || typeString.endsWith(".cram") || typeString.endsWith(".sam.list") || typeString.endsWith(".bam.list") || typeString.endsWith(".aligned") || typeString.endsWith(".sai") || typeString.endsWith(".bai") || typeString.equals("alist") || typeString.equals(Ga4ghAPIHelper.RESOURCE_TYPE)) { loadAlignmentsTrack(locator, newTracks, genome); } else if (typeString.endsWith(".wig") || typeString.endsWith(".bedgraph") || typeString.endsWith(".bdg") || typeString.endsWith("cpg.txt") || typeString.endsWith(".expr")) { loadWigFile(locator, newTracks, genome); } else if (typeString.endsWith("fpkm_tracking") || typeString.endsWith("gene_exp.diff") || typeString.endsWith("cds_exp.diff")) { loadCufflinksFile(locator, newTracks, genome); } else if (typeString.contains(".dranger")) { loadDRangerFile(locator, newTracks, genome); } else if (typeString.endsWith(".ewig.tdf") || (typeString.endsWith(".ewig.ibf"))) { loadEwigIBFFile(locator, newTracks, genome); } else if (typeString.endsWith(".bw") || typeString.endsWith(".bb") || typeString.endsWith(".bigwig") || typeString.endsWith(".bigbed")) { loadBWFile(locator, newTracks, genome); } else if (typeString.endsWith(".ibf") || typeString.endsWith(".tdf")) { loadTDFFile(locator, newTracks, genome); } else if (typeString.endsWith(".counts")) { loadGobyCountsArchive(locator, newTracks, genome); } else if (WiggleParser.isWiggle(locator)) { loadWigFile(locator, newTracks, genome); } else if (typeString.endsWith(".maf")) { loadMultipleAlignmentTrack(locator, newTracks, genome); } else if (typeString.endsWith(".maf.dict")) { loadMultipleAlignmentTrack(locator, newTracks, genome); } else if (typeString.contains(".peak.bin")) { loadPeakTrack(locator, newTracks, genome); } else if (typeString.endsWith("mage-tab") || ExpressionFileParser.parsableMAGE_TAB(locator)) { locator.setDescription("MAGE_TAB"); loadGctFile(locator, newTracks, genome); } else if (typeString.endsWith(".bp")) { loadBasePairFile(locator, newTracks, genome); } else if (GWASParser.isGWASFile(typeString)) { loadGWASFile(locator, newTracks, genome); } else if (GobyAlignmentQueryReader.supportsFileType(path)) { loadAlignmentsTrack(locator, newTracks, genome); } else if (typeString.endsWith(".list")) { // This should be deprecated loadListFile(locator, newTracks, genome); } else if (typeString.endsWith(".smap")) { loadSMAPFile(locator, newTracks, genome); } else if (CodecFactory.hasCodec(locator, genome) && !forceNotTribble(typeString)) { loadTribbleFile(locator, newTracks, genome); } else if (handler != null) { // Custom loader specified log.info(String.format("Loading %s with %s", path, handler)); handler.load(path, newTracks); } else if (AttributeManager.isSampleInfoFile(locator)) { // This might be a sample information file. AttributeManager.getInstance().loadSampleInfo(locator); } else { MessageUtils.showMessage("<html>Unknown file type: " + path + "<br>Check file extension"); } // Track line TrackProperties tp = null; String trackLine = locator.getTrackLine(); if (trackLine != null) { tp = new TrackProperties(); ParsingUtils.parseTrackLine(trackLine, tp); } for (Track track : newTracks) { if (locator.getFeatureInfoURL() != null) { track.setUrl(locator.getFeatureInfoURL()); } if (tp != null) { track.setProperties(tp); } if (locator.getColor() != null) { track.setColor(locator.getColor()); } if (locator.getSampleId() != null) { track.setSampleId(locator.getSampleId()); } } return newTracks; } catch (Exception e) { if (!NOLogExceptions.contains(e.getClass())) { log.error(e.getMessage(), e); } throw new DataLoadException(e.getMessage()); } }
/** * Create an index for an alignment or feature file The output index will have the same base name * is the input file, although it may be in a different directory. An appropriate index extension * (.sai, .idx, etc.) will be appended. * * @param ifile * @param typeString * @param outputDir * @param indexType * @param binSize * @throws IOException */ public String doIndex( String ifile, String typeString, String outputDir, int indexType, int binSize) throws IOException { File inputFile = new File(ifile); if (outputDir == null) { outputDir = inputFile.getParent(); } String outputFileName = (new File(outputDir, inputFile.getName())).getAbsolutePath(); if (typeString.endsWith("gz")) { System.out.println("Cannot index a gzipped file"); throw new PreprocessingException("Cannot index a gzipped file"); } if (typeString.endsWith("bam")) { String msg = "Cannot index a BAM file. Use the samtools package for sorting and indexing BAM files."; System.out.println(msg); throw new PreprocessingException(msg); } // We have different naming conventions for different index files if (typeString.endsWith("sam") && !outputFileName.endsWith(".sai")) { outputFileName += ".sai"; } else if (typeString.endsWith("bam") && !outputFileName.endsWith(".bai")) { outputFileName += ".bai"; } else if (typeString.endsWith("fa") && !outputFileName.endsWith(".fai")) { outputFileName += ".fai"; } else if (typeString.endsWith("fasta") && !outputFileName.endsWith(".fai")) { outputFileName += ".fai"; } else if (!typeString.endsWith("sam") && !typeString.endsWith("bam") && !outputFileName.endsWith(".idx")) { outputFileName += ".idx"; } File outputFile = new File(outputFileName); // Sam/FASTA files are special try { if (typeString.endsWith("sam")) { AlignmentIndexer indexer = AlignmentIndexer.getInstance(inputFile, null, null); indexer.createSamIndex(outputFile); return outputFileName; } else if (typeString.equals(".fa") || typeString.equals(".fasta")) { FastaUtils.createIndexFile(inputFile.getAbsolutePath(), outputFileName); return outputFileName; } } catch (Exception e) { e.printStackTrace(); // Delete output file as it is probably corrupt if (outputFile.exists()) { outputFile.delete(); } } Genome genome = null; // <= don't do chromosome conversion FeatureCodec codec = CodecFactory.getCodec(ifile, genome); if (codec != null) { try { createTribbleIndex(ifile, outputFile, indexType, binSize, codec); } catch (TribbleException.MalformedFeatureFile e) { StringBuffer buf = new StringBuffer(); buf.append("<html>Files must be sorted by start position prior to indexing.<br>"); buf.append(e.getMessage()); buf.append( "<br><br>Note: igvtools can be used to sort the file, select \"File > Run igvtools...\"."); MessageUtils.showMessage(buf.toString()); } } else { throw new DataLoadException("Unknown File Type", ifile); } System.out.flush(); return outputFileName; }
public static boolean canParse(ResourceLocator locator) { return CodecFactory.getCodec(locator, null) != null; }