/** * Note: This is an exact copy of the method in ExpressionFileParser. Refactor to merge these two * parsers, or share a common base class. * * @param comment * @param dataset */ private void parseDirective(String comment, IGVDataset dataset) { String tmp = comment.substring(1, comment.length()); if (tmp.startsWith("track")) { ParsingUtils.parseTrackLine(tmp, dataset.getTrackProperties()); } else if (tmp.startsWith("columns")) { parseColumnLine(tmp); } else { String[] tokens = tmp.split("="); if (tokens.length != 2) { return; } String key = tokens[0].trim().toLowerCase(); if (key.equals("name")) { dataset.setName(tokens[1].trim()); } else if (key.equals("type")) { try { dataset.setTrackType(TrackType.valueOf(tokens[1].trim().toUpperCase())); } catch (Exception exception) { // Ignore } } else if (key.equals("coords")) { startBase = Integer.parseInt(tokens[1].trim()); } } }
public void loadTDFFile(ResourceLocator locator, List<Track> newTracks, Genome genome) { log.debug("Loading TDF file " + locator.getPath()); TDFReader reader = TDFReader.getReader(locator); TrackType type = reader.getTrackType(); TrackProperties props = null; String trackLine = reader.getTrackLine(); if (trackLine != null && trackLine.length() > 0) { props = new TrackProperties(); ParsingUtils.parseTrackLine(trackLine, props); } // In case of conflict between the resource locator display name and the track properties name, // use the resource locator String name = locator.getName(); if (name != null && props != null) { props.setName(name); } if (name == null) { name = props == null ? locator.getTrackName() : props.getName(); } int trackNumber = 0; String path = locator.getPath(); boolean multiTrack = reader.getTrackNames().length > 1; for (String heading : reader.getTrackNames()) { String trackId = multiTrack ? path + "_" + heading : path; String trackName = multiTrack ? heading : name; final DataSource dataSource = locator.getPath().endsWith(".counts") ? new GobyCountArchiveDataSource(locator) : new TDFDataSource(reader, trackNumber, heading, genome); DataSourceTrack track = new DataSourceTrack(locator, trackId, trackName, dataSource); String displayName = (name == null || multiTrack) ? heading : name; track.setName(displayName); track.setTrackType(type); if (props != null) { track.setProperties(props); } newTracks.add(track); trackNumber++; } }
private void loadEwigIBFFile(ResourceLocator locator, List<Track> newTracks, Genome genome) { TDFReader reader = TDFReader.getReader(locator.getPath()); TrackProperties props = null; String trackLine = reader.getTrackLine(); if (trackLine != null && trackLine.length() > 0) { props = new TrackProperties(); ParsingUtils.parseTrackLine(trackLine, props); } EWigTrack track = new EWigTrack(locator, genome); if (props != null) { track.setProperties(props); } track.setName(locator.getTrackName()); newTracks.add(track); }
/** * Load all features in this file. * * @param reader * @param maxLines * @return */ public List<org.broad.tribble.Feature> loadFeatures(BufferedReader reader, int maxLines) { List<org.broad.tribble.Feature> features = new ArrayList<org.broad.tribble.Feature>(); String nextLine = null; int nLines = 0; try { while ((nextLine = reader.readLine()) != null) { nextLine = nextLine.trim(); if (nextLine.length() == 0) continue; nLines++; if ((maxLines > 0) && (nLines > maxLines)) { break; } try { if (nextLine.startsWith("#")) { if (nextLine.startsWith("#type")) { String[] tokens = Globals.equalPattern.split(nextLine); if (tokens.length > 1) { try { // TODO: type is not currently used, is there any reason to keep this? TrackType type = TrackType.valueOf(tokens[1]); } catch (Exception e) { log.error("Error converting track type: " + tokens[1]); } } } else if (nextLine.startsWith("#track")) { TrackProperties tp = new TrackProperties(); ParsingUtils.parseTrackLine(nextLine, tp); setTrackProperties(tp); if (tp.isGffTags()) { gffTags = true; } } else if (nextLine.startsWith("#coords")) { try { String[] tokens = Globals.equalPattern.split(nextLine); startBase = Integer.parseInt(tokens[1]); } catch (Exception e) { log.error("Error parsing coords line: " + nextLine, e); } } else if (nextLine.startsWith("#gffTags")) { gffTags = true; } } else { Feature feature = parseLine(nextLine); if (feature != null) { features.add(feature); } } } catch (NumberFormatException e) { // Expected condition -- for example comments. don't log as it slows down // the parsing and is not useful information. } } } catch (java.io.EOFException e) { // This exception is due to a known bug with java zip library. Not // in general a real error, and nothing we can do about it in any // event. return features; } catch (Exception e) { if (nextLine != null && nLines != 0) { throw new ParserException(e.getMessage(), e, nLines, nextLine); } else { throw new RuntimeException(e); } } // TODO -- why is this test here? This will break igvtools processing of expression files // if (IGV.hasInstance() || Globals.isTesting()) { FeatureDB.addFeatures(features); // } return features; }
/** * Switches on various attributes of locator (mainly locator path extension and whether the * locator is indexed) to call the appropriate loading method. * * @param locator * @param genome * @return */ public List<Track> load(ResourceLocator locator, Genome genome) throws DataLoadException { final String path = locator.getPath().trim(); log.info("Loading resource, path " + path); try { String typeString = locator.getTypeString(); if (typeString.endsWith(".tbi")) { MessageUtils.showMessage( "<html><b>Error:</b>File type '.tbi' is not recognized. If this is a 'tabix' index <br>" + " load the associated gzipped file, which should have an extension of '.gz'"); } // This list will hold all new tracks created for this locator List<Track> newTracks = new ArrayList<Track>(); String dbUrl = locator.getDBUrl(); LoadHandler handler = getTrackLoaderHandler(typeString); if (dbUrl != null) { this.loadFromDatabase(locator, newTracks, genome); } else if (typeString.endsWith(".dbxml")) { loadFromDBProfile(locator, newTracks); } else if (typeString.endsWith(".gmt")) { loadGMT(locator); } else if (typeString.equals("das")) { loadDASResource(locator, newTracks); } else if (typeString.endsWith(".vcf.list")) { loadVCFListFile(locator, newTracks, genome); } else if (typeString.endsWith(".trio")) { loadTrioData(locator); } else if (typeString.endsWith("varlist")) { VariantListManager.loadVariants(locator); } else if (typeString.endsWith("samplepathmap")) { VariantListManager.loadSamplePathMap(locator); } else if (typeString.endsWith(".rnai.gct")) { loadRnaiGctFile(locator, newTracks, genome); } else if (typeString.endsWith(".gct") || typeString.endsWith("res") || typeString.endsWith("tab")) { loadGctFile(locator, newTracks, genome); } else if (typeString.endsWith(".gbk") || typeString.endsWith(".gb")) { loadGbkFile(locator, newTracks, genome); } else if (typeString.endsWith(".cn") || typeString.endsWith(".xcn") || typeString.endsWith(".snp") || typeString.endsWith(".igv") || typeString.endsWith(".loh")) { loadIGVFile(locator, newTracks, genome); } else if (typeString.endsWith(".cbs") || typeString.endsWith(".seg") || typeString.endsWith("glad") || typeString.endsWith("birdseye_canary_calls") || typeString.endsWith(".seg.zip")) { loadSegFile(locator, newTracks, genome); } else if (typeString.endsWith(".gistic")) { loadGisticFile(locator, newTracks); } else if (typeString.endsWith(".gs")) { loadRNAiGeneScoreFile(locator, newTracks, RNAIGeneScoreParser.Type.GENE_SCORE, genome); } else if (typeString.endsWith(".riger")) { loadRNAiGeneScoreFile(locator, newTracks, RNAIGeneScoreParser.Type.POOLED, genome); } else if (typeString.endsWith(".hp")) { loadRNAiHPScoreFile(locator); } else if (typeString.contains(".tabblastn") || typeString.endsWith(".orthologs")) { loadSyntentyMapping(locator, newTracks); } else if (typeString.endsWith(".sam") || typeString.endsWith(".bam") || typeString.endsWith(".cram") || typeString.endsWith(".sam.list") || typeString.endsWith(".bam.list") || typeString.endsWith(".aligned") || typeString.endsWith(".sai") || typeString.endsWith(".bai") || typeString.equals("alist") || typeString.equals(Ga4ghAPIHelper.RESOURCE_TYPE)) { loadAlignmentsTrack(locator, newTracks, genome); } else if (typeString.endsWith(".wig") || typeString.endsWith(".bedgraph") || typeString.endsWith(".bdg") || typeString.endsWith("cpg.txt") || typeString.endsWith(".expr")) { loadWigFile(locator, newTracks, genome); } else if (typeString.endsWith("fpkm_tracking") || typeString.endsWith("gene_exp.diff") || typeString.endsWith("cds_exp.diff")) { loadCufflinksFile(locator, newTracks, genome); } else if (typeString.contains(".dranger")) { loadDRangerFile(locator, newTracks, genome); } else if (typeString.endsWith(".ewig.tdf") || (typeString.endsWith(".ewig.ibf"))) { loadEwigIBFFile(locator, newTracks, genome); } else if (typeString.endsWith(".bw") || typeString.endsWith(".bb") || typeString.endsWith(".bigwig") || typeString.endsWith(".bigbed")) { loadBWFile(locator, newTracks, genome); } else if (typeString.endsWith(".ibf") || typeString.endsWith(".tdf")) { loadTDFFile(locator, newTracks, genome); } else if (typeString.endsWith(".counts")) { loadGobyCountsArchive(locator, newTracks, genome); } else if (WiggleParser.isWiggle(locator)) { loadWigFile(locator, newTracks, genome); } else if (typeString.endsWith(".maf")) { loadMultipleAlignmentTrack(locator, newTracks, genome); } else if (typeString.endsWith(".maf.dict")) { loadMultipleAlignmentTrack(locator, newTracks, genome); } else if (typeString.contains(".peak.bin")) { loadPeakTrack(locator, newTracks, genome); } else if (typeString.endsWith("mage-tab") || ExpressionFileParser.parsableMAGE_TAB(locator)) { locator.setDescription("MAGE_TAB"); loadGctFile(locator, newTracks, genome); } else if (typeString.endsWith(".bp")) { loadBasePairFile(locator, newTracks, genome); } else if (GWASParser.isGWASFile(typeString)) { loadGWASFile(locator, newTracks, genome); } else if (GobyAlignmentQueryReader.supportsFileType(path)) { loadAlignmentsTrack(locator, newTracks, genome); } else if (typeString.endsWith(".list")) { // This should be deprecated loadListFile(locator, newTracks, genome); } else if (typeString.endsWith(".smap")) { loadSMAPFile(locator, newTracks, genome); } else if (CodecFactory.hasCodec(locator, genome) && !forceNotTribble(typeString)) { loadTribbleFile(locator, newTracks, genome); } else if (handler != null) { // Custom loader specified log.info(String.format("Loading %s with %s", path, handler)); handler.load(path, newTracks); } else if (AttributeManager.isSampleInfoFile(locator)) { // This might be a sample information file. AttributeManager.getInstance().loadSampleInfo(locator); } else { MessageUtils.showMessage("<html>Unknown file type: " + path + "<br>Check file extension"); } // Track line TrackProperties tp = null; String trackLine = locator.getTrackLine(); if (trackLine != null) { tp = new TrackProperties(); ParsingUtils.parseTrackLine(trackLine, tp); } for (Track track : newTracks) { if (locator.getFeatureInfoURL() != null) { track.setUrl(locator.getFeatureInfoURL()); } if (tp != null) { track.setProperties(tp); } if (locator.getColor() != null) { track.setColor(locator.getColor()); } if (locator.getSampleId() != null) { track.setSampleId(locator.getSampleId()); } } return newTracks; } catch (Exception e) { if (!NOLogExceptions.contains(e.getClass())) { log.error(e.getMessage(), e); } throw new DataLoadException(e.getMessage()); } }