/** * Estimage the number of lines in the given file, or all files in the given directory, or all * files referenced in a ".list" file. * * @param file a file or directory. * @return */ private int estimateLineCount(File file) { int nLines = 0; if (file.isDirectory() || file.getName().endsWith(".list")) { List<File> files = getFilesFromDirOrList(file); for (File f : files) { if (!f.isDirectory()) { nLines += ParsingUtils.estimateLineCount(f.getAbsolutePath()); } } } else { nLines = ParsingUtils.estimateLineCount(file.getAbsolutePath()); } return nLines; }
public List<Feature> getFeatures(final String chr) { List<Feature> features = featureCache.get(chr); if (features == null) { final String filename = fileMap.getProperty(chr); if (filename != null) { BufferedReader reader = null; String path = rootDir + "/" + filename; try { log.info("Loading " + path); // Load features here ResourceLocator loc = new ResourceLocator(path); FeatureParser fp = AbstractFeatureParser.getInstanceFor(loc, genome); reader = ParsingUtils.openBufferedReader(loc); features = fp.loadFeatures(reader, genome); featureCache.put(chr, features); } catch (IOException ex) { MessageUtils.showMessage("Error loading file: " + path + " (" + ex.toString() + ")"); log.info("Error loading feature file: " + filename, ex); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { } } } } } return featureCache.get(chr); }
/** * Note: This is an exact copy of the method in ExpressionFileParser. Refactor to merge these two * parsers, or share a common base class. * * @param comment * @param dataset */ private void parseDirective(String comment, IGVDataset dataset) { String tmp = comment.substring(1, comment.length()); if (tmp.startsWith("track")) { ParsingUtils.parseTrackLine(tmp, dataset.getTrackProperties()); } else if (tmp.startsWith("columns")) { parseColumnLine(tmp); } else { String[] tokens = tmp.split("="); if (tokens.length != 2) { return; } String key = tokens[0].trim().toLowerCase(); if (key.equals("name")) { dataset.setName(tokens[1].trim()); } else if (key.equals("type")) { try { dataset.setTrackType(TrackType.valueOf(tokens[1].trim().toUpperCase())); } catch (Exception exception) { // Ignore } } else if (key.equals("coords")) { startBase = Integer.parseInt(tokens[1].trim()); } } }
static void calculateEigenvector(String file, String chr, int binsize) throws IOException { if (!file.endsWith("hic")) { System.err.println("Only 'hic' files are supported"); System.exit(-1); } // Load the expected density function, if it exists. Map<Integer, DensityFunction> zoomToDensityMap = null; String densityFile = file + ".densities"; if (FileUtils.resourceExists(densityFile)) { InputStream is = null; try { is = ParsingUtils.openInputStream(densityFile); zoomToDensityMap = DensityUtil.readDensities(is); } finally { if (is != null) is.close(); } } else { System.err.println("Densities file doesn't exist"); System.exit(-1); } SeekableStream ss = IGVSeekableStreamFactory.getStreamFor(file); Dataset dataset = (new DatasetReader(ss)).read(); Chromosome[] tmp = dataset.getChromosomes(); Map<String, Chromosome> chromosomeMap = new HashMap<String, Chromosome>(); for (Chromosome c : tmp) { chromosomeMap.put(c.getName(), c); } if (!chromosomeMap.containsKey(chr)) { System.err.println("Unknown chromosome: " + chr); System.exit(-1); } int zoomIdx = 0; boolean found = false; for (; zoomIdx < HiCGlobals.zoomBinSizes.length; zoomIdx++) { if (HiCGlobals.zoomBinSizes[zoomIdx] == binsize) { found = true; break; } } if (!found) { System.err.println("Unknown bin size: " + binsize); System.exit(-1); } Matrix matrix = dataset.getMatrix(chromosomeMap.get(chr), chromosomeMap.get(chr)); MatrixZoomData zd = matrix.getObservedMatrix(zoomIdx); final DensityFunction df = zoomToDensityMap.get(zd.getZoom()); double[] eigenvector = zd.computeEigenvector(df, 0); for (double ev : eigenvector) System.out.print(ev + " "); System.out.println(); }
private void processFrames(Element element) { NodeList elements = element.getChildNodes(); if (elements.getLength() > 0) { Map<String, ReferenceFrame> frames = new HashMap(); for (ReferenceFrame f : FrameManager.getFrames()) { frames.put(f.getName(), f); } List<ReferenceFrame> reorderedFrames = new ArrayList(); for (int i = 0; i < elements.getLength(); i++) { Node childNode = elements.item(i); if (childNode.getNodeName().equalsIgnoreCase(SessionElement.FRAME.getText())) { String frameName = getAttribute((Element) childNode, SessionAttribute.NAME.getText()); ReferenceFrame f = frames.get(frameName); if (f != null) { reorderedFrames.add(f); try { String chr = getAttribute((Element) childNode, SessionAttribute.CHR.getText()); final String startString = getAttribute((Element) childNode, SessionAttribute.START.getText()) .replace(",", ""); final String endString = getAttribute((Element) childNode, SessionAttribute.END.getText()) .replace(",", ""); int start = ParsingUtils.parseInt(startString); int end = ParsingUtils.parseInt(endString); org.broad.igv.feature.Locus locus = new Locus(chr, start, end); f.jumpTo(locus); } catch (NumberFormatException e) { e.printStackTrace(); // To change body of catch statement use File | Settings | // File Templates. } } } } if (reorderedFrames.size() > 0) { FrameManager.setFrames(reorderedFrames); } } IGV.getInstance().resetFrames(); }
public FeatureDirSource(ResourceLocator locator, Genome genome) throws IOException { this.genome = genome; featureCache = new LRUCache(this, 3); rootLocator = locator; setRootDir(locator.getPath()); fileMap = new Properties(); InputStream propStream = ParsingUtils.openInputStreamGZ(locator); fileMap.load(propStream); propStream.close(); }
public void loadTDFFile(ResourceLocator locator, List<Track> newTracks, Genome genome) { log.debug("Loading TDF file " + locator.getPath()); TDFReader reader = TDFReader.getReader(locator); TrackType type = reader.getTrackType(); TrackProperties props = null; String trackLine = reader.getTrackLine(); if (trackLine != null && trackLine.length() > 0) { props = new TrackProperties(); ParsingUtils.parseTrackLine(trackLine, props); } // In case of conflict between the resource locator display name and the track properties name, // use the resource locator String name = locator.getName(); if (name != null && props != null) { props.setName(name); } if (name == null) { name = props == null ? locator.getTrackName() : props.getName(); } int trackNumber = 0; String path = locator.getPath(); boolean multiTrack = reader.getTrackNames().length > 1; for (String heading : reader.getTrackNames()) { String trackId = multiTrack ? path + "_" + heading : path; String trackName = multiTrack ? heading : name; final DataSource dataSource = locator.getPath().endsWith(".counts") ? new GobyCountArchiveDataSource(locator) : new TDFDataSource(reader, trackNumber, heading, genome); DataSourceTrack track = new DataSourceTrack(locator, trackId, trackName, dataSource); String displayName = (name == null || multiTrack) ? heading : name; track.setName(displayName); track.setTrackType(type); if (props != null) { track.setProperties(props); } newTracks.add(track); trackNumber++; } }
public FastaIndexedSequence(String path) throws IOException { this.path = path; contentLength = ParsingUtils.getContentLength(path); String indexPath = path + ".fai"; // The check below is not useful in the files have been copied or moved, which is always the // case for our hosted // genomes. It causes lots of spurious warnings // if(ParsingUtils.getLastModified(path) > ParsingUtils.getLastModified(indexPath)){ // log.warn("Index file for " + path + " is older than the file it indexes"); // } index = new FastaIndex(indexPath); chromoNamesList = new ArrayList<String>(index.getSequenceNames()); }
private void loadEwigIBFFile(ResourceLocator locator, List<Track> newTracks, Genome genome) { TDFReader reader = TDFReader.getReader(locator.getPath()); TrackProperties props = null; String trackLine = reader.getTrackLine(); if (trackLine != null && trackLine.length() > 0) { props = new TrackProperties(); ParsingUtils.parseTrackLine(trackLine, props); } EWigTrack track = new EWigTrack(locator, genome); if (props != null) { track.setProperties(props); } track.setName(locator.getTrackName()); newTracks.add(track); }
/** * Parse a limited number of lines in this file and return a list of features found. * * @param locator * @param maxLines * @return */ public List<org.broad.tribble.Feature> loadFeatures(ResourceLocator locator, int maxLines) { BufferedReader reader = null; try { reader = ParsingUtils.openBufferedReader(locator); return loadFeatures(reader, maxLines); } catch (IOException e) { throw new RuntimeException(e); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { } } } }
public static boolean parsableMAGE_TAB(ResourceLocator file) throws IOException { AsciiLineReader reader = null; try { reader = ParsingUtils.openAsciiReader(file); String nextLine = null; // skip first row reader.readLine(); // check second row for MAGE_TAB identifiers if ((nextLine = reader.readLine()) != null && (nextLine.contains("Reporter REF") || nextLine.contains("Composite Element REF") || nextLine.contains("Term Source REF") || nextLine.contains("CompositeElement REF") || nextLine.contains("TermSource REF") || nextLine.contains("Coordinates REF"))) { int count = 0; // check if this mage_tab data matrix can be parsed by this class while ((nextLine = reader.readLine()) != null && count < 5) { nextLine = nextLine.trim(); if (nextLine.startsWith("SNP_A") || nextLine.startsWith("CN_")) { return true; } count++; } return false; } } finally { if (reader != null) { reader.close(); } } return false; }
/** * Load all features in this file. * * @param reader * @param maxLines * @return */ public List<org.broad.tribble.Feature> loadFeatures(BufferedReader reader, int maxLines) { List<org.broad.tribble.Feature> features = new ArrayList<org.broad.tribble.Feature>(); String nextLine = null; int nLines = 0; try { while ((nextLine = reader.readLine()) != null) { nextLine = nextLine.trim(); if (nextLine.length() == 0) continue; nLines++; if ((maxLines > 0) && (nLines > maxLines)) { break; } try { if (nextLine.startsWith("#")) { if (nextLine.startsWith("#type")) { String[] tokens = Globals.equalPattern.split(nextLine); if (tokens.length > 1) { try { // TODO: type is not currently used, is there any reason to keep this? TrackType type = TrackType.valueOf(tokens[1]); } catch (Exception e) { log.error("Error converting track type: " + tokens[1]); } } } else if (nextLine.startsWith("#track")) { TrackProperties tp = new TrackProperties(); ParsingUtils.parseTrackLine(nextLine, tp); setTrackProperties(tp); if (tp.isGffTags()) { gffTags = true; } } else if (nextLine.startsWith("#coords")) { try { String[] tokens = Globals.equalPattern.split(nextLine); startBase = Integer.parseInt(tokens[1]); } catch (Exception e) { log.error("Error parsing coords line: " + nextLine, e); } } else if (nextLine.startsWith("#gffTags")) { gffTags = true; } } else { Feature feature = parseLine(nextLine); if (feature != null) { features.add(feature); } } } catch (NumberFormatException e) { // Expected condition -- for example comments. don't log as it slows down // the parsing and is not useful information. } } } catch (java.io.EOFException e) { // This exception is due to a known bug with java zip library. Not // in general a real error, and nothing we can do about it in any // event. return features; } catch (Exception e) { if (nextLine != null && nLines != 0) { throw new ParserException(e.getMessage(), e, nLines, nextLine); } else { throw new RuntimeException(e); } } // TODO -- why is this test here? This will break igvtools processing of expression files // if (IGV.hasInstance() || Globals.isTesting()) { FeatureDB.addFeatures(features); // } return features; }
static void dumpMatrix(String file, String chr1, String chr2, int binsize, String type) throws IOException { if (!file.endsWith("hic")) { System.err.println("Only 'hic' files are supported"); System.exit(-1); } // Load the expected density function, if it exists. Map<Integer, DensityFunction> zoomToDensityMap = null; if (type.equals("oe") || type.equals("pearson")) { String densityFile = file + ".densities"; if (FileUtils.resourceExists(densityFile)) { InputStream is = null; try { is = ParsingUtils.openInputStream(densityFile); zoomToDensityMap = DensityUtil.readDensities(is); } finally { if (is != null) is.close(); } } else { System.err.println("Densities file doesn't exist, cannot calculate O/E or Pearson's"); System.exit(-1); } } SeekableStream ss = IGVSeekableStreamFactory.getStreamFor(file); Dataset dataset = (new DatasetReader(ss)).read(); Chromosome[] tmp = dataset.getChromosomes(); Map<String, Chromosome> chromosomeMap = new HashMap<String, Chromosome>(); for (Chromosome c : tmp) { chromosomeMap.put(c.getName(), c); } if (!chromosomeMap.containsKey(chr1)) { System.err.println("Unknown chromosome: " + chr1); System.exit(-1); } else if (!chromosomeMap.containsKey(chr2)) { System.err.println("Unknown chromosome: " + chr2); System.exit(-1); } if (type.equals("oe") || type.equals("pearson")) { if (!chr1.equals(chr2)) { System.err.println("Chromosome " + chr1 + " not equal to Chromosome " + chr2); System.err.println("Currently only intrachromosomal O/E and Pearson's are supported."); System.exit(-1); } } int zoomIdx = 0; boolean found = false; for (; zoomIdx < HiCGlobals.zoomBinSizes.length; zoomIdx++) { if (HiCGlobals.zoomBinSizes[zoomIdx] == binsize) { found = true; break; } } if (!found) { System.err.println("Unknown bin size: " + binsize); } Matrix matrix = dataset.getMatrix(chromosomeMap.get(chr1), chromosomeMap.get(chr2)); MatrixZoomData zd = matrix.getObservedMatrix(zoomIdx); if (type.equals("oe") || type.equals("pearson")) { final DensityFunction df = zoomToDensityMap.get(zd.getZoom()); if (df == null) { System.err.println("Densities not calculated to this resolution."); System.exit(-1); } zd.dumpOE(df, type.equals("oe")); } else zd.dump(); }
private void processRootNode(Session session, Node node, HashMap additionalInformation) { if ((node == null) || (session == null)) { MessageUtils.showMessage("Invalid session file: root node not found"); return; } String nodeName = node.getNodeName(); if (!(nodeName.equalsIgnoreCase(SessionElement.GLOBAL.getText()) || nodeName.equalsIgnoreCase(SessionElement.SESSION.getText()))) { MessageUtils.showMessage( "Session files must begin with a \"Global\" or \"Session\" element. Found: " + nodeName); } process(session, node, additionalInformation); Element element = (Element) node; // Load the genome, which can be an ID, or a path or URL to a .genome or indexed fasta file. String genomeId = getAttribute(element, SessionAttribute.GENOME.getText()); if (genomeId != null && genomeId.length() > 0) { if (genomeId.equals(GenomeManager.getInstance().getGenomeId())) { // We don't have to reload the genome, but the gene track for the current genome should be // restored. Genome genome = GenomeManager.getInstance().getCurrentGenome(); IGV.getInstance().setGenomeTracks(genome.getGeneTrack()); } else { // Selecting a genome will actually "reset" the session so we have to // save the path and restore it. String sessionPath = session.getPath(); if (IGV.getInstance().getGenomeIds().contains(genomeId)) { IGV.getInstance().selectGenomeFromList(genomeId); } else { String genomePath = genomeId; if (!ParsingUtils.pathExists(genomePath)) { genomePath = FileUtils.getAbsolutePath(genomeId, session.getPath()); } if (ParsingUtils.pathExists(genomePath)) { try { IGV.getInstance().loadGenome(genomePath, null); } catch (IOException e) { throw new RuntimeException("Error loading genome: " + genomeId); } } else { MessageUtils.showMessage("Warning: Could not locate genome: " + genomeId); } } session.setPath(sessionPath); } } session.setLocus(getAttribute(element, SessionAttribute.LOCUS.getText())); session.setGroupTracksBy(getAttribute(element, SessionAttribute.GROUP_TRACKS_BY.getText())); String removeEmptyTracks = getAttribute(element, "removeEmptyTracks"); if (removeEmptyTracks != null) { try { Boolean b = Boolean.parseBoolean(removeEmptyTracks); session.setRemoveEmptyPanels(b); } catch (Exception e) { log.error("Error parsing removeEmptyTracks string: " + removeEmptyTracks, e); } } String versionString = getAttribute(element, SessionAttribute.VERSION.getText()); try { version = Integer.parseInt(versionString); } catch (NumberFormatException e) { log.error("Non integer version number in session file: " + versionString); } session.setVersion(version); NodeList elements = element.getChildNodes(); process(session, elements, additionalInformation); // ReferenceFrame.getInstance().invalidateLocationScale(); }
/** * Switches on various attributes of locator (mainly locator path extension and whether the * locator is indexed) to call the appropriate loading method. * * @param locator * @param genome * @return */ public List<Track> load(ResourceLocator locator, Genome genome) throws DataLoadException { final String path = locator.getPath().trim(); log.info("Loading resource, path " + path); try { String typeString = locator.getTypeString(); if (typeString.endsWith(".tbi")) { MessageUtils.showMessage( "<html><b>Error:</b>File type '.tbi' is not recognized. If this is a 'tabix' index <br>" + " load the associated gzipped file, which should have an extension of '.gz'"); } // This list will hold all new tracks created for this locator List<Track> newTracks = new ArrayList<Track>(); String dbUrl = locator.getDBUrl(); LoadHandler handler = getTrackLoaderHandler(typeString); if (dbUrl != null) { this.loadFromDatabase(locator, newTracks, genome); } else if (typeString.endsWith(".dbxml")) { loadFromDBProfile(locator, newTracks); } else if (typeString.endsWith(".gmt")) { loadGMT(locator); } else if (typeString.equals("das")) { loadDASResource(locator, newTracks); } else if (typeString.endsWith(".vcf.list")) { loadVCFListFile(locator, newTracks, genome); } else if (typeString.endsWith(".trio")) { loadTrioData(locator); } else if (typeString.endsWith("varlist")) { VariantListManager.loadVariants(locator); } else if (typeString.endsWith("samplepathmap")) { VariantListManager.loadSamplePathMap(locator); } else if (typeString.endsWith(".rnai.gct")) { loadRnaiGctFile(locator, newTracks, genome); } else if (typeString.endsWith(".gct") || typeString.endsWith("res") || typeString.endsWith("tab")) { loadGctFile(locator, newTracks, genome); } else if (typeString.endsWith(".gbk") || typeString.endsWith(".gb")) { loadGbkFile(locator, newTracks, genome); } else if (typeString.endsWith(".cn") || typeString.endsWith(".xcn") || typeString.endsWith(".snp") || typeString.endsWith(".igv") || typeString.endsWith(".loh")) { loadIGVFile(locator, newTracks, genome); } else if (typeString.endsWith(".cbs") || typeString.endsWith(".seg") || typeString.endsWith("glad") || typeString.endsWith("birdseye_canary_calls") || typeString.endsWith(".seg.zip")) { loadSegFile(locator, newTracks, genome); } else if (typeString.endsWith(".gistic")) { loadGisticFile(locator, newTracks); } else if (typeString.endsWith(".gs")) { loadRNAiGeneScoreFile(locator, newTracks, RNAIGeneScoreParser.Type.GENE_SCORE, genome); } else if (typeString.endsWith(".riger")) { loadRNAiGeneScoreFile(locator, newTracks, RNAIGeneScoreParser.Type.POOLED, genome); } else if (typeString.endsWith(".hp")) { loadRNAiHPScoreFile(locator); } else if (typeString.contains(".tabblastn") || typeString.endsWith(".orthologs")) { loadSyntentyMapping(locator, newTracks); } else if (typeString.endsWith(".sam") || typeString.endsWith(".bam") || typeString.endsWith(".cram") || typeString.endsWith(".sam.list") || typeString.endsWith(".bam.list") || typeString.endsWith(".aligned") || typeString.endsWith(".sai") || typeString.endsWith(".bai") || typeString.equals("alist") || typeString.equals(Ga4ghAPIHelper.RESOURCE_TYPE)) { loadAlignmentsTrack(locator, newTracks, genome); } else if (typeString.endsWith(".wig") || typeString.endsWith(".bedgraph") || typeString.endsWith(".bdg") || typeString.endsWith("cpg.txt") || typeString.endsWith(".expr")) { loadWigFile(locator, newTracks, genome); } else if (typeString.endsWith("fpkm_tracking") || typeString.endsWith("gene_exp.diff") || typeString.endsWith("cds_exp.diff")) { loadCufflinksFile(locator, newTracks, genome); } else if (typeString.contains(".dranger")) { loadDRangerFile(locator, newTracks, genome); } else if (typeString.endsWith(".ewig.tdf") || (typeString.endsWith(".ewig.ibf"))) { loadEwigIBFFile(locator, newTracks, genome); } else if (typeString.endsWith(".bw") || typeString.endsWith(".bb") || typeString.endsWith(".bigwig") || typeString.endsWith(".bigbed")) { loadBWFile(locator, newTracks, genome); } else if (typeString.endsWith(".ibf") || typeString.endsWith(".tdf")) { loadTDFFile(locator, newTracks, genome); } else if (typeString.endsWith(".counts")) { loadGobyCountsArchive(locator, newTracks, genome); } else if (WiggleParser.isWiggle(locator)) { loadWigFile(locator, newTracks, genome); } else if (typeString.endsWith(".maf")) { loadMultipleAlignmentTrack(locator, newTracks, genome); } else if (typeString.endsWith(".maf.dict")) { loadMultipleAlignmentTrack(locator, newTracks, genome); } else if (typeString.contains(".peak.bin")) { loadPeakTrack(locator, newTracks, genome); } else if (typeString.endsWith("mage-tab") || ExpressionFileParser.parsableMAGE_TAB(locator)) { locator.setDescription("MAGE_TAB"); loadGctFile(locator, newTracks, genome); } else if (typeString.endsWith(".bp")) { loadBasePairFile(locator, newTracks, genome); } else if (GWASParser.isGWASFile(typeString)) { loadGWASFile(locator, newTracks, genome); } else if (GobyAlignmentQueryReader.supportsFileType(path)) { loadAlignmentsTrack(locator, newTracks, genome); } else if (typeString.endsWith(".list")) { // This should be deprecated loadListFile(locator, newTracks, genome); } else if (typeString.endsWith(".smap")) { loadSMAPFile(locator, newTracks, genome); } else if (CodecFactory.hasCodec(locator, genome) && !forceNotTribble(typeString)) { loadTribbleFile(locator, newTracks, genome); } else if (handler != null) { // Custom loader specified log.info(String.format("Loading %s with %s", path, handler)); handler.load(path, newTracks); } else if (AttributeManager.isSampleInfoFile(locator)) { // This might be a sample information file. AttributeManager.getInstance().loadSampleInfo(locator); } else { MessageUtils.showMessage("<html>Unknown file type: " + path + "<br>Check file extension"); } // Track line TrackProperties tp = null; String trackLine = locator.getTrackLine(); if (trackLine != null) { tp = new TrackProperties(); ParsingUtils.parseTrackLine(trackLine, tp); } for (Track track : newTracks) { if (locator.getFeatureInfoURL() != null) { track.setUrl(locator.getFeatureInfoURL()); } if (tp != null) { track.setProperties(tp); } if (locator.getColor() != null) { track.setColor(locator.getColor()); } if (locator.getSampleId() != null) { track.setSampleId(locator.getSampleId()); } } return newTracks; } catch (Exception e) { if (!NOLogExceptions.contains(e.getClass())) { log.error(e.getMessage(), e); } throw new DataLoadException(e.getMessage()); } }
public AsciiPairIterator(String path, Map<String, Integer> chromosomeOrdinals) throws IOException { this.reader = org.broad.igv.util.ParsingUtils.openBufferedReader(path); this.chromosomeOrdinals = chromosomeOrdinals; advance(); }
/** * Scan the datafile for chromosome breaks. * * @param dataset * @return */ public List<ChromosomeSummary> scan(IGVDataset dataset) { int estLineCount = ParsingUtils.estimateLineCount(dataResourceLocator.getPath()); Map<String, Integer> longestFeatureMap = new HashMap(); float dataMin = 0; float dataMax = 0; InputStream is = null; AsciiLineReader reader = null; String nextLine = null; ChromosomeSummary chrSummary = null; List<ChromosomeSummary> chrSummaries = new ArrayList(); String[] headings = null; WholeGenomeData wgData = null; int nRows = 0; int headerRows = 0; int count = 0; boolean logNormalized; try { int skipColumns = hasCalls ? 2 : 1; // BufferedReader reader = ParsingUtils.openBufferedReader(dataResourceLocator); is = ParsingUtils.openInputStreamGZ(dataResourceLocator); reader = new AsciiLineReader(is); // Infer datatype from extension. This can be overriden in the // comment section if (isCopyNumberFileExt(dataResourceLocator.getPath())) { dataset.setTrackType(TrackType.COPY_NUMBER); dataset.getTrackProperties().setWindowingFunction(WindowFunction.mean); } else if (isLOHFileExt(dataResourceLocator.getPath())) { dataset.setTrackType(TrackType.LOH); dataset.getTrackProperties().setWindowingFunction(WindowFunction.mean); } else { dataset.getTrackProperties().setWindowingFunction(WindowFunction.mean); } // Parse comments and directives, if any nextLine = reader.readLine(); while (nextLine.startsWith("#") || (nextLine.trim().length() == 0)) { headerRows++; if (nextLine.length() > 0) { parseDirective(nextLine, dataset); } nextLine = reader.readLine(); } if (chrColumn < 0) { setColumnDefaults(); } // Parse column headings String[] data = nextLine.trim().split("\t"); // Set last data column if (lastDataColumn < 0) { lastDataColumn = data.length - 1; } headings = getHeadings(data, skipColumns); dataset.setDataHeadings(headings); // Infer if the data is logNormalized by looking for negative data values. // Assume it is not until proven otherwise logNormalized = false; wgData = new WholeGenomeData(headings); int chrRowCount = 0; // Update int updateCount = 5000; long lastPosition = 0; while ((nextLine = reader.readLine()) != null) { if (igv != null && ++count % updateCount == 0) { igv.setStatusBarMessage("Loaded: " + count + " / " + estLineCount + " (est)"); } // Distance since last sample String[] tokens = Globals.tabPattern.split(nextLine, -1); int nTokens = tokens.length; if (nTokens > 0) { String thisChr = genome.getChromosomeAlias(tokens[chrColumn]); if (chrSummary == null || !thisChr.equals(chrSummary.getName())) { // Update whole genome and previous chromosome summary, unless this is // the first chromosome if (chrSummary != null) { updateWholeGenome(chrSummary.getName(), dataset, headings, wgData); chrSummary.setNDataPoints(nRows); } // Shart the next chromosome chrSummary = new ChromosomeSummary(thisChr, lastPosition); chrSummaries.add(chrSummary); nRows = 0; wgData = new WholeGenomeData(headings); chrRowCount = 0; } lastPosition = reader.getPosition(); int location = -1; try { location = ParsingUtils.parseInt(tokens[startColumn]) - startBase; } catch (NumberFormatException numberFormatException) { log.error("Column " + tokens[startColumn] + " is not a number"); throw new ParserException( "Column " + (startColumn + 1) + " must contain an integer value." + " Found: " + tokens[startColumn], count + headerRows, nextLine); } int length = 1; if (hasEndLocations) { try { length = ParsingUtils.parseInt(tokens[endColumn].trim()) - location + 1; } catch (NumberFormatException numberFormatException) { log.error("Column " + tokens[endColumn] + " is not a number"); throw new ParserException( "Column " + (endColumn + 1) + " must contain an integer value." + " Found: " + tokens[endColumn], count + headerRows, nextLine); } } updateLongestFeature(longestFeatureMap, thisChr, length); if (wgData.locations.size() > 0 && wgData.locations.get(wgData.locations.size() - 1) > location) { throw new ParserException( "File is not sorted, .igv and .cn files must be sorted by start position." + " Use igvtools (File > Run igvtools..) to sort the file.", count + headerRows); } wgData.locations.add(location); for (int idx = 0; idx < headings.length; idx++) { int i = firstDataColumn + idx * skipColumns; float copyNo = i < tokens.length ? readFloat(tokens[i]) : Float.NaN; if (!Float.isNaN(copyNo)) { dataMin = Math.min(dataMin, copyNo); dataMax = Math.max(dataMax, copyNo); } if (copyNo < 0) { logNormalized = true; } String heading = headings[idx]; wgData.data.get(heading).add(copyNo); } nRows++; } chrRowCount++; } dataset.setLongestFeatureMap(longestFeatureMap); } catch (ParserException pe) { throw pe; } catch (FileNotFoundException e) { // DialogUtils.showError("SNP file not found: " + dataSource.getCopyNoFile()); log.error("File not found: " + dataResourceLocator); throw new RuntimeException(e); } catch (Exception e) { log.error("Exception when loading: " + dataResourceLocator.getPath(), e); if (nextLine != null && (count + headerRows != 0)) { throw new ParserException(e.getMessage(), e, count + headerRows, nextLine); } else { throw new RuntimeException(e); } } finally { if (is != null) { try { is.close(); } catch (IOException e) { log.error("Error closing IGVDataset stream", e); } } } // Update last chromosome if (chrSummary != null) { updateWholeGenome(chrSummary.getName(), dataset, headings, wgData); chrSummary.setNDataPoints(nRows); } dataset.setLogNormalized(logNormalized); dataset.setDataMin(dataMin); dataset.setDataMax(dataMax); return chrSummaries; }
/** * Load data for a single chromosome. * * @param chrSummary * @param dataHeaders * @return */ public ChromosomeData loadChromosomeData(ChromosomeSummary chrSummary, String[] dataHeaders) { // InputStream is = null; try { int skipColumns = hasCalls ? 2 : 1; // Get an estimate of the number of snps (rows). THIS IS ONLY AN ESTIMATE int nRowsEst = chrSummary.getNDataPts(); SeekableStream is = IGVSeekableStreamFactory.getStreamFor(dataResourceLocator.getPath()); is.seek(chrSummary.getStartPosition()); AsciiLineReader reader = new AsciiLineReader(is); // Create containers to hold data IntArrayList startLocations = new IntArrayList(nRowsEst); IntArrayList endLocations = (hasEndLocations ? new IntArrayList(nRowsEst) : null); List<String> probes = new ArrayList(nRowsEst); Map<String, FloatArrayList> dataMap = new HashMap(); for (String h : dataHeaders) { dataMap.put(h, new FloatArrayList(nRowsEst)); } // Begin loop through rows String chromosome = chrSummary.getName(); boolean chromosomeStarted = false; String nextLine = reader.readLine(); while ((nextLine != null) && (nextLine.trim().length() > 0)) { if (!nextLine.startsWith("#")) { try { String[] tokens = Globals.tabPattern.split(nextLine, -1); String thisChromosome = genome.getChromosomeAlias(tokens[chrColumn].trim()); if (thisChromosome.equals(chromosome)) { chromosomeStarted = true; // chromosomeData.setMarkerId(nRows, tokens[0]); // The probe. A new string is created to prevent holding on to the entire row through // a substring reference String probe = new String(tokens[probeColumn]); probes.add(probe); int start = ParsingUtils.parseInt(tokens[startColumn].trim()) - startBase; if (hasEndLocations) { endLocations.add(ParsingUtils.parseInt(tokens[endColumn].trim())); } startLocations.add(start); for (int idx = 0; idx < dataHeaders.length; idx++) { int i = firstDataColumn + idx * skipColumns; float copyNo = i <= lastDataColumn ? readFloat(tokens[i]) : Float.NaN; String heading = dataHeaders[idx]; dataMap.get(heading).add(copyNo); } } else if (chromosomeStarted) { break; } } catch (NumberFormatException numberFormatException) { // Skip line log.info("Skipping line (NumberFormatException) " + nextLine); } } nextLine = reader.readLine(); } // Loop complete ChromosomeData cd = new ChromosomeData(chrSummary.getName()); cd.setProbes(probes.toArray(new String[] {})); cd.setStartLocations(startLocations.toArray()); if (hasEndLocations) { cd.setEndLocations(endLocations.toArray()); } for (String h : dataHeaders) { cd.setData(h, dataMap.get(h).toArray()); } return cd; } catch (IOException ex) { log.error("Error parsing cn file", ex); throw new RuntimeException("Error parsing cn file", ex); } }