예제 #1
0
  /**
   * Estimage the number of lines in the given file, or all files in the given directory, or all
   * files referenced in a ".list" file.
   *
   * @param file a file or directory.
   * @return
   */
  private int estimateLineCount(File file) {

    int nLines = 0;
    if (file.isDirectory() || file.getName().endsWith(".list")) {
      List<File> files = getFilesFromDirOrList(file);
      for (File f : files) {
        if (!f.isDirectory()) {
          nLines += ParsingUtils.estimateLineCount(f.getAbsolutePath());
        }
      }
    } else {
      nLines = ParsingUtils.estimateLineCount(file.getAbsolutePath());
    }
    return nLines;
  }
예제 #2
0
  public List<Feature> getFeatures(final String chr) {

    List<Feature> features = featureCache.get(chr);
    if (features == null) {
      final String filename = fileMap.getProperty(chr);
      if (filename != null) {
        BufferedReader reader = null;
        String path = rootDir + "/" + filename;
        try {
          log.info("Loading " + path);
          // Load features here
          ResourceLocator loc = new ResourceLocator(path);

          FeatureParser fp = AbstractFeatureParser.getInstanceFor(loc, genome);
          reader = ParsingUtils.openBufferedReader(loc);
          features = fp.loadFeatures(reader, genome);
          featureCache.put(chr, features);
        } catch (IOException ex) {
          MessageUtils.showMessage("Error loading file: " + path + " (" + ex.toString() + ")");
          log.info("Error loading feature file: " + filename, ex);
        } finally {
          if (reader != null) {
            try {
              reader.close();
            } catch (IOException e) {

            }
          }
        }
      }
    }
    return featureCache.get(chr);
  }
예제 #3
0
  /**
   * Note: This is an exact copy of the method in ExpressionFileParser. Refactor to merge these two
   * parsers, or share a common base class.
   *
   * @param comment
   * @param dataset
   */
  private void parseDirective(String comment, IGVDataset dataset) {

    String tmp = comment.substring(1, comment.length());
    if (tmp.startsWith("track")) {
      ParsingUtils.parseTrackLine(tmp, dataset.getTrackProperties());

    } else if (tmp.startsWith("columns")) {
      parseColumnLine(tmp);
    } else {
      String[] tokens = tmp.split("=");
      if (tokens.length != 2) {
        return;
      }

      String key = tokens[0].trim().toLowerCase();
      if (key.equals("name")) {
        dataset.setName(tokens[1].trim());
      } else if (key.equals("type")) {

        try {
          dataset.setTrackType(TrackType.valueOf(tokens[1].trim().toUpperCase()));
        } catch (Exception exception) {

          // Ignore
        }
      } else if (key.equals("coords")) {

        startBase = Integer.parseInt(tokens[1].trim());
      }
    }
  }
예제 #4
0
  static void calculateEigenvector(String file, String chr, int binsize) throws IOException {
    if (!file.endsWith("hic")) {
      System.err.println("Only 'hic' files are supported");
      System.exit(-1);
    }
    // Load the expected density function, if it exists.
    Map<Integer, DensityFunction> zoomToDensityMap = null;
    String densityFile = file + ".densities";
    if (FileUtils.resourceExists(densityFile)) {
      InputStream is = null;
      try {
        is = ParsingUtils.openInputStream(densityFile);
        zoomToDensityMap = DensityUtil.readDensities(is);

      } finally {
        if (is != null) is.close();
      }
    } else {
      System.err.println("Densities file doesn't exist");
      System.exit(-1);
    }

    SeekableStream ss = IGVSeekableStreamFactory.getStreamFor(file);
    Dataset dataset = (new DatasetReader(ss)).read();
    Chromosome[] tmp = dataset.getChromosomes();

    Map<String, Chromosome> chromosomeMap = new HashMap<String, Chromosome>();
    for (Chromosome c : tmp) {
      chromosomeMap.put(c.getName(), c);
    }

    if (!chromosomeMap.containsKey(chr)) {
      System.err.println("Unknown chromosome: " + chr);
      System.exit(-1);
    }
    int zoomIdx = 0;
    boolean found = false;
    for (; zoomIdx < HiCGlobals.zoomBinSizes.length; zoomIdx++) {
      if (HiCGlobals.zoomBinSizes[zoomIdx] == binsize) {
        found = true;
        break;
      }
    }

    if (!found) {
      System.err.println("Unknown bin size: " + binsize);
      System.exit(-1);
    }

    Matrix matrix = dataset.getMatrix(chromosomeMap.get(chr), chromosomeMap.get(chr));
    MatrixZoomData zd = matrix.getObservedMatrix(zoomIdx);
    final DensityFunction df = zoomToDensityMap.get(zd.getZoom());
    double[] eigenvector = zd.computeEigenvector(df, 0);
    for (double ev : eigenvector) System.out.print(ev + " ");
    System.out.println();
  }
예제 #5
0
  private void processFrames(Element element) {
    NodeList elements = element.getChildNodes();
    if (elements.getLength() > 0) {
      Map<String, ReferenceFrame> frames = new HashMap();
      for (ReferenceFrame f : FrameManager.getFrames()) {
        frames.put(f.getName(), f);
      }
      List<ReferenceFrame> reorderedFrames = new ArrayList();

      for (int i = 0; i < elements.getLength(); i++) {
        Node childNode = elements.item(i);
        if (childNode.getNodeName().equalsIgnoreCase(SessionElement.FRAME.getText())) {
          String frameName = getAttribute((Element) childNode, SessionAttribute.NAME.getText());

          ReferenceFrame f = frames.get(frameName);
          if (f != null) {
            reorderedFrames.add(f);
            try {
              String chr = getAttribute((Element) childNode, SessionAttribute.CHR.getText());
              final String startString =
                  getAttribute((Element) childNode, SessionAttribute.START.getText())
                      .replace(",", "");
              final String endString =
                  getAttribute((Element) childNode, SessionAttribute.END.getText())
                      .replace(",", "");
              int start = ParsingUtils.parseInt(startString);
              int end = ParsingUtils.parseInt(endString);
              org.broad.igv.feature.Locus locus = new Locus(chr, start, end);
              f.jumpTo(locus);
            } catch (NumberFormatException e) {
              e.printStackTrace(); // To change body of catch statement use File | Settings |
              // File Templates.
            }
          }
        }
      }
      if (reorderedFrames.size() > 0) {
        FrameManager.setFrames(reorderedFrames);
      }
    }
    IGV.getInstance().resetFrames();
  }
예제 #6
0
  public FeatureDirSource(ResourceLocator locator, Genome genome) throws IOException {
    this.genome = genome;
    featureCache = new LRUCache(this, 3);
    rootLocator = locator;
    setRootDir(locator.getPath());

    fileMap = new Properties();
    InputStream propStream = ParsingUtils.openInputStreamGZ(locator);
    fileMap.load(propStream);
    propStream.close();
  }
예제 #7
0
  public void loadTDFFile(ResourceLocator locator, List<Track> newTracks, Genome genome) {

    log.debug("Loading TDF file " + locator.getPath());
    TDFReader reader = TDFReader.getReader(locator);
    TrackType type = reader.getTrackType();

    TrackProperties props = null;
    String trackLine = reader.getTrackLine();
    if (trackLine != null && trackLine.length() > 0) {
      props = new TrackProperties();
      ParsingUtils.parseTrackLine(trackLine, props);
    }

    // In case of conflict between the resource locator display name and the track properties name,
    // use the resource locator
    String name = locator.getName();
    if (name != null && props != null) {
      props.setName(name);
    }

    if (name == null) {
      name = props == null ? locator.getTrackName() : props.getName();
    }

    int trackNumber = 0;
    String path = locator.getPath();
    boolean multiTrack = reader.getTrackNames().length > 1;

    for (String heading : reader.getTrackNames()) {

      String trackId = multiTrack ? path + "_" + heading : path;
      String trackName = multiTrack ? heading : name;
      final DataSource dataSource =
          locator.getPath().endsWith(".counts")
              ? new GobyCountArchiveDataSource(locator)
              : new TDFDataSource(reader, trackNumber, heading, genome);
      DataSourceTrack track = new DataSourceTrack(locator, trackId, trackName, dataSource);

      String displayName = (name == null || multiTrack) ? heading : name;
      track.setName(displayName);
      track.setTrackType(type);
      if (props != null) {
        track.setProperties(props);
      }
      newTracks.add(track);
      trackNumber++;
    }
  }
예제 #8
0
  public FastaIndexedSequence(String path) throws IOException {

    this.path = path;
    contentLength = ParsingUtils.getContentLength(path);

    String indexPath = path + ".fai";

    // The check below is not useful in the files have been copied or moved, which is always the
    // case for our hosted
    // genomes.   It causes lots of spurious warnings
    //        if(ParsingUtils.getLastModified(path) > ParsingUtils.getLastModified(indexPath)){
    //            log.warn("Index file for " + path + " is older than the file it indexes");
    //        }

    index = new FastaIndex(indexPath);
    chromoNamesList = new ArrayList<String>(index.getSequenceNames());
  }
예제 #9
0
  private void loadEwigIBFFile(ResourceLocator locator, List<Track> newTracks, Genome genome) {

    TDFReader reader = TDFReader.getReader(locator.getPath());
    TrackProperties props = null;
    String trackLine = reader.getTrackLine();
    if (trackLine != null && trackLine.length() > 0) {
      props = new TrackProperties();
      ParsingUtils.parseTrackLine(trackLine, props);
    }

    EWigTrack track = new EWigTrack(locator, genome);
    if (props != null) {
      track.setProperties(props);
    }
    track.setName(locator.getTrackName());
    newTracks.add(track);
  }
예제 #10
0
  /**
   * Parse a limited number of lines in this file and return a list of features found.
   *
   * @param locator
   * @param maxLines
   * @return
   */
  public List<org.broad.tribble.Feature> loadFeatures(ResourceLocator locator, int maxLines) {

    BufferedReader reader = null;
    try {
      reader = ParsingUtils.openBufferedReader(locator);
      return loadFeatures(reader, maxLines);
    } catch (IOException e) {
      throw new RuntimeException(e);
    } finally {
      if (reader != null) {
        try {
          reader.close();
        } catch (IOException e) {

        }
      }
    }
  }
예제 #11
0
  public static boolean parsableMAGE_TAB(ResourceLocator file) throws IOException {
    AsciiLineReader reader = null;
    try {
      reader = ParsingUtils.openAsciiReader(file);
      String nextLine = null;

      // skip first row
      reader.readLine();

      // check second row for MAGE_TAB identifiers
      if ((nextLine = reader.readLine()) != null
          && (nextLine.contains("Reporter REF")
              || nextLine.contains("Composite Element REF")
              || nextLine.contains("Term Source REF")
              || nextLine.contains("CompositeElement REF")
              || nextLine.contains("TermSource REF")
              || nextLine.contains("Coordinates REF"))) {
        int count = 0;
        // check if this mage_tab data matrix can be parsed by this class
        while ((nextLine = reader.readLine()) != null && count < 5) {
          nextLine = nextLine.trim();
          if (nextLine.startsWith("SNP_A") || nextLine.startsWith("CN_")) {
            return true;
          }

          count++;
        }
        return false;
      }
    } finally {
      if (reader != null) {
        reader.close();
      }
    }

    return false;
  }
예제 #12
0
  /**
   * Load all features in this file.
   *
   * @param reader
   * @param maxLines
   * @return
   */
  public List<org.broad.tribble.Feature> loadFeatures(BufferedReader reader, int maxLines) {

    List<org.broad.tribble.Feature> features = new ArrayList<org.broad.tribble.Feature>();
    String nextLine = null;

    int nLines = 0;
    try {
      while ((nextLine = reader.readLine()) != null) {
        nextLine = nextLine.trim();
        if (nextLine.length() == 0) continue;
        nLines++;
        if ((maxLines > 0) && (nLines > maxLines)) {
          break;
        }

        try {
          if (nextLine.startsWith("#")) {
            if (nextLine.startsWith("#type")) {
              String[] tokens = Globals.equalPattern.split(nextLine);
              if (tokens.length > 1) {
                try {
                  // TODO: type is not currently used, is there any reason to keep this?
                  TrackType type = TrackType.valueOf(tokens[1]);
                } catch (Exception e) {
                  log.error("Error converting track type: " + tokens[1]);
                }
              }
            } else if (nextLine.startsWith("#track")) {
              TrackProperties tp = new TrackProperties();
              ParsingUtils.parseTrackLine(nextLine, tp);
              setTrackProperties(tp);
              if (tp.isGffTags()) {
                gffTags = true;
              }
            } else if (nextLine.startsWith("#coords")) {
              try {
                String[] tokens = Globals.equalPattern.split(nextLine);
                startBase = Integer.parseInt(tokens[1]);
              } catch (Exception e) {
                log.error("Error parsing coords line: " + nextLine, e);
              }

            } else if (nextLine.startsWith("#gffTags")) {
              gffTags = true;
            }
          } else {
            Feature feature = parseLine(nextLine);
            if (feature != null) {
              features.add(feature);
            }
          }

        } catch (NumberFormatException e) {

          // Expected condition -- for example comments.  don't log as it slows down
          // the parsing and is not useful information.
        }
      }
    } catch (java.io.EOFException e) {

      // This exception is due to a known bug with java zip library.  Not
      // in general a real error, and nothing we can do about it in any
      // event.
      return features;
    } catch (Exception e) {
      if (nextLine != null && nLines != 0) {
        throw new ParserException(e.getMessage(), e, nLines, nextLine);
      } else {
        throw new RuntimeException(e);
      }
    }

    // TODO -- why is this test here?  This will break igvtools processing of expression files
    // if (IGV.hasInstance() || Globals.isTesting()) {
    FeatureDB.addFeatures(features);
    // }
    return features;
  }
예제 #13
0
  static void dumpMatrix(String file, String chr1, String chr2, int binsize, String type)
      throws IOException {

    if (!file.endsWith("hic")) {
      System.err.println("Only 'hic' files are supported");
      System.exit(-1);
    }
    // Load the expected density function, if it exists.
    Map<Integer, DensityFunction> zoomToDensityMap = null;
    if (type.equals("oe") || type.equals("pearson")) {
      String densityFile = file + ".densities";
      if (FileUtils.resourceExists(densityFile)) {
        InputStream is = null;
        try {
          is = ParsingUtils.openInputStream(densityFile);
          zoomToDensityMap = DensityUtil.readDensities(is);

        } finally {
          if (is != null) is.close();
        }
      } else {
        System.err.println("Densities file doesn't exist, cannot calculate O/E or Pearson's");
        System.exit(-1);
      }
    }

    SeekableStream ss = IGVSeekableStreamFactory.getStreamFor(file);
    Dataset dataset = (new DatasetReader(ss)).read();
    Chromosome[] tmp = dataset.getChromosomes();

    Map<String, Chromosome> chromosomeMap = new HashMap<String, Chromosome>();
    for (Chromosome c : tmp) {
      chromosomeMap.put(c.getName(), c);
    }

    if (!chromosomeMap.containsKey(chr1)) {
      System.err.println("Unknown chromosome: " + chr1);
      System.exit(-1);
    } else if (!chromosomeMap.containsKey(chr2)) {
      System.err.println("Unknown chromosome: " + chr2);
      System.exit(-1);
    }
    if (type.equals("oe") || type.equals("pearson")) {
      if (!chr1.equals(chr2)) {
        System.err.println("Chromosome " + chr1 + " not equal to Chromosome " + chr2);
        System.err.println("Currently only intrachromosomal O/E and Pearson's are supported.");
        System.exit(-1);
      }
    }

    int zoomIdx = 0;
    boolean found = false;
    for (; zoomIdx < HiCGlobals.zoomBinSizes.length; zoomIdx++) {
      if (HiCGlobals.zoomBinSizes[zoomIdx] == binsize) {
        found = true;
        break;
      }
    }

    if (!found) {
      System.err.println("Unknown bin size: " + binsize);
    }

    Matrix matrix = dataset.getMatrix(chromosomeMap.get(chr1), chromosomeMap.get(chr2));
    MatrixZoomData zd = matrix.getObservedMatrix(zoomIdx);
    if (type.equals("oe") || type.equals("pearson")) {
      final DensityFunction df = zoomToDensityMap.get(zd.getZoom());
      if (df == null) {
        System.err.println("Densities not calculated to this resolution.");
        System.exit(-1);
      }
      zd.dumpOE(df, type.equals("oe"));
    } else zd.dump();
  }
예제 #14
0
  private void processRootNode(Session session, Node node, HashMap additionalInformation) {

    if ((node == null) || (session == null)) {
      MessageUtils.showMessage("Invalid session file: root node not found");
      return;
    }

    String nodeName = node.getNodeName();
    if (!(nodeName.equalsIgnoreCase(SessionElement.GLOBAL.getText())
        || nodeName.equalsIgnoreCase(SessionElement.SESSION.getText()))) {
      MessageUtils.showMessage(
          "Session files must begin with a \"Global\" or \"Session\" element.  Found: " + nodeName);
    }
    process(session, node, additionalInformation);

    Element element = (Element) node;

    // Load the genome, which can be an ID, or a path or URL to a .genome or indexed fasta file.
    String genomeId = getAttribute(element, SessionAttribute.GENOME.getText());
    if (genomeId != null && genomeId.length() > 0) {
      if (genomeId.equals(GenomeManager.getInstance().getGenomeId())) {
        // We don't have to reload the genome, but the gene track for the current genome should be
        // restored.
        Genome genome = GenomeManager.getInstance().getCurrentGenome();
        IGV.getInstance().setGenomeTracks(genome.getGeneTrack());
      } else {
        // Selecting a genome will actually "reset" the session so we have to
        // save the path and restore it.
        String sessionPath = session.getPath();
        if (IGV.getInstance().getGenomeIds().contains(genomeId)) {
          IGV.getInstance().selectGenomeFromList(genomeId);
        } else {
          String genomePath = genomeId;
          if (!ParsingUtils.pathExists(genomePath)) {
            genomePath = FileUtils.getAbsolutePath(genomeId, session.getPath());
          }
          if (ParsingUtils.pathExists(genomePath)) {
            try {
              IGV.getInstance().loadGenome(genomePath, null);
            } catch (IOException e) {
              throw new RuntimeException("Error loading genome: " + genomeId);
            }
          } else {
            MessageUtils.showMessage("Warning: Could not locate genome: " + genomeId);
          }
        }
        session.setPath(sessionPath);
      }
    }

    session.setLocus(getAttribute(element, SessionAttribute.LOCUS.getText()));
    session.setGroupTracksBy(getAttribute(element, SessionAttribute.GROUP_TRACKS_BY.getText()));

    String removeEmptyTracks = getAttribute(element, "removeEmptyTracks");
    if (removeEmptyTracks != null) {
      try {
        Boolean b = Boolean.parseBoolean(removeEmptyTracks);
        session.setRemoveEmptyPanels(b);
      } catch (Exception e) {
        log.error("Error parsing removeEmptyTracks string: " + removeEmptyTracks, e);
      }
    }

    String versionString = getAttribute(element, SessionAttribute.VERSION.getText());
    try {
      version = Integer.parseInt(versionString);
    } catch (NumberFormatException e) {
      log.error("Non integer version number in session file: " + versionString);
    }
    session.setVersion(version);

    NodeList elements = element.getChildNodes();
    process(session, elements, additionalInformation);

    // ReferenceFrame.getInstance().invalidateLocationScale();
  }
예제 #15
0
  /**
   * Switches on various attributes of locator (mainly locator path extension and whether the
   * locator is indexed) to call the appropriate loading method.
   *
   * @param locator
   * @param genome
   * @return
   */
  public List<Track> load(ResourceLocator locator, Genome genome) throws DataLoadException {

    final String path = locator.getPath().trim();
    log.info("Loading resource, path " + path);
    try {
      String typeString = locator.getTypeString();

      if (typeString.endsWith(".tbi")) {
        MessageUtils.showMessage(
            "<html><b>Error:</b>File type '.tbi' is not recognized.  If this is a 'tabix' index <br>"
                + " load the associated gzipped file, which should have an extension of '.gz'");
      }

      // This list will hold all new tracks created for this locator
      List<Track> newTracks = new ArrayList<Track>();

      String dbUrl = locator.getDBUrl();
      LoadHandler handler = getTrackLoaderHandler(typeString);
      if (dbUrl != null) {
        this.loadFromDatabase(locator, newTracks, genome);
      } else if (typeString.endsWith(".dbxml")) {
        loadFromDBProfile(locator, newTracks);
      } else if (typeString.endsWith(".gmt")) {
        loadGMT(locator);
      } else if (typeString.equals("das")) {
        loadDASResource(locator, newTracks);
      } else if (typeString.endsWith(".vcf.list")) {
        loadVCFListFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".trio")) {
        loadTrioData(locator);
      } else if (typeString.endsWith("varlist")) {
        VariantListManager.loadVariants(locator);
      } else if (typeString.endsWith("samplepathmap")) {
        VariantListManager.loadSamplePathMap(locator);
      } else if (typeString.endsWith(".rnai.gct")) {
        loadRnaiGctFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".gct")
          || typeString.endsWith("res")
          || typeString.endsWith("tab")) {
        loadGctFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".gbk") || typeString.endsWith(".gb")) {
        loadGbkFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".cn")
          || typeString.endsWith(".xcn")
          || typeString.endsWith(".snp")
          || typeString.endsWith(".igv")
          || typeString.endsWith(".loh")) {
        loadIGVFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".cbs")
          || typeString.endsWith(".seg")
          || typeString.endsWith("glad")
          || typeString.endsWith("birdseye_canary_calls")
          || typeString.endsWith(".seg.zip")) {
        loadSegFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".gistic")) {
        loadGisticFile(locator, newTracks);
      } else if (typeString.endsWith(".gs")) {
        loadRNAiGeneScoreFile(locator, newTracks, RNAIGeneScoreParser.Type.GENE_SCORE, genome);
      } else if (typeString.endsWith(".riger")) {
        loadRNAiGeneScoreFile(locator, newTracks, RNAIGeneScoreParser.Type.POOLED, genome);
      } else if (typeString.endsWith(".hp")) {
        loadRNAiHPScoreFile(locator);
      } else if (typeString.contains(".tabblastn") || typeString.endsWith(".orthologs")) {
        loadSyntentyMapping(locator, newTracks);
      } else if (typeString.endsWith(".sam")
          || typeString.endsWith(".bam")
          || typeString.endsWith(".cram")
          || typeString.endsWith(".sam.list")
          || typeString.endsWith(".bam.list")
          || typeString.endsWith(".aligned")
          || typeString.endsWith(".sai")
          || typeString.endsWith(".bai")
          || typeString.equals("alist")
          || typeString.equals(Ga4ghAPIHelper.RESOURCE_TYPE)) {
        loadAlignmentsTrack(locator, newTracks, genome);
      } else if (typeString.endsWith(".wig")
          || typeString.endsWith(".bedgraph")
          || typeString.endsWith(".bdg")
          || typeString.endsWith("cpg.txt")
          || typeString.endsWith(".expr")) {
        loadWigFile(locator, newTracks, genome);
      } else if (typeString.endsWith("fpkm_tracking")
          || typeString.endsWith("gene_exp.diff")
          || typeString.endsWith("cds_exp.diff")) {
        loadCufflinksFile(locator, newTracks, genome);
      } else if (typeString.contains(".dranger")) {
        loadDRangerFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".ewig.tdf") || (typeString.endsWith(".ewig.ibf"))) {
        loadEwigIBFFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".bw")
          || typeString.endsWith(".bb")
          || typeString.endsWith(".bigwig")
          || typeString.endsWith(".bigbed")) {
        loadBWFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".ibf") || typeString.endsWith(".tdf")) {
        loadTDFFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".counts")) {
        loadGobyCountsArchive(locator, newTracks, genome);
      } else if (WiggleParser.isWiggle(locator)) {
        loadWigFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".maf")) {
        loadMultipleAlignmentTrack(locator, newTracks, genome);
      } else if (typeString.endsWith(".maf.dict")) {
        loadMultipleAlignmentTrack(locator, newTracks, genome);
      } else if (typeString.contains(".peak.bin")) {
        loadPeakTrack(locator, newTracks, genome);
      } else if (typeString.endsWith("mage-tab")
          || ExpressionFileParser.parsableMAGE_TAB(locator)) {
        locator.setDescription("MAGE_TAB");
        loadGctFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".bp")) {
        loadBasePairFile(locator, newTracks, genome);
      } else if (GWASParser.isGWASFile(typeString)) {
        loadGWASFile(locator, newTracks, genome);
      } else if (GobyAlignmentQueryReader.supportsFileType(path)) {
        loadAlignmentsTrack(locator, newTracks, genome);
      } else if (typeString.endsWith(".list")) {
        // This should be deprecated
        loadListFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".smap")) {
        loadSMAPFile(locator, newTracks, genome);
      } else if (CodecFactory.hasCodec(locator, genome) && !forceNotTribble(typeString)) {
        loadTribbleFile(locator, newTracks, genome);
      } else if (handler != null) {
        // Custom loader specified
        log.info(String.format("Loading %s with %s", path, handler));
        handler.load(path, newTracks);
      } else if (AttributeManager.isSampleInfoFile(locator)) {
        // This might be a sample information file.
        AttributeManager.getInstance().loadSampleInfo(locator);
      } else {
        MessageUtils.showMessage("<html>Unknown file type: " + path + "<br>Check file extension");
      }

      // Track line
      TrackProperties tp = null;
      String trackLine = locator.getTrackLine();
      if (trackLine != null) {
        tp = new TrackProperties();
        ParsingUtils.parseTrackLine(trackLine, tp);
      }

      for (Track track : newTracks) {

        if (locator.getFeatureInfoURL() != null) {
          track.setUrl(locator.getFeatureInfoURL());
        }
        if (tp != null) {
          track.setProperties(tp);
        }
        if (locator.getColor() != null) {
          track.setColor(locator.getColor());
        }
        if (locator.getSampleId() != null) {
          track.setSampleId(locator.getSampleId());
        }
      }

      return newTracks;
    } catch (Exception e) {
      if (!NOLogExceptions.contains(e.getClass())) {
        log.error(e.getMessage(), e);
      }
      throw new DataLoadException(e.getMessage());
    }
  }
 public AsciiPairIterator(String path, Map<String, Integer> chromosomeOrdinals)
     throws IOException {
   this.reader = org.broad.igv.util.ParsingUtils.openBufferedReader(path);
   this.chromosomeOrdinals = chromosomeOrdinals;
   advance();
 }
예제 #17
0
  /**
   * Scan the datafile for chromosome breaks.
   *
   * @param dataset
   * @return
   */
  public List<ChromosomeSummary> scan(IGVDataset dataset) {

    int estLineCount = ParsingUtils.estimateLineCount(dataResourceLocator.getPath());
    Map<String, Integer> longestFeatureMap = new HashMap();

    float dataMin = 0;
    float dataMax = 0;

    InputStream is = null;
    AsciiLineReader reader = null;
    String nextLine = null;
    ChromosomeSummary chrSummary = null;
    List<ChromosomeSummary> chrSummaries = new ArrayList();
    String[] headings = null;
    WholeGenomeData wgData = null;
    int nRows = 0;

    int headerRows = 0;
    int count = 0;

    boolean logNormalized;
    try {

      int skipColumns = hasCalls ? 2 : 1;

      // BufferedReader reader = ParsingUtils.openBufferedReader(dataResourceLocator);
      is = ParsingUtils.openInputStreamGZ(dataResourceLocator);
      reader = new AsciiLineReader(is);

      // Infer datatype from extension.  This can be overriden in the
      // comment section
      if (isCopyNumberFileExt(dataResourceLocator.getPath())) {
        dataset.setTrackType(TrackType.COPY_NUMBER);
        dataset.getTrackProperties().setWindowingFunction(WindowFunction.mean);
      } else if (isLOHFileExt(dataResourceLocator.getPath())) {
        dataset.setTrackType(TrackType.LOH);
        dataset.getTrackProperties().setWindowingFunction(WindowFunction.mean);
      } else {
        dataset.getTrackProperties().setWindowingFunction(WindowFunction.mean);
      }

      // Parse comments and directives, if any
      nextLine = reader.readLine();
      while (nextLine.startsWith("#") || (nextLine.trim().length() == 0)) {
        headerRows++;

        if (nextLine.length() > 0) {
          parseDirective(nextLine, dataset);
        }
        nextLine = reader.readLine();
      }

      if (chrColumn < 0) {
        setColumnDefaults();
      }

      // Parse column headings
      String[] data = nextLine.trim().split("\t");

      // Set last data column
      if (lastDataColumn < 0) {
        lastDataColumn = data.length - 1;
      }

      headings = getHeadings(data, skipColumns);

      dataset.setDataHeadings(headings);

      // Infer if the data is logNormalized by looking for negative data values.
      // Assume it is not until proven otherwise
      logNormalized = false;

      wgData = new WholeGenomeData(headings);

      int chrRowCount = 0;

      // Update
      int updateCount = 5000;
      long lastPosition = 0;
      while ((nextLine = reader.readLine()) != null) {

        if (igv != null && ++count % updateCount == 0) {
          igv.setStatusBarMessage("Loaded: " + count + " / " + estLineCount + " (est)");
        }
        // Distance since last sample

        String[] tokens = Globals.tabPattern.split(nextLine, -1);
        int nTokens = tokens.length;
        if (nTokens > 0) {
          String thisChr = genome.getChromosomeAlias(tokens[chrColumn]);
          if (chrSummary == null || !thisChr.equals(chrSummary.getName())) {
            // Update whole genome and previous chromosome summary, unless this is
            // the first chromosome
            if (chrSummary != null) {
              updateWholeGenome(chrSummary.getName(), dataset, headings, wgData);
              chrSummary.setNDataPoints(nRows);
            }

            // Shart the next chromosome
            chrSummary = new ChromosomeSummary(thisChr, lastPosition);
            chrSummaries.add(chrSummary);
            nRows = 0;
            wgData = new WholeGenomeData(headings);
            chrRowCount = 0;
          }
          lastPosition = reader.getPosition();

          int location = -1;
          try {
            location = ParsingUtils.parseInt(tokens[startColumn]) - startBase;

          } catch (NumberFormatException numberFormatException) {
            log.error("Column " + tokens[startColumn] + " is not a number");
            throw new ParserException(
                "Column "
                    + (startColumn + 1)
                    + " must contain an integer value."
                    + " Found: "
                    + tokens[startColumn],
                count + headerRows,
                nextLine);
          }

          int length = 1;
          if (hasEndLocations) {
            try {
              length = ParsingUtils.parseInt(tokens[endColumn].trim()) - location + 1;

            } catch (NumberFormatException numberFormatException) {
              log.error("Column " + tokens[endColumn] + " is not a number");
              throw new ParserException(
                  "Column "
                      + (endColumn + 1)
                      + " must contain an integer value."
                      + " Found: "
                      + tokens[endColumn],
                  count + headerRows,
                  nextLine);
            }
          }

          updateLongestFeature(longestFeatureMap, thisChr, length);

          if (wgData.locations.size() > 0
              && wgData.locations.get(wgData.locations.size() - 1) > location) {
            throw new ParserException(
                "File is not sorted, .igv and .cn files must be sorted by start position."
                    + " Use igvtools (File > Run igvtools..) to sort the file.",
                count + headerRows);
          }

          wgData.locations.add(location);

          for (int idx = 0; idx < headings.length; idx++) {
            int i = firstDataColumn + idx * skipColumns;

            float copyNo = i < tokens.length ? readFloat(tokens[i]) : Float.NaN;

            if (!Float.isNaN(copyNo)) {
              dataMin = Math.min(dataMin, copyNo);
              dataMax = Math.max(dataMax, copyNo);
            }
            if (copyNo < 0) {
              logNormalized = true;
            }
            String heading = headings[idx];
            wgData.data.get(heading).add(copyNo);
          }

          nRows++;
        }
        chrRowCount++;
      }

      dataset.setLongestFeatureMap(longestFeatureMap);

    } catch (ParserException pe) {
      throw pe;
    } catch (FileNotFoundException e) {
      // DialogUtils.showError("SNP file not found: " + dataSource.getCopyNoFile());
      log.error("File not found: " + dataResourceLocator);
      throw new RuntimeException(e);
    } catch (Exception e) {
      log.error("Exception when loading: " + dataResourceLocator.getPath(), e);
      if (nextLine != null && (count + headerRows != 0)) {
        throw new ParserException(e.getMessage(), e, count + headerRows, nextLine);
      } else {
        throw new RuntimeException(e);
      }
    } finally {
      if (is != null) {
        try {
          is.close();
        } catch (IOException e) {
          log.error("Error closing IGVDataset stream", e);
        }
      }
    }

    // Update last chromosome
    if (chrSummary != null) {
      updateWholeGenome(chrSummary.getName(), dataset, headings, wgData);
      chrSummary.setNDataPoints(nRows);
    }

    dataset.setLogNormalized(logNormalized);
    dataset.setDataMin(dataMin);
    dataset.setDataMax(dataMax);

    return chrSummaries;
  }
예제 #18
0
  /**
   * Load data for a single chromosome.
   *
   * @param chrSummary
   * @param dataHeaders
   * @return
   */
  public ChromosomeData loadChromosomeData(ChromosomeSummary chrSummary, String[] dataHeaders) {

    // InputStream is = null;
    try {
      int skipColumns = hasCalls ? 2 : 1;

      // Get an estimate of the number of snps (rows).  THIS IS ONLY AN ESTIMATE
      int nRowsEst = chrSummary.getNDataPts();

      SeekableStream is = IGVSeekableStreamFactory.getStreamFor(dataResourceLocator.getPath());
      is.seek(chrSummary.getStartPosition());
      AsciiLineReader reader = new AsciiLineReader(is);

      // Create containers to hold data
      IntArrayList startLocations = new IntArrayList(nRowsEst);
      IntArrayList endLocations = (hasEndLocations ? new IntArrayList(nRowsEst) : null);
      List<String> probes = new ArrayList(nRowsEst);

      Map<String, FloatArrayList> dataMap = new HashMap();
      for (String h : dataHeaders) {
        dataMap.put(h, new FloatArrayList(nRowsEst));
      }

      // Begin loop through rows
      String chromosome = chrSummary.getName();
      boolean chromosomeStarted = false;
      String nextLine = reader.readLine();

      while ((nextLine != null) && (nextLine.trim().length() > 0)) {

        if (!nextLine.startsWith("#")) {
          try {
            String[] tokens = Globals.tabPattern.split(nextLine, -1);
            String thisChromosome = genome.getChromosomeAlias(tokens[chrColumn].trim());
            if (thisChromosome.equals(chromosome)) {
              chromosomeStarted = true;

              // chromosomeData.setMarkerId(nRows, tokens[0]);

              // The probe.  A new string is created to prevent holding on to the entire row through
              // a substring reference
              String probe = new String(tokens[probeColumn]);
              probes.add(probe);

              int start = ParsingUtils.parseInt(tokens[startColumn].trim()) - startBase;
              if (hasEndLocations) {
                endLocations.add(ParsingUtils.parseInt(tokens[endColumn].trim()));
              }

              startLocations.add(start);

              for (int idx = 0; idx < dataHeaders.length; idx++) {
                int i = firstDataColumn + idx * skipColumns;
                float copyNo = i <= lastDataColumn ? readFloat(tokens[i]) : Float.NaN;
                String heading = dataHeaders[idx];
                dataMap.get(heading).add(copyNo);
              }

            } else if (chromosomeStarted) {
              break;
            }

          } catch (NumberFormatException numberFormatException) {

            // Skip line
            log.info("Skipping line (NumberFormatException) " + nextLine);
          }
        }

        nextLine = reader.readLine();
      }

      // Loop complete
      ChromosomeData cd = new ChromosomeData(chrSummary.getName());
      cd.setProbes(probes.toArray(new String[] {}));
      cd.setStartLocations(startLocations.toArray());
      if (hasEndLocations) {
        cd.setEndLocations(endLocations.toArray());
      }

      for (String h : dataHeaders) {
        cd.setData(h, dataMap.get(h).toArray());
      }

      return cd;

    } catch (IOException ex) {
      log.error("Error parsing cn file", ex);
      throw new RuntimeException("Error parsing cn file", ex);
    }
  }