예제 #1
0
  /**
   * Note: This is an exact copy of the method in ExpressionFileParser. Refactor to merge these two
   * parsers, or share a common base class.
   *
   * @param comment
   * @param dataset
   */
  private void parseDirective(String comment, IGVDataset dataset) {

    String tmp = comment.substring(1, comment.length());
    if (tmp.startsWith("track")) {
      ParsingUtils.parseTrackLine(tmp, dataset.getTrackProperties());

    } else if (tmp.startsWith("columns")) {
      parseColumnLine(tmp);
    } else {
      String[] tokens = tmp.split("=");
      if (tokens.length != 2) {
        return;
      }

      String key = tokens[0].trim().toLowerCase();
      if (key.equals("name")) {
        dataset.setName(tokens[1].trim());
      } else if (key.equals("type")) {

        try {
          dataset.setTrackType(TrackType.valueOf(tokens[1].trim().toUpperCase()));
        } catch (Exception exception) {

          // Ignore
        }
      } else if (key.equals("coords")) {

        startBase = Integer.parseInt(tokens[1].trim());
      }
    }
  }
예제 #2
0
  public void loadTDFFile(ResourceLocator locator, List<Track> newTracks, Genome genome) {

    log.debug("Loading TDF file " + locator.getPath());
    TDFReader reader = TDFReader.getReader(locator);
    TrackType type = reader.getTrackType();

    TrackProperties props = null;
    String trackLine = reader.getTrackLine();
    if (trackLine != null && trackLine.length() > 0) {
      props = new TrackProperties();
      ParsingUtils.parseTrackLine(trackLine, props);
    }

    // In case of conflict between the resource locator display name and the track properties name,
    // use the resource locator
    String name = locator.getName();
    if (name != null && props != null) {
      props.setName(name);
    }

    if (name == null) {
      name = props == null ? locator.getTrackName() : props.getName();
    }

    int trackNumber = 0;
    String path = locator.getPath();
    boolean multiTrack = reader.getTrackNames().length > 1;

    for (String heading : reader.getTrackNames()) {

      String trackId = multiTrack ? path + "_" + heading : path;
      String trackName = multiTrack ? heading : name;
      final DataSource dataSource =
          locator.getPath().endsWith(".counts")
              ? new GobyCountArchiveDataSource(locator)
              : new TDFDataSource(reader, trackNumber, heading, genome);
      DataSourceTrack track = new DataSourceTrack(locator, trackId, trackName, dataSource);

      String displayName = (name == null || multiTrack) ? heading : name;
      track.setName(displayName);
      track.setTrackType(type);
      if (props != null) {
        track.setProperties(props);
      }
      newTracks.add(track);
      trackNumber++;
    }
  }
예제 #3
0
  private void loadEwigIBFFile(ResourceLocator locator, List<Track> newTracks, Genome genome) {

    TDFReader reader = TDFReader.getReader(locator.getPath());
    TrackProperties props = null;
    String trackLine = reader.getTrackLine();
    if (trackLine != null && trackLine.length() > 0) {
      props = new TrackProperties();
      ParsingUtils.parseTrackLine(trackLine, props);
    }

    EWigTrack track = new EWigTrack(locator, genome);
    if (props != null) {
      track.setProperties(props);
    }
    track.setName(locator.getTrackName());
    newTracks.add(track);
  }
예제 #4
0
  /**
   * Load all features in this file.
   *
   * @param reader
   * @param maxLines
   * @return
   */
  public List<org.broad.tribble.Feature> loadFeatures(BufferedReader reader, int maxLines) {

    List<org.broad.tribble.Feature> features = new ArrayList<org.broad.tribble.Feature>();
    String nextLine = null;

    int nLines = 0;
    try {
      while ((nextLine = reader.readLine()) != null) {
        nextLine = nextLine.trim();
        if (nextLine.length() == 0) continue;
        nLines++;
        if ((maxLines > 0) && (nLines > maxLines)) {
          break;
        }

        try {
          if (nextLine.startsWith("#")) {
            if (nextLine.startsWith("#type")) {
              String[] tokens = Globals.equalPattern.split(nextLine);
              if (tokens.length > 1) {
                try {
                  // TODO: type is not currently used, is there any reason to keep this?
                  TrackType type = TrackType.valueOf(tokens[1]);
                } catch (Exception e) {
                  log.error("Error converting track type: " + tokens[1]);
                }
              }
            } else if (nextLine.startsWith("#track")) {
              TrackProperties tp = new TrackProperties();
              ParsingUtils.parseTrackLine(nextLine, tp);
              setTrackProperties(tp);
              if (tp.isGffTags()) {
                gffTags = true;
              }
            } else if (nextLine.startsWith("#coords")) {
              try {
                String[] tokens = Globals.equalPattern.split(nextLine);
                startBase = Integer.parseInt(tokens[1]);
              } catch (Exception e) {
                log.error("Error parsing coords line: " + nextLine, e);
              }

            } else if (nextLine.startsWith("#gffTags")) {
              gffTags = true;
            }
          } else {
            Feature feature = parseLine(nextLine);
            if (feature != null) {
              features.add(feature);
            }
          }

        } catch (NumberFormatException e) {

          // Expected condition -- for example comments.  don't log as it slows down
          // the parsing and is not useful information.
        }
      }
    } catch (java.io.EOFException e) {

      // This exception is due to a known bug with java zip library.  Not
      // in general a real error, and nothing we can do about it in any
      // event.
      return features;
    } catch (Exception e) {
      if (nextLine != null && nLines != 0) {
        throw new ParserException(e.getMessage(), e, nLines, nextLine);
      } else {
        throw new RuntimeException(e);
      }
    }

    // TODO -- why is this test here?  This will break igvtools processing of expression files
    // if (IGV.hasInstance() || Globals.isTesting()) {
    FeatureDB.addFeatures(features);
    // }
    return features;
  }
예제 #5
0
  /**
   * Switches on various attributes of locator (mainly locator path extension and whether the
   * locator is indexed) to call the appropriate loading method.
   *
   * @param locator
   * @param genome
   * @return
   */
  public List<Track> load(ResourceLocator locator, Genome genome) throws DataLoadException {

    final String path = locator.getPath().trim();
    log.info("Loading resource, path " + path);
    try {
      String typeString = locator.getTypeString();

      if (typeString.endsWith(".tbi")) {
        MessageUtils.showMessage(
            "<html><b>Error:</b>File type '.tbi' is not recognized.  If this is a 'tabix' index <br>"
                + " load the associated gzipped file, which should have an extension of '.gz'");
      }

      // This list will hold all new tracks created for this locator
      List<Track> newTracks = new ArrayList<Track>();

      String dbUrl = locator.getDBUrl();
      LoadHandler handler = getTrackLoaderHandler(typeString);
      if (dbUrl != null) {
        this.loadFromDatabase(locator, newTracks, genome);
      } else if (typeString.endsWith(".dbxml")) {
        loadFromDBProfile(locator, newTracks);
      } else if (typeString.endsWith(".gmt")) {
        loadGMT(locator);
      } else if (typeString.equals("das")) {
        loadDASResource(locator, newTracks);
      } else if (typeString.endsWith(".vcf.list")) {
        loadVCFListFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".trio")) {
        loadTrioData(locator);
      } else if (typeString.endsWith("varlist")) {
        VariantListManager.loadVariants(locator);
      } else if (typeString.endsWith("samplepathmap")) {
        VariantListManager.loadSamplePathMap(locator);
      } else if (typeString.endsWith(".rnai.gct")) {
        loadRnaiGctFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".gct")
          || typeString.endsWith("res")
          || typeString.endsWith("tab")) {
        loadGctFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".gbk") || typeString.endsWith(".gb")) {
        loadGbkFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".cn")
          || typeString.endsWith(".xcn")
          || typeString.endsWith(".snp")
          || typeString.endsWith(".igv")
          || typeString.endsWith(".loh")) {
        loadIGVFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".cbs")
          || typeString.endsWith(".seg")
          || typeString.endsWith("glad")
          || typeString.endsWith("birdseye_canary_calls")
          || typeString.endsWith(".seg.zip")) {
        loadSegFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".gistic")) {
        loadGisticFile(locator, newTracks);
      } else if (typeString.endsWith(".gs")) {
        loadRNAiGeneScoreFile(locator, newTracks, RNAIGeneScoreParser.Type.GENE_SCORE, genome);
      } else if (typeString.endsWith(".riger")) {
        loadRNAiGeneScoreFile(locator, newTracks, RNAIGeneScoreParser.Type.POOLED, genome);
      } else if (typeString.endsWith(".hp")) {
        loadRNAiHPScoreFile(locator);
      } else if (typeString.contains(".tabblastn") || typeString.endsWith(".orthologs")) {
        loadSyntentyMapping(locator, newTracks);
      } else if (typeString.endsWith(".sam")
          || typeString.endsWith(".bam")
          || typeString.endsWith(".cram")
          || typeString.endsWith(".sam.list")
          || typeString.endsWith(".bam.list")
          || typeString.endsWith(".aligned")
          || typeString.endsWith(".sai")
          || typeString.endsWith(".bai")
          || typeString.equals("alist")
          || typeString.equals(Ga4ghAPIHelper.RESOURCE_TYPE)) {
        loadAlignmentsTrack(locator, newTracks, genome);
      } else if (typeString.endsWith(".wig")
          || typeString.endsWith(".bedgraph")
          || typeString.endsWith(".bdg")
          || typeString.endsWith("cpg.txt")
          || typeString.endsWith(".expr")) {
        loadWigFile(locator, newTracks, genome);
      } else if (typeString.endsWith("fpkm_tracking")
          || typeString.endsWith("gene_exp.diff")
          || typeString.endsWith("cds_exp.diff")) {
        loadCufflinksFile(locator, newTracks, genome);
      } else if (typeString.contains(".dranger")) {
        loadDRangerFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".ewig.tdf") || (typeString.endsWith(".ewig.ibf"))) {
        loadEwigIBFFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".bw")
          || typeString.endsWith(".bb")
          || typeString.endsWith(".bigwig")
          || typeString.endsWith(".bigbed")) {
        loadBWFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".ibf") || typeString.endsWith(".tdf")) {
        loadTDFFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".counts")) {
        loadGobyCountsArchive(locator, newTracks, genome);
      } else if (WiggleParser.isWiggle(locator)) {
        loadWigFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".maf")) {
        loadMultipleAlignmentTrack(locator, newTracks, genome);
      } else if (typeString.endsWith(".maf.dict")) {
        loadMultipleAlignmentTrack(locator, newTracks, genome);
      } else if (typeString.contains(".peak.bin")) {
        loadPeakTrack(locator, newTracks, genome);
      } else if (typeString.endsWith("mage-tab")
          || ExpressionFileParser.parsableMAGE_TAB(locator)) {
        locator.setDescription("MAGE_TAB");
        loadGctFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".bp")) {
        loadBasePairFile(locator, newTracks, genome);
      } else if (GWASParser.isGWASFile(typeString)) {
        loadGWASFile(locator, newTracks, genome);
      } else if (GobyAlignmentQueryReader.supportsFileType(path)) {
        loadAlignmentsTrack(locator, newTracks, genome);
      } else if (typeString.endsWith(".list")) {
        // This should be deprecated
        loadListFile(locator, newTracks, genome);
      } else if (typeString.endsWith(".smap")) {
        loadSMAPFile(locator, newTracks, genome);
      } else if (CodecFactory.hasCodec(locator, genome) && !forceNotTribble(typeString)) {
        loadTribbleFile(locator, newTracks, genome);
      } else if (handler != null) {
        // Custom loader specified
        log.info(String.format("Loading %s with %s", path, handler));
        handler.load(path, newTracks);
      } else if (AttributeManager.isSampleInfoFile(locator)) {
        // This might be a sample information file.
        AttributeManager.getInstance().loadSampleInfo(locator);
      } else {
        MessageUtils.showMessage("<html>Unknown file type: " + path + "<br>Check file extension");
      }

      // Track line
      TrackProperties tp = null;
      String trackLine = locator.getTrackLine();
      if (trackLine != null) {
        tp = new TrackProperties();
        ParsingUtils.parseTrackLine(trackLine, tp);
      }

      for (Track track : newTracks) {

        if (locator.getFeatureInfoURL() != null) {
          track.setUrl(locator.getFeatureInfoURL());
        }
        if (tp != null) {
          track.setProperties(tp);
        }
        if (locator.getColor() != null) {
          track.setColor(locator.getColor());
        }
        if (locator.getSampleId() != null) {
          track.setSampleId(locator.getSampleId());
        }
      }

      return newTracks;
    } catch (Exception e) {
      if (!NOLogExceptions.contains(e.getClass())) {
        log.error(e.getMessage(), e);
      }
      throw new DataLoadException(e.getMessage());
    }
  }