Exemplo n.º 1
0
  /**
   * Note: This is an exact copy of the method in ExpressionFileParser. Refactor to merge these two
   * parsers, or share a common base class.
   *
   * @param comment
   * @param dataset
   */
  private void parseDirective(String comment, IGVDataset dataset) {

    String tmp = comment.substring(1, comment.length());
    if (tmp.startsWith("track")) {
      ParsingUtils.parseTrackLine(tmp, dataset.getTrackProperties());

    } else if (tmp.startsWith("columns")) {
      parseColumnLine(tmp);
    } else {
      String[] tokens = tmp.split("=");
      if (tokens.length != 2) {
        return;
      }

      String key = tokens[0].trim().toLowerCase();
      if (key.equals("name")) {
        dataset.setName(tokens[1].trim());
      } else if (key.equals("type")) {

        try {
          dataset.setTrackType(TrackType.valueOf(tokens[1].trim().toUpperCase()));
        } catch (Exception exception) {

          // Ignore
        }
      } else if (key.equals("coords")) {

        startBase = Integer.parseInt(tokens[1].trim());
      }
    }
  }
Exemplo n.º 2
0
  private void updateWholeGenome(
      String currentChromosome,
      IGVDataset dataset,
      String[] headings,
      IGVDatasetParser.WholeGenomeData wgData) {

    if (!genome.getHomeChromosome().equals(Globals.CHR_ALL)) {
      return;
    }

    // Update whole genome data
    int[] locations = wgData.locations.toArray();
    if (locations.length > 0) {
      Map<String, float[]> tmp = new HashMap(wgData.data.size());
      for (String s : wgData.headings) {
        tmp.put(s, wgData.data.get(s).toArray());
      }

      GenomeSummaryData genomeSummary = dataset.getGenomeSummary();
      if (genomeSummary == null) {
        genomeSummary = new GenomeSummaryData(genome, headings);
        dataset.setGenomeSummary(genomeSummary);
      }
      genomeSummary.addData(currentChromosome, locations, tmp);
    }
  }
Exemplo n.º 3
0
  private void loadIGVFile(ResourceLocator locator, List<Track> newTracks, Genome genome) {

    if (locator.isLocal()) {
      if (!checkSize(locator)) {
        return;
      }
    }

    String dsName = locator.getTrackName();
    IGVDataset ds = new IGVDataset(locator, genome);
    ds.setName(dsName);

    TrackProperties trackProperties = ds.getTrackProperties();
    String path = locator.getPath();
    TrackType type = ds.getType();
    for (String trackName : ds.getTrackNames()) {

      DatasetDataSource dataSource = new DatasetDataSource(trackName, ds, genome);
      String trackId = path + "_" + trackName;
      DataSourceTrack track = new DataSourceTrack(locator, trackId, trackName, dataSource);

      // track.setRendererClass(HeatmapRenderer.class);
      track.setTrackType(ds.getType());
      track.setProperties(trackProperties);

      if (type == TrackType.ALLELE_FREQUENCY) {
        track.setRendererClass(PointsRenderer.class);
        track.setHeight(40);
      }
      newTracks.add(track);
    }
  }
Exemplo n.º 4
0
  /**
   * Scan the datafile for chromosome breaks.
   *
   * @param dataset
   * @return
   */
  public List<ChromosomeSummary> scan(IGVDataset dataset) {

    int estLineCount = ParsingUtils.estimateLineCount(dataResourceLocator.getPath());
    Map<String, Integer> longestFeatureMap = new HashMap();

    float dataMin = 0;
    float dataMax = 0;

    InputStream is = null;
    AsciiLineReader reader = null;
    String nextLine = null;
    ChromosomeSummary chrSummary = null;
    List<ChromosomeSummary> chrSummaries = new ArrayList();
    String[] headings = null;
    WholeGenomeData wgData = null;
    int nRows = 0;

    int headerRows = 0;
    int count = 0;

    boolean logNormalized;
    try {

      int skipColumns = hasCalls ? 2 : 1;

      // BufferedReader reader = ParsingUtils.openBufferedReader(dataResourceLocator);
      is = ParsingUtils.openInputStreamGZ(dataResourceLocator);
      reader = new AsciiLineReader(is);

      // Infer datatype from extension.  This can be overriden in the
      // comment section
      if (isCopyNumberFileExt(dataResourceLocator.getPath())) {
        dataset.setTrackType(TrackType.COPY_NUMBER);
        dataset.getTrackProperties().setWindowingFunction(WindowFunction.mean);
      } else if (isLOHFileExt(dataResourceLocator.getPath())) {
        dataset.setTrackType(TrackType.LOH);
        dataset.getTrackProperties().setWindowingFunction(WindowFunction.mean);
      } else {
        dataset.getTrackProperties().setWindowingFunction(WindowFunction.mean);
      }

      // Parse comments and directives, if any
      nextLine = reader.readLine();
      while (nextLine.startsWith("#") || (nextLine.trim().length() == 0)) {
        headerRows++;

        if (nextLine.length() > 0) {
          parseDirective(nextLine, dataset);
        }
        nextLine = reader.readLine();
      }

      if (chrColumn < 0) {
        setColumnDefaults();
      }

      // Parse column headings
      String[] data = nextLine.trim().split("\t");

      // Set last data column
      if (lastDataColumn < 0) {
        lastDataColumn = data.length - 1;
      }

      headings = getHeadings(data, skipColumns);

      dataset.setDataHeadings(headings);

      // Infer if the data is logNormalized by looking for negative data values.
      // Assume it is not until proven otherwise
      logNormalized = false;

      wgData = new WholeGenomeData(headings);

      int chrRowCount = 0;

      // Update
      int updateCount = 5000;
      long lastPosition = 0;
      while ((nextLine = reader.readLine()) != null) {

        if (igv != null && ++count % updateCount == 0) {
          igv.setStatusBarMessage("Loaded: " + count + " / " + estLineCount + " (est)");
        }
        // Distance since last sample

        String[] tokens = Globals.tabPattern.split(nextLine, -1);
        int nTokens = tokens.length;
        if (nTokens > 0) {
          String thisChr = genome.getChromosomeAlias(tokens[chrColumn]);
          if (chrSummary == null || !thisChr.equals(chrSummary.getName())) {
            // Update whole genome and previous chromosome summary, unless this is
            // the first chromosome
            if (chrSummary != null) {
              updateWholeGenome(chrSummary.getName(), dataset, headings, wgData);
              chrSummary.setNDataPoints(nRows);
            }

            // Shart the next chromosome
            chrSummary = new ChromosomeSummary(thisChr, lastPosition);
            chrSummaries.add(chrSummary);
            nRows = 0;
            wgData = new WholeGenomeData(headings);
            chrRowCount = 0;
          }
          lastPosition = reader.getPosition();

          int location = -1;
          try {
            location = ParsingUtils.parseInt(tokens[startColumn]) - startBase;

          } catch (NumberFormatException numberFormatException) {
            log.error("Column " + tokens[startColumn] + " is not a number");
            throw new ParserException(
                "Column "
                    + (startColumn + 1)
                    + " must contain an integer value."
                    + " Found: "
                    + tokens[startColumn],
                count + headerRows,
                nextLine);
          }

          int length = 1;
          if (hasEndLocations) {
            try {
              length = ParsingUtils.parseInt(tokens[endColumn].trim()) - location + 1;

            } catch (NumberFormatException numberFormatException) {
              log.error("Column " + tokens[endColumn] + " is not a number");
              throw new ParserException(
                  "Column "
                      + (endColumn + 1)
                      + " must contain an integer value."
                      + " Found: "
                      + tokens[endColumn],
                  count + headerRows,
                  nextLine);
            }
          }

          updateLongestFeature(longestFeatureMap, thisChr, length);

          if (wgData.locations.size() > 0
              && wgData.locations.get(wgData.locations.size() - 1) > location) {
            throw new ParserException(
                "File is not sorted, .igv and .cn files must be sorted by start position."
                    + " Use igvtools (File > Run igvtools..) to sort the file.",
                count + headerRows);
          }

          wgData.locations.add(location);

          for (int idx = 0; idx < headings.length; idx++) {
            int i = firstDataColumn + idx * skipColumns;

            float copyNo = i < tokens.length ? readFloat(tokens[i]) : Float.NaN;

            if (!Float.isNaN(copyNo)) {
              dataMin = Math.min(dataMin, copyNo);
              dataMax = Math.max(dataMax, copyNo);
            }
            if (copyNo < 0) {
              logNormalized = true;
            }
            String heading = headings[idx];
            wgData.data.get(heading).add(copyNo);
          }

          nRows++;
        }
        chrRowCount++;
      }

      dataset.setLongestFeatureMap(longestFeatureMap);

    } catch (ParserException pe) {
      throw pe;
    } catch (FileNotFoundException e) {
      // DialogUtils.showError("SNP file not found: " + dataSource.getCopyNoFile());
      log.error("File not found: " + dataResourceLocator);
      throw new RuntimeException(e);
    } catch (Exception e) {
      log.error("Exception when loading: " + dataResourceLocator.getPath(), e);
      if (nextLine != null && (count + headerRows != 0)) {
        throw new ParserException(e.getMessage(), e, count + headerRows, nextLine);
      } else {
        throw new RuntimeException(e);
      }
    } finally {
      if (is != null) {
        try {
          is.close();
        } catch (IOException e) {
          log.error("Error closing IGVDataset stream", e);
        }
      }
    }

    // Update last chromosome
    if (chrSummary != null) {
      updateWholeGenome(chrSummary.getName(), dataset, headings, wgData);
      chrSummary.setNDataPoints(nRows);
    }

    dataset.setLogNormalized(logNormalized);
    dataset.setDataMin(dataMin);
    dataset.setDataMax(dataMax);

    return chrSummaries;
  }