Java ParsingUtils.estimateLineCount 예제들

프로그래밍 언어: Java

네임스페이스/패키지 이름: org.broad.igv.util

클래스/타입: ParsingUtils

메소드/함수: estimateLineCount

hotexamples.com에서의 예제들: 2

Java ParsingUtils.estimateLineCount - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Java의 org.broad.igv.util.ParsingUtils.estimateLineCount에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

parseTrackLine(5)

parseInt(3)

openBufferedReader(3)

estimateLineCount(2)

openInputStream(2)

openInputStreamGZ(2)

getContentLength(1)

openAsciiReader(1)

pathExists(1)

예제 #1

파일 보기

파일: IgvTools.java 프로젝트: IntersectAustralia/IGV

  /**
   * Estimage the number of lines in the given file, or all files in the given directory, or all
   * files referenced in a ".list" file.
   *
   * @param file a file or directory.
   * @return
   */
  private int estimateLineCount(File file) {

    int nLines = 0;
    if (file.isDirectory() || file.getName().endsWith(".list")) {
      List<File> files = getFilesFromDirOrList(file);
      for (File f : files) {
        if (!f.isDirectory()) {
          nLines += ParsingUtils.estimateLineCount(f.getAbsolutePath());
        }
      }
    } else {
      nLines = ParsingUtils.estimateLineCount(file.getAbsolutePath());
    }
    return nLines;
  }

예제 #2

파일 보기

파일: IGVDatasetParser.java 프로젝트: jmuilu/IGV

  /**
   * Scan the datafile for chromosome breaks.
   *
   * @param dataset
   * @return
   */
  public List<ChromosomeSummary> scan(IGVDataset dataset) {

    int estLineCount = ParsingUtils.estimateLineCount(dataResourceLocator.getPath());
    Map<String, Integer> longestFeatureMap = new HashMap();

    float dataMin = 0;
    float dataMax = 0;

    InputStream is = null;
    AsciiLineReader reader = null;
    String nextLine = null;
    ChromosomeSummary chrSummary = null;
    List<ChromosomeSummary> chrSummaries = new ArrayList();
    String[] headings = null;
    WholeGenomeData wgData = null;
    int nRows = 0;

    int headerRows = 0;
    int count = 0;

    boolean logNormalized;
    try {

      int skipColumns = hasCalls ? 2 : 1;

      // BufferedReader reader = ParsingUtils.openBufferedReader(dataResourceLocator);
      is = ParsingUtils.openInputStreamGZ(dataResourceLocator);
      reader = new AsciiLineReader(is);

      // Infer datatype from extension.  This can be overriden in the
      // comment section
      if (isCopyNumberFileExt(dataResourceLocator.getPath())) {
        dataset.setTrackType(TrackType.COPY_NUMBER);
        dataset.getTrackProperties().setWindowingFunction(WindowFunction.mean);
      } else if (isLOHFileExt(dataResourceLocator.getPath())) {
        dataset.setTrackType(TrackType.LOH);
        dataset.getTrackProperties().setWindowingFunction(WindowFunction.mean);
      } else {
        dataset.getTrackProperties().setWindowingFunction(WindowFunction.mean);
      }

      // Parse comments and directives, if any
      nextLine = reader.readLine();
      while (nextLine.startsWith("#") || (nextLine.trim().length() == 0)) {
        headerRows++;

        if (nextLine.length() > 0) {
          parseDirective(nextLine, dataset);
        }
        nextLine = reader.readLine();
      }

      if (chrColumn < 0) {
        setColumnDefaults();
      }

      // Parse column headings
      String[] data = nextLine.trim().split("\t");

      // Set last data column
      if (lastDataColumn < 0) {
        lastDataColumn = data.length - 1;
      }

      headings = getHeadings(data, skipColumns);

      dataset.setDataHeadings(headings);

      // Infer if the data is logNormalized by looking for negative data values.
      // Assume it is not until proven otherwise
      logNormalized = false;

      wgData = new WholeGenomeData(headings);

      int chrRowCount = 0;

      // Update
      int updateCount = 5000;
      long lastPosition = 0;
      while ((nextLine = reader.readLine()) != null) {

        if (igv != null && ++count % updateCount == 0) {
          igv.setStatusBarMessage("Loaded: " + count + " / " + estLineCount + " (est)");
        }
        // Distance since last sample

        String[] tokens = Globals.tabPattern.split(nextLine, -1);
        int nTokens = tokens.length;
        if (nTokens > 0) {
          String thisChr = genome.getChromosomeAlias(tokens[chrColumn]);
          if (chrSummary == null || !thisChr.equals(chrSummary.getName())) {
            // Update whole genome and previous chromosome summary, unless this is
            // the first chromosome
            if (chrSummary != null) {
              updateWholeGenome(chrSummary.getName(), dataset, headings, wgData);
              chrSummary.setNDataPoints(nRows);
            }

            // Shart the next chromosome
            chrSummary = new ChromosomeSummary(thisChr, lastPosition);
            chrSummaries.add(chrSummary);
            nRows = 0;
            wgData = new WholeGenomeData(headings);
            chrRowCount = 0;
          }
          lastPosition = reader.getPosition();

          int location = -1;
          try {
            location = ParsingUtils.parseInt(tokens[startColumn]) - startBase;

          } catch (NumberFormatException numberFormatException) {
            log.error("Column " + tokens[startColumn] + " is not a number");
            throw new ParserException(
                "Column "
                    + (startColumn + 1)
                    + " must contain an integer value."
                    + " Found: "
                    + tokens[startColumn],
                count + headerRows,
                nextLine);
          }

          int length = 1;
          if (hasEndLocations) {
            try {
              length = ParsingUtils.parseInt(tokens[endColumn].trim()) - location + 1;

            } catch (NumberFormatException numberFormatException) {
              log.error("Column " + tokens[endColumn] + " is not a number");
              throw new ParserException(
                  "Column "
                      + (endColumn + 1)
                      + " must contain an integer value."
                      + " Found: "
                      + tokens[endColumn],
                  count + headerRows,
                  nextLine);
            }
          }

          updateLongestFeature(longestFeatureMap, thisChr, length);

          if (wgData.locations.size() > 0
              && wgData.locations.get(wgData.locations.size() - 1) > location) {
            throw new ParserException(
                "File is not sorted, .igv and .cn files must be sorted by start position."
                    + " Use igvtools (File > Run igvtools..) to sort the file.",
                count + headerRows);
          }

          wgData.locations.add(location);

          for (int idx = 0; idx < headings.length; idx++) {
            int i = firstDataColumn + idx * skipColumns;

            float copyNo = i < tokens.length ? readFloat(tokens[i]) : Float.NaN;

            if (!Float.isNaN(copyNo)) {
              dataMin = Math.min(dataMin, copyNo);
              dataMax = Math.max(dataMax, copyNo);
            }
            if (copyNo < 0) {
              logNormalized = true;
            }
            String heading = headings[idx];
            wgData.data.get(heading).add(copyNo);
          }

          nRows++;
        }
        chrRowCount++;
      }

      dataset.setLongestFeatureMap(longestFeatureMap);

    } catch (ParserException pe) {
      throw pe;
    } catch (FileNotFoundException e) {
      // DialogUtils.showError("SNP file not found: " + dataSource.getCopyNoFile());
      log.error("File not found: " + dataResourceLocator);
      throw new RuntimeException(e);
    } catch (Exception e) {
      log.error("Exception when loading: " + dataResourceLocator.getPath(), e);
      if (nextLine != null && (count + headerRows != 0)) {
        throw new ParserException(e.getMessage(), e, count + headerRows, nextLine);
      } else {
        throw new RuntimeException(e);
      }
    } finally {
      if (is != null) {
        try {
          is.close();
        } catch (IOException e) {
          log.error("Error closing IGVDataset stream", e);
        }
      }
    }

    // Update last chromosome
    if (chrSummary != null) {
      updateWholeGenome(chrSummary.getName(), dataset, headings, wgData);
      chrSummary.setNDataPoints(nRows);
    }

    dataset.setLogNormalized(logNormalized);
    dataset.setDataMin(dataMin);
    dataset.setDataMax(dataMax);

    return chrSummaries;
  }