Exemplo n.º 1
0
  public static void main(String[] args) {
    File inputFile =
        args.length > 0
            ? new File(args[0])
            : new File("C:\\Documents and Settings\\tdanford\\Desktop\\sacCer1.gff");

    try {
      SGDGFFParser parser = new SGDGFFParser();
      BinCalculator bincalc = new BinCalculator();
      parser.parseInputFile(inputFile);

      for (String k : parser.geneFeatures.keySet()) {
        try {
          GeneFeatures gf = parser.geneFeatures.get(k);
          StringBuffer line = new StringBuffer();
          ArrayList<Integer> starts = new ArrayList<Integer>();
          ArrayList<Integer> ends = new ArrayList<Integer>();
          for (GFFRecord cds : gf.cds) {
            starts.add(cds.getStart());
            ends.add(cds.getEnd());
          }
          if (starts.size() == 0) {
            starts.add(gf.gene.getStart());
            ends.add(gf.gene.getEnd());
          }

          Collections.sort(starts);
          Collections.sort(ends);
          String strand = "";
          if (gf.gene.getStrand() == StrandedFeature.NEGATIVE) {
            strand = "-";
          } else if (gf.gene.getStrand() == StrandedFeature.POSITIVE) {
            strand = "+";
          }
          /* dumb that we have to put this back on, but that's the UCSC standard format */
          String chrom = "chr" + Genome.fixYeastChrom(gf.gene.getSeqName());

          line.append(gf.id + "\t");
          line.append(chrom + "\t"); // do we need to get rid of roman numerals here ?
          line.append(strand + "\t");
          line.append(gf.gene.getStart() + "\t");
          line.append(gf.gene.getEnd() + "\t");
          line.append(starts.get(0) + "\t");
          line.append(ends.get(ends.size() - 1) + "\t");
          line.append(starts.size() + "\t");
          for (int i = 0; i < starts.size(); i++) {
            if (i == 0) {
              line.append(starts.get(i));
            } else {
              line.append("," + starts.get(i));
            }
          }
          line.append("\t");
          for (int i = 0; i < ends.size(); i++) {
            if (i == 0) {
              line.append(ends.get(i));
            } else {
              line.append("," + ends.get(i));
            }
          }
          line.append("\t");

          // no protein ID
          System.out.println(line);
        } catch (Exception e) {
          System.err.println(e.toString());
          e.printStackTrace();
        }
      }
      for (GFFRecord record : parser.otherRecords) {
        if (record.getFeature().equals("intron")) {
          continue;
        }

        try {
          StringBuffer line = new StringBuffer();
          line.append(bincalc.getBinFromRange(record.getStart(), record.getEnd()) + "\t");
          String strand = "";
          if (record.getStrand() == StrandedFeature.NEGATIVE) {
            strand = "-";
          } else if (record.getStrand() == StrandedFeature.POSITIVE) {
            strand = "+";
          }
          /* dumb that we have to put this back on, but that's the UCSC standard format */
          String chrom = "chr" + Genome.fixYeastChrom(record.getSeqName());
          Map<String, List<String>> attrs = decodeAttrMap(record);
          String name = "";
          if (attrs != null && attrs.containsKey("Name") && attrs.get("Name").size() > 0) {
            name = attrs.get("Name").get(0);
          } else if (attrs != null && attrs.containsKey("ID") && attrs.get("ID").size() > 0) {
            name = attrs.get("ID").get(0);
          }

          line.append(chrom + "\t");
          line.append(record.getStart() + "\t" + record.getEnd() + "\t");
          line.append(name + "\t");
          line.append(record.getScore() + "\t");
          line.append(strand + "\t");
          line.append(record.getFeature());
          System.out.println(line);
        } catch (Exception e) {
          System.err.println(e.toString());
          e.printStackTrace();
        }
      }

    } catch (IOException e) {
      e.printStackTrace();
    }
  }