/**
   * Handle UTR joins
   *
   * @param feature
   * @param key
   * @param qualifiers
   * @return
   */
  private Location joinUtrs(Feature feature, Key key, QualifierVector qualifiers) {
    Location location = feature.getLocation();
    if (key.getKeyString().equals("5'UTR") || key.getKeyString().equals("3'UTR")) {
      ChadoCanonicalGene gene = ((GFFStreamFeature) feature).getChadoGene();
      String utrName = GeneUtils.getUniqueName(feature);
      String transcriptName = gene.getTranscriptFromName(utrName);
      List<Feature> utrs;

      if (key.getKeyString().equals("5'UTR")) utrs = gene.get5UtrOfTranscript(transcriptName);
      else utrs = gene.get3UtrOfTranscript(transcriptName);

      if (utrs.size() > 1) {
        int start = Integer.MAX_VALUE;
        RangeVector ranges = new RangeVector();
        for (int i = 0; i < utrs.size(); i++) {
          Feature utr = utrs.get(i);
          Range range = utr.getLocation().getTotalRange();
          if (start > range.getStart()) start = range.getStart();
          ranges.add(range);
        }

        if (start != feature.getLocation().getTotalRange().getStart()) return null;

        location = new Location(ranges, feature.getLocation().isComplement());
      }

      int ntranscripts = gene.getTranscripts().size();
      if (ntranscripts == 1) transcriptName = gene.getGeneUniqueName();
      qualifiers.setQualifier(new Qualifier("locus_tag", transcriptName));
      qualifiers.removeQualifierByName("ID");
    }
    return location;
  }
  /**
   * Add a new qualifier to a list of qualifiers
   *
   * @param qualifiers
   * @param newQualifier
   */
  private void addNewQualifier(QualifierVector qualifiers, Qualifier newQualifier) {
    Qualifier qualifier;
    if ((qualifier = qualifiers.getQualifierByName(newQualifier.getName())) != null) {
      final StringVector newValues = newQualifier.getValues();
      final StringVector values = qualifier.getValues();

      if (newValues == null) return;
      for (int j = 0; j < newValues.size(); j++) {
        String newValue = (String) newValues.get(j);
        if (!values.contains(newValue)) qualifier.addValue(newValue);
      }
    } else qualifiers.addElement(newQualifier);
  }
  /**
   * Maps database (SO) keys to EMBl keys. It will add any extra qualifiers found in the 3rd column
   * of DATABASE_MAP_KEYS.
   *
   * @param key
   * @return
   */
  private Key map(final Key key, final QualifierVector qualifiers) {
    if (DATABASE_MAP_KEYS == null) initDatabaseMappings();
    for (int i = 0; i < DATABASE_MAP_KEYS.length; i++) {
      if (key.getKeyString().equals(DATABASE_MAP_KEYS[i][0])) {
        Key mappedKey = new Key((String) DATABASE_MAP_KEYS[i][1]);
        if (DATABASE_MAP_KEYS[i][2] != null) {

          Qualifier newQualifier = (Qualifier) DATABASE_MAP_KEYS[i][2];
          if (!getEntryInformation().isValidQualifier(mappedKey, newQualifier.getName())) {
            try {
              final int nvalues;
              if (newQualifier.getValues() == null) nvalues = 0;
              else nvalues = newQualifier.getValues().size();

              final int type;
              if (nvalues == 0) type = QualifierInfo.NO_VALUE;
              else type = QualifierInfo.QUOTED_TEXT;
              getEntryInformation()
                  .addQualifierInfo(
                      new QualifierInfo(newQualifier.getName(), type, null, null, false));
            } catch (QualifierInfoException e) {
            }
          }

          qualifiers.addQualifierValues(newQualifier);
        }

        return mappedKey;
      }
    }
    return key;
  }
Ejemplo n.º 4
0
  /**
   * Read some embl feature qualifiers from a stream into a QualifierVector object. The stream
   * should contain qualifiers in this form:
   *
   * <PRE>  /name1=value1/name2="value2"/name3=[value3]  </PRE>
   *
   * @param in_stream the qualifiers are read from this stream
   * @exception IOException thrown if there is a problem reading the qualifiers, such as end of
   *     file.
   * @exception QualifierParseException Thrown if the format of the value String is not appropriate
   *     for a Qualifier with the given name. Each qualifier has a specific format for the value
   *     part which depends on the name, for example the value part of /codon_start qualifier must
   *     be a number: 1, 2 or 3.
   * @return A Vector containing one Qualifier object for each name/value pair read from the stream.
   */
  public static QualifierVector readQualifiers(
      final Reader in_stream, final EntryInformation entry_information)
      throws QualifierParseException, IOException {

    QualifierVector return_vector = new QualifierVector();

    BufferedReader buffered_reader = new BufferedReader(in_stream);

    String name;
    String value;

    // loop until end of file
    while ((name = StreamQualifier.readName(buffered_reader)) != null) {
      // save one character in case the next char is not a '='
      buffered_reader.mark(1);

      final int next_char = buffered_reader.read();

      if (next_char == -1) value = null;
      else {
        if (next_char == '=') value = StreamQualifier.readValue(buffered_reader);
        else {
          // this qualifier doesn't have a value
          value = null;
          buffered_reader.reset();
        }
      }

      final Qualifier new_qualifier;

      if (value == null) new_qualifier = new Qualifier(name);
      else {
        new_qualifier = StreamQualifier.makeStreamQualifier(name, value, entry_information);
      }

      return_vector.addQualifierValues(new_qualifier);
    }

    return return_vector;
  }
  /**
   * Change the name of a qualifier
   *
   * @param qualifiers
   * @param oldName
   * @param newName
   */
  private void changeQualifierName(
      QualifierVector qualifiers, final String oldName, final String newName) {
    QualifierVector tmpQualifiers = new QualifierVector();
    for (int i = 0; i < qualifiers.size(); i++) {
      Qualifier qualifier = (Qualifier) qualifiers.elementAt(i);
      if (!qualifier.getName().equals(oldName)) {
        tmpQualifiers.addElement(qualifier);
        continue;
      }

      Qualifier newQualifier = new Qualifier(newName, qualifier.getValues());
      tmpQualifiers.addQualifierValues(newQualifier);
    }
    qualifiers.removeAllElements();

    for (int i = 0; i < tmpQualifiers.size(); i++)
      qualifiers.addElement(tmpQualifiers.elementAt(i));
  }
 /**
  * Routine to combine transcript qualifiers and for multiple transcripts create links to the other
  * transcripts (other_transcript) and to use the transcript ID.
  *
  * @param qualifiers
  * @param transcript
  * @param ntranscripts
  * @param chadoGene
  */
 private int handleTranscripts(
     QualifierVector qualifiers,
     Feature transcript,
     int ntranscripts,
     ChadoCanonicalGene chadoGene) {
   QualifierVector transcriptQualifiers = transcript.getQualifiers().copy();
   combineQualifiers(qualifiers, transcriptQualifiers, false);
   ntranscripts = chadoGene.getTranscripts().size();
   if (ntranscripts > 1) {
     addNewQualifier(qualifiers, transcriptQualifiers.getQualifierByName("ID"));
     List<Feature> transcripts = chadoGene.getTranscripts();
     for (int i = 0; i < ntranscripts; i++) {
       Feature thisTranscript = (Feature) transcripts.get(i);
       String thisTranscriptName = GeneUtils.getUniqueName(thisTranscript);
       if (!thisTranscriptName.equals(GeneUtils.getUniqueName(transcript))) {
         Qualifier qualifier = new Qualifier("other_transcript", thisTranscriptName);
         addNewQualifier(qualifiers, qualifier);
       }
     }
   }
   return ntranscripts;
 }
  /**
   * Change the stop_codon_redefined_as_selenocysteine SO qualifier to the transl_except EMBL
   * qualifier.
   *
   * @param qualifiers
   * @param feature
   */
  private void handleSelenocysteine(QualifierVector qualifiers, Feature feature) {
    if (!feature.getKey().getKeyString().equals(DatabaseDocument.EXONMODEL)) return;
    qualifiers.removeQualifierByName("stop_codon_redefined_as_selenocysteine");

    uk.ac.sanger.artemis.Feature f = ((uk.ac.sanger.artemis.Feature) feature.getUserData());

    int translatedBasePosion = 0;
    String aa = f.getTranslation().toString();
    for (int i = 0; i < aa.length(); i++) {
      if (AminoAcidSequence.isStopCodon(aa.charAt(i))) {
        translatedBasePosion = i * 3;
        break;
      }
    }

    FeatureSegmentVector segments = f.getSegments();
    int nbases = 0;
    int sequenceloc = 0;
    for (int i = 0; i < segments.size(); i++) {
      int seglen = segments.elementAt(i).getBases().length();
      if (nbases + seglen > translatedBasePosion) {
        Bases bases = f.getStrand().getBases();
        sequenceloc =
            segments.elementAt(i).getStart().getPosition() + (translatedBasePosion - nbases);

        if (!f.isForwardFeature()) sequenceloc = bases.getComplementPosition(sequenceloc);
      }
      nbases += seglen;
    }

    String pos = "";
    if (f.isForwardFeature()) pos = sequenceloc + ".." + (sequenceloc + 2);
    else pos = "complement(" + (sequenceloc - 2) + ".." + sequenceloc + ")";

    qualifiers.add(new Qualifier("transl_except", "(pos:" + pos + ",aa:Sec)"));
  }
  /**
   * Merge qualifiers
   *
   * @param qualifiers
   * @param newQualifiers
   */
  private void combineQualifiers(
      final QualifierVector qualifiers, final QualifierVector newQualifiers, final boolean isGene) {
    for (int i = 0; i < newQualifiers.size(); i++) {
      Qualifier newQualifier = (Qualifier) newQualifiers.get(i);

      if (newQualifier.getName().equals("ID") && !isGene) {
        continue;
      }

      // convert GO evidence to codes (e.g. ND=No biological Data available)
      if (newQualifier.getName().equals("GO")) {
        final StringVector newValues = newQualifier.getValues();
        final StringVector tmpNewValues = new StringVector();
        for (int j = 0; j < newValues.size(); j++) {
          String val = GoBox.getEvidenceCodeGoTextFromText((String) newValues.get(j));
          tmpNewValues.add(val);
        }

        newQualifier = new Qualifier("GO", tmpNewValues);
      }

      if (newQualifier.getName().equals("product")) {
        final StringVector newValues = newQualifier.getValues();
        final StringVector tmpNewValues = new StringVector();
        for (int j = 0; j < newValues.size(); j++) {
          String val = (String) newValues.get(j);

          int ind = 0;
          if ((ind = val.indexOf(";db_xref=")) > -1) val = val.substring(0, ind);

          if ((ind = val.indexOf(";evidence=")) > -1) val = val.substring(0, ind);

          if (val.startsWith("term=")) val = val.substring(5, val.length());

          if (val.endsWith(";")) val = val.substring(0, val.length() - 1);

          tmpNewValues.add(val);
        }

        newQualifier = new Qualifier("product", tmpNewValues);
      }

      if (newQualifier.getName().equals("orthologous_to")
          || newQualifier.getName().equals("paralogous_to")) {
        final StringVector newValues = newQualifier.getValues();
        final StringVector tmpNewValues = new StringVector();
        for (int j = 0; j < newValues.size(); j++) {
          if (!newValues.get(j).equals("")) tmpNewValues.add(newValues.get(j));
        }
        if (tmpNewValues.size() == 0) continue;

        Pattern p = Pattern.compile("\\w+:link=\\w+");
        for (int j = 0; j < tmpNewValues.size(); j++) {
          String valueStr = (String) tmpNewValues.get(j);
          String newValueStr;
          int indexEnd = valueStr.indexOf(';');
          String endStr = "";
          if (indexEnd > -1) endStr = valueStr.substring(indexEnd);
          Matcher m = p.matcher(valueStr);
          while (m.find()) {
            int index = valueStr.indexOf("link=", m.start());
            newValueStr =
                valueStr.substring(m.start(), index)
                    + valueStr.substring(index + 5, m.end())
                    + endStr;
            if (newQualifier.getName().equals("orthologous_to"))
              newQualifier = new Qualifier("orthologous_to", newValueStr);
            else newQualifier = new Qualifier("paralogous_to", newValueStr);
            qualifiers.addElement(newQualifier);
          }
        }
        continue;
      }

      addNewQualifier(qualifiers, newQualifier);
    }
  }
  /**
   * Map GFF features to EMBL/Genbank
   *
   * @param feature
   * @return
   */
  private Object mapGffToNativeFeature(final Feature feature) {
    if (DATABASE_MAP_KEYS == null) initDatabaseMappings();

    Key key = feature.getKey();
    QualifierVector qualifiers = feature.getQualifiers().copy();

    // ignore if obsolete
    if (IGNORE_OBSOLETE_FEATURES) {
      Qualifier isObsoleteQualifier = qualifiers.getQualifierByName("isObsolete");
      if (isObsoleteQualifier != null) {
        String value = (String) isObsoleteQualifier.getValues().get(0);
        if (Boolean.parseBoolean(value)) return null;
      }
    }

    key = map(key, qualifiers);
    if (getEntryInformation().isValidQualifier((String) DATABASE_QUALIFIERS_TO_REMOVE[0])) {
      try {
        if (this instanceof EmblDocumentEntry)
          return new EmblStreamFeature(key, feature.getLocation(), qualifiers);
        else return new GenbankStreamFeature(key, feature.getLocation(), qualifiers);
      } catch (InvalidRelationException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }

    Location location = joinUtrs(feature, key, qualifiers);
    if (location == null) return null;
    // flatten gene model - combining qualifiers
    if (key.getKeyString().equals(DatabaseDocument.EXONMODEL)) {
      ChadoCanonicalGene chadoGene = ((GFFStreamFeature) feature).getChadoGene();

      final String name = GeneUtils.getUniqueName(feature);
      final String transcriptName = chadoGene.getTranscriptFromName(name);

      StringVector sv = new StringVector();
      sv.add(transcriptName);
      final Feature transcript = chadoGene.containsTranscript(sv);

      if (transcript != null && GeneUtils.isNonCodingTranscripts(transcript.getKey())) return null;

      qualifiers.removeQualifierByName("ID");
      int ntranscripts = 0;
      // add transcript & protein qualifiers to CDS
      try {
        final Feature protein = chadoGene.getProteinOfTranscript(transcriptName);
        if (protein != null) combineQualifiers(qualifiers, protein.getQualifiers().copy(), false);

        if (transcript != null)
          ntranscripts = handleTranscripts(qualifiers, transcript, ntranscripts, chadoGene);
      } catch (NullPointerException npe) {
      }

      // add gene qualifiers to CDS
      QualifierVector geneQualifiers = chadoGene.getGene().getQualifiers().copy();

      // multiple transcripts
      if (ntranscripts > 1 && geneQualifiers.getQualifierByName("ID") != null) {
        Qualifier newIDQualifier =
            new Qualifier(
                "shared_id", (String) geneQualifiers.getQualifierByName("ID").getValues().get(0));
        addNewQualifier(qualifiers, newIDQualifier);
        geneQualifiers.removeQualifierByName("ID");
      }
      combineQualifiers(qualifiers, geneQualifiers, true);
    } else if (GeneUtils.isNonCodingTranscripts(key)) {
      // use gene id for non-coding transcripts
      ChadoCanonicalGene chadoGene = ((GFFStreamFeature) feature).getChadoGene();
      if (chadoGene != null) {
        qualifiers.removeQualifierByName("ID");
        QualifierVector geneQualifiers = chadoGene.getGene().getQualifiers().copy();
        combineQualifiers(qualifiers, geneQualifiers, true);
      }
    }

    try {
      for (int i = 0; i < DATABASE_QUALIFIERS_TO_MAP.length; i++) {
        if (!getEntryInformation().isValidQualifier(DATABASE_QUALIFIERS_TO_MAP[i][0])) {
          changeQualifierName(
              qualifiers, DATABASE_QUALIFIERS_TO_MAP[i][0], DATABASE_QUALIFIERS_TO_MAP[i][1]);
        }
      }

      if (qualifiers.getQualifierByName("stop_codon_redefined_as_selenocysteine") != null) {
        handleSelenocysteine(qualifiers, feature);
      }

      for (int i = 0; i < DATABASE_QUALIFIERS_TO_REMOVE.length; i++) {
        if (!getEntryInformation().isValidQualifier((String) DATABASE_QUALIFIERS_TO_REMOVE[i]))
          qualifiers.removeQualifierByName((String) DATABASE_QUALIFIERS_TO_REMOVE[i]);
      }

      if (key.getKeyString().equals("polypeptide")) return null;
      else if (key.getKeyString().equals("gene")) return null;
      else if (key.getKeyString().equals("centromere")) return null;
      else if (key.getKeyString().equals("transcript") || key.getKeyString().equals("mRNA"))
        return null;

      if (this instanceof EmblDocumentEntry)
        return new EmblStreamFeature(key, location, qualifiers);
      else return new GenbankStreamFeature(key, location, qualifiers);
    } catch (InvalidRelationException e) {
      e.printStackTrace();
      if (feature instanceof DatabaseStreamFeature) return new EmblStreamFeature();
      else return new GenbankStreamFeature();
    }
  }