コード例 #1
0
  /**
   * Return a qualifier copy of the qualifier provided that does not contain any of the values in
   * the StringVector.
   *
   * @param newQualifier
   * @param oldValues
   * @return
   * @throws InvalidRelationException
   */
  protected static Qualifier getQualifierWithoutDuplicateValues(
      final Qualifier qualifier, final StringVector values) throws InvalidRelationException {
    final Qualifier newQualifier;
    if (values == null || values.size() < 1) newQualifier = qualifier;
    else {
      StringVector newValues = qualifier.getValues();
      StringVector valuesToAdd = new StringVector();

      for (int k = 0; k < newValues.size(); k++) {
        if (!values.contains(newValues.get(k))) {
          if (qualifier.getName().equals("history")) {
            if (!uk.ac.sanger.artemis.components.genebuilder.cv.HistoryBox.contains(
                values, (String) newValues.get(k))) valuesToAdd.add(newValues.get(k));
          } else valuesToAdd.add(newValues.get(k));
        }
      }

      if (valuesToAdd.size() == 0) return null;
      newQualifier = new Qualifier(qualifier.getName(), valuesToAdd);
    }
    return newQualifier;
  }
コード例 #2
0
  /**
   * Optionally transfer GO fields with evidence code ISO and link back to the original source in
   * the WITH/FROM column.
   *
   * @param setEvidenceAndWithFrom
   * @param feature
   * @param qName
   * @param values
   * @return
   */
  private static StringVector getTransferValues(
      final boolean setEvidenceAndWithFrom,
      final Feature feature,
      final String qName,
      final StringVector values) {
    if (!setEvidenceAndWithFrom) return values;

    if (qName.equals("GO") || qName.equals("product")) {
      final StringVector tvalues = new StringVector();
      final String gene = getGeneName(feature);
      for (int i = 0; i < values.size(); i++) {
        String val =
            changeField("evidence=", "Inferred from Sequence Orthology", null, values.get(i));

        if (gene != null) val = changeField("with=", "GeneDB:" + gene, "|", val);
        tvalues.add(val);
      }
      return tvalues;
    }
    return values;
  }
コード例 #3
0
  /**
   * Merge qualifiers
   *
   * @param qualifiers
   * @param newQualifiers
   */
  private void combineQualifiers(
      final QualifierVector qualifiers, final QualifierVector newQualifiers, final boolean isGene) {
    for (int i = 0; i < newQualifiers.size(); i++) {
      Qualifier newQualifier = (Qualifier) newQualifiers.get(i);

      if (newQualifier.getName().equals("ID") && !isGene) {
        continue;
      }

      // convert GO evidence to codes (e.g. ND=No biological Data available)
      if (newQualifier.getName().equals("GO")) {
        final StringVector newValues = newQualifier.getValues();
        final StringVector tmpNewValues = new StringVector();
        for (int j = 0; j < newValues.size(); j++) {
          String val = GoBox.getEvidenceCodeGoTextFromText((String) newValues.get(j));
          tmpNewValues.add(val);
        }

        newQualifier = new Qualifier("GO", tmpNewValues);
      }

      if (newQualifier.getName().equals("product")) {
        final StringVector newValues = newQualifier.getValues();
        final StringVector tmpNewValues = new StringVector();
        for (int j = 0; j < newValues.size(); j++) {
          String val = (String) newValues.get(j);

          int ind = 0;
          if ((ind = val.indexOf(";db_xref=")) > -1) val = val.substring(0, ind);

          if ((ind = val.indexOf(";evidence=")) > -1) val = val.substring(0, ind);

          if (val.startsWith("term=")) val = val.substring(5, val.length());

          if (val.endsWith(";")) val = val.substring(0, val.length() - 1);

          tmpNewValues.add(val);
        }

        newQualifier = new Qualifier("product", tmpNewValues);
      }

      if (newQualifier.getName().equals("orthologous_to")
          || newQualifier.getName().equals("paralogous_to")) {
        final StringVector newValues = newQualifier.getValues();
        final StringVector tmpNewValues = new StringVector();
        for (int j = 0; j < newValues.size(); j++) {
          if (!newValues.get(j).equals("")) tmpNewValues.add(newValues.get(j));
        }
        if (tmpNewValues.size() == 0) continue;

        Pattern p = Pattern.compile("\\w+:link=\\w+");
        for (int j = 0; j < tmpNewValues.size(); j++) {
          String valueStr = (String) tmpNewValues.get(j);
          String newValueStr;
          int indexEnd = valueStr.indexOf(';');
          String endStr = "";
          if (indexEnd > -1) endStr = valueStr.substring(indexEnd);
          Matcher m = p.matcher(valueStr);
          while (m.find()) {
            int index = valueStr.indexOf("link=", m.start());
            newValueStr =
                valueStr.substring(m.start(), index)
                    + valueStr.substring(index + 5, m.end())
                    + endStr;
            if (newQualifier.getName().equals("orthologous_to"))
              newQualifier = new Qualifier("orthologous_to", newValueStr);
            else newQualifier = new Qualifier("paralogous_to", newValueStr);
            qualifiers.addElement(newQualifier);
          }
        }
        continue;
      }

      addNewQualifier(qualifiers, newQualifier);
    }
  }
コード例 #4
0
  /**
   * Map GFF features to EMBL/Genbank
   *
   * @param feature
   * @return
   */
  private Object mapGffToNativeFeature(final Feature feature) {
    if (DATABASE_MAP_KEYS == null) initDatabaseMappings();

    Key key = feature.getKey();
    QualifierVector qualifiers = feature.getQualifiers().copy();

    // ignore if obsolete
    if (IGNORE_OBSOLETE_FEATURES) {
      Qualifier isObsoleteQualifier = qualifiers.getQualifierByName("isObsolete");
      if (isObsoleteQualifier != null) {
        String value = (String) isObsoleteQualifier.getValues().get(0);
        if (Boolean.parseBoolean(value)) return null;
      }
    }

    key = map(key, qualifiers);
    if (getEntryInformation().isValidQualifier((String) DATABASE_QUALIFIERS_TO_REMOVE[0])) {
      try {
        if (this instanceof EmblDocumentEntry)
          return new EmblStreamFeature(key, feature.getLocation(), qualifiers);
        else return new GenbankStreamFeature(key, feature.getLocation(), qualifiers);
      } catch (InvalidRelationException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }

    Location location = joinUtrs(feature, key, qualifiers);
    if (location == null) return null;
    // flatten gene model - combining qualifiers
    if (key.getKeyString().equals(DatabaseDocument.EXONMODEL)) {
      ChadoCanonicalGene chadoGene = ((GFFStreamFeature) feature).getChadoGene();

      final String name = GeneUtils.getUniqueName(feature);
      final String transcriptName = chadoGene.getTranscriptFromName(name);

      StringVector sv = new StringVector();
      sv.add(transcriptName);
      final Feature transcript = chadoGene.containsTranscript(sv);

      if (transcript != null && GeneUtils.isNonCodingTranscripts(transcript.getKey())) return null;

      qualifiers.removeQualifierByName("ID");
      int ntranscripts = 0;
      // add transcript & protein qualifiers to CDS
      try {
        final Feature protein = chadoGene.getProteinOfTranscript(transcriptName);
        if (protein != null) combineQualifiers(qualifiers, protein.getQualifiers().copy(), false);

        if (transcript != null)
          ntranscripts = handleTranscripts(qualifiers, transcript, ntranscripts, chadoGene);
      } catch (NullPointerException npe) {
      }

      // add gene qualifiers to CDS
      QualifierVector geneQualifiers = chadoGene.getGene().getQualifiers().copy();

      // multiple transcripts
      if (ntranscripts > 1 && geneQualifiers.getQualifierByName("ID") != null) {
        Qualifier newIDQualifier =
            new Qualifier(
                "shared_id", (String) geneQualifiers.getQualifierByName("ID").getValues().get(0));
        addNewQualifier(qualifiers, newIDQualifier);
        geneQualifiers.removeQualifierByName("ID");
      }
      combineQualifiers(qualifiers, geneQualifiers, true);
    } else if (GeneUtils.isNonCodingTranscripts(key)) {
      // use gene id for non-coding transcripts
      ChadoCanonicalGene chadoGene = ((GFFStreamFeature) feature).getChadoGene();
      if (chadoGene != null) {
        qualifiers.removeQualifierByName("ID");
        QualifierVector geneQualifiers = chadoGene.getGene().getQualifiers().copy();
        combineQualifiers(qualifiers, geneQualifiers, true);
      }
    }

    try {
      for (int i = 0; i < DATABASE_QUALIFIERS_TO_MAP.length; i++) {
        if (!getEntryInformation().isValidQualifier(DATABASE_QUALIFIERS_TO_MAP[i][0])) {
          changeQualifierName(
              qualifiers, DATABASE_QUALIFIERS_TO_MAP[i][0], DATABASE_QUALIFIERS_TO_MAP[i][1]);
        }
      }

      if (qualifiers.getQualifierByName("stop_codon_redefined_as_selenocysteine") != null) {
        handleSelenocysteine(qualifiers, feature);
      }

      for (int i = 0; i < DATABASE_QUALIFIERS_TO_REMOVE.length; i++) {
        if (!getEntryInformation().isValidQualifier((String) DATABASE_QUALIFIERS_TO_REMOVE[i]))
          qualifiers.removeQualifierByName((String) DATABASE_QUALIFIERS_TO_REMOVE[i]);
      }

      if (key.getKeyString().equals("polypeptide")) return null;
      else if (key.getKeyString().equals("gene")) return null;
      else if (key.getKeyString().equals("centromere")) return null;
      else if (key.getKeyString().equals("transcript") || key.getKeyString().equals("mRNA"))
        return null;

      if (this instanceof EmblDocumentEntry)
        return new EmblStreamFeature(key, location, qualifiers);
      else return new GenbankStreamFeature(key, location, qualifiers);
    } catch (InvalidRelationException e) {
      e.printStackTrace();
      if (feature instanceof DatabaseStreamFeature) return new EmblStreamFeature();
      else return new GenbankStreamFeature();
    }
  }