/** * Return a qualifier copy of the qualifier provided that does not contain any of the values in * the StringVector. * * @param newQualifier * @param oldValues * @return * @throws InvalidRelationException */ protected static Qualifier getQualifierWithoutDuplicateValues( final Qualifier qualifier, final StringVector values) throws InvalidRelationException { final Qualifier newQualifier; if (values == null || values.size() < 1) newQualifier = qualifier; else { StringVector newValues = qualifier.getValues(); StringVector valuesToAdd = new StringVector(); for (int k = 0; k < newValues.size(); k++) { if (!values.contains(newValues.get(k))) { if (qualifier.getName().equals("history")) { if (!uk.ac.sanger.artemis.components.genebuilder.cv.HistoryBox.contains( values, (String) newValues.get(k))) valuesToAdd.add(newValues.get(k)); } else valuesToAdd.add(newValues.get(k)); } } if (valuesToAdd.size() == 0) return null; newQualifier = new Qualifier(qualifier.getName(), valuesToAdd); } return newQualifier; }
/** * Optionally transfer GO fields with evidence code ISO and link back to the original source in * the WITH/FROM column. * * @param setEvidenceAndWithFrom * @param feature * @param qName * @param values * @return */ private static StringVector getTransferValues( final boolean setEvidenceAndWithFrom, final Feature feature, final String qName, final StringVector values) { if (!setEvidenceAndWithFrom) return values; if (qName.equals("GO") || qName.equals("product")) { final StringVector tvalues = new StringVector(); final String gene = getGeneName(feature); for (int i = 0; i < values.size(); i++) { String val = changeField("evidence=", "Inferred from Sequence Orthology", null, values.get(i)); if (gene != null) val = changeField("with=", "GeneDB:" + gene, "|", val); tvalues.add(val); } return tvalues; } return values; }
/** * Merge qualifiers * * @param qualifiers * @param newQualifiers */ private void combineQualifiers( final QualifierVector qualifiers, final QualifierVector newQualifiers, final boolean isGene) { for (int i = 0; i < newQualifiers.size(); i++) { Qualifier newQualifier = (Qualifier) newQualifiers.get(i); if (newQualifier.getName().equals("ID") && !isGene) { continue; } // convert GO evidence to codes (e.g. ND=No biological Data available) if (newQualifier.getName().equals("GO")) { final StringVector newValues = newQualifier.getValues(); final StringVector tmpNewValues = new StringVector(); for (int j = 0; j < newValues.size(); j++) { String val = GoBox.getEvidenceCodeGoTextFromText((String) newValues.get(j)); tmpNewValues.add(val); } newQualifier = new Qualifier("GO", tmpNewValues); } if (newQualifier.getName().equals("product")) { final StringVector newValues = newQualifier.getValues(); final StringVector tmpNewValues = new StringVector(); for (int j = 0; j < newValues.size(); j++) { String val = (String) newValues.get(j); int ind = 0; if ((ind = val.indexOf(";db_xref=")) > -1) val = val.substring(0, ind); if ((ind = val.indexOf(";evidence=")) > -1) val = val.substring(0, ind); if (val.startsWith("term=")) val = val.substring(5, val.length()); if (val.endsWith(";")) val = val.substring(0, val.length() - 1); tmpNewValues.add(val); } newQualifier = new Qualifier("product", tmpNewValues); } if (newQualifier.getName().equals("orthologous_to") || newQualifier.getName().equals("paralogous_to")) { final StringVector newValues = newQualifier.getValues(); final StringVector tmpNewValues = new StringVector(); for (int j = 0; j < newValues.size(); j++) { if (!newValues.get(j).equals("")) tmpNewValues.add(newValues.get(j)); } if (tmpNewValues.size() == 0) continue; Pattern p = Pattern.compile("\\w+:link=\\w+"); for (int j = 0; j < tmpNewValues.size(); j++) { String valueStr = (String) tmpNewValues.get(j); String newValueStr; int indexEnd = valueStr.indexOf(';'); String endStr = ""; if (indexEnd > -1) endStr = valueStr.substring(indexEnd); Matcher m = p.matcher(valueStr); while (m.find()) { int index = valueStr.indexOf("link=", m.start()); newValueStr = valueStr.substring(m.start(), index) + valueStr.substring(index + 5, m.end()) + endStr; if (newQualifier.getName().equals("orthologous_to")) newQualifier = new Qualifier("orthologous_to", newValueStr); else newQualifier = new Qualifier("paralogous_to", newValueStr); qualifiers.addElement(newQualifier); } } continue; } addNewQualifier(qualifiers, newQualifier); } }
/** * Map GFF features to EMBL/Genbank * * @param feature * @return */ private Object mapGffToNativeFeature(final Feature feature) { if (DATABASE_MAP_KEYS == null) initDatabaseMappings(); Key key = feature.getKey(); QualifierVector qualifiers = feature.getQualifiers().copy(); // ignore if obsolete if (IGNORE_OBSOLETE_FEATURES) { Qualifier isObsoleteQualifier = qualifiers.getQualifierByName("isObsolete"); if (isObsoleteQualifier != null) { String value = (String) isObsoleteQualifier.getValues().get(0); if (Boolean.parseBoolean(value)) return null; } } key = map(key, qualifiers); if (getEntryInformation().isValidQualifier((String) DATABASE_QUALIFIERS_TO_REMOVE[0])) { try { if (this instanceof EmblDocumentEntry) return new EmblStreamFeature(key, feature.getLocation(), qualifiers); else return new GenbankStreamFeature(key, feature.getLocation(), qualifiers); } catch (InvalidRelationException e) { // TODO Auto-generated catch block e.printStackTrace(); } } Location location = joinUtrs(feature, key, qualifiers); if (location == null) return null; // flatten gene model - combining qualifiers if (key.getKeyString().equals(DatabaseDocument.EXONMODEL)) { ChadoCanonicalGene chadoGene = ((GFFStreamFeature) feature).getChadoGene(); final String name = GeneUtils.getUniqueName(feature); final String transcriptName = chadoGene.getTranscriptFromName(name); StringVector sv = new StringVector(); sv.add(transcriptName); final Feature transcript = chadoGene.containsTranscript(sv); if (transcript != null && GeneUtils.isNonCodingTranscripts(transcript.getKey())) return null; qualifiers.removeQualifierByName("ID"); int ntranscripts = 0; // add transcript & protein qualifiers to CDS try { final Feature protein = chadoGene.getProteinOfTranscript(transcriptName); if (protein != null) combineQualifiers(qualifiers, protein.getQualifiers().copy(), false); if (transcript != null) ntranscripts = handleTranscripts(qualifiers, transcript, ntranscripts, chadoGene); } catch (NullPointerException npe) { } // add gene qualifiers to CDS QualifierVector geneQualifiers = chadoGene.getGene().getQualifiers().copy(); // multiple transcripts if (ntranscripts > 1 && geneQualifiers.getQualifierByName("ID") != null) { Qualifier newIDQualifier = new Qualifier( "shared_id", (String) geneQualifiers.getQualifierByName("ID").getValues().get(0)); addNewQualifier(qualifiers, newIDQualifier); geneQualifiers.removeQualifierByName("ID"); } combineQualifiers(qualifiers, geneQualifiers, true); } else if (GeneUtils.isNonCodingTranscripts(key)) { // use gene id for non-coding transcripts ChadoCanonicalGene chadoGene = ((GFFStreamFeature) feature).getChadoGene(); if (chadoGene != null) { qualifiers.removeQualifierByName("ID"); QualifierVector geneQualifiers = chadoGene.getGene().getQualifiers().copy(); combineQualifiers(qualifiers, geneQualifiers, true); } } try { for (int i = 0; i < DATABASE_QUALIFIERS_TO_MAP.length; i++) { if (!getEntryInformation().isValidQualifier(DATABASE_QUALIFIERS_TO_MAP[i][0])) { changeQualifierName( qualifiers, DATABASE_QUALIFIERS_TO_MAP[i][0], DATABASE_QUALIFIERS_TO_MAP[i][1]); } } if (qualifiers.getQualifierByName("stop_codon_redefined_as_selenocysteine") != null) { handleSelenocysteine(qualifiers, feature); } for (int i = 0; i < DATABASE_QUALIFIERS_TO_REMOVE.length; i++) { if (!getEntryInformation().isValidQualifier((String) DATABASE_QUALIFIERS_TO_REMOVE[i])) qualifiers.removeQualifierByName((String) DATABASE_QUALIFIERS_TO_REMOVE[i]); } if (key.getKeyString().equals("polypeptide")) return null; else if (key.getKeyString().equals("gene")) return null; else if (key.getKeyString().equals("centromere")) return null; else if (key.getKeyString().equals("transcript") || key.getKeyString().equals("mRNA")) return null; if (this instanceof EmblDocumentEntry) return new EmblStreamFeature(key, location, qualifiers); else return new GenbankStreamFeature(key, location, qualifiers); } catch (InvalidRelationException e) { e.printStackTrace(); if (feature instanceof DatabaseStreamFeature) return new EmblStreamFeature(); else return new GenbankStreamFeature(); } }