private Integer createGoAnnotation( String productIdentifier, String productType, String termIdentifier, Item organism, String qualifier, String withText, String dataSourceCode) throws ObjectStoreException { Item goAnnotation = createItem(annotationClassName); goAnnotation.setReference("subject", productIdentifier); goAnnotation.setReference("ontologyTerm", termIdentifier); if (!StringUtils.isEmpty(qualifier)) { goAnnotation.setAttribute("qualifier", qualifier); } // with objects if (!StringUtils.isEmpty(withText)) { goAnnotation.setAttribute("withText", withText); List<String> with = createWithObjects(withText, organism, dataSourceCode); if (!with.isEmpty()) { goAnnotation.addCollection(new ReferenceList("with", with)); } } goAnnotation.addToCollection("dataSets", getDataset(dataSourceCode)); if ("gene".equals(productType)) { addProductCollection(productIdentifier, goAnnotation.getIdentifier()); } Integer storedAnnotationId = store(goAnnotation); return storedAnnotationId; }
private void setImage(Item result, String img) { if (!imgs.containsKey(img)) { Item item = createItem("Image"); item.setAttribute("url", img); imgs.put(img, item); result.addToCollection("images", item.getIdentifier()); } }
private void processMeshTerms(Item publication, List<String> newTerms) { for (String name : newTerms) { Item item = meshTerms.get(name); if (item == null) { item = itemFactory.makeItemForClass("MeshTerm"); item.setAttribute("name", name); meshTerms.put(name, item); } publication.addToCollection("meshTerms", item); } }
// save homologue pair private void processHomologue(String gene1, String gene2) throws ObjectStoreException { Item homologue = createItem("Homologue"); homologue.setReference("gene", gene1); homologue.setReference("homologue", gene2); homologue.addToCollection("evidence", getEvidence()); homologue.setAttribute("type", "homologue"); try { store(homologue); } catch (ObjectStoreException e) { throw new ObjectStoreException(e); } }
/** * @param proteinId * @param modSite * @param modType * @param source * @param pmid * @throws ObjectStoreException * @throws Exception */ private void newProduct( String experiment, String proteinId, String value, String units, String pmid) throws ObjectStoreException, Exception { Item protein = getProteinItem(proteinId); Item pmods = getProteinHalfLife(experiment, value, units, pmid); protein.addToCollection("proteinHalfLife", pmods.getIdentifier()); // Item pmods2 = getProteinHalfLife(experiment, valueMins, stringMinutes, pmid); // protein.addToCollection("proteinHalfLife", pmods2.getIdentifier()); }
private String newGoTerm(String identifier, String dataSourceCode) throws ObjectStoreException { String goId = resolveTerm(identifier); if (goId == null) { return null; } String goTermIdentifier = goTerms.get(goId); if (goTermIdentifier == null) { Item item = createItem(termClassName); item.setAttribute("identifier", goId); item.addToCollection("dataSets", getDataset(dataSourceCode)); store(item); goTermIdentifier = item.getIdentifier(); goTerms.put(goId, goTermIdentifier); } return goTermIdentifier; }
private Item getLocation(GFF3Record record, String refId, Item seq, ClassDescriptor cd) { Item location = createItem("Location"); int start = record.getStart(); int end = record.getEnd(); if (record.getStart() < record.getEnd()) { location.setAttribute("start", String.valueOf(start)); location.setAttribute("end", String.valueOf(end)); } else { location.setAttribute("start", String.valueOf(end)); location.setAttribute("end", String.valueOf(start)); } if (record.getStrand() != null && "+".equals(record.getStrand())) { location.setAttribute("strand", "1"); } else if (record.getStrand() != null && "-".equals(record.getStrand())) { location.setAttribute("strand", "-1"); } else { location.setAttribute("strand", "0"); } location.setReference("locatedOn", seq.getIdentifier()); location.setReference("feature", refId); location.addToCollection("dataSets", dataSet); return location; }
/** {@inheritDoc} */ public void process(Reader reader) throws Exception { // Create a chromosome Item chromosome = createItem("Chromosome"); chromosome.setAttribute("primaryIdentifier", CHROMOSOME_PID); store(chromosome); @SuppressWarnings("rawtypes") Iterator lineIter = FormattedTextParser.parseTabDelimitedReader(reader); while (lineIter.hasNext()) { String[] line = (String[]) lineIter.next(); // remove header line if (!line[0].equals(HEADER_LINE)) { String ecogeneId = line[0]; String geneName = line[1]; String eCK = line[2]; String swissProtId = line[3]; String wisconsinGenBankId = line[4]; String genBankProteinId = line[5]; String genoBaseId = line[6]; String type = line[7]; String strand = line[8]; String start = line[9]; String end = line[10]; String synonym = line[11]; Set<String> symSet = new TreeSet<String>(); if (!eCK.equals(NULL_STRING)) { symSet.add(eCK); } if (!genoBaseId.equals(NULL_STRING)) { symSet.addAll(Arrays.asList(StringUtil.split(genoBaseId, "; "))); } if (!synonym.equals(NONE_STRING)) { symSet.addAll(Arrays.asList(synonym.split(", "))); } if (type.equals(TYPE_GENE)) { Item gene = createItem("Gene"); gene.setReference("chromosome", chromosome); gene.setReference("organism", getOrganism(ECOLI_TAXON)); gene.setAttribute("primaryIdentifier", ecogeneId); gene.setAttribute("secondaryIdentifier", wisconsinGenBankId); gene.setAttribute("name", geneName); gene.setAttribute("symbol", geneName); if (symSet.size() > 0) { for (String sym : symSet) { createSynonym(gene, sym, true); } } if (!swissProtId.equals(NULL_STRING)) { if (proteinMap.containsKey(swissProtId)) { // Reference a protein to a gene (a gene has proteins // collection) gene.addToCollection("proteins", proteinMap.get(swissProtId)); } else { Item protein = createItem("Protein"); protein.setAttribute("primaryAccession", swissProtId); // NCBI Protein id, remove "g" protein.setAttribute("secondaryIdentifier", genBankProteinId.substring(1)); gene.addToCollection("proteins", protein); store(protein); proteinMap.put(swissProtId, protein); } } // Create chromosome location if (start.matches(DIGIT_REGEX) && end.matches(DIGIT_REGEX)) { Item location = createItem("Location"); location.setAttribute("start", start); location.setAttribute("end", end); location.setReference("feature", gene); location.setReference("locatedOn", chromosome); if (strand.equals(CLOCKWISE)) { location.setAttribute("strand", "+1"); } else if (strand.equals(COUNTER_CLOCKWISE)) { location.setAttribute("strand", "-1"); } else { location.setAttribute("strand", "0"); } gene.setReference("chromosomeLocation", location); store(location); } store(gene); } else if (type.equals(TYPE_RNA)) { // TODO code refactory Item rna = createItem("NcRNA"); rna.setReference("chromosome", chromosome); rna.setReference("organism", getOrganism(ECOLI_TAXON)); rna.setAttribute("primaryIdentifier", ecogeneId); rna.setAttribute("secondaryIdentifier", wisconsinGenBankId); rna.setAttribute("name", geneName); rna.setAttribute("symbol", geneName); if (symSet.size() > 0) { for (String sym : symSet) { createSynonym(rna, sym, true); } } // Create chromosome location if (start.matches(DIGIT_REGEX) && end.matches(DIGIT_REGEX)) { Item location = createItem("Location"); location.setAttribute("start", start); location.setAttribute("end", end); location.setReference("feature", rna); location.setReference("locatedOn", chromosome); if (strand.equals(CLOCKWISE)) { location.setAttribute("strand", "+1"); } else if (strand.equals(COUNTER_CLOCKWISE)) { location.setAttribute("strand", "-1"); } else { location.setAttribute("strand", "0"); } rna.setReference("chromosomeLocation", location); store(location); } store(rna); } } } }
/** * process GFF3 record and give a xml presentation * * @param record GFF3Record * @throws ObjectStoreException if an error occurs storing items */ public void process(GFF3Record record) throws ObjectStoreException { String identifier = record.getId(); String refId = identifierMap.get(identifier); // get rid of previous record Items from handler handler.clear(); List<?> names = record.getNames(); Item seq = getSeq(record.getSequenceID()); String term = record.getType(); String className = TypeUtil.javaiseClassName(term); String fullClassName = tgtModel.getPackageName() + "." + className; ClassDescriptor cd = tgtModel.getClassDescriptorByName(fullClassName); if (cd == null) { throw new IllegalArgumentException( "no class found in model for: " + className + " (original GFF record type: " + term + ") for " + "record: " + record); } Set<Item> synonymsToAdd = new HashSet<Item>(); Item feature = null; // new feature if (refId == null) { feature = createItem(className); refId = feature.getIdentifier(); } if (!"chromosome".equals(record.getType()) && seq != null) { boolean makeLocation = record.getStart() >= 1 && record.getEnd() >= 1 && !dontCreateLocations && handler.createLocations(record); if (makeLocation) { Item location = getLocation(record, refId, seq, cd); if (feature == null) { // this feature has already been created and stored // we only wanted the location, we're done here. store(location); return; } int length = getLength(record); feature.setAttribute("length", String.valueOf(length)); handler.setLocation(location); if ("Chromosome".equals(seqClsName) && (cd.getFieldDescriptorByName("chromosome") != null)) { feature.setReference("chromosome", seq.getIdentifier()); feature.setReference("chromosomeLocation", location); } } } if (feature == null) { // this feature has already been created and stored // feature with discontinous location, this location wasn't valid for some reason return; } if (identifier != null) { feature.setAttribute("primaryIdentifier", identifier); } handler.setFeature(feature); identifierMap.put(identifier, feature.getIdentifier()); if (names != null) { setNames(names, synonymsToAdd, record.getId(), feature, cd); } List<String> parents = record.getParents(); if (parents != null && !parents.isEmpty()) { setRefsAndCollections(parents, feature); } feature.addReference(getOrgRef()); feature.addToCollection("dataSets", dataSet); handler.addDataSet(dataSet); Double score = record.getScore(); if (score != null && !"".equals(String.valueOf(score))) { feature.setAttribute("score", String.valueOf(score)); feature.setAttribute("scoreType", record.getSource()); } for (Item synonym : synonymsToAdd) { handler.addItem(synonym); } handler.process(record); if (handler.getDataSetReferenceList().getRefIds().size() > 0) { feature.addCollection(handler.getDataSetReferenceList()); } handler.clearDataSetReferenceList(); if (handler.getPublicationReferenceList().getRefIds().size() > 0) { feature.addCollection(handler.getPublicationReferenceList()); } handler.clearPublicationReferenceList(); try { Iterator<Item> iter = handler.getItems().iterator(); while (iter.hasNext()) { store(iter.next()); } } catch (ObjectStoreException e) { LOG.error("Problem writing item to the itemwriter"); throw e; } }
private Set<Item> mapToItems(ItemFactory itemFactory, Map map) { Set<Item> retSet = new HashSet<Item>(); Item publication = itemFactory.makeItemForClass("Publication"); retSet.add(publication); publication.setAttribute("pubMedId", (String) map.get("id")); final String title = (String) map.get("title"); if (!StringUtils.isEmpty(title)) { publication.setAttribute("title", title); } final String journal = (String) map.get("journal"); if (!StringUtils.isEmpty(journal)) { publication.setAttribute("journal", journal); } final String volume = (String) map.get("volume"); if (!StringUtils.isEmpty(volume)) { publication.setAttribute("volume", volume); } final String issue = (String) map.get("issue"); if (!StringUtils.isEmpty(issue)) { publication.setAttribute("issue", issue); } final String pages = (String) map.get("pages"); if (!StringUtils.isEmpty(pages)) { publication.setAttribute("pages", pages); } if (map.get("year") != null) { publication.setAttribute("year", (String) map.get("year")); } final String abstractText = (String) map.get("abstractText"); if (!StringUtils.isEmpty(abstractText)) { publication.setAttribute("abstractText", abstractText); } final String month = (String) map.get("month"); if (!StringUtils.isEmpty(month)) { publication.setAttribute("month", month); } final String doi = (String) map.get("doi"); if (!StringUtils.isEmpty(doi)) { publication.setAttribute("doi", doi); } final List<String> termsToStore = (List<String>) map.get("meshTerms"); if (termsToStore != null && !termsToStore.isEmpty()) { processMeshTerms(publication, termsToStore); } List<String> authors = (List<String>) map.get("authors"); if (authors != null) { for (String authorString : authors) { Item author = authorMap.get(authorString); if (author == null) { author = itemFactory.makeItemForClass("Author"); author.setAttribute("name", authorString); authorMap.put(authorString, author); } publication.addToCollection("authors", author); if (!publication.hasAttribute("firstAuthor")) { publication.setAttribute("firstAuthor", authorString); } } } return retSet; }
/** * process GFF3 record and give a xml presentation * * @param record GFF3Record * @throws ObjectStoreException if an error occurs storing items * @throws IOException */ public void process(GFF3Record record) throws ObjectStoreException { String term = record.getType(); if (config_exclude != null && !config_exclude.isEmpty()) { // don't process terms in the exclude list if (config_exclude.containsKey(this.orgTaxonId)) { if (config_exclude.get(this.orgTaxonId).contains(term)) { return; } } } if (config_term != null && !config_term.isEmpty()) { // otherwise all terms are processed if (config_term.containsKey(this.orgTaxonId)) { if (!config_term.get(this.orgTaxonId).contains(term)) { return; } } } // By default, use ID field in attributes String primaryIdentifier = record.getId(); // If pid set in gff_config.propeties, look for the attribute field, e.g. locus_tag if (config_attr.containsKey(this.orgTaxonId)) { if (config_attr.get(this.orgTaxonId).containsKey("primaryIdentifier")) { String cls = config_attr_class.get(this.orgTaxonId).get("primaryIdentifier"); if ("all".equals(cls) || term.equals(cls)) { String pidAttr = config_attr.get(this.orgTaxonId).get("primaryIdentifier"); if (pidAttr.contains("Dbxref") && record.getDbxrefs() != null) { String pidAttrPrefix = pidAttr.split("\\.")[1]; for (Iterator<?> i = record.getDbxrefs().iterator(); i.hasNext(); ) { String xref = (String) i.next(); if (xref.contains(pidAttrPrefix)) { primaryIdentifier = xref.split(":")[1]; break; } } } else { if (record.getAttributes().get(pidAttr) != null) { primaryIdentifier = record.getAttributes().get(pidAttr).get(0); } } } } } String refId = identifierMap.get(primaryIdentifier); // get rid of previous record Items from handler handler.clear(); Item seq = getSeq(record.getSequenceID()); String className = TypeUtil.javaiseClassName(term); String fullClassName = tgtModel.getPackageName() + "." + className; ClassDescriptor cd = tgtModel.getClassDescriptorByName(fullClassName); if (cd == null) { throw new IllegalArgumentException( "no class found in model for: " + className + " (original GFF record type: " + term + ") for " + "record: " + record); } Set<Item> synonymsToAdd = new HashSet<Item>(); Item feature = null; // new feature if (refId == null) { feature = createItem(className); refId = feature.getIdentifier(); } if (!"chromosome".equals(term) && seq != null) { boolean makeLocation = record.getStart() >= 1 && record.getEnd() >= 1 && !dontCreateLocations && handler.createLocations(record); if (makeLocation) { Item location = getLocation(record, refId, seq, cd); if (feature == null) { // this feature has already been created and stored // we only wanted the location, we're done here. store(location); return; } int length = getLength(record); feature.setAttribute("length", String.valueOf(length)); handler.setLocation(location); if ("Chromosome".equals(seqClsName) && (cd.getFieldDescriptorByName("chromosome") != null)) { feature.setReference("chromosome", seq.getIdentifier()); feature.setReference("chromosomeLocation", location); } } } if (feature == null) { // this feature has already been created and stored // feature with discontinous location, this location wasn't valid for some reason return; } if (primaryIdentifier != null) { feature.setAttribute("primaryIdentifier", primaryIdentifier); } handler.setFeature(feature); identifierMap.put(primaryIdentifier, feature.getIdentifier()); List<?> names = record.getNames(); String symbol = null; List<String> synonyms = new ArrayList<String>(); // get the attribute set for symbol if (config_attr.containsKey(this.orgTaxonId)) { if (config_attr.get(this.orgTaxonId).containsKey("symbol")) { String cls = config_attr_class.get(this.orgTaxonId).get("symbol"); if ("all".equals(cls) || term.equals(cls)) { String symbolAttr = config_attr.get(this.orgTaxonId).get("symbol"); if (symbolAttr.contains("Dbxref") && record.getDbxrefs() != null) { String symbolAttrPrefix = symbolAttr.split("\\.")[1]; for (Iterator<?> i = record.getDbxrefs().iterator(); i.hasNext(); ) { String xref = (String) i.next(); if (xref.contains(symbolAttrPrefix)) { symbol = xref.split(":")[1]; break; } } } else { if (record.getAttributes().get(symbolAttr) != null) { symbol = record.getAttributes().get(symbolAttr).get(0); } } } } } // get the attribute set for synonym if (config_attr.containsKey(this.orgTaxonId)) { if (config_attr.get(this.orgTaxonId).containsKey("synonym")) { String cls = config_attr_class.get(this.orgTaxonId).get("synonym"); if ("all".equals(cls) || term.equals(cls)) { String synonymAttr = config_attr.get(this.orgTaxonId).get("synonym"); if (synonymAttr.contains("Dbxref") && record.getDbxrefs() != null) { String synonymAttrPrefix = synonymAttr.split("\\.")[1]; Set<String> synSet = new HashSet<String>(); for (Iterator<?> i = record.getDbxrefs().iterator(); i.hasNext(); ) { String xref = (String) i.next(); if (xref.contains(synonymAttrPrefix)) { synSet.add(xref.split(":")[1]); } } synonyms.addAll(synSet); } else { synonyms = record.getAttributes().get(synonymAttr); } // synonyms.removeAll(Collections.singleton(null)); } } } if (names != null) { setNames(names, symbol, synonyms, synonymsToAdd, primaryIdentifier, feature, cd); } // Other attributes List<String> primeAttrList = Arrays.asList("primaryIdentifier", "symbol", "synonym"); if (config_attr.containsKey(this.orgTaxonId)) { Map<String, String> attrMapOrg = config_attr.get(this.orgTaxonId); Map<String, String> attrMapClone = new HashMap<String, String>(); // Deep copy of a map for (Entry<String, String> e : attrMapOrg.entrySet()) { attrMapClone.put(e.getKey(), e.getValue()); } for (String pa : primeAttrList) { attrMapClone.remove(pa); } for (Entry<String, String> e : attrMapClone.entrySet()) { String cls = config_attr_class.get(this.orgTaxonId).get(e.getKey()); if ("all".equals(cls) || term.equals(cls)) { String attr = e.getValue(); if (attr.contains("Dbxref") && record.getDbxrefs() != null) { String attrPrefix = attr.split("\\.")[1]; for (Iterator<?> i = record.getDbxrefs().iterator(); i.hasNext(); ) { String xref = (String) i.next(); if (xref.contains(attrPrefix)) { if (feature.checkAttribute(e.getKey())) { feature.setAttribute(e.getKey(), xref.split(":")[1]); } break; } } } else { if (record.getAttributes().get(attr) != null) { String attrVal = record.getAttributes().get(attr).get(0); if (attrVal != null) { if (feature.checkAttribute(e.getKey())) { feature.setAttribute(e.getKey(), attrVal); } } } } } } } List<String> parents = record.getParents(); if (parents != null && !parents.isEmpty()) { setRefsAndCollections(parents, feature); } feature.addReference(getOrgRef()); feature.addToCollection("dataSets", dataSet); handler.addDataSet(dataSet); Double score = record.getScore(); if (score != null && !"".equals(String.valueOf(score))) { feature.setAttribute("score", String.valueOf(score)); feature.setAttribute("scoreType", record.getSource()); } for (Item synonym : synonymsToAdd) { handler.addItem(synonym); } handler.process(record); if (handler.getDataSetReferenceList().getRefIds().size() > 0) { feature.addCollection(handler.getDataSetReferenceList()); } handler.clearDataSetReferenceList(); if (handler.getPublicationReferenceList().getRefIds().size() > 0) { feature.addCollection(handler.getPublicationReferenceList()); } handler.clearPublicationReferenceList(); try { Iterator<Item> iter = handler.getItems().iterator(); while (iter.hasNext()) { store(iter.next()); } } catch (ObjectStoreException e) { LOG.error("Problem writing item to the itemwriter"); throw e; } }
private String newProduct( String identifier, String type, Item organism, String dataSourceCode, boolean createOrganism, String field) throws ObjectStoreException { String idField = field; String accession = identifier; String clsName = null; // find gene attribute first to see if organism should be part of key if ("gene".equalsIgnoreCase(type)) { clsName = "Gene"; String taxonId = organism.getAttribute("taxonId").getValue(); if (idField == null) { idField = configs.get(taxonId).identifier; if (idField == null) { throw new RuntimeException( "Could not find a identifier property for taxon: " + taxonId + " check properties file: " + PROP_FILE); } } // if a Dmel gene we need to use FlyBaseIdResolver to find a current id if ("7227".equals(taxonId)) { IdResolver resolver = flybaseResolverFactory.getIdResolver(false); if (resolver != null) { int resCount = resolver.countResolutions(taxonId, accession); if (resCount != 1) { LOG.info( "RESOLVER: failed to resolve gene to one identifier, " + "ignoring gene: " + accession + " count: " + resCount + " FBgn: " + resolver.resolveId(taxonId, accession)); return null; } accession = resolver.resolveId(taxonId, accession).iterator().next(); } } } else if ("protein".equalsIgnoreCase(type)) { // TODO use values in config clsName = "Protein"; idField = "primaryAccession"; } else { String typeCls = TypeUtil.javaiseClassName(type); if (getModel().getClassDescriptorByName(typeCls) != null) { Class<?> cls = getModel().getClassDescriptorByName(typeCls).getType(); if (BioEntity.class.isAssignableFrom(cls)) { clsName = typeCls; } } if (clsName == null) { throw new IllegalArgumentException("Unrecognised annotation type '" + type + "'"); } } boolean includeOrganism; if ("primaryIdentifier".equals(idField) || "protein".equals(type)) { includeOrganism = false; } else { includeOrganism = createOrganism; } String key = makeProductKey(accession, type, organism, includeOrganism); // Have we already seen this product somewhere before? // if so, return the product rather than creating a new one... if (productMap.containsKey(key)) { return productMap.get(key); } // if a Dmel gene we need to use FlyBaseIdResolver to find a current id Item product = createItem(clsName); if (organism != null && createOrganism) { product.setReference("organism", organism.getIdentifier()); } product.setAttribute(idField, accession); String dataSetIdentifier = getDataset(dataSourceCode); product.addToCollection("dataSets", dataSetIdentifier); Integer storedProductId = store(product); storedProductIds.put(product.getIdentifier(), storedProductId); productMap.put(key, product.getIdentifier()); return product.getIdentifier(); }
/** * Process the csv file * * @param reader the Reader * @see DataConverter#process * @throws Exception if something goes wrong */ @Override public void process(Reader reader) throws Exception { if (rslv == null) { rslv = IdResolverService.getFlyIdResolver(); } Iterator<String[]> it = FormattedTextParser.parseTabDelimitedReader(reader); while (it.hasNext()) { String[] lineBits = it.next(); String geneCG = lineBits[0]; if (!geneCG.startsWith("CG")) { // ignore clones for now continue; } // Try to create/fetch gene, if null the IdResolver failed so do nothing for this row Item gene = getGene(geneCG); if (gene == null) { continue; } String stage = lineBits[1]; String resultKey = geneCG + stage; Item result = getResult(resultKey, gene.getIdentifier(), pub.getIdentifier(), stage); Integer stageNumber = null; try { stageNumber = new Integer(stage); } catch (NumberFormatException e) { // bad line in file, just keep going continue; } result.setAttribute("stageRange", STAGE_LABELS[stageNumber.intValue()] + " (BDGP in situ)"); if (lineBits.length > 2) { String image = lineBits[2]; if (StringUtils.isNotEmpty(image)) { setImage(result, URL + image); } } if (lineBits.length > 3) { String term = lineBits[3]; Item termItem = getTerm(term); if (termItem != null) { result.addToCollection("mRNAExpressionTerms", termItem); } if ("no staining".equals(term)) { result.setAttribute("expressed", "false"); } } } for (Item result : results.values()) { if (!result.hasCollection("mRNAExpressionTerms") || result.getCollection("mRNAExpressionTerms").getRefIds().isEmpty()) { result.setAttribute("expressed", "false"); } } storeAll(imgs); storeAll(results); }