private Integer createGoAnnotation( String productIdentifier, String productType, String termIdentifier, Item organism, String qualifier, String withText, String dataSourceCode) throws ObjectStoreException { Item goAnnotation = createItem(annotationClassName); goAnnotation.setReference("subject", productIdentifier); goAnnotation.setReference("ontologyTerm", termIdentifier); if (!StringUtils.isEmpty(qualifier)) { goAnnotation.setAttribute("qualifier", qualifier); } // with objects if (!StringUtils.isEmpty(withText)) { goAnnotation.setAttribute("withText", withText); List<String> with = createWithObjects(withText, organism, dataSourceCode); if (!with.isEmpty()) { goAnnotation.addCollection(new ReferenceList("with", with)); } } goAnnotation.addToCollection("dataSets", getDataset(dataSourceCode)); if ("gene".equals(productType)) { addProductCollection(productIdentifier, goAnnotation.getIdentifier()); } Integer storedAnnotationId = store(goAnnotation); return storedAnnotationId; }
// save homologue pair private void processHomologue(String gene1, String gene2) throws ObjectStoreException { Item homologue = createItem("Homologue"); homologue.setReference("gene", gene1); homologue.setReference("homologue", gene2); homologue.addToCollection("evidence", getEvidence()); homologue.setAttribute("type", "homologue"); try { store(homologue); } catch (ObjectStoreException e) { throw new ObjectStoreException(e); } }
private Item getResult(String key, String geneId, String pubId, String stage) { if (results.containsKey(key)) { return results.get(key); } Item result = createItem("MRNAExpressionResult"); result.setAttribute("expressed", "true"); result.setReference("gene", geneId); result.setReference("publication", pubId); result.setCollection("stages", getStages(stage)); // result.setCollection("images", new ArrayList<String>()); // result.setCollection("mRNAExpressionTerms", new ArrayList<String>()); results.put(key, result); return result; }
private Item getGene(String geneCG) throws ObjectStoreException { if (rslv == null || !rslv.hasTaxon(TAXON_FLY)) { return null; } int resCount = rslv.countResolutions(TAXON_FLY, geneCG); if (resCount != 1) { LOG.info( "RESOLVER: failed to resolve gene to one identifier, ignoring gene: " + geneCG + " count: " + resCount + " FBgn: " + rslv.resolveId(TAXON_FLY, geneCG)); return null; } String primaryIdentifier = rslv.resolveId(TAXON_FLY, geneCG).iterator().next(); if (genes.containsKey(primaryIdentifier)) { return genes.get(primaryIdentifier); } Item gene = createItem("Gene"); gene.setAttribute("primaryIdentifier", primaryIdentifier); gene.setReference("organism", orgDrosophila); genes.put(primaryIdentifier, gene); store(gene); return gene; }
/** {@inheritDoc} */ @Override public void process(Reader reader) throws Exception { // Data has format: // id | description @SuppressWarnings("rawtypes") Iterator lineIter = FormattedTextParser.parseTabDelimitedReader(reader); int count = 0; while (lineIter.hasNext()) { String[] line = (String[]) lineIter.next(); try { String entrez = line[0]; String description = line[1]; LOG.error("description " + count++ + " " + description); if (!StringUtils.isBlank(description)) { Item gene = createItem("Gene"); gene.setAttribute("primaryIdentifier", entrez); gene.setAttribute("description", description); gene.setReference("organism", getOrganism(HUMAN_TAXON_ID)); store(gene); } } catch (IndexOutOfBoundsException e) { LOG.info("Failed to read line: " + Arrays.asList(line)); } } }
private String getGene(String identifierType, String id, String taxonId) throws ObjectStoreException { String identifier = id; if (rslv != null && rslv.hasTaxon(taxonId)) { identifier = resolveGene(identifier, taxonId); if (identifier == null) { return null; } } String refId = identifiersToGenes.get(identifier); if (refId == null) { Item gene = createItem("Gene"); refId = gene.getIdentifier(); gene.setAttribute(identifierType, identifier); gene.setReference("organism", getOrganism(taxonId)); identifiersToGenes.put(identifier, refId); try { store(gene); } catch (ObjectStoreException e) { throw new ObjectStoreException(e); } } return refId; }
private String getEvidence() throws ObjectStoreException { if (evidenceRefId == null) { Item item = createItem("OrthologueEvidenceCode"); item.setAttribute("abbreviation", EVIDENCE_CODE_ABBR); item.setAttribute("name", EVIDENCE_CODE_NAME); try { store(item); } catch (ObjectStoreException e) { throw new ObjectStoreException(e); } String refId = item.getIdentifier(); item = createItem("OrthologueEvidence"); item.setReference("evidenceCode", refId); try { store(item); } catch (ObjectStoreException e) { throw new ObjectStoreException(e); } evidenceRefId = item.getIdentifier(); } return evidenceRefId; }
/** * Produce random data. * * <p>{@inheritDoc} */ public void process(Reader inputFile) throws Exception { Random random = new Random(); Set doneValues = new HashSet(); long time = System.currentTimeMillis(); for (int i = 0; i < count / 2; i++) { Item itemTo = createItem("ReferenceTo2"); Item itemFrom = createItem("ReferenceFrom2"); Integer firstInt; do { firstInt = new Integer(random.nextInt()); } while (doneValues.contains(firstInt)); doneValues.add(firstInt); itemTo.setAttribute("att", "" + firstInt); itemFrom.setReference("ref", itemTo.getIdentifier()); getItemWriter().store(ItemHelper.convert(itemFrom)); getItemWriter().store(ItemHelper.convert(itemTo)); } long now = System.currentTimeMillis(); LOG.info( "Finished generating " + count + " objects at " + ((60000L * count) / (now - time)) + " objects per minute (" + (now - time) + " ms total)"); }
/** * @param modSite * @param modType * @param source * @param pmid * @return */ private Item getProteinHalfLife(String experiment, String value, String units, String pmid) throws ObjectStoreException { Item item = createItem("ProteinHalfLife"); if (StringUtils.isNotEmpty(experiment)) { item.setAttribute("experiment", experiment); } if (StringUtils.isNotEmpty(value)) { item.setAttribute("value", value); } if (StringUtils.isNotEmpty(units)) { item.setAttribute("units", units); } item.setAttribute("source", "SGD"); Item publication = pubmedIdMap.get(pmid); if (publication == null) { publication = createItem("Publication"); publication.setAttribute("pubMedId", pmid); pubmedIdMap.put(pmid, publication); item.setReference("publication", publication); try { store(publication); } catch (ObjectStoreException e) { throw new ObjectStoreException(e); } } else { item.setReference("publication", publication); } try { store(item); } catch (ObjectStoreException e) { throw new ObjectStoreException(e); } return item; }
private Item getTerm(String name) throws ObjectStoreException { if (!isValidTerm(name)) { return null; } else if (terms.containsKey(name)) { return terms.get(name); } Item termItem = createItem("OntologyTerm"); termItem.setAttribute("name", name); termItem.setReference("ontology", ontology); store(termItem); terms.put(name, termItem); return termItem; }
private void setStages() throws ObjectStoreException { Item item = createItem("Ontology"); item.setAttribute("name", "Fly Development"); store(item); stages = new String[17]; for (int i = 1; i <= 16; i++) { Item stage = createItem("DevelopmentTerm"); stage.setAttribute("name", "embryonic stage " + i); stage.setReference("ontology", item); stages[i] = stage.getIdentifier(); store(stage); } }
private Item getLocation(GFF3Record record, String refId, Item seq, ClassDescriptor cd) { Item location = createItem("Location"); int start = record.getStart(); int end = record.getEnd(); if (record.getStart() < record.getEnd()) { location.setAttribute("start", String.valueOf(start)); location.setAttribute("end", String.valueOf(end)); } else { location.setAttribute("start", String.valueOf(end)); location.setAttribute("end", String.valueOf(start)); } if (record.getStrand() != null && "+".equals(record.getStrand())) { location.setAttribute("strand", "1"); } else if (record.getStrand() != null && "-".equals(record.getStrand())) { location.setAttribute("strand", "-1"); } else { location.setAttribute("strand", "0"); } location.setReference("locatedOn", seq.getIdentifier()); location.setReference("feature", refId); location.addToCollection("dataSets", dataSet); return location; }
private String getDataset(String code) throws ObjectStoreException { String dataSetIdentifier = dataSets.get(code); if (dataSetIdentifier == null) { String dataSourceName = getDataSourceName(code); String title = "DO Annotation from " + dataSourceName; Item item = createItem("DataSet"); item.setAttribute("name", title); item.setReference("dataSource", getDataSource(dataSourceName)); dataSetIdentifier = item.getIdentifier(); dataSets.put(code, dataSetIdentifier); store(item); } return dataSetIdentifier; }
private void setSynonym(String subjectRefId, String type, String value) throws SAXException { String key = subjectRefId + type + value; if (!synonyms.contains(key)) { Item synonym = createItem("Synonym"); synonym.setAttribute("type", type); synonym.setAttribute("value", value); synonym.setReference("subject", subjectRefId); synonyms.add(key); try { store(synonym); } catch (ObjectStoreException e) { throw new SAXException(e); } } }
private void setRefsAndCollections(List<String> parents, Item feature) { String clsName = feature.getClassName(); Map<String, String> refsAndCollections = handler.getRefsAndCollections(); if (refsAndCollections != null && refsAndCollections.containsKey(clsName) && parents != null && !parents.isEmpty()) { ClassDescriptor cld = tgtModel.getClassDescriptorByName(tgtModel.getPackageName() + "." + clsName); String refName = refsAndCollections.get(clsName); Iterator<String> parentIter = parents.iterator(); if (cld.getReferenceDescriptorByName(refName, true) != null) { String parent = parentIter.next(); feature.setReference(refName, getRefId(parent)); if (parentIter.hasNext()) { String primaryIdent = feature.getAttribute("primaryIdentifier").getValue(); throw new RuntimeException( "Feature has multiple relations for reference: " + refName + " for feature: " + feature.getClassName() + ", " + feature.getIdentifier() + ", " + primaryIdent); } } else if (cld.getCollectionDescriptorByName(refName, true) != null) { List<String> refIds = new ArrayList<String>(); while (parentIter.hasNext()) { refIds.add(getRefId(parentIter.next())); } feature.setCollection(refName, refIds); } else if (parentIter.hasNext()) { throw new RuntimeException( "No '" + refName + "' reference/collection found in " + "class: " + clsName + " - is map configured correctly?"); } } }
private void storeEvidence() throws ObjectStoreException { for (Set<Evidence> annotationEvidence : goTermGeneToEvidence.values()) { List<String> evidenceRefIds = new ArrayList<String>(); Integer goAnnotationRefId = null; for (Evidence evidence : annotationEvidence) { Item goevidence = createItem("DOEvidence"); goevidence.setReference("code", evidenceCodes.get(evidence.getEvidenceCode())); List<String> publicationEvidence = evidence.getPublications(); if (!publicationEvidence.isEmpty()) { goevidence.setCollection("publications", publicationEvidence); } store(goevidence); evidenceRefIds.add(goevidence.getIdentifier()); goAnnotationRefId = evidence.getStoredAnnotationId(); } ReferenceList refIds = new ReferenceList("evidence", new ArrayList<String>(evidenceRefIds)); store(refIds, goAnnotationRefId); } }
/** * Return a DataSource item with the given details. * * @param title the DataSet title * @param url the new url field, or null if the url shouldn't be set * @param description the new description field, or null if the field shouldn't be set * @param dataSourceItem the DataSource referenced by the the DataSet * @return the DataSet Item */ public Item getDataSetItem(String title, String url, String description, Item dataSourceItem) { Item item = dataSets.get(title); if (item == null) { item = createItem("DataSet"); item.setAttribute("name", title); item.setReference("dataSource", dataSourceItem); if (url != null) { item.setAttribute("url", url); } if (description != null) { item.setAttribute("description", description); } try { store(item); } catch (ObjectStoreException e) { throw new RuntimeException("failed to store DataSet with title: " + title, e); } dataSets.put(title, item); } return item; }
private String newProduct( String identifier, String type, Item organism, String dataSourceCode, boolean createOrganism, String field) throws ObjectStoreException { String idField = field; String accession = identifier; String clsName = null; // find gene attribute first to see if organism should be part of key if ("gene".equalsIgnoreCase(type)) { clsName = "Gene"; String taxonId = organism.getAttribute("taxonId").getValue(); if (idField == null) { idField = configs.get(taxonId).identifier; if (idField == null) { throw new RuntimeException( "Could not find a identifier property for taxon: " + taxonId + " check properties file: " + PROP_FILE); } } // if a Dmel gene we need to use FlyBaseIdResolver to find a current id if ("7227".equals(taxonId)) { IdResolver resolver = flybaseResolverFactory.getIdResolver(false); if (resolver != null) { int resCount = resolver.countResolutions(taxonId, accession); if (resCount != 1) { LOG.info( "RESOLVER: failed to resolve gene to one identifier, " + "ignoring gene: " + accession + " count: " + resCount + " FBgn: " + resolver.resolveId(taxonId, accession)); return null; } accession = resolver.resolveId(taxonId, accession).iterator().next(); } } } else if ("protein".equalsIgnoreCase(type)) { // TODO use values in config clsName = "Protein"; idField = "primaryAccession"; } else { String typeCls = TypeUtil.javaiseClassName(type); if (getModel().getClassDescriptorByName(typeCls) != null) { Class<?> cls = getModel().getClassDescriptorByName(typeCls).getType(); if (BioEntity.class.isAssignableFrom(cls)) { clsName = typeCls; } } if (clsName == null) { throw new IllegalArgumentException("Unrecognised annotation type '" + type + "'"); } } boolean includeOrganism; if ("primaryIdentifier".equals(idField) || "protein".equals(type)) { includeOrganism = false; } else { includeOrganism = createOrganism; } String key = makeProductKey(accession, type, organism, includeOrganism); // Have we already seen this product somewhere before? // if so, return the product rather than creating a new one... if (productMap.containsKey(key)) { return productMap.get(key); } // if a Dmel gene we need to use FlyBaseIdResolver to find a current id Item product = createItem(clsName); if (organism != null && createOrganism) { product.setReference("organism", organism.getIdentifier()); } product.setAttribute(idField, accession); String dataSetIdentifier = getDataset(dataSourceCode); product.addToCollection("dataSets", dataSetIdentifier); Integer storedProductId = store(product); storedProductIds.put(product.getIdentifier(), storedProductId); productMap.put(key, product.getIdentifier()); return product.getIdentifier(); }
/** * process GFF3 record and give a xml presentation * * @param record GFF3Record * @throws ObjectStoreException if an error occurs storing items * @throws IOException */ public void process(GFF3Record record) throws ObjectStoreException { String term = record.getType(); if (config_exclude != null && !config_exclude.isEmpty()) { // don't process terms in the exclude list if (config_exclude.containsKey(this.orgTaxonId)) { if (config_exclude.get(this.orgTaxonId).contains(term)) { return; } } } if (config_term != null && !config_term.isEmpty()) { // otherwise all terms are processed if (config_term.containsKey(this.orgTaxonId)) { if (!config_term.get(this.orgTaxonId).contains(term)) { return; } } } // By default, use ID field in attributes String primaryIdentifier = record.getId(); // If pid set in gff_config.propeties, look for the attribute field, e.g. locus_tag if (config_attr.containsKey(this.orgTaxonId)) { if (config_attr.get(this.orgTaxonId).containsKey("primaryIdentifier")) { String cls = config_attr_class.get(this.orgTaxonId).get("primaryIdentifier"); if ("all".equals(cls) || term.equals(cls)) { String pidAttr = config_attr.get(this.orgTaxonId).get("primaryIdentifier"); if (pidAttr.contains("Dbxref") && record.getDbxrefs() != null) { String pidAttrPrefix = pidAttr.split("\\.")[1]; for (Iterator<?> i = record.getDbxrefs().iterator(); i.hasNext(); ) { String xref = (String) i.next(); if (xref.contains(pidAttrPrefix)) { primaryIdentifier = xref.split(":")[1]; break; } } } else { if (record.getAttributes().get(pidAttr) != null) { primaryIdentifier = record.getAttributes().get(pidAttr).get(0); } } } } } String refId = identifierMap.get(primaryIdentifier); // get rid of previous record Items from handler handler.clear(); Item seq = getSeq(record.getSequenceID()); String className = TypeUtil.javaiseClassName(term); String fullClassName = tgtModel.getPackageName() + "." + className; ClassDescriptor cd = tgtModel.getClassDescriptorByName(fullClassName); if (cd == null) { throw new IllegalArgumentException( "no class found in model for: " + className + " (original GFF record type: " + term + ") for " + "record: " + record); } Set<Item> synonymsToAdd = new HashSet<Item>(); Item feature = null; // new feature if (refId == null) { feature = createItem(className); refId = feature.getIdentifier(); } if (!"chromosome".equals(term) && seq != null) { boolean makeLocation = record.getStart() >= 1 && record.getEnd() >= 1 && !dontCreateLocations && handler.createLocations(record); if (makeLocation) { Item location = getLocation(record, refId, seq, cd); if (feature == null) { // this feature has already been created and stored // we only wanted the location, we're done here. store(location); return; } int length = getLength(record); feature.setAttribute("length", String.valueOf(length)); handler.setLocation(location); if ("Chromosome".equals(seqClsName) && (cd.getFieldDescriptorByName("chromosome") != null)) { feature.setReference("chromosome", seq.getIdentifier()); feature.setReference("chromosomeLocation", location); } } } if (feature == null) { // this feature has already been created and stored // feature with discontinous location, this location wasn't valid for some reason return; } if (primaryIdentifier != null) { feature.setAttribute("primaryIdentifier", primaryIdentifier); } handler.setFeature(feature); identifierMap.put(primaryIdentifier, feature.getIdentifier()); List<?> names = record.getNames(); String symbol = null; List<String> synonyms = new ArrayList<String>(); // get the attribute set for symbol if (config_attr.containsKey(this.orgTaxonId)) { if (config_attr.get(this.orgTaxonId).containsKey("symbol")) { String cls = config_attr_class.get(this.orgTaxonId).get("symbol"); if ("all".equals(cls) || term.equals(cls)) { String symbolAttr = config_attr.get(this.orgTaxonId).get("symbol"); if (symbolAttr.contains("Dbxref") && record.getDbxrefs() != null) { String symbolAttrPrefix = symbolAttr.split("\\.")[1]; for (Iterator<?> i = record.getDbxrefs().iterator(); i.hasNext(); ) { String xref = (String) i.next(); if (xref.contains(symbolAttrPrefix)) { symbol = xref.split(":")[1]; break; } } } else { if (record.getAttributes().get(symbolAttr) != null) { symbol = record.getAttributes().get(symbolAttr).get(0); } } } } } // get the attribute set for synonym if (config_attr.containsKey(this.orgTaxonId)) { if (config_attr.get(this.orgTaxonId).containsKey("synonym")) { String cls = config_attr_class.get(this.orgTaxonId).get("synonym"); if ("all".equals(cls) || term.equals(cls)) { String synonymAttr = config_attr.get(this.orgTaxonId).get("synonym"); if (synonymAttr.contains("Dbxref") && record.getDbxrefs() != null) { String synonymAttrPrefix = synonymAttr.split("\\.")[1]; Set<String> synSet = new HashSet<String>(); for (Iterator<?> i = record.getDbxrefs().iterator(); i.hasNext(); ) { String xref = (String) i.next(); if (xref.contains(synonymAttrPrefix)) { synSet.add(xref.split(":")[1]); } } synonyms.addAll(synSet); } else { synonyms = record.getAttributes().get(synonymAttr); } // synonyms.removeAll(Collections.singleton(null)); } } } if (names != null) { setNames(names, symbol, synonyms, synonymsToAdd, primaryIdentifier, feature, cd); } // Other attributes List<String> primeAttrList = Arrays.asList("primaryIdentifier", "symbol", "synonym"); if (config_attr.containsKey(this.orgTaxonId)) { Map<String, String> attrMapOrg = config_attr.get(this.orgTaxonId); Map<String, String> attrMapClone = new HashMap<String, String>(); // Deep copy of a map for (Entry<String, String> e : attrMapOrg.entrySet()) { attrMapClone.put(e.getKey(), e.getValue()); } for (String pa : primeAttrList) { attrMapClone.remove(pa); } for (Entry<String, String> e : attrMapClone.entrySet()) { String cls = config_attr_class.get(this.orgTaxonId).get(e.getKey()); if ("all".equals(cls) || term.equals(cls)) { String attr = e.getValue(); if (attr.contains("Dbxref") && record.getDbxrefs() != null) { String attrPrefix = attr.split("\\.")[1]; for (Iterator<?> i = record.getDbxrefs().iterator(); i.hasNext(); ) { String xref = (String) i.next(); if (xref.contains(attrPrefix)) { if (feature.checkAttribute(e.getKey())) { feature.setAttribute(e.getKey(), xref.split(":")[1]); } break; } } } else { if (record.getAttributes().get(attr) != null) { String attrVal = record.getAttributes().get(attr).get(0); if (attrVal != null) { if (feature.checkAttribute(e.getKey())) { feature.setAttribute(e.getKey(), attrVal); } } } } } } } List<String> parents = record.getParents(); if (parents != null && !parents.isEmpty()) { setRefsAndCollections(parents, feature); } feature.addReference(getOrgRef()); feature.addToCollection("dataSets", dataSet); handler.addDataSet(dataSet); Double score = record.getScore(); if (score != null && !"".equals(String.valueOf(score))) { feature.setAttribute("score", String.valueOf(score)); feature.setAttribute("scoreType", record.getSource()); } for (Item synonym : synonymsToAdd) { handler.addItem(synonym); } handler.process(record); if (handler.getDataSetReferenceList().getRefIds().size() > 0) { feature.addCollection(handler.getDataSetReferenceList()); } handler.clearDataSetReferenceList(); if (handler.getPublicationReferenceList().getRefIds().size() > 0) { feature.addCollection(handler.getPublicationReferenceList()); } handler.clearPublicationReferenceList(); try { Iterator<Item> iter = handler.getItems().iterator(); while (iter.hasNext()) { store(iter.next()); } } catch (ObjectStoreException e) { LOG.error("Problem writing item to the itemwriter"); throw e; } }
/** {@inheritDoc} */ public void process(Reader reader) throws Exception { // Create a chromosome Item chromosome = createItem("Chromosome"); chromosome.setAttribute("primaryIdentifier", CHROMOSOME_PID); store(chromosome); @SuppressWarnings("rawtypes") Iterator lineIter = FormattedTextParser.parseTabDelimitedReader(reader); while (lineIter.hasNext()) { String[] line = (String[]) lineIter.next(); // remove header line if (!line[0].equals(HEADER_LINE)) { String ecogeneId = line[0]; String geneName = line[1]; String eCK = line[2]; String swissProtId = line[3]; String wisconsinGenBankId = line[4]; String genBankProteinId = line[5]; String genoBaseId = line[6]; String type = line[7]; String strand = line[8]; String start = line[9]; String end = line[10]; String synonym = line[11]; Set<String> symSet = new TreeSet<String>(); if (!eCK.equals(NULL_STRING)) { symSet.add(eCK); } if (!genoBaseId.equals(NULL_STRING)) { symSet.addAll(Arrays.asList(StringUtil.split(genoBaseId, "; "))); } if (!synonym.equals(NONE_STRING)) { symSet.addAll(Arrays.asList(synonym.split(", "))); } if (type.equals(TYPE_GENE)) { Item gene = createItem("Gene"); gene.setReference("chromosome", chromosome); gene.setReference("organism", getOrganism(ECOLI_TAXON)); gene.setAttribute("primaryIdentifier", ecogeneId); gene.setAttribute("secondaryIdentifier", wisconsinGenBankId); gene.setAttribute("name", geneName); gene.setAttribute("symbol", geneName); if (symSet.size() > 0) { for (String sym : symSet) { createSynonym(gene, sym, true); } } if (!swissProtId.equals(NULL_STRING)) { if (proteinMap.containsKey(swissProtId)) { // Reference a protein to a gene (a gene has proteins // collection) gene.addToCollection("proteins", proteinMap.get(swissProtId)); } else { Item protein = createItem("Protein"); protein.setAttribute("primaryAccession", swissProtId); // NCBI Protein id, remove "g" protein.setAttribute("secondaryIdentifier", genBankProteinId.substring(1)); gene.addToCollection("proteins", protein); store(protein); proteinMap.put(swissProtId, protein); } } // Create chromosome location if (start.matches(DIGIT_REGEX) && end.matches(DIGIT_REGEX)) { Item location = createItem("Location"); location.setAttribute("start", start); location.setAttribute("end", end); location.setReference("feature", gene); location.setReference("locatedOn", chromosome); if (strand.equals(CLOCKWISE)) { location.setAttribute("strand", "+1"); } else if (strand.equals(COUNTER_CLOCKWISE)) { location.setAttribute("strand", "-1"); } else { location.setAttribute("strand", "0"); } gene.setReference("chromosomeLocation", location); store(location); } store(gene); } else if (type.equals(TYPE_RNA)) { // TODO code refactory Item rna = createItem("NcRNA"); rna.setReference("chromosome", chromosome); rna.setReference("organism", getOrganism(ECOLI_TAXON)); rna.setAttribute("primaryIdentifier", ecogeneId); rna.setAttribute("secondaryIdentifier", wisconsinGenBankId); rna.setAttribute("name", geneName); rna.setAttribute("symbol", geneName); if (symSet.size() > 0) { for (String sym : symSet) { createSynonym(rna, sym, true); } } // Create chromosome location if (start.matches(DIGIT_REGEX) && end.matches(DIGIT_REGEX)) { Item location = createItem("Location"); location.setAttribute("start", start); location.setAttribute("end", end); location.setReference("feature", rna); location.setReference("locatedOn", chromosome); if (strand.equals(CLOCKWISE)) { location.setAttribute("strand", "+1"); } else if (strand.equals(COUNTER_CLOCKWISE)) { location.setAttribute("strand", "-1"); } else { location.setAttribute("strand", "0"); } rna.setReference("chromosomeLocation", location); store(location); } store(rna); } } } }
/** * Create and add a synonym Item from the given information. * * @param subject the subject of the new Synonym * @param value the Synonym value * @return the new Synonym Item */ public Item getSynonym(Item subject, String value) { Item synonym = createItem("Synonym"); synonym.setAttribute("value", value); synonym.setReference("subject", subject.getIdentifier()); return synonym; }
/** * Process the edge data file. * * @param file the edge data file * @param tfMap a customized map with TF information * @param miRNAMap a customized map with miRNA information */ private void processEdgeFile( File file, Map<String, Map<String, String>> tfMap, Map<String, Map<String, String>> miRNAMap) { try { Reader reader = new FileReader(file); Iterator<?> tsvIter; try { tsvIter = FormattedTextParser.parseTabDelimitedReader(reader); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } while (tsvIter.hasNext()) { String[] line = (String[]) tsvIter.next(); if (line.length > 1) { String sourceIdentifier = line[0]; String targetIdentifier = line[1]; try { if (tfMap.containsKey(sourceIdentifier)) { // Create source gene String sourceSymbol = tfMap.get(sourceIdentifier).keySet().iterator().next(); String sourceLevel = tfMap.get(sourceIdentifier).get(sourceSymbol); String sourceGenePid = createGene(sourceIdentifier, sourceSymbol); // Create networkProperty for source gene Item sourceNetworkProperty = createNetworkProperty(TOPO_TYPE_LEVEL, sourceLevel); sourceNetworkProperty.setReference("node", geneItems.get(sourceGenePid)); store(sourceNetworkProperty); if (tfMap.containsKey(targetIdentifier)) { // Create regulation for both genes Item regulation = createRegulation(INTERACTION_TYPE_TF_TF); // Create target gene String targetSymbol = tfMap.get(targetIdentifier).keySet().iterator().next(); String targetLevel = tfMap.get(targetIdentifier).get(targetSymbol); String targetGenePid = createGene(targetIdentifier, targetSymbol); // Create networkProperty for target gene Item targetNetworkProperty = createNetworkProperty(TOPO_TYPE_LEVEL, targetLevel); targetNetworkProperty.setReference("node", geneItems.get(targetGenePid)); store(targetNetworkProperty); regulation.setReference("source", geneItems.get(sourceGenePid)); regulation.setReference("target", geneItems.get(targetGenePid)); store(regulation); } else if (miRNAMap.containsKey(targetIdentifier)) { // Create regulation for both genes Item regulation = createRegulation(INTERACTION_TYPE_TF_MIRNA); // Create target gene String targetSymbol = miRNAMap.get(targetIdentifier).keySet().iterator().next(); String targetPosition = miRNAMap.get(targetIdentifier).get(targetSymbol); String targetGenePid = createGene(null, targetSymbol); // Create networkProperty for target gene Item targetNetworkProperty = createNetworkProperty(TOPO_TYPE_POSITION, targetPosition); targetNetworkProperty.setReference("node", geneItems.get(targetGenePid)); store(targetNetworkProperty); regulation.setReference("source", geneItems.get(sourceGenePid)); regulation.setReference("target", geneItems.get(targetGenePid)); store(regulation); } else { continue; } } else if (miRNAMap.containsKey(sourceIdentifier)) { // Create source gene String sourceSymbol = miRNAMap.get(sourceIdentifier).keySet().iterator().next(); String sourcePosition = miRNAMap.get(sourceIdentifier).get(sourceSymbol); String sourceGenePid = createGene(null, sourceSymbol); // Create networkProperty for source gene Item sourceNetworkProperty = createNetworkProperty(TOPO_TYPE_POSITION, sourcePosition); sourceNetworkProperty.setReference("node", geneItems.get(sourceGenePid)); store(sourceNetworkProperty); if (tfMap.containsKey(targetIdentifier)) { // Create regulation for both genes Item regulation = createRegulation(INTERACTION_TYPE_MIRNA_TF); // Create target gene String targetSymbol = tfMap.get(targetIdentifier).keySet().iterator().next(); String targetLevel = tfMap.get(targetIdentifier).get(targetSymbol); String targetGenePid = createGene(targetIdentifier, targetSymbol); // Create networkProperty for target gene Item targetNetworkProperty = createNetworkProperty(TOPO_TYPE_LEVEL, targetLevel); targetNetworkProperty.setReference("node", geneItems.get(targetGenePid)); store(targetNetworkProperty); regulation.setReference("source", geneItems.get(sourceGenePid)); regulation.setReference("target", geneItems.get(targetGenePid)); store(regulation); } else if (miRNAMap.containsKey(targetIdentifier)) { // Create regulation for both genes Item regulation = createRegulation(INTERACTION_TYPE_MIRNA_MIRNA); // Create target gene String targetSymbol = miRNAMap.get(targetIdentifier).keySet().iterator().next(); String targetPosition = miRNAMap.get(targetIdentifier).get(targetSymbol); String targetGenePid = createGene(null, targetSymbol); // Create networkProperty for target gene Item targetNetworkProperty = createNetworkProperty(TOPO_TYPE_POSITION, targetPosition); targetNetworkProperty.setReference("node", geneItems.get(targetGenePid)); store(targetNetworkProperty); regulation.setReference("source", geneItems.get(sourceGenePid)); regulation.setReference("target", geneItems.get(targetGenePid)); store(regulation); } else { continue; } } else { continue; } } catch (ObjectStoreException e) { e.printStackTrace(); throw new RuntimeException(e); } } } } catch (FileNotFoundException e) { e.printStackTrace(); throw new RuntimeException(e); } }
/** * process GFF3 record and give a xml presentation * * @param record GFF3Record * @throws ObjectStoreException if an error occurs storing items */ public void process(GFF3Record record) throws ObjectStoreException { String identifier = record.getId(); String refId = identifierMap.get(identifier); // get rid of previous record Items from handler handler.clear(); List<?> names = record.getNames(); Item seq = getSeq(record.getSequenceID()); String term = record.getType(); String className = TypeUtil.javaiseClassName(term); String fullClassName = tgtModel.getPackageName() + "." + className; ClassDescriptor cd = tgtModel.getClassDescriptorByName(fullClassName); if (cd == null) { throw new IllegalArgumentException( "no class found in model for: " + className + " (original GFF record type: " + term + ") for " + "record: " + record); } Set<Item> synonymsToAdd = new HashSet<Item>(); Item feature = null; // new feature if (refId == null) { feature = createItem(className); refId = feature.getIdentifier(); } if (!"chromosome".equals(record.getType()) && seq != null) { boolean makeLocation = record.getStart() >= 1 && record.getEnd() >= 1 && !dontCreateLocations && handler.createLocations(record); if (makeLocation) { Item location = getLocation(record, refId, seq, cd); if (feature == null) { // this feature has already been created and stored // we only wanted the location, we're done here. store(location); return; } int length = getLength(record); feature.setAttribute("length", String.valueOf(length)); handler.setLocation(location); if ("Chromosome".equals(seqClsName) && (cd.getFieldDescriptorByName("chromosome") != null)) { feature.setReference("chromosome", seq.getIdentifier()); feature.setReference("chromosomeLocation", location); } } } if (feature == null) { // this feature has already been created and stored // feature with discontinous location, this location wasn't valid for some reason return; } if (identifier != null) { feature.setAttribute("primaryIdentifier", identifier); } handler.setFeature(feature); identifierMap.put(identifier, feature.getIdentifier()); if (names != null) { setNames(names, synonymsToAdd, record.getId(), feature, cd); } List<String> parents = record.getParents(); if (parents != null && !parents.isEmpty()) { setRefsAndCollections(parents, feature); } feature.addReference(getOrgRef()); feature.addToCollection("dataSets", dataSet); handler.addDataSet(dataSet); Double score = record.getScore(); if (score != null && !"".equals(String.valueOf(score))) { feature.setAttribute("score", String.valueOf(score)); feature.setAttribute("scoreType", record.getSource()); } for (Item synonym : synonymsToAdd) { handler.addItem(synonym); } handler.process(record); if (handler.getDataSetReferenceList().getRefIds().size() > 0) { feature.addCollection(handler.getDataSetReferenceList()); } handler.clearDataSetReferenceList(); if (handler.getPublicationReferenceList().getRefIds().size() > 0) { feature.addCollection(handler.getPublicationReferenceList()); } handler.clearPublicationReferenceList(); try { Iterator<Item> iter = handler.getItems().iterator(); while (iter.hasNext()) { store(iter.next()); } } catch (ObjectStoreException e) { LOG.error("Problem writing item to the itemwriter"); throw e; } }