Пример #1
0
  private Integer createGoAnnotation(
      String productIdentifier,
      String productType,
      String termIdentifier,
      Item organism,
      String qualifier,
      String withText,
      String dataSourceCode)
      throws ObjectStoreException {
    Item goAnnotation = createItem(annotationClassName);
    goAnnotation.setReference("subject", productIdentifier);
    goAnnotation.setReference("ontologyTerm", termIdentifier);

    if (!StringUtils.isEmpty(qualifier)) {
      goAnnotation.setAttribute("qualifier", qualifier);
    }

    // with objects
    if (!StringUtils.isEmpty(withText)) {
      goAnnotation.setAttribute("withText", withText);
      List<String> with = createWithObjects(withText, organism, dataSourceCode);
      if (!with.isEmpty()) {
        goAnnotation.addCollection(new ReferenceList("with", with));
      }
    }

    goAnnotation.addToCollection("dataSets", getDataset(dataSourceCode));

    if ("gene".equals(productType)) {
      addProductCollection(productIdentifier, goAnnotation.getIdentifier());
    }
    Integer storedAnnotationId = store(goAnnotation);
    return storedAnnotationId;
  }
Пример #2
0
 // save homologue pair
 private void processHomologue(String gene1, String gene2) throws ObjectStoreException {
   Item homologue = createItem("Homologue");
   homologue.setReference("gene", gene1);
   homologue.setReference("homologue", gene2);
   homologue.addToCollection("evidence", getEvidence());
   homologue.setAttribute("type", "homologue");
   try {
     store(homologue);
   } catch (ObjectStoreException e) {
     throw new ObjectStoreException(e);
   }
 }
Пример #3
0
 private Item getResult(String key, String geneId, String pubId, String stage) {
   if (results.containsKey(key)) {
     return results.get(key);
   }
   Item result = createItem("MRNAExpressionResult");
   result.setAttribute("expressed", "true");
   result.setReference("gene", geneId);
   result.setReference("publication", pubId);
   result.setCollection("stages", getStages(stage));
   //        result.setCollection("images", new ArrayList<String>());
   //        result.setCollection("mRNAExpressionTerms", new ArrayList<String>());
   results.put(key, result);
   return result;
 }
Пример #4
0
  private Item getGene(String geneCG) throws ObjectStoreException {
    if (rslv == null || !rslv.hasTaxon(TAXON_FLY)) {
      return null;
    }
    int resCount = rslv.countResolutions(TAXON_FLY, geneCG);
    if (resCount != 1) {
      LOG.info(
          "RESOLVER: failed to resolve gene to one identifier, ignoring gene: "
              + geneCG
              + " count: "
              + resCount
              + " FBgn: "
              + rslv.resolveId(TAXON_FLY, geneCG));
      return null;
    }
    String primaryIdentifier = rslv.resolveId(TAXON_FLY, geneCG).iterator().next();

    if (genes.containsKey(primaryIdentifier)) {
      return genes.get(primaryIdentifier);
    }
    Item gene = createItem("Gene");
    gene.setAttribute("primaryIdentifier", primaryIdentifier);
    gene.setReference("organism", orgDrosophila);
    genes.put(primaryIdentifier, gene);
    store(gene);
    return gene;
  }
 /** {@inheritDoc} */
 @Override
 public void process(Reader reader) throws Exception {
   // Data has format:
   // id | description
   @SuppressWarnings("rawtypes")
   Iterator lineIter = FormattedTextParser.parseTabDelimitedReader(reader);
   int count = 0;
   while (lineIter.hasNext()) {
     String[] line = (String[]) lineIter.next();
     try {
       String entrez = line[0];
       String description = line[1];
       LOG.error("description " + count++ + " " + description);
       if (!StringUtils.isBlank(description)) {
         Item gene = createItem("Gene");
         gene.setAttribute("primaryIdentifier", entrez);
         gene.setAttribute("description", description);
         gene.setReference("organism", getOrganism(HUMAN_TAXON_ID));
         store(gene);
       }
     } catch (IndexOutOfBoundsException e) {
       LOG.info("Failed to read line: " + Arrays.asList(line));
     }
   }
 }
Пример #6
0
  private String getGene(String identifierType, String id, String taxonId)
      throws ObjectStoreException {
    String identifier = id;

    if (rslv != null && rslv.hasTaxon(taxonId)) {
      identifier = resolveGene(identifier, taxonId);
      if (identifier == null) {
        return null;
      }
    }
    String refId = identifiersToGenes.get(identifier);
    if (refId == null) {
      Item gene = createItem("Gene");
      refId = gene.getIdentifier();
      gene.setAttribute(identifierType, identifier);
      gene.setReference("organism", getOrganism(taxonId));
      identifiersToGenes.put(identifier, refId);
      try {
        store(gene);
      } catch (ObjectStoreException e) {
        throw new ObjectStoreException(e);
      }
    }
    return refId;
  }
Пример #7
0
  private String getEvidence() throws ObjectStoreException {

    if (evidenceRefId == null) {

      Item item = createItem("OrthologueEvidenceCode");
      item.setAttribute("abbreviation", EVIDENCE_CODE_ABBR);
      item.setAttribute("name", EVIDENCE_CODE_NAME);
      try {
        store(item);
      } catch (ObjectStoreException e) {
        throw new ObjectStoreException(e);
      }
      String refId = item.getIdentifier();

      item = createItem("OrthologueEvidence");
      item.setReference("evidenceCode", refId);
      try {
        store(item);
      } catch (ObjectStoreException e) {
        throw new ObjectStoreException(e);
      }

      evidenceRefId = item.getIdentifier();
    }
    return evidenceRefId;
  }
Пример #8
0
 /**
  * Produce random data.
  *
  * <p>{@inheritDoc}
  */
 public void process(Reader inputFile) throws Exception {
   Random random = new Random();
   Set doneValues = new HashSet();
   long time = System.currentTimeMillis();
   for (int i = 0; i < count / 2; i++) {
     Item itemTo = createItem("ReferenceTo2");
     Item itemFrom = createItem("ReferenceFrom2");
     Integer firstInt;
     do {
       firstInt = new Integer(random.nextInt());
     } while (doneValues.contains(firstInt));
     doneValues.add(firstInt);
     itemTo.setAttribute("att", "" + firstInt);
     itemFrom.setReference("ref", itemTo.getIdentifier());
     getItemWriter().store(ItemHelper.convert(itemFrom));
     getItemWriter().store(ItemHelper.convert(itemTo));
   }
   long now = System.currentTimeMillis();
   LOG.info(
       "Finished generating "
           + count
           + " objects at "
           + ((60000L * count) / (now - time))
           + " objects per minute ("
           + (now - time)
           + " ms total)");
 }
  /**
   * @param modSite
   * @param modType
   * @param source
   * @param pmid
   * @return
   */
  private Item getProteinHalfLife(String experiment, String value, String units, String pmid)
      throws ObjectStoreException {

    Item item = createItem("ProteinHalfLife");

    if (StringUtils.isNotEmpty(experiment)) {
      item.setAttribute("experiment", experiment);
    }
    if (StringUtils.isNotEmpty(value)) {
      item.setAttribute("value", value);
    }
    if (StringUtils.isNotEmpty(units)) {
      item.setAttribute("units", units);
    }

    item.setAttribute("source", "SGD");

    Item publication = pubmedIdMap.get(pmid);

    if (publication == null) {

      publication = createItem("Publication");
      publication.setAttribute("pubMedId", pmid);
      pubmedIdMap.put(pmid, publication);
      item.setReference("publication", publication);
      try {
        store(publication);
      } catch (ObjectStoreException e) {
        throw new ObjectStoreException(e);
      }
    } else {
      item.setReference("publication", publication);
    }

    try {
      store(item);
    } catch (ObjectStoreException e) {
      throw new ObjectStoreException(e);
    }

    return item;
  }
Пример #10
0
 private Item getTerm(String name) throws ObjectStoreException {
   if (!isValidTerm(name)) {
     return null;
   } else if (terms.containsKey(name)) {
     return terms.get(name);
   }
   Item termItem = createItem("OntologyTerm");
   termItem.setAttribute("name", name);
   termItem.setReference("ontology", ontology);
   store(termItem);
   terms.put(name, termItem);
   return termItem;
 }
Пример #11
0
 private void setStages() throws ObjectStoreException {
   Item item = createItem("Ontology");
   item.setAttribute("name", "Fly Development");
   store(item);
   stages = new String[17];
   for (int i = 1; i <= 16; i++) {
     Item stage = createItem("DevelopmentTerm");
     stage.setAttribute("name", "embryonic stage " + i);
     stage.setReference("ontology", item);
     stages[i] = stage.getIdentifier();
     store(stage);
   }
 }
Пример #12
0
 private Item getLocation(GFF3Record record, String refId, Item seq, ClassDescriptor cd) {
   Item location = createItem("Location");
   int start = record.getStart();
   int end = record.getEnd();
   if (record.getStart() < record.getEnd()) {
     location.setAttribute("start", String.valueOf(start));
     location.setAttribute("end", String.valueOf(end));
   } else {
     location.setAttribute("start", String.valueOf(end));
     location.setAttribute("end", String.valueOf(start));
   }
   if (record.getStrand() != null && "+".equals(record.getStrand())) {
     location.setAttribute("strand", "1");
   } else if (record.getStrand() != null && "-".equals(record.getStrand())) {
     location.setAttribute("strand", "-1");
   } else {
     location.setAttribute("strand", "0");
   }
   location.setReference("locatedOn", seq.getIdentifier());
   location.setReference("feature", refId);
   location.addToCollection("dataSets", dataSet);
   return location;
 }
Пример #13
0
 private String getDataset(String code) throws ObjectStoreException {
   String dataSetIdentifier = dataSets.get(code);
   if (dataSetIdentifier == null) {
     String dataSourceName = getDataSourceName(code);
     String title = "DO Annotation from " + dataSourceName;
     Item item = createItem("DataSet");
     item.setAttribute("name", title);
     item.setReference("dataSource", getDataSource(dataSourceName));
     dataSetIdentifier = item.getIdentifier();
     dataSets.put(code, dataSetIdentifier);
     store(item);
   }
   return dataSetIdentifier;
 }
 private void setSynonym(String subjectRefId, String type, String value) throws SAXException {
   String key = subjectRefId + type + value;
   if (!synonyms.contains(key)) {
     Item synonym = createItem("Synonym");
     synonym.setAttribute("type", type);
     synonym.setAttribute("value", value);
     synonym.setReference("subject", subjectRefId);
     synonyms.add(key);
     try {
       store(synonym);
     } catch (ObjectStoreException e) {
       throw new SAXException(e);
     }
   }
 }
Пример #15
0
 private void setRefsAndCollections(List<String> parents, Item feature) {
   String clsName = feature.getClassName();
   Map<String, String> refsAndCollections = handler.getRefsAndCollections();
   if (refsAndCollections != null
       && refsAndCollections.containsKey(clsName)
       && parents != null
       && !parents.isEmpty()) {
     ClassDescriptor cld =
         tgtModel.getClassDescriptorByName(tgtModel.getPackageName() + "." + clsName);
     String refName = refsAndCollections.get(clsName);
     Iterator<String> parentIter = parents.iterator();
     if (cld.getReferenceDescriptorByName(refName, true) != null) {
       String parent = parentIter.next();
       feature.setReference(refName, getRefId(parent));
       if (parentIter.hasNext()) {
         String primaryIdent = feature.getAttribute("primaryIdentifier").getValue();
         throw new RuntimeException(
             "Feature has multiple relations for reference: "
                 + refName
                 + " for feature: "
                 + feature.getClassName()
                 + ", "
                 + feature.getIdentifier()
                 + ", "
                 + primaryIdent);
       }
     } else if (cld.getCollectionDescriptorByName(refName, true) != null) {
       List<String> refIds = new ArrayList<String>();
       while (parentIter.hasNext()) {
         refIds.add(getRefId(parentIter.next()));
       }
       feature.setCollection(refName, refIds);
     } else if (parentIter.hasNext()) {
       throw new RuntimeException(
           "No '"
               + refName
               + "' reference/collection found in "
               + "class: "
               + clsName
               + " - is map configured correctly?");
     }
   }
 }
Пример #16
0
  private void storeEvidence() throws ObjectStoreException {
    for (Set<Evidence> annotationEvidence : goTermGeneToEvidence.values()) {
      List<String> evidenceRefIds = new ArrayList<String>();
      Integer goAnnotationRefId = null;
      for (Evidence evidence : annotationEvidence) {
        Item goevidence = createItem("DOEvidence");
        goevidence.setReference("code", evidenceCodes.get(evidence.getEvidenceCode()));
        List<String> publicationEvidence = evidence.getPublications();
        if (!publicationEvidence.isEmpty()) {
          goevidence.setCollection("publications", publicationEvidence);
        }
        store(goevidence);
        evidenceRefIds.add(goevidence.getIdentifier());
        goAnnotationRefId = evidence.getStoredAnnotationId();
      }

      ReferenceList refIds = new ReferenceList("evidence", new ArrayList<String>(evidenceRefIds));
      store(refIds, goAnnotationRefId);
    }
  }
Пример #17
0
 /**
  * Return a DataSource item with the given details.
  *
  * @param title the DataSet title
  * @param url the new url field, or null if the url shouldn't be set
  * @param description the new description field, or null if the field shouldn't be set
  * @param dataSourceItem the DataSource referenced by the the DataSet
  * @return the DataSet Item
  */
 public Item getDataSetItem(String title, String url, String description, Item dataSourceItem) {
   Item item = dataSets.get(title);
   if (item == null) {
     item = createItem("DataSet");
     item.setAttribute("name", title);
     item.setReference("dataSource", dataSourceItem);
     if (url != null) {
       item.setAttribute("url", url);
     }
     if (description != null) {
       item.setAttribute("description", description);
     }
     try {
       store(item);
     } catch (ObjectStoreException e) {
       throw new RuntimeException("failed to store DataSet with title: " + title, e);
     }
     dataSets.put(title, item);
   }
   return item;
 }
Пример #18
0
  private String newProduct(
      String identifier,
      String type,
      Item organism,
      String dataSourceCode,
      boolean createOrganism,
      String field)
      throws ObjectStoreException {
    String idField = field;
    String accession = identifier;
    String clsName = null;
    // find gene attribute first to see if organism should be part of key
    if ("gene".equalsIgnoreCase(type)) {
      clsName = "Gene";
      String taxonId = organism.getAttribute("taxonId").getValue();
      if (idField == null) {
        idField = configs.get(taxonId).identifier;
        if (idField == null) {
          throw new RuntimeException(
              "Could not find a identifier property for taxon: "
                  + taxonId
                  + " check properties file: "
                  + PROP_FILE);
        }
      }

      // if a Dmel gene we need to use FlyBaseIdResolver to find a current id
      if ("7227".equals(taxonId)) {
        IdResolver resolver = flybaseResolverFactory.getIdResolver(false);
        if (resolver != null) {
          int resCount = resolver.countResolutions(taxonId, accession);

          if (resCount != 1) {
            LOG.info(
                "RESOLVER: failed to resolve gene to one identifier, "
                    + "ignoring gene: "
                    + accession
                    + " count: "
                    + resCount
                    + " FBgn: "
                    + resolver.resolveId(taxonId, accession));
            return null;
          }
          accession = resolver.resolveId(taxonId, accession).iterator().next();
        }
      }
    } else if ("protein".equalsIgnoreCase(type)) {
      // TODO use values in config
      clsName = "Protein";
      idField = "primaryAccession";
    } else {
      String typeCls = TypeUtil.javaiseClassName(type);

      if (getModel().getClassDescriptorByName(typeCls) != null) {
        Class<?> cls = getModel().getClassDescriptorByName(typeCls).getType();
        if (BioEntity.class.isAssignableFrom(cls)) {
          clsName = typeCls;
        }
      }
      if (clsName == null) {
        throw new IllegalArgumentException("Unrecognised annotation type '" + type + "'");
      }
    }

    boolean includeOrganism;
    if ("primaryIdentifier".equals(idField) || "protein".equals(type)) {
      includeOrganism = false;
    } else {
      includeOrganism = createOrganism;
    }
    String key = makeProductKey(accession, type, organism, includeOrganism);

    // Have we already seen this product somewhere before?
    // if so, return the product rather than creating a new one...
    if (productMap.containsKey(key)) {
      return productMap.get(key);
    }

    // if a Dmel gene we need to use FlyBaseIdResolver to find a current id

    Item product = createItem(clsName);
    if (organism != null && createOrganism) {
      product.setReference("organism", organism.getIdentifier());
    }
    product.setAttribute(idField, accession);

    String dataSetIdentifier = getDataset(dataSourceCode);
    product.addToCollection("dataSets", dataSetIdentifier);

    Integer storedProductId = store(product);
    storedProductIds.put(product.getIdentifier(), storedProductId);
    productMap.put(key, product.getIdentifier());
    return product.getIdentifier();
  }
Пример #19
0
  /**
   * process GFF3 record and give a xml presentation
   *
   * @param record GFF3Record
   * @throws ObjectStoreException if an error occurs storing items
   * @throws IOException
   */
  public void process(GFF3Record record) throws ObjectStoreException {
    String term = record.getType();

    if (config_exclude != null
        && !config_exclude.isEmpty()) { // don't process terms in the exclude list
      if (config_exclude.containsKey(this.orgTaxonId)) {
        if (config_exclude.get(this.orgTaxonId).contains(term)) {
          return;
        }
      }
    }

    if (config_term != null && !config_term.isEmpty()) { // otherwise all terms are processed
      if (config_term.containsKey(this.orgTaxonId)) {
        if (!config_term.get(this.orgTaxonId).contains(term)) {
          return;
        }
      }
    }

    // By default, use ID field in attributes
    String primaryIdentifier = record.getId();
    // If pid set in gff_config.propeties, look for the attribute field, e.g. locus_tag
    if (config_attr.containsKey(this.orgTaxonId)) {
      if (config_attr.get(this.orgTaxonId).containsKey("primaryIdentifier")) {
        String cls = config_attr_class.get(this.orgTaxonId).get("primaryIdentifier");
        if ("all".equals(cls) || term.equals(cls)) {
          String pidAttr = config_attr.get(this.orgTaxonId).get("primaryIdentifier");
          if (pidAttr.contains("Dbxref") && record.getDbxrefs() != null) {
            String pidAttrPrefix = pidAttr.split("\\.")[1];
            for (Iterator<?> i = record.getDbxrefs().iterator(); i.hasNext(); ) {
              String xref = (String) i.next();
              if (xref.contains(pidAttrPrefix)) {
                primaryIdentifier = xref.split(":")[1];
                break;
              }
            }
          } else {
            if (record.getAttributes().get(pidAttr) != null) {
              primaryIdentifier = record.getAttributes().get(pidAttr).get(0);
            }
          }
        }
      }
    }

    String refId = identifierMap.get(primaryIdentifier);

    // get rid of previous record Items from handler
    handler.clear();

    Item seq = getSeq(record.getSequenceID());

    String className = TypeUtil.javaiseClassName(term);
    String fullClassName = tgtModel.getPackageName() + "." + className;

    ClassDescriptor cd = tgtModel.getClassDescriptorByName(fullClassName);

    if (cd == null) {
      throw new IllegalArgumentException(
          "no class found in model for: "
              + className
              + " (original GFF record type: "
              + term
              + ") for "
              + "record: "
              + record);
    }

    Set<Item> synonymsToAdd = new HashSet<Item>();

    Item feature = null;

    // new feature
    if (refId == null) {
      feature = createItem(className);
      refId = feature.getIdentifier();
    }

    if (!"chromosome".equals(term) && seq != null) {
      boolean makeLocation =
          record.getStart() >= 1
              && record.getEnd() >= 1
              && !dontCreateLocations
              && handler.createLocations(record);
      if (makeLocation) {
        Item location = getLocation(record, refId, seq, cd);
        if (feature == null) {
          // this feature has already been created and stored
          // we only wanted the location, we're done here.
          store(location);
          return;
        }
        int length = getLength(record);
        feature.setAttribute("length", String.valueOf(length));
        handler.setLocation(location);
        if ("Chromosome".equals(seqClsName)
            && (cd.getFieldDescriptorByName("chromosome") != null)) {
          feature.setReference("chromosome", seq.getIdentifier());
          feature.setReference("chromosomeLocation", location);
        }
      }
    }

    if (feature == null) {
      // this feature has already been created and stored
      // feature with discontinous location, this location wasn't valid for some reason
      return;
    }

    if (primaryIdentifier != null) {
      feature.setAttribute("primaryIdentifier", primaryIdentifier);
    }
    handler.setFeature(feature);
    identifierMap.put(primaryIdentifier, feature.getIdentifier());

    List<?> names = record.getNames();
    String symbol = null;
    List<String> synonyms = new ArrayList<String>();

    // get the attribute set for symbol
    if (config_attr.containsKey(this.orgTaxonId)) {
      if (config_attr.get(this.orgTaxonId).containsKey("symbol")) {
        String cls = config_attr_class.get(this.orgTaxonId).get("symbol");
        if ("all".equals(cls) || term.equals(cls)) {
          String symbolAttr = config_attr.get(this.orgTaxonId).get("symbol");
          if (symbolAttr.contains("Dbxref") && record.getDbxrefs() != null) {
            String symbolAttrPrefix = symbolAttr.split("\\.")[1];
            for (Iterator<?> i = record.getDbxrefs().iterator(); i.hasNext(); ) {
              String xref = (String) i.next();
              if (xref.contains(symbolAttrPrefix)) {
                symbol = xref.split(":")[1];
                break;
              }
            }
          } else {
            if (record.getAttributes().get(symbolAttr) != null) {
              symbol = record.getAttributes().get(symbolAttr).get(0);
            }
          }
        }
      }
    }

    // get the attribute set for synonym
    if (config_attr.containsKey(this.orgTaxonId)) {
      if (config_attr.get(this.orgTaxonId).containsKey("synonym")) {
        String cls = config_attr_class.get(this.orgTaxonId).get("synonym");
        if ("all".equals(cls) || term.equals(cls)) {
          String synonymAttr = config_attr.get(this.orgTaxonId).get("synonym");
          if (synonymAttr.contains("Dbxref") && record.getDbxrefs() != null) {
            String synonymAttrPrefix = synonymAttr.split("\\.")[1];
            Set<String> synSet = new HashSet<String>();
            for (Iterator<?> i = record.getDbxrefs().iterator(); i.hasNext(); ) {
              String xref = (String) i.next();
              if (xref.contains(synonymAttrPrefix)) {
                synSet.add(xref.split(":")[1]);
              }
            }
            synonyms.addAll(synSet);
          } else {
            synonyms = record.getAttributes().get(synonymAttr);
          }
          // synonyms.removeAll(Collections.singleton(null));
        }
      }
    }

    if (names != null) {
      setNames(names, symbol, synonyms, synonymsToAdd, primaryIdentifier, feature, cd);
    }

    // Other attributes
    List<String> primeAttrList = Arrays.asList("primaryIdentifier", "symbol", "synonym");

    if (config_attr.containsKey(this.orgTaxonId)) {
      Map<String, String> attrMapOrg = config_attr.get(this.orgTaxonId);
      Map<String, String> attrMapClone = new HashMap<String, String>();
      // Deep copy of a map
      for (Entry<String, String> e : attrMapOrg.entrySet()) {
        attrMapClone.put(e.getKey(), e.getValue());
      }

      for (String pa : primeAttrList) {
        attrMapClone.remove(pa);
      }

      for (Entry<String, String> e : attrMapClone.entrySet()) {
        String cls = config_attr_class.get(this.orgTaxonId).get(e.getKey());
        if ("all".equals(cls) || term.equals(cls)) {
          String attr = e.getValue();
          if (attr.contains("Dbxref") && record.getDbxrefs() != null) {
            String attrPrefix = attr.split("\\.")[1];
            for (Iterator<?> i = record.getDbxrefs().iterator(); i.hasNext(); ) {
              String xref = (String) i.next();
              if (xref.contains(attrPrefix)) {
                if (feature.checkAttribute(e.getKey())) {
                  feature.setAttribute(e.getKey(), xref.split(":")[1]);
                }
                break;
              }
            }
          } else {
            if (record.getAttributes().get(attr) != null) {
              String attrVal = record.getAttributes().get(attr).get(0);
              if (attrVal != null) {
                if (feature.checkAttribute(e.getKey())) {
                  feature.setAttribute(e.getKey(), attrVal);
                }
              }
            }
          }
        }
      }
    }

    List<String> parents = record.getParents();
    if (parents != null && !parents.isEmpty()) {
      setRefsAndCollections(parents, feature);
    }

    feature.addReference(getOrgRef());
    feature.addToCollection("dataSets", dataSet);

    handler.addDataSet(dataSet);
    Double score = record.getScore();
    if (score != null && !"".equals(String.valueOf(score))) {
      feature.setAttribute("score", String.valueOf(score));
      feature.setAttribute("scoreType", record.getSource());
    }
    for (Item synonym : synonymsToAdd) {
      handler.addItem(synonym);
    }
    handler.process(record);
    if (handler.getDataSetReferenceList().getRefIds().size() > 0) {
      feature.addCollection(handler.getDataSetReferenceList());
    }
    handler.clearDataSetReferenceList();
    if (handler.getPublicationReferenceList().getRefIds().size() > 0) {
      feature.addCollection(handler.getPublicationReferenceList());
    }
    handler.clearPublicationReferenceList();

    try {
      Iterator<Item> iter = handler.getItems().iterator();
      while (iter.hasNext()) {
        store(iter.next());
      }
    } catch (ObjectStoreException e) {
      LOG.error("Problem writing item to the itemwriter");
      throw e;
    }
  }
  /** {@inheritDoc} */
  public void process(Reader reader) throws Exception {

    // Create a chromosome
    Item chromosome = createItem("Chromosome");
    chromosome.setAttribute("primaryIdentifier", CHROMOSOME_PID);
    store(chromosome);

    @SuppressWarnings("rawtypes")
    Iterator lineIter = FormattedTextParser.parseTabDelimitedReader(reader);

    while (lineIter.hasNext()) {
      String[] line = (String[]) lineIter.next();

      // remove header line
      if (!line[0].equals(HEADER_LINE)) {
        String ecogeneId = line[0];
        String geneName = line[1];
        String eCK = line[2];
        String swissProtId = line[3];
        String wisconsinGenBankId = line[4];
        String genBankProteinId = line[5];
        String genoBaseId = line[6];
        String type = line[7];
        String strand = line[8];
        String start = line[9];
        String end = line[10];
        String synonym = line[11];

        Set<String> symSet = new TreeSet<String>();

        if (!eCK.equals(NULL_STRING)) {
          symSet.add(eCK);
        }

        if (!genoBaseId.equals(NULL_STRING)) {
          symSet.addAll(Arrays.asList(StringUtil.split(genoBaseId, "; ")));
        }

        if (!synonym.equals(NONE_STRING)) {
          symSet.addAll(Arrays.asList(synonym.split(", ")));
        }

        if (type.equals(TYPE_GENE)) {

          Item gene = createItem("Gene");
          gene.setReference("chromosome", chromosome);
          gene.setReference("organism", getOrganism(ECOLI_TAXON));
          gene.setAttribute("primaryIdentifier", ecogeneId);
          gene.setAttribute("secondaryIdentifier", wisconsinGenBankId);
          gene.setAttribute("name", geneName);
          gene.setAttribute("symbol", geneName);

          if (symSet.size() > 0) {
            for (String sym : symSet) {
              createSynonym(gene, sym, true);
            }
          }

          if (!swissProtId.equals(NULL_STRING)) {

            if (proteinMap.containsKey(swissProtId)) {
              // Reference a protein to a gene (a gene has proteins
              // collection)
              gene.addToCollection("proteins", proteinMap.get(swissProtId));
            } else {
              Item protein = createItem("Protein");
              protein.setAttribute("primaryAccession", swissProtId);
              // NCBI Protein id, remove "g"
              protein.setAttribute("secondaryIdentifier", genBankProteinId.substring(1));
              gene.addToCollection("proteins", protein);
              store(protein);
              proteinMap.put(swissProtId, protein);
            }
          }

          // Create chromosome location
          if (start.matches(DIGIT_REGEX) && end.matches(DIGIT_REGEX)) {

            Item location = createItem("Location");
            location.setAttribute("start", start);
            location.setAttribute("end", end);
            location.setReference("feature", gene);
            location.setReference("locatedOn", chromosome);

            if (strand.equals(CLOCKWISE)) {
              location.setAttribute("strand", "+1");
            } else if (strand.equals(COUNTER_CLOCKWISE)) {
              location.setAttribute("strand", "-1");
            } else {
              location.setAttribute("strand", "0");
            }

            gene.setReference("chromosomeLocation", location);

            store(location);
          }

          store(gene);

        } else if (type.equals(TYPE_RNA)) { // TODO code refactory

          Item rna = createItem("NcRNA");
          rna.setReference("chromosome", chromosome);
          rna.setReference("organism", getOrganism(ECOLI_TAXON));
          rna.setAttribute("primaryIdentifier", ecogeneId);
          rna.setAttribute("secondaryIdentifier", wisconsinGenBankId);
          rna.setAttribute("name", geneName);
          rna.setAttribute("symbol", geneName);

          if (symSet.size() > 0) {
            for (String sym : symSet) {
              createSynonym(rna, sym, true);
            }
          }

          // Create chromosome location
          if (start.matches(DIGIT_REGEX) && end.matches(DIGIT_REGEX)) {

            Item location = createItem("Location");
            location.setAttribute("start", start);
            location.setAttribute("end", end);
            location.setReference("feature", rna);
            location.setReference("locatedOn", chromosome);

            if (strand.equals(CLOCKWISE)) {
              location.setAttribute("strand", "+1");
            } else if (strand.equals(COUNTER_CLOCKWISE)) {
              location.setAttribute("strand", "-1");
            } else {
              location.setAttribute("strand", "0");
            }

            rna.setReference("chromosomeLocation", location);

            store(location);
          }

          store(rna);
        }
      }
    }
  }
Пример #21
0
 /**
  * Create and add a synonym Item from the given information.
  *
  * @param subject the subject of the new Synonym
  * @param value the Synonym value
  * @return the new Synonym Item
  */
 public Item getSynonym(Item subject, String value) {
   Item synonym = createItem("Synonym");
   synonym.setAttribute("value", value);
   synonym.setReference("subject", subject.getIdentifier());
   return synonym;
 }
Пример #22
0
  /**
   * Process the edge data file.
   *
   * @param file the edge data file
   * @param tfMap a customized map with TF information
   * @param miRNAMap a customized map with miRNA information
   */
  private void processEdgeFile(
      File file,
      Map<String, Map<String, String>> tfMap,
      Map<String, Map<String, String>> miRNAMap) {

    try {
      Reader reader = new FileReader(file);
      Iterator<?> tsvIter;
      try {
        tsvIter = FormattedTextParser.parseTabDelimitedReader(reader);
      } catch (IOException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
      }

      while (tsvIter.hasNext()) {
        String[] line = (String[]) tsvIter.next();
        if (line.length > 1) {
          String sourceIdentifier = line[0];
          String targetIdentifier = line[1];

          try {
            if (tfMap.containsKey(sourceIdentifier)) {
              // Create source gene
              String sourceSymbol = tfMap.get(sourceIdentifier).keySet().iterator().next();
              String sourceLevel = tfMap.get(sourceIdentifier).get(sourceSymbol);
              String sourceGenePid = createGene(sourceIdentifier, sourceSymbol);

              // Create networkProperty for source gene
              Item sourceNetworkProperty = createNetworkProperty(TOPO_TYPE_LEVEL, sourceLevel);
              sourceNetworkProperty.setReference("node", geneItems.get(sourceGenePid));
              store(sourceNetworkProperty);

              if (tfMap.containsKey(targetIdentifier)) {
                // Create regulation for both genes
                Item regulation = createRegulation(INTERACTION_TYPE_TF_TF);

                // Create target gene
                String targetSymbol = tfMap.get(targetIdentifier).keySet().iterator().next();
                String targetLevel = tfMap.get(targetIdentifier).get(targetSymbol);
                String targetGenePid = createGene(targetIdentifier, targetSymbol);

                // Create networkProperty for target gene
                Item targetNetworkProperty = createNetworkProperty(TOPO_TYPE_LEVEL, targetLevel);
                targetNetworkProperty.setReference("node", geneItems.get(targetGenePid));
                store(targetNetworkProperty);

                regulation.setReference("source", geneItems.get(sourceGenePid));
                regulation.setReference("target", geneItems.get(targetGenePid));
                store(regulation);
              } else if (miRNAMap.containsKey(targetIdentifier)) {
                // Create regulation for both genes
                Item regulation = createRegulation(INTERACTION_TYPE_TF_MIRNA);

                // Create target gene
                String targetSymbol = miRNAMap.get(targetIdentifier).keySet().iterator().next();
                String targetPosition = miRNAMap.get(targetIdentifier).get(targetSymbol);
                String targetGenePid = createGene(null, targetSymbol);

                // Create networkProperty for target gene
                Item targetNetworkProperty =
                    createNetworkProperty(TOPO_TYPE_POSITION, targetPosition);
                targetNetworkProperty.setReference("node", geneItems.get(targetGenePid));
                store(targetNetworkProperty);

                regulation.setReference("source", geneItems.get(sourceGenePid));
                regulation.setReference("target", geneItems.get(targetGenePid));
                store(regulation);
              } else {
                continue;
              }
            } else if (miRNAMap.containsKey(sourceIdentifier)) {
              // Create source gene
              String sourceSymbol = miRNAMap.get(sourceIdentifier).keySet().iterator().next();
              String sourcePosition = miRNAMap.get(sourceIdentifier).get(sourceSymbol);
              String sourceGenePid = createGene(null, sourceSymbol);

              // Create networkProperty for source gene
              Item sourceNetworkProperty =
                  createNetworkProperty(TOPO_TYPE_POSITION, sourcePosition);
              sourceNetworkProperty.setReference("node", geneItems.get(sourceGenePid));
              store(sourceNetworkProperty);

              if (tfMap.containsKey(targetIdentifier)) {
                // Create regulation for both genes
                Item regulation = createRegulation(INTERACTION_TYPE_MIRNA_TF);

                // Create target gene
                String targetSymbol = tfMap.get(targetIdentifier).keySet().iterator().next();
                String targetLevel = tfMap.get(targetIdentifier).get(targetSymbol);
                String targetGenePid = createGene(targetIdentifier, targetSymbol);

                // Create networkProperty for target gene
                Item targetNetworkProperty = createNetworkProperty(TOPO_TYPE_LEVEL, targetLevel);
                targetNetworkProperty.setReference("node", geneItems.get(targetGenePid));
                store(targetNetworkProperty);

                regulation.setReference("source", geneItems.get(sourceGenePid));
                regulation.setReference("target", geneItems.get(targetGenePid));
                store(regulation);
              } else if (miRNAMap.containsKey(targetIdentifier)) {
                // Create regulation for both genes
                Item regulation = createRegulation(INTERACTION_TYPE_MIRNA_MIRNA);

                // Create target gene
                String targetSymbol = miRNAMap.get(targetIdentifier).keySet().iterator().next();
                String targetPosition = miRNAMap.get(targetIdentifier).get(targetSymbol);
                String targetGenePid = createGene(null, targetSymbol);

                // Create networkProperty for target gene
                Item targetNetworkProperty =
                    createNetworkProperty(TOPO_TYPE_POSITION, targetPosition);
                targetNetworkProperty.setReference("node", geneItems.get(targetGenePid));
                store(targetNetworkProperty);

                regulation.setReference("source", geneItems.get(sourceGenePid));
                regulation.setReference("target", geneItems.get(targetGenePid));
                store(regulation);
              } else {
                continue;
              }
            } else {
              continue;
            }
          } catch (ObjectStoreException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
          }
        }
      }
    } catch (FileNotFoundException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
    }
  }
Пример #23
0
  /**
   * process GFF3 record and give a xml presentation
   *
   * @param record GFF3Record
   * @throws ObjectStoreException if an error occurs storing items
   */
  public void process(GFF3Record record) throws ObjectStoreException {
    String identifier = record.getId();
    String refId = identifierMap.get(identifier);

    // get rid of previous record Items from handler
    handler.clear();
    List<?> names = record.getNames();
    Item seq = getSeq(record.getSequenceID());

    String term = record.getType();
    String className = TypeUtil.javaiseClassName(term);
    String fullClassName = tgtModel.getPackageName() + "." + className;

    ClassDescriptor cd = tgtModel.getClassDescriptorByName(fullClassName);

    if (cd == null) {
      throw new IllegalArgumentException(
          "no class found in model for: "
              + className
              + " (original GFF record type: "
              + term
              + ") for "
              + "record: "
              + record);
    }

    Set<Item> synonymsToAdd = new HashSet<Item>();

    Item feature = null;

    // new feature
    if (refId == null) {
      feature = createItem(className);
      refId = feature.getIdentifier();
    }

    if (!"chromosome".equals(record.getType()) && seq != null) {
      boolean makeLocation =
          record.getStart() >= 1
              && record.getEnd() >= 1
              && !dontCreateLocations
              && handler.createLocations(record);
      if (makeLocation) {
        Item location = getLocation(record, refId, seq, cd);
        if (feature == null) {
          // this feature has already been created and stored
          // we only wanted the location, we're done here.
          store(location);
          return;
        }
        int length = getLength(record);
        feature.setAttribute("length", String.valueOf(length));
        handler.setLocation(location);
        if ("Chromosome".equals(seqClsName)
            && (cd.getFieldDescriptorByName("chromosome") != null)) {
          feature.setReference("chromosome", seq.getIdentifier());
          feature.setReference("chromosomeLocation", location);
        }
      }
    }

    if (feature == null) {
      // this feature has already been created and stored
      // feature with discontinous location, this location wasn't valid for some reason
      return;
    }

    if (identifier != null) {
      feature.setAttribute("primaryIdentifier", identifier);
    }
    handler.setFeature(feature);
    identifierMap.put(identifier, feature.getIdentifier());
    if (names != null) {
      setNames(names, synonymsToAdd, record.getId(), feature, cd);
    }

    List<String> parents = record.getParents();
    if (parents != null && !parents.isEmpty()) {
      setRefsAndCollections(parents, feature);
    }

    feature.addReference(getOrgRef());
    feature.addToCollection("dataSets", dataSet);

    handler.addDataSet(dataSet);
    Double score = record.getScore();
    if (score != null && !"".equals(String.valueOf(score))) {
      feature.setAttribute("score", String.valueOf(score));
      feature.setAttribute("scoreType", record.getSource());
    }
    for (Item synonym : synonymsToAdd) {
      handler.addItem(synonym);
    }
    handler.process(record);
    if (handler.getDataSetReferenceList().getRefIds().size() > 0) {
      feature.addCollection(handler.getDataSetReferenceList());
    }
    handler.clearDataSetReferenceList();
    if (handler.getPublicationReferenceList().getRefIds().size() > 0) {
      feature.addCollection(handler.getPublicationReferenceList());
    }
    handler.clearPublicationReferenceList();

    try {
      Iterator<Item> iter = handler.getItems().iterator();
      while (iter.hasNext()) {
        store(iter.next());
      }
    } catch (ObjectStoreException e) {
      LOG.error("Problem writing item to the itemwriter");
      throw e;
    }
  }