Esempio n. 1
0
  /**
   * process GFF3 record and give a xml presentation
   *
   * @param record GFF3Record
   * @throws ObjectStoreException if an error occurs storing items
   */
  public void process(GFF3Record record) throws ObjectStoreException {
    String identifier = record.getId();
    String refId = identifierMap.get(identifier);

    // get rid of previous record Items from handler
    handler.clear();
    List<?> names = record.getNames();
    Item seq = getSeq(record.getSequenceID());

    String term = record.getType();
    String className = TypeUtil.javaiseClassName(term);
    String fullClassName = tgtModel.getPackageName() + "." + className;

    ClassDescriptor cd = tgtModel.getClassDescriptorByName(fullClassName);

    if (cd == null) {
      throw new IllegalArgumentException(
          "no class found in model for: "
              + className
              + " (original GFF record type: "
              + term
              + ") for "
              + "record: "
              + record);
    }

    Set<Item> synonymsToAdd = new HashSet<Item>();

    Item feature = null;

    // new feature
    if (refId == null) {
      feature = createItem(className);
      refId = feature.getIdentifier();
    }

    if (!"chromosome".equals(record.getType()) && seq != null) {
      boolean makeLocation =
          record.getStart() >= 1
              && record.getEnd() >= 1
              && !dontCreateLocations
              && handler.createLocations(record);
      if (makeLocation) {
        Item location = getLocation(record, refId, seq, cd);
        if (feature == null) {
          // this feature has already been created and stored
          // we only wanted the location, we're done here.
          store(location);
          return;
        }
        int length = getLength(record);
        feature.setAttribute("length", String.valueOf(length));
        handler.setLocation(location);
        if ("Chromosome".equals(seqClsName)
            && (cd.getFieldDescriptorByName("chromosome") != null)) {
          feature.setReference("chromosome", seq.getIdentifier());
          feature.setReference("chromosomeLocation", location);
        }
      }
    }

    if (feature == null) {
      // this feature has already been created and stored
      // feature with discontinous location, this location wasn't valid for some reason
      return;
    }

    if (identifier != null) {
      feature.setAttribute("primaryIdentifier", identifier);
    }
    handler.setFeature(feature);
    identifierMap.put(identifier, feature.getIdentifier());
    if (names != null) {
      setNames(names, synonymsToAdd, record.getId(), feature, cd);
    }

    List<String> parents = record.getParents();
    if (parents != null && !parents.isEmpty()) {
      setRefsAndCollections(parents, feature);
    }

    feature.addReference(getOrgRef());
    feature.addToCollection("dataSets", dataSet);

    handler.addDataSet(dataSet);
    Double score = record.getScore();
    if (score != null && !"".equals(String.valueOf(score))) {
      feature.setAttribute("score", String.valueOf(score));
      feature.setAttribute("scoreType", record.getSource());
    }
    for (Item synonym : synonymsToAdd) {
      handler.addItem(synonym);
    }
    handler.process(record);
    if (handler.getDataSetReferenceList().getRefIds().size() > 0) {
      feature.addCollection(handler.getDataSetReferenceList());
    }
    handler.clearDataSetReferenceList();
    if (handler.getPublicationReferenceList().getRefIds().size() > 0) {
      feature.addCollection(handler.getPublicationReferenceList());
    }
    handler.clearPublicationReferenceList();

    try {
      Iterator<Item> iter = handler.getItems().iterator();
      while (iter.hasNext()) {
        store(iter.next());
      }
    } catch (ObjectStoreException e) {
      LOG.error("Problem writing item to the itemwriter");
      throw e;
    }
  }
Esempio n. 2
0
  /**
   * process GFF3 record and give a xml presentation
   *
   * @param record GFF3Record
   * @throws ObjectStoreException if an error occurs storing items
   * @throws IOException
   */
  public void process(GFF3Record record) throws ObjectStoreException {
    String term = record.getType();

    if (config_exclude != null
        && !config_exclude.isEmpty()) { // don't process terms in the exclude list
      if (config_exclude.containsKey(this.orgTaxonId)) {
        if (config_exclude.get(this.orgTaxonId).contains(term)) {
          return;
        }
      }
    }

    if (config_term != null && !config_term.isEmpty()) { // otherwise all terms are processed
      if (config_term.containsKey(this.orgTaxonId)) {
        if (!config_term.get(this.orgTaxonId).contains(term)) {
          return;
        }
      }
    }

    // By default, use ID field in attributes
    String primaryIdentifier = record.getId();
    // If pid set in gff_config.propeties, look for the attribute field, e.g. locus_tag
    if (config_attr.containsKey(this.orgTaxonId)) {
      if (config_attr.get(this.orgTaxonId).containsKey("primaryIdentifier")) {
        String cls = config_attr_class.get(this.orgTaxonId).get("primaryIdentifier");
        if ("all".equals(cls) || term.equals(cls)) {
          String pidAttr = config_attr.get(this.orgTaxonId).get("primaryIdentifier");
          if (pidAttr.contains("Dbxref") && record.getDbxrefs() != null) {
            String pidAttrPrefix = pidAttr.split("\\.")[1];
            for (Iterator<?> i = record.getDbxrefs().iterator(); i.hasNext(); ) {
              String xref = (String) i.next();
              if (xref.contains(pidAttrPrefix)) {
                primaryIdentifier = xref.split(":")[1];
                break;
              }
            }
          } else {
            if (record.getAttributes().get(pidAttr) != null) {
              primaryIdentifier = record.getAttributes().get(pidAttr).get(0);
            }
          }
        }
      }
    }

    String refId = identifierMap.get(primaryIdentifier);

    // get rid of previous record Items from handler
    handler.clear();

    Item seq = getSeq(record.getSequenceID());

    String className = TypeUtil.javaiseClassName(term);
    String fullClassName = tgtModel.getPackageName() + "." + className;

    ClassDescriptor cd = tgtModel.getClassDescriptorByName(fullClassName);

    if (cd == null) {
      throw new IllegalArgumentException(
          "no class found in model for: "
              + className
              + " (original GFF record type: "
              + term
              + ") for "
              + "record: "
              + record);
    }

    Set<Item> synonymsToAdd = new HashSet<Item>();

    Item feature = null;

    // new feature
    if (refId == null) {
      feature = createItem(className);
      refId = feature.getIdentifier();
    }

    if (!"chromosome".equals(term) && seq != null) {
      boolean makeLocation =
          record.getStart() >= 1
              && record.getEnd() >= 1
              && !dontCreateLocations
              && handler.createLocations(record);
      if (makeLocation) {
        Item location = getLocation(record, refId, seq, cd);
        if (feature == null) {
          // this feature has already been created and stored
          // we only wanted the location, we're done here.
          store(location);
          return;
        }
        int length = getLength(record);
        feature.setAttribute("length", String.valueOf(length));
        handler.setLocation(location);
        if ("Chromosome".equals(seqClsName)
            && (cd.getFieldDescriptorByName("chromosome") != null)) {
          feature.setReference("chromosome", seq.getIdentifier());
          feature.setReference("chromosomeLocation", location);
        }
      }
    }

    if (feature == null) {
      // this feature has already been created and stored
      // feature with discontinous location, this location wasn't valid for some reason
      return;
    }

    if (primaryIdentifier != null) {
      feature.setAttribute("primaryIdentifier", primaryIdentifier);
    }
    handler.setFeature(feature);
    identifierMap.put(primaryIdentifier, feature.getIdentifier());

    List<?> names = record.getNames();
    String symbol = null;
    List<String> synonyms = new ArrayList<String>();

    // get the attribute set for symbol
    if (config_attr.containsKey(this.orgTaxonId)) {
      if (config_attr.get(this.orgTaxonId).containsKey("symbol")) {
        String cls = config_attr_class.get(this.orgTaxonId).get("symbol");
        if ("all".equals(cls) || term.equals(cls)) {
          String symbolAttr = config_attr.get(this.orgTaxonId).get("symbol");
          if (symbolAttr.contains("Dbxref") && record.getDbxrefs() != null) {
            String symbolAttrPrefix = symbolAttr.split("\\.")[1];
            for (Iterator<?> i = record.getDbxrefs().iterator(); i.hasNext(); ) {
              String xref = (String) i.next();
              if (xref.contains(symbolAttrPrefix)) {
                symbol = xref.split(":")[1];
                break;
              }
            }
          } else {
            if (record.getAttributes().get(symbolAttr) != null) {
              symbol = record.getAttributes().get(symbolAttr).get(0);
            }
          }
        }
      }
    }

    // get the attribute set for synonym
    if (config_attr.containsKey(this.orgTaxonId)) {
      if (config_attr.get(this.orgTaxonId).containsKey("synonym")) {
        String cls = config_attr_class.get(this.orgTaxonId).get("synonym");
        if ("all".equals(cls) || term.equals(cls)) {
          String synonymAttr = config_attr.get(this.orgTaxonId).get("synonym");
          if (synonymAttr.contains("Dbxref") && record.getDbxrefs() != null) {
            String synonymAttrPrefix = synonymAttr.split("\\.")[1];
            Set<String> synSet = new HashSet<String>();
            for (Iterator<?> i = record.getDbxrefs().iterator(); i.hasNext(); ) {
              String xref = (String) i.next();
              if (xref.contains(synonymAttrPrefix)) {
                synSet.add(xref.split(":")[1]);
              }
            }
            synonyms.addAll(synSet);
          } else {
            synonyms = record.getAttributes().get(synonymAttr);
          }
          // synonyms.removeAll(Collections.singleton(null));
        }
      }
    }

    if (names != null) {
      setNames(names, symbol, synonyms, synonymsToAdd, primaryIdentifier, feature, cd);
    }

    // Other attributes
    List<String> primeAttrList = Arrays.asList("primaryIdentifier", "symbol", "synonym");

    if (config_attr.containsKey(this.orgTaxonId)) {
      Map<String, String> attrMapOrg = config_attr.get(this.orgTaxonId);
      Map<String, String> attrMapClone = new HashMap<String, String>();
      // Deep copy of a map
      for (Entry<String, String> e : attrMapOrg.entrySet()) {
        attrMapClone.put(e.getKey(), e.getValue());
      }

      for (String pa : primeAttrList) {
        attrMapClone.remove(pa);
      }

      for (Entry<String, String> e : attrMapClone.entrySet()) {
        String cls = config_attr_class.get(this.orgTaxonId).get(e.getKey());
        if ("all".equals(cls) || term.equals(cls)) {
          String attr = e.getValue();
          if (attr.contains("Dbxref") && record.getDbxrefs() != null) {
            String attrPrefix = attr.split("\\.")[1];
            for (Iterator<?> i = record.getDbxrefs().iterator(); i.hasNext(); ) {
              String xref = (String) i.next();
              if (xref.contains(attrPrefix)) {
                if (feature.checkAttribute(e.getKey())) {
                  feature.setAttribute(e.getKey(), xref.split(":")[1]);
                }
                break;
              }
            }
          } else {
            if (record.getAttributes().get(attr) != null) {
              String attrVal = record.getAttributes().get(attr).get(0);
              if (attrVal != null) {
                if (feature.checkAttribute(e.getKey())) {
                  feature.setAttribute(e.getKey(), attrVal);
                }
              }
            }
          }
        }
      }
    }

    List<String> parents = record.getParents();
    if (parents != null && !parents.isEmpty()) {
      setRefsAndCollections(parents, feature);
    }

    feature.addReference(getOrgRef());
    feature.addToCollection("dataSets", dataSet);

    handler.addDataSet(dataSet);
    Double score = record.getScore();
    if (score != null && !"".equals(String.valueOf(score))) {
      feature.setAttribute("score", String.valueOf(score));
      feature.setAttribute("scoreType", record.getSource());
    }
    for (Item synonym : synonymsToAdd) {
      handler.addItem(synonym);
    }
    handler.process(record);
    if (handler.getDataSetReferenceList().getRefIds().size() > 0) {
      feature.addCollection(handler.getDataSetReferenceList());
    }
    handler.clearDataSetReferenceList();
    if (handler.getPublicationReferenceList().getRefIds().size() > 0) {
      feature.addCollection(handler.getPublicationReferenceList());
    }
    handler.clearPublicationReferenceList();

    try {
      Iterator<Item> iter = handler.getItems().iterator();
      while (iter.hasNext()) {
        store(iter.next());
      }
    } catch (ObjectStoreException e) {
      LOG.error("Problem writing item to the itemwriter");
      throw e;
    }
  }