예제 #1
0
  /**
   * Filter a UniProt XML dump file, write entries for organisms of interest to a new
   * BufferedWriter. Currently ignores entries for more than one organism.
   *
   * @param in the UniProt XML dump
   * @param out to write output to
   * @throws IOException if problem with input or output
   */
  public void filter(BufferedReader in, BufferedWriter out) throws IOException {
    StringBuffer sb = new StringBuffer();
    String line = null;
    boolean keep = true;
    boolean inOrganism = false;
    boolean foundTaxon = false;

    while ((line = in.readLine()) != null) {
      // quicker to trim whole file first?
      String trimmed = StringUtil.trimLeft(line);
      if (trimmed.startsWith("<entry")) {
        // make sure opening element is included
        if (keep) {
          out.write(sb.toString());
        }
        sb = new StringBuffer();
        keep = true;
        inOrganism = false;
        foundTaxon = false;
      } else if (trimmed.startsWith("<organism")) {
        inOrganism = true;
        foundTaxon = false;
      } else if (inOrganism && trimmed.startsWith("<dbReference type=\"NCBI Taxonomy")) {
        // ignores the possibility of a protein being linked to multiple organisms
        foundTaxon = true;
        int start = trimmed.indexOf("id=\"") + 4;
        String taxonId = trimmed.substring(start, trimmed.indexOf('"', start));
        if (!(organisms.contains(taxonId))) {
          keep = false;
        }
      } else if (inOrganism && trimmed.startsWith("</organism") && !foundTaxon) {
        // if the organism has no taxon defined then we don't want it
        keep = false;
      }
      if (keep) {
        sb.append(StringUtil.escapeBackslash(line) + System.getProperty("line.separator"));
      }
    }
    // catch final entry
    if (keep) {
      out.write(sb.toString());
    } else {
      out.write("</uniprot>");
    }
    out.flush();
  }
  protected void createFromFile(File f) throws IOException {
    // data is in format:
    // ZDBID	ID1,ID2,ID3
    Iterator<?> lineIter =
        FormattedTextParser.parseTabDelimitedReader(new BufferedReader(new FileReader(f)));
    while (lineIter.hasNext()) {
      String[] line = (String[]) lineIter.next();

      if (line.length < 2 || line[0].startsWith("#") || !line[0].startsWith(GENE_PATTERN)) {
        continue;
      }

      String zfinId = line[0];
      String[] synonyms = StringUtil.split(line[1].trim(), ",");

      resolver.addMainIds(taxonId, zfinId, Collections.singleton(zfinId));
      resolver.addSynonyms(taxonId, zfinId, new HashSet<String>(Arrays.asList(synonyms)));
    }
  }
예제 #3
0
 private String newPublication(String codes) throws ObjectStoreException {
   String pubRefId = null;
   String[] array = codes.split("[|]");
   for (int i = 0; i < array.length; i++) {
     if (array[i].startsWith("PMID:")) {
       String pubMedId = array[i].substring(5);
       if (StringUtil.allDigits(pubMedId)) {
         pubRefId = publications.get(pubMedId);
         if (pubRefId == null) {
           Item item = createItem("Publication");
           item.setAttribute("pubMedId", pubMedId);
           pubRefId = item.getIdentifier();
           publications.put(pubMedId, pubRefId);
           store(item);
         }
         return pubRefId;
       }
     }
   }
   return null;
 }
예제 #4
0
  private void combine(ActionForm form, HttpServletRequest request, String opText) {
    HttpSession session = request.getSession();
    final InterMineAPI im = SessionMethods.getInterMineAPI(session);
    Profile profile = SessionMethods.getProfile(session);
    ModifyBagForm mbf = (ModifyBagForm) form;

    BagManager bagManager = im.getBagManager();
    Map<String, InterMineBag> allBags = bagManager.getBags(profile);

    String[] selectedBagNames = mbf.getSelectedBags();

    Collection<InterMineBag> selectedBags = getSelectedBags(allBags, selectedBagNames);

    String newBagName = NameUtil.validateName(allBags.keySet(), mbf.getNewBagName());

    int newBagSize = 0;
    try {
      if (opText.equals(BagOperations.UNION)) {
        newBagSize = BagOperations.union(selectedBags, newBagName, profile, im.getClassKeys());
      } else if (opText.equals(BagOperations.INTERSECT)) {
        newBagSize = BagOperations.intersect(selectedBags, newBagName, profile, im.getClassKeys());
      } else if (opText.equals(BagOperations.SUBTRACT)) {
        newBagSize = BagOperations.subtract(selectedBags, newBagName, profile, im.getClassKeys());
      }
    } catch (IncompatibleTypesException e) {
      SessionMethods.recordError(
          "You can only perform operations on lists of the same type."
              + " Lists "
              + StringUtil.prettyList(Arrays.asList(selectedBagNames))
              + " do not match.",
          session);
      return;
    } catch (ObjectStoreException e) {
      LOG.error(e);
      ActionMessage actionMessage = new ActionMessage("An error occurred while saving the list");
      recordError(actionMessage, request);
      return;
    }

    if (newBagSize > 0) {
      SessionMethods.recordMessage(
          "Created list \""
              + newBagName
              + "\" as "
              + opText
              + " of  "
              + StringUtil.prettyList(Arrays.asList(selectedBagNames))
              + ".",
          session);
      // track the list creation
      im.getTrackerDelegate()
          .trackListCreation(
              BagOperations.getCommonBagType(selectedBags),
              newBagSize,
              ListBuildMode.OPERATION,
              profile,
              session.getId());
    } else {
      SessionMethods.recordError(
          opText
              + " operation on lists "
              + StringUtil.prettyList(Arrays.asList(selectedBagNames))
              + " produced no results.",
          session);
    }
  }
예제 #5
0
  /**
   * Configures a datasource from a Properties object
   *
   * @param props the properties for configuring the Database
   * @throws ClassNotFoundException if the class given in the properties file cannot be found
   * @throws IllegalArgumentException if the configuration properties are empty
   * @throws NullPointerException if props is null
   */
  protected void configure(Properties props) throws ClassNotFoundException {
    if (props == null) {
      throw new NullPointerException("Props cannot be null");
    }

    if (props.size() == 0) {
      throw new IllegalArgumentException("No configuration details");
    }

    Properties subProps = new Properties();

    for (Map.Entry<Object, Object> entry : props.entrySet()) {
      String propertyName = (String) entry.getKey();
      String propertyValue = (String) entry.getValue();
      Field field = null;

      // Get the first part of the string - this is the attribute we are taking about
      String attribute = propertyName;
      String subAttribute = "";
      int index = propertyName.indexOf(".");
      if (index != -1) {
        attribute = propertyName.substring(0, index);
        subAttribute = propertyName.substring(index + 1);
      }

      try {
        field = Database.class.getDeclaredField(attribute);
      } catch (Exception e) {
        LOG.warn("Ignoring field for Database: " + attribute);
        // Ignore this property - no such field
        continue;
      }

      if ("class".equals(subAttribute)) {
        // make a new instance of this class for this attribute
        Class<?> clazz = Class.forName(propertyValue.toString());
        Object obj;
        try {
          obj = clazz.newInstance();
        } catch (Exception e) {
          throw new ClassNotFoundException(
              "Cannot instantiate class " + clazz.getName() + " " + e.getMessage());
        }
        // Set the field to this newly instantiated class
        try {
          field.set(this, obj);
        } catch (Exception e) {
          continue;
        }
      } else if ("".equals(subAttribute)) {
        // Set this attribute directly
        try {
          field.set(this, propertyValue);
        } catch (Exception e) {
          continue;
        }
      } else {
        // Set parameters on the attribute
        Method m = null;
        // Set this configuration parameter on the DataSource;
        try {
          // Strings first
          Object o = field.get(this);
          // Sometimes the class will not have been instantiated yet
          if (o == null) {
            subProps.put(propertyName, propertyValue);
            continue;
          }
          Class<?> clazz = o.getClass();
          m =
              clazz.getMethod(
                  "set" + StringUtil.capitalise(subAttribute), new Class[] {String.class});
          if (m != null) {
            m.invoke(field.get(this), new Object[] {propertyValue});
          }
          // now integers
        } catch (Exception e) {
          // Don't do anything - either the method not found or cannot be invoked
        }
        try {
          if (m == null) {
            m =
                field
                    .get(this)
                    .getClass()
                    .getMethod(
                        "set" + StringUtil.capitalise(subAttribute), new Class[] {int.class});
            if (m != null) {
              m.invoke(field.get(this), new Object[] {Integer.valueOf(propertyValue.toString())});
            }
          }
        } catch (Exception e) {
          // Don't do anything - either the method not found or cannot be invoked
        }
      }
      if (subProps.size() > 0) {
        configure(subProps);
      }
    }
  }
예제 #6
0
  /**
   * Write code for getters and setters for given field.
   *
   * @param field descriptor for field
   * @param fieldPresent true if this class has the associated field
   * @return string with generated java code
   */
  protected String generateGetSet(FieldDescriptor field, boolean fieldPresent) {
    String name = field.getName();
    String type = getType(field);

    StringBuffer sb = new StringBuffer();

    // Get method
    sb.append(INDENT)
        .append("public ")
        .append(type)
        .append(" get")
        .append(StringUtil.reverseCapitalisation(name))
        .append("()");
    if (!fieldPresent) {
      sb.append(";" + ENDL);
    } else {
      sb.append(" { ");
      if ((field instanceof ReferenceDescriptor) && (!(field instanceof CollectionDescriptor))) {
        // This is an object reference.
        sb.append("if (")
            .append(name)
            .append(" instanceof org.intermine.objectstore.proxy.ProxyReference) { return ")
            .append("((")
            .append(type)
            .append(") ((org.intermine.objectstore.proxy.ProxyReference) ")
            .append(name)
            .append(").getObject()); }; return (")
            .append(type)
            .append(") ")
            .append(name)
            .append("; }" + ENDL);
      } else {
        sb.append("return ").append(name).append("; }" + ENDL);
      }
    }

    // Set method
    sb.append(INDENT)
        .append("public void ")
        .append("set")
        .append(StringUtil.reverseCapitalisation(name))
        .append("(final ")
        .append(type)
        .append(" ")
        .append(name)
        .append(")");
    if (!fieldPresent) {
      sb.append(";" + ENDL);
    } else {
      sb.append(" { ").append("this.").append(name).append(" = ").append(name).append("; }" + ENDL);
    }

    if (field instanceof ReferenceDescriptor) {
      if (field instanceof CollectionDescriptor) {
        sb.append(INDENT)
            .append("public void add")
            .append(StringUtil.reverseCapitalisation(name))
            .append("(final ")
            .append(((CollectionDescriptor) field).getReferencedClassDescriptor().getName())
            .append(" arg)");
        if (fieldPresent) {
          sb.append(" { ").append(name).append(".add(arg); }" + ENDL);
        } else {
          sb.append(";" + ENDL);
        }
      } else {
        // This is an object reference.
        sb.append(INDENT)
            .append("public void proxy")
            .append(StringUtil.reverseCapitalisation(name))
            .append("(final org.intermine.objectstore.proxy.ProxyReference ")
            .append(name)
            .append(")");
        if (fieldPresent) {
          sb.append(" { this.").append(name).append(" = ").append(name).append("; }" + ENDL);
        } else {
          sb.append(";" + ENDL);
        }
        sb.append(INDENT)
            .append("public org.intermine.model.InterMineObject proxGet")
            .append(StringUtil.reverseCapitalisation(name))
            .append("()");
        if (fieldPresent) {
          sb.append(" { return ").append(name).append("; }" + ENDL);
        } else {
          sb.append(";" + ENDL);
        }
      }
    }

    return sb.toString();
  }
  /** {@inheritDoc} */
  public void process(Reader reader) throws Exception {

    // Create a chromosome
    Item chromosome = createItem("Chromosome");
    chromosome.setAttribute("primaryIdentifier", CHROMOSOME_PID);
    store(chromosome);

    @SuppressWarnings("rawtypes")
    Iterator lineIter = FormattedTextParser.parseTabDelimitedReader(reader);

    while (lineIter.hasNext()) {
      String[] line = (String[]) lineIter.next();

      // remove header line
      if (!line[0].equals(HEADER_LINE)) {
        String ecogeneId = line[0];
        String geneName = line[1];
        String eCK = line[2];
        String swissProtId = line[3];
        String wisconsinGenBankId = line[4];
        String genBankProteinId = line[5];
        String genoBaseId = line[6];
        String type = line[7];
        String strand = line[8];
        String start = line[9];
        String end = line[10];
        String synonym = line[11];

        Set<String> symSet = new TreeSet<String>();

        if (!eCK.equals(NULL_STRING)) {
          symSet.add(eCK);
        }

        if (!genoBaseId.equals(NULL_STRING)) {
          symSet.addAll(Arrays.asList(StringUtil.split(genoBaseId, "; ")));
        }

        if (!synonym.equals(NONE_STRING)) {
          symSet.addAll(Arrays.asList(synonym.split(", ")));
        }

        if (type.equals(TYPE_GENE)) {

          Item gene = createItem("Gene");
          gene.setReference("chromosome", chromosome);
          gene.setReference("organism", getOrganism(ECOLI_TAXON));
          gene.setAttribute("primaryIdentifier", ecogeneId);
          gene.setAttribute("secondaryIdentifier", wisconsinGenBankId);
          gene.setAttribute("name", geneName);
          gene.setAttribute("symbol", geneName);

          if (symSet.size() > 0) {
            for (String sym : symSet) {
              createSynonym(gene, sym, true);
            }
          }

          if (!swissProtId.equals(NULL_STRING)) {

            if (proteinMap.containsKey(swissProtId)) {
              // Reference a protein to a gene (a gene has proteins
              // collection)
              gene.addToCollection("proteins", proteinMap.get(swissProtId));
            } else {
              Item protein = createItem("Protein");
              protein.setAttribute("primaryAccession", swissProtId);
              // NCBI Protein id, remove "g"
              protein.setAttribute("secondaryIdentifier", genBankProteinId.substring(1));
              gene.addToCollection("proteins", protein);
              store(protein);
              proteinMap.put(swissProtId, protein);
            }
          }

          // Create chromosome location
          if (start.matches(DIGIT_REGEX) && end.matches(DIGIT_REGEX)) {

            Item location = createItem("Location");
            location.setAttribute("start", start);
            location.setAttribute("end", end);
            location.setReference("feature", gene);
            location.setReference("locatedOn", chromosome);

            if (strand.equals(CLOCKWISE)) {
              location.setAttribute("strand", "+1");
            } else if (strand.equals(COUNTER_CLOCKWISE)) {
              location.setAttribute("strand", "-1");
            } else {
              location.setAttribute("strand", "0");
            }

            gene.setReference("chromosomeLocation", location);

            store(location);
          }

          store(gene);

        } else if (type.equals(TYPE_RNA)) { // TODO code refactory

          Item rna = createItem("NcRNA");
          rna.setReference("chromosome", chromosome);
          rna.setReference("organism", getOrganism(ECOLI_TAXON));
          rna.setAttribute("primaryIdentifier", ecogeneId);
          rna.setAttribute("secondaryIdentifier", wisconsinGenBankId);
          rna.setAttribute("name", geneName);
          rna.setAttribute("symbol", geneName);

          if (symSet.size() > 0) {
            for (String sym : symSet) {
              createSynonym(rna, sym, true);
            }
          }

          // Create chromosome location
          if (start.matches(DIGIT_REGEX) && end.matches(DIGIT_REGEX)) {

            Item location = createItem("Location");
            location.setAttribute("start", start);
            location.setAttribute("end", end);
            location.setReference("feature", rna);
            location.setReference("locatedOn", chromosome);

            if (strand.equals(CLOCKWISE)) {
              location.setAttribute("strand", "+1");
            } else if (strand.equals(COUNTER_CLOCKWISE)) {
              location.setAttribute("strand", "-1");
            } else {
              location.setAttribute("strand", "0");
            }

            rna.setReference("chromosomeLocation", location);

            store(location);
          }

          store(rna);
        }
      }
    }
  }
 /**
  * Obtain the pubmed esummary information for the publications
  *
  * @param ids the pubMedIds of the publications
  * @return a Reader for the information
  * @throws Exception if an error occurs
  */
 protected Reader getReader(Set<Integer> ids) throws Exception {
   String urlString = EFETCH_URL + StringUtil.join(ids, ",");
   System.err.println("retrieving: " + urlString);
   return new BufferedReader(new InputStreamReader(new URL(urlString).openStream()));
 }
예제 #9
0
  /**
   * Return a Set of PrimaryKeys relevant to a given Source for a ClassDescriptor. The Set contains
   * all the primary keys that exist on a particular class that are used by the source, without
   * performing any recursion. The Model.getClassDescriptorsForClass() method is recommended if you
   * wish for all the primary keys of the class' parents as well.
   *
   * @param cld the ClassDescriptor
   * @param source the Source
   * @param os the ObjectStore that these PrimaryKeys are used in, for creating indexes
   * @return a Set of PrimaryKeys
   */
  public static Set<PrimaryKey> getPrimaryKeys(ClassDescriptor cld, Source source, ObjectStore os) {
    GetPrimaryKeyCacheKey key = new GetPrimaryKeyCacheKey(cld, source);
    synchronized (getPrimaryKeyCache) {
      Set<PrimaryKey> keySet = getPrimaryKeyCache.get(key);
      if (keySet == null) {
        keySet = new LinkedHashSet<PrimaryKey>();
        Properties keys = getKeyProperties(source);
        if (keys != null) {
          if (!verifiedSources.contains(source)) {
            String packageNameWithDot =
                cld.getName().substring(0, cld.getName().lastIndexOf('.') + 1);
            LOG.info(
                "Verifying primary key config for source "
                    + source
                    + ", packageName = "
                    + packageNameWithDot);
            for (Map.Entry<Object, Object> entry : keys.entrySet()) {
              String cldName = (String) entry.getKey();
              String keyList = (String) entry.getValue();
              if (!cldName.contains(".")) {
                ClassDescriptor iCld =
                    cld.getModel().getClassDescriptorByName(packageNameWithDot + cldName);
                if (iCld != null) {
                  Map<String, PrimaryKey> map = PrimaryKeyUtil.getPrimaryKeys(iCld);

                  String[] tokens = keyList.split(",");
                  for (int i = 0; i < tokens.length; i++) {
                    String token = tokens[i].trim();
                    if (map.get(token) == null) {
                      throw new IllegalArgumentException(
                          "Primary key "
                              + token
                              + " for class "
                              + cldName
                              + " required by datasource "
                              + source.getName()
                              + " in "
                              + source.getName()
                              + "_keys.properties is not defined in "
                              + cld.getModel().getName()
                              + "_keyDefs.properties");
                    }
                  }
                } else {
                  LOG.warn(
                      "Ignoring entry for "
                          + cldName
                          + " in file "
                          + cld.getModel().getName()
                          + "_keyDefs.properties - not in model!");
                }
              }
            }
            verifiedSources.add(source);
          }
          Map<String, PrimaryKey> map = PrimaryKeyUtil.getPrimaryKeys(cld);
          String cldName = TypeUtil.unqualifiedName(cld.getName());
          String keyList = (String) keys.get(cldName);
          if (keyList != null) {
            String[] tokens = keyList.split(",");
            for (int i = 0; i < tokens.length; i++) {
              String token = tokens[i].trim();
              if (map.get(token) == null) {
                throw new IllegalArgumentException(
                    "Primary key "
                        + token
                        + " for class "
                        + cld.getName()
                        + " required by data source "
                        + source.getName()
                        + " in "
                        + source.getName()
                        + "_keys.properties is not defined in "
                        + cld.getModel().getName()
                        + "_keyDefs.properties");
              } else {
                keySet.add(map.get(token));
              }
            }
          }
          for (Map.Entry<Object, Object> entry : keys.entrySet()) {
            String propKey = (String) entry.getKey();
            String fieldList = (String) entry.getValue();
            int posOfDot = propKey.indexOf('.');
            if (posOfDot > 0) {
              String propCldName = propKey.substring(0, posOfDot);
              if (cldName.equals(propCldName)) {
                String keyName = propKey.substring(posOfDot + 1);
                PrimaryKey pk = new PrimaryKey(keyName, fieldList, cld);
                if (!keySet.contains(pk)) {
                  keySet.add(pk);
                  if (os instanceof ObjectStoreInterMineImpl) {
                    ObjectStoreInterMineImpl osimi = (ObjectStoreInterMineImpl) os;
                    DatabaseSchema schema = osimi.getSchema();
                    ClassDescriptor tableMaster = schema.getTableMaster(cld);
                    String tableName = DatabaseUtil.getTableName(tableMaster);
                    List<String> fields = new ArrayList<String>();

                    for (String field : pk.getFieldNames()) {
                      String colName = DatabaseUtil.generateSqlCompatibleName(field);
                      if (tableMaster.getReferenceDescriptorByName(field, true) != null) {
                        colName += "id";
                      }
                      fields.add(colName);
                    }
                    String sql =
                        "CREATE INDEX "
                            + tableName
                            + "__"
                            + keyName
                            + " ON "
                            + tableName
                            + " ("
                            + StringUtil.join(fields, ", ")
                            + ")";
                    System.out.println("Creating index: " + sql);
                    LOG.info("Creating index: " + sql);
                    Connection conn = null;
                    try {
                      conn = osimi.getConnection();
                      conn.createStatement().execute(sql);
                    } catch (SQLException e) {
                      LOG.warn("Index creation failed", e);
                    } finally {
                      if (conn != null) {
                        osimi.releaseConnection(conn);
                      }
                    }
                  }
                }
              }
            }
          }
        } else {
          throw new IllegalArgumentException(
              "Unable to find keys for source "
                  + source.getName()
                  + " in file "
                  + source.getName()
                  + "_keys.properties");
        }
        getPrimaryKeyCache.put(key, keySet);
      }
      return keySet;
    }
  }