/** * Filter a UniProt XML dump file, write entries for organisms of interest to a new * BufferedWriter. Currently ignores entries for more than one organism. * * @param in the UniProt XML dump * @param out to write output to * @throws IOException if problem with input or output */ public void filter(BufferedReader in, BufferedWriter out) throws IOException { StringBuffer sb = new StringBuffer(); String line = null; boolean keep = true; boolean inOrganism = false; boolean foundTaxon = false; while ((line = in.readLine()) != null) { // quicker to trim whole file first? String trimmed = StringUtil.trimLeft(line); if (trimmed.startsWith("<entry")) { // make sure opening element is included if (keep) { out.write(sb.toString()); } sb = new StringBuffer(); keep = true; inOrganism = false; foundTaxon = false; } else if (trimmed.startsWith("<organism")) { inOrganism = true; foundTaxon = false; } else if (inOrganism && trimmed.startsWith("<dbReference type=\"NCBI Taxonomy")) { // ignores the possibility of a protein being linked to multiple organisms foundTaxon = true; int start = trimmed.indexOf("id=\"") + 4; String taxonId = trimmed.substring(start, trimmed.indexOf('"', start)); if (!(organisms.contains(taxonId))) { keep = false; } } else if (inOrganism && trimmed.startsWith("</organism") && !foundTaxon) { // if the organism has no taxon defined then we don't want it keep = false; } if (keep) { sb.append(StringUtil.escapeBackslash(line) + System.getProperty("line.separator")); } } // catch final entry if (keep) { out.write(sb.toString()); } else { out.write("</uniprot>"); } out.flush(); }
protected void createFromFile(File f) throws IOException { // data is in format: // ZDBID ID1,ID2,ID3 Iterator<?> lineIter = FormattedTextParser.parseTabDelimitedReader(new BufferedReader(new FileReader(f))); while (lineIter.hasNext()) { String[] line = (String[]) lineIter.next(); if (line.length < 2 || line[0].startsWith("#") || !line[0].startsWith(GENE_PATTERN)) { continue; } String zfinId = line[0]; String[] synonyms = StringUtil.split(line[1].trim(), ","); resolver.addMainIds(taxonId, zfinId, Collections.singleton(zfinId)); resolver.addSynonyms(taxonId, zfinId, new HashSet<String>(Arrays.asList(synonyms))); } }
private String newPublication(String codes) throws ObjectStoreException { String pubRefId = null; String[] array = codes.split("[|]"); for (int i = 0; i < array.length; i++) { if (array[i].startsWith("PMID:")) { String pubMedId = array[i].substring(5); if (StringUtil.allDigits(pubMedId)) { pubRefId = publications.get(pubMedId); if (pubRefId == null) { Item item = createItem("Publication"); item.setAttribute("pubMedId", pubMedId); pubRefId = item.getIdentifier(); publications.put(pubMedId, pubRefId); store(item); } return pubRefId; } } } return null; }
private void combine(ActionForm form, HttpServletRequest request, String opText) { HttpSession session = request.getSession(); final InterMineAPI im = SessionMethods.getInterMineAPI(session); Profile profile = SessionMethods.getProfile(session); ModifyBagForm mbf = (ModifyBagForm) form; BagManager bagManager = im.getBagManager(); Map<String, InterMineBag> allBags = bagManager.getBags(profile); String[] selectedBagNames = mbf.getSelectedBags(); Collection<InterMineBag> selectedBags = getSelectedBags(allBags, selectedBagNames); String newBagName = NameUtil.validateName(allBags.keySet(), mbf.getNewBagName()); int newBagSize = 0; try { if (opText.equals(BagOperations.UNION)) { newBagSize = BagOperations.union(selectedBags, newBagName, profile, im.getClassKeys()); } else if (opText.equals(BagOperations.INTERSECT)) { newBagSize = BagOperations.intersect(selectedBags, newBagName, profile, im.getClassKeys()); } else if (opText.equals(BagOperations.SUBTRACT)) { newBagSize = BagOperations.subtract(selectedBags, newBagName, profile, im.getClassKeys()); } } catch (IncompatibleTypesException e) { SessionMethods.recordError( "You can only perform operations on lists of the same type." + " Lists " + StringUtil.prettyList(Arrays.asList(selectedBagNames)) + " do not match.", session); return; } catch (ObjectStoreException e) { LOG.error(e); ActionMessage actionMessage = new ActionMessage("An error occurred while saving the list"); recordError(actionMessage, request); return; } if (newBagSize > 0) { SessionMethods.recordMessage( "Created list \"" + newBagName + "\" as " + opText + " of " + StringUtil.prettyList(Arrays.asList(selectedBagNames)) + ".", session); // track the list creation im.getTrackerDelegate() .trackListCreation( BagOperations.getCommonBagType(selectedBags), newBagSize, ListBuildMode.OPERATION, profile, session.getId()); } else { SessionMethods.recordError( opText + " operation on lists " + StringUtil.prettyList(Arrays.asList(selectedBagNames)) + " produced no results.", session); } }
/** * Configures a datasource from a Properties object * * @param props the properties for configuring the Database * @throws ClassNotFoundException if the class given in the properties file cannot be found * @throws IllegalArgumentException if the configuration properties are empty * @throws NullPointerException if props is null */ protected void configure(Properties props) throws ClassNotFoundException { if (props == null) { throw new NullPointerException("Props cannot be null"); } if (props.size() == 0) { throw new IllegalArgumentException("No configuration details"); } Properties subProps = new Properties(); for (Map.Entry<Object, Object> entry : props.entrySet()) { String propertyName = (String) entry.getKey(); String propertyValue = (String) entry.getValue(); Field field = null; // Get the first part of the string - this is the attribute we are taking about String attribute = propertyName; String subAttribute = ""; int index = propertyName.indexOf("."); if (index != -1) { attribute = propertyName.substring(0, index); subAttribute = propertyName.substring(index + 1); } try { field = Database.class.getDeclaredField(attribute); } catch (Exception e) { LOG.warn("Ignoring field for Database: " + attribute); // Ignore this property - no such field continue; } if ("class".equals(subAttribute)) { // make a new instance of this class for this attribute Class<?> clazz = Class.forName(propertyValue.toString()); Object obj; try { obj = clazz.newInstance(); } catch (Exception e) { throw new ClassNotFoundException( "Cannot instantiate class " + clazz.getName() + " " + e.getMessage()); } // Set the field to this newly instantiated class try { field.set(this, obj); } catch (Exception e) { continue; } } else if ("".equals(subAttribute)) { // Set this attribute directly try { field.set(this, propertyValue); } catch (Exception e) { continue; } } else { // Set parameters on the attribute Method m = null; // Set this configuration parameter on the DataSource; try { // Strings first Object o = field.get(this); // Sometimes the class will not have been instantiated yet if (o == null) { subProps.put(propertyName, propertyValue); continue; } Class<?> clazz = o.getClass(); m = clazz.getMethod( "set" + StringUtil.capitalise(subAttribute), new Class[] {String.class}); if (m != null) { m.invoke(field.get(this), new Object[] {propertyValue}); } // now integers } catch (Exception e) { // Don't do anything - either the method not found or cannot be invoked } try { if (m == null) { m = field .get(this) .getClass() .getMethod( "set" + StringUtil.capitalise(subAttribute), new Class[] {int.class}); if (m != null) { m.invoke(field.get(this), new Object[] {Integer.valueOf(propertyValue.toString())}); } } } catch (Exception e) { // Don't do anything - either the method not found or cannot be invoked } } if (subProps.size() > 0) { configure(subProps); } } }
/** * Write code for getters and setters for given field. * * @param field descriptor for field * @param fieldPresent true if this class has the associated field * @return string with generated java code */ protected String generateGetSet(FieldDescriptor field, boolean fieldPresent) { String name = field.getName(); String type = getType(field); StringBuffer sb = new StringBuffer(); // Get method sb.append(INDENT) .append("public ") .append(type) .append(" get") .append(StringUtil.reverseCapitalisation(name)) .append("()"); if (!fieldPresent) { sb.append(";" + ENDL); } else { sb.append(" { "); if ((field instanceof ReferenceDescriptor) && (!(field instanceof CollectionDescriptor))) { // This is an object reference. sb.append("if (") .append(name) .append(" instanceof org.intermine.objectstore.proxy.ProxyReference) { return ") .append("((") .append(type) .append(") ((org.intermine.objectstore.proxy.ProxyReference) ") .append(name) .append(").getObject()); }; return (") .append(type) .append(") ") .append(name) .append("; }" + ENDL); } else { sb.append("return ").append(name).append("; }" + ENDL); } } // Set method sb.append(INDENT) .append("public void ") .append("set") .append(StringUtil.reverseCapitalisation(name)) .append("(final ") .append(type) .append(" ") .append(name) .append(")"); if (!fieldPresent) { sb.append(";" + ENDL); } else { sb.append(" { ").append("this.").append(name).append(" = ").append(name).append("; }" + ENDL); } if (field instanceof ReferenceDescriptor) { if (field instanceof CollectionDescriptor) { sb.append(INDENT) .append("public void add") .append(StringUtil.reverseCapitalisation(name)) .append("(final ") .append(((CollectionDescriptor) field).getReferencedClassDescriptor().getName()) .append(" arg)"); if (fieldPresent) { sb.append(" { ").append(name).append(".add(arg); }" + ENDL); } else { sb.append(";" + ENDL); } } else { // This is an object reference. sb.append(INDENT) .append("public void proxy") .append(StringUtil.reverseCapitalisation(name)) .append("(final org.intermine.objectstore.proxy.ProxyReference ") .append(name) .append(")"); if (fieldPresent) { sb.append(" { this.").append(name).append(" = ").append(name).append("; }" + ENDL); } else { sb.append(";" + ENDL); } sb.append(INDENT) .append("public org.intermine.model.InterMineObject proxGet") .append(StringUtil.reverseCapitalisation(name)) .append("()"); if (fieldPresent) { sb.append(" { return ").append(name).append("; }" + ENDL); } else { sb.append(";" + ENDL); } } } return sb.toString(); }
/** {@inheritDoc} */ public void process(Reader reader) throws Exception { // Create a chromosome Item chromosome = createItem("Chromosome"); chromosome.setAttribute("primaryIdentifier", CHROMOSOME_PID); store(chromosome); @SuppressWarnings("rawtypes") Iterator lineIter = FormattedTextParser.parseTabDelimitedReader(reader); while (lineIter.hasNext()) { String[] line = (String[]) lineIter.next(); // remove header line if (!line[0].equals(HEADER_LINE)) { String ecogeneId = line[0]; String geneName = line[1]; String eCK = line[2]; String swissProtId = line[3]; String wisconsinGenBankId = line[4]; String genBankProteinId = line[5]; String genoBaseId = line[6]; String type = line[7]; String strand = line[8]; String start = line[9]; String end = line[10]; String synonym = line[11]; Set<String> symSet = new TreeSet<String>(); if (!eCK.equals(NULL_STRING)) { symSet.add(eCK); } if (!genoBaseId.equals(NULL_STRING)) { symSet.addAll(Arrays.asList(StringUtil.split(genoBaseId, "; "))); } if (!synonym.equals(NONE_STRING)) { symSet.addAll(Arrays.asList(synonym.split(", "))); } if (type.equals(TYPE_GENE)) { Item gene = createItem("Gene"); gene.setReference("chromosome", chromosome); gene.setReference("organism", getOrganism(ECOLI_TAXON)); gene.setAttribute("primaryIdentifier", ecogeneId); gene.setAttribute("secondaryIdentifier", wisconsinGenBankId); gene.setAttribute("name", geneName); gene.setAttribute("symbol", geneName); if (symSet.size() > 0) { for (String sym : symSet) { createSynonym(gene, sym, true); } } if (!swissProtId.equals(NULL_STRING)) { if (proteinMap.containsKey(swissProtId)) { // Reference a protein to a gene (a gene has proteins // collection) gene.addToCollection("proteins", proteinMap.get(swissProtId)); } else { Item protein = createItem("Protein"); protein.setAttribute("primaryAccession", swissProtId); // NCBI Protein id, remove "g" protein.setAttribute("secondaryIdentifier", genBankProteinId.substring(1)); gene.addToCollection("proteins", protein); store(protein); proteinMap.put(swissProtId, protein); } } // Create chromosome location if (start.matches(DIGIT_REGEX) && end.matches(DIGIT_REGEX)) { Item location = createItem("Location"); location.setAttribute("start", start); location.setAttribute("end", end); location.setReference("feature", gene); location.setReference("locatedOn", chromosome); if (strand.equals(CLOCKWISE)) { location.setAttribute("strand", "+1"); } else if (strand.equals(COUNTER_CLOCKWISE)) { location.setAttribute("strand", "-1"); } else { location.setAttribute("strand", "0"); } gene.setReference("chromosomeLocation", location); store(location); } store(gene); } else if (type.equals(TYPE_RNA)) { // TODO code refactory Item rna = createItem("NcRNA"); rna.setReference("chromosome", chromosome); rna.setReference("organism", getOrganism(ECOLI_TAXON)); rna.setAttribute("primaryIdentifier", ecogeneId); rna.setAttribute("secondaryIdentifier", wisconsinGenBankId); rna.setAttribute("name", geneName); rna.setAttribute("symbol", geneName); if (symSet.size() > 0) { for (String sym : symSet) { createSynonym(rna, sym, true); } } // Create chromosome location if (start.matches(DIGIT_REGEX) && end.matches(DIGIT_REGEX)) { Item location = createItem("Location"); location.setAttribute("start", start); location.setAttribute("end", end); location.setReference("feature", rna); location.setReference("locatedOn", chromosome); if (strand.equals(CLOCKWISE)) { location.setAttribute("strand", "+1"); } else if (strand.equals(COUNTER_CLOCKWISE)) { location.setAttribute("strand", "-1"); } else { location.setAttribute("strand", "0"); } rna.setReference("chromosomeLocation", location); store(location); } store(rna); } } } }
/** * Obtain the pubmed esummary information for the publications * * @param ids the pubMedIds of the publications * @return a Reader for the information * @throws Exception if an error occurs */ protected Reader getReader(Set<Integer> ids) throws Exception { String urlString = EFETCH_URL + StringUtil.join(ids, ","); System.err.println("retrieving: " + urlString); return new BufferedReader(new InputStreamReader(new URL(urlString).openStream())); }
/** * Return a Set of PrimaryKeys relevant to a given Source for a ClassDescriptor. The Set contains * all the primary keys that exist on a particular class that are used by the source, without * performing any recursion. The Model.getClassDescriptorsForClass() method is recommended if you * wish for all the primary keys of the class' parents as well. * * @param cld the ClassDescriptor * @param source the Source * @param os the ObjectStore that these PrimaryKeys are used in, for creating indexes * @return a Set of PrimaryKeys */ public static Set<PrimaryKey> getPrimaryKeys(ClassDescriptor cld, Source source, ObjectStore os) { GetPrimaryKeyCacheKey key = new GetPrimaryKeyCacheKey(cld, source); synchronized (getPrimaryKeyCache) { Set<PrimaryKey> keySet = getPrimaryKeyCache.get(key); if (keySet == null) { keySet = new LinkedHashSet<PrimaryKey>(); Properties keys = getKeyProperties(source); if (keys != null) { if (!verifiedSources.contains(source)) { String packageNameWithDot = cld.getName().substring(0, cld.getName().lastIndexOf('.') + 1); LOG.info( "Verifying primary key config for source " + source + ", packageName = " + packageNameWithDot); for (Map.Entry<Object, Object> entry : keys.entrySet()) { String cldName = (String) entry.getKey(); String keyList = (String) entry.getValue(); if (!cldName.contains(".")) { ClassDescriptor iCld = cld.getModel().getClassDescriptorByName(packageNameWithDot + cldName); if (iCld != null) { Map<String, PrimaryKey> map = PrimaryKeyUtil.getPrimaryKeys(iCld); String[] tokens = keyList.split(","); for (int i = 0; i < tokens.length; i++) { String token = tokens[i].trim(); if (map.get(token) == null) { throw new IllegalArgumentException( "Primary key " + token + " for class " + cldName + " required by datasource " + source.getName() + " in " + source.getName() + "_keys.properties is not defined in " + cld.getModel().getName() + "_keyDefs.properties"); } } } else { LOG.warn( "Ignoring entry for " + cldName + " in file " + cld.getModel().getName() + "_keyDefs.properties - not in model!"); } } } verifiedSources.add(source); } Map<String, PrimaryKey> map = PrimaryKeyUtil.getPrimaryKeys(cld); String cldName = TypeUtil.unqualifiedName(cld.getName()); String keyList = (String) keys.get(cldName); if (keyList != null) { String[] tokens = keyList.split(","); for (int i = 0; i < tokens.length; i++) { String token = tokens[i].trim(); if (map.get(token) == null) { throw new IllegalArgumentException( "Primary key " + token + " for class " + cld.getName() + " required by data source " + source.getName() + " in " + source.getName() + "_keys.properties is not defined in " + cld.getModel().getName() + "_keyDefs.properties"); } else { keySet.add(map.get(token)); } } } for (Map.Entry<Object, Object> entry : keys.entrySet()) { String propKey = (String) entry.getKey(); String fieldList = (String) entry.getValue(); int posOfDot = propKey.indexOf('.'); if (posOfDot > 0) { String propCldName = propKey.substring(0, posOfDot); if (cldName.equals(propCldName)) { String keyName = propKey.substring(posOfDot + 1); PrimaryKey pk = new PrimaryKey(keyName, fieldList, cld); if (!keySet.contains(pk)) { keySet.add(pk); if (os instanceof ObjectStoreInterMineImpl) { ObjectStoreInterMineImpl osimi = (ObjectStoreInterMineImpl) os; DatabaseSchema schema = osimi.getSchema(); ClassDescriptor tableMaster = schema.getTableMaster(cld); String tableName = DatabaseUtil.getTableName(tableMaster); List<String> fields = new ArrayList<String>(); for (String field : pk.getFieldNames()) { String colName = DatabaseUtil.generateSqlCompatibleName(field); if (tableMaster.getReferenceDescriptorByName(field, true) != null) { colName += "id"; } fields.add(colName); } String sql = "CREATE INDEX " + tableName + "__" + keyName + " ON " + tableName + " (" + StringUtil.join(fields, ", ") + ")"; System.out.println("Creating index: " + sql); LOG.info("Creating index: " + sql); Connection conn = null; try { conn = osimi.getConnection(); conn.createStatement().execute(sql); } catch (SQLException e) { LOG.warn("Index creation failed", e); } finally { if (conn != null) { osimi.releaseConnection(conn); } } } } } } } } else { throw new IllegalArgumentException( "Unable to find keys for source " + source.getName() + " in file " + source.getName() + "_keys.properties"); } getPrimaryKeyCache.put(key, keySet); } return keySet; } }