private boolean checkFeaturesAndMapWeights(Connection con) { boolean result = true; int rowCount = DBUtils.getRowCount(con, "SELECT COUNT(*) FROM marker_feature"); if (rowCount == 0) { ReportManager.problem( this, con, "No marker features in database even though markers are present"); result = false; } int badWeightCount = DBUtils.getRowCount( con, "SELECT marker_id, COUNT(*) AS correct, map_weight FROM marker_feature GROUP BY marker_id HAVING map_weight != correct"); if (badWeightCount > 0) { ReportManager.problem( this, con, badWeightCount + " marker features have not been assigned correct map weights"); result = false; } if (result) { ReportManager.correct(this, con, "Marker features appear to be ok"); } return result; } // checkFeaturesAndMapWeights
public boolean checkTreeStatsArePresent(final DatabaseRegistryEntry dbre) { Connection con = dbre.getConnection(); if (!tableHasRows(con, "species_tree_root")) { return true; } int n_tags_root = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM species_tree_node_tag WHERE tag LIKE 'root\\_%'"); int n_tags_genes = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM species_tree_node_tag WHERE tag LIKE 'nb%\\_genes%'"); boolean result = true; if (n_tags_root == 0) { ReportManager.problem( this, con, "There are no species_tree_node_tags to describe properties of the root nodes"); result = false; } else if (n_tags_genes == 0) { ReportManager.problem( this, con, "There are no species_tree_node_tags to summarize the gene counts"); result = false; } return result; }
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); // -------------------------------- // MGI - dbprimary_acc should have MGI: prefix int rows = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM external_db e, xref x WHERE x.external_db_id=e.external_db_id AND e.db_name='MGI' AND x.dbprimary_acc NOT LIKE 'MGI:%'"); if (rows > 0) { ReportManager.problem( this, con, rows + " MGI xrefs do not have MGI: prefixes in the dbprimary_acc column"); result = false; } else { ReportManager.correct(this, con, "All MGI xrefs have the correct prefix"); } // -------------------------------- // GO - dbprimary_acc and display_label should have GO: prefix rows = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM external_db e, xref x WHERE x.external_db_id=e.external_db_id AND e.db_name='GO' AND (x.dbprimary_acc NOT LIKE 'GO:%' OR x.display_label NOT LIKE 'GO:%')"); if (rows > 0) { ReportManager.problem( this, con, rows + " GO xrefs do not have GO: prefixes in the dbprimary_acc and/or display_label columns"); result = false; } else { ReportManager.correct(this, con, "All GO xrefs have the correct prefix"); } // -------------------------------- // ZFIN - dbprimary_acc should begin with ZDB rows = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM external_db e, xref x WHERE x.external_db_id=e.external_db_id AND e.db_name='ZFIN_ID' AND x.dbprimary_acc NOT LIKE 'ZDB%'"); if (rows > 0) { ReportManager.problem( this, con, rows + " ZFIN xrefs do not have ZDB: prefixes in the dbprimary_acc and/or display_label columns"); result = false; } else { ReportManager.correct(this, con, "All ZFIN xrefs have the correct prefix"); } return result; } // run
/** * Test various things about ditag features. * * @param dbre The database to use. * @return Result. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); int rows = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM identity_xref WHERE cigar_line REGEXP '^[MDI]'"); if (rows > 0) { ReportManager.problem( this, con, rows + " cigar lines in identity_xref appear to be in the wrong format (number first)"); result = false; } else { ReportManager.correct( this, con, "All cigar lines in identity_xref are in the correct format"); } return result; }
public DatabaseServer(String host, String port, String user, String pass, String driver) { this.driver = driver; this.host = host; this.port = port; this.user = user; this.pass = pass; this.databaseURL = buildDatabaseURL(); try { this.connection = DBUtils.openConnection(driver, databaseURL, user, pass); connectedSuccessfully = true; } catch (SQLException e) { logger.warning("Couldn't connect to database" + " " + host + " " + port + " " + user); connectedSuccessfully = false; } }
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); int rows = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM external_db WHERE db_display_name IS NULL OR db_display_name LIKE ' %'"); if (rows > 0) { ReportManager.problem( this, con, rows + " rows in external_db have null or blank db_display_name - this will mean their label is missing on the web page"); result = false; } else { ReportManager.correct(this, con, "No blank db_display_name fields in external_db"); } return result; } // run
/** Check that all priorities are greater than a certain threshold. */ private boolean checkMarkerPriorities(Connection con) { boolean result = true; int count = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM marker WHERE priority > " + MARKER_PRIORITY_THRESHOLD); if (count == 0) { ReportManager.problem( this, con, " No marker features have priorities greater than the threshold (" + MARKER_PRIORITY_THRESHOLD + ")"); result = false; } else { ReportManager.correct( this, con, "Some marker features have priorities greater than " + MARKER_PRIORITY_THRESHOLD); } return result; }
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); // list of transcript analysis logic_names which are allowed to not have supporting features String allowed = "'" + StringUtils.join(allowedNoSupporting, "','") + "'"; String sql = String.format( "SELECT COUNT(*),t.analysis_id FROM transcript t LEFT JOIN transcript_supporting_feature tsf ON t.transcript_id = tsf.transcript_id JOIN analysis a ON a.analysis_id=t.analysis_id WHERE a.analysis_id=t.analysis_id and tsf.transcript_id IS NULL AND a.logic_name NOT IN (%s) group by t.analysis_id", allowed); int rows = DBUtils.getRowCount(con, sql); if (rows > 0) { ReportManager.problem( this, con, rows + " transcripts which should have transcript_supporting_features do not have them\nUseful SQL: " + sql); result = false; } else { ReportManager.correct( this, con, "All transcripts that require supporting features have them"); } return result; } // run
private boolean checkCodingCountAttributes(final DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); String code = (dbre.getType() == DatabaseType.SANGER_VEGA) ? "KnwnPCCount" : "coding_cnt"; SqlTemplate t = DBUtils.getSqlTemplate(dbre); String sql = "select distinct g.seq_region_id from gene g where g.biotype = ? and g.seq_region_id not in (select distinct g.seq_region_id from gene g, seq_region_attrib sa, attrib_type at where g.seq_region_id = sa.seq_region_id and sa.attrib_type_id = at.attrib_type_id and at.code in (?,?))"; List<String> toplevel = t.queryForDefaultObjectList(sql, String.class, "protein_coding", "LRG", "non_ref"); sql = "select distinct g.seq_region_id from gene g, seq_region_attrib sa, attrib_type at where g.seq_region_id = sa.seq_region_id and sa.attrib_type_id = at.attrib_type_id and code =? "; List<String> known = t.queryForDefaultObjectList(sql, String.class, code); Set<String> missing = new HashSet<String>(toplevel); missing.removeAll(known); if (missing.isEmpty()) { ReportManager.correct( this, con, "All seq_regions with protein_coding genes have a coding_cnt attribute associated with them"); } else { String msg = String.format( "%s regions with protein_coding genes do not have the coding_cnt attribute associated", missing.size()); ReportManager.problem(this, con, msg); result = false; } return result; }
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); if (tableHasRows(con, "method_link_species_set")) { /* Check if have both BLASTZ_NET and LASTZ_NET entries for the same species set */ int numOfBLASTZ_LASTZSpeciesSets = DBUtils.getRowCount( con, "SELECT species_set_id, count(*) FROM method_link_species_set JOIN method_link USING (method_link_id) WHERE TYPE in ('BLASTZ_NET', 'LASTZ_NET') GROUP BY species_set_id HAVING count(*) > 1"); if (numOfBLASTZ_LASTZSpeciesSets > 0) { ReportManager.problem( this, con, "FAILED method_link_species_set table contains " + numOfBLASTZ_LASTZSpeciesSets + " entries with a BLASTZ_NET and LASTZ_NET entry for the same species_set"); ReportManager.problem( this, con, "USEFUL SQL: SELECT species_set_id, count(*) FROM method_link_species_set JOIN method_link USING (method_link_id) WHERE TYPE in ('BLASTZ_NET', 'LASTZ_NET') GROUP BY species_set_id HAVING count(*) > 1"); result = false; } // Everything below will be ignored on the master database if (isMasterDB(dbre.getConnection())) { return result; } /* Check method_link_species_set <-> synteny_region */ /* All method_link for syntenies must have an internal ID between 101 and 199 */ result &= checkForOrphansWithConstraint( con, "method_link_species_set", "method_link_species_set_id", "synteny_region", "method_link_species_set_id", "method_link_id >= 101 and method_link_id < 200"); result &= checkForOrphans( con, "synteny_region", "method_link_species_set_id", "method_link_species_set", "method_link_species_set_id"); } else { ReportManager.correct( this, con, "NO ENTRIES in method_link_species_set table, so nothing to test IGNORED"); } return result; }
/** Check that the schema_version in the meta table is present and matches the database name. */ private boolean checkSchemaVersionDBName(DatabaseRegistryEntry dbre) { boolean result = true; // get version from database name String dbNameVersion = dbre.getSchemaVersion(); logger.finest("Schema version from database name: " + dbNameVersion); // get version from meta table Connection con = dbre.getConnection(); if (dbNameVersion == null) { ReportManager.warning(this, con, "Can't deduce schema version from database name."); return false; } String schemaVersion = DBUtils.getRowColumnValue( con, "SELECT meta_value FROM meta WHERE meta_key='schema_version'"); logger.finest("schema_version from meta table: " + schemaVersion); if (schemaVersion == null || schemaVersion.length() == 0) { ReportManager.problem(this, con, "No schema_version entry in meta table"); return false; } else if (!schemaVersion.matches("[0-9]+")) { ReportManager.problem(this, con, "Meta schema_version " + schemaVersion + " is not numeric"); return false; } else if (!dbNameVersion.equals(schemaVersion) && !isSangerVega) { // do // not // report // for // sangervega ReportManager.problem( this, con, "Meta schema_version " + schemaVersion + " does not match version inferred from database name (" + dbNameVersion + ")"); return false; } else { ReportManager.correct( this, con, "schema_version " + schemaVersion + " matches database name version " + dbNameVersion); } return result; }
private boolean checkTableExists(Connection con) { boolean result = true; if (!DBUtils.checkTableExists(con, "meta")) { result = false; ReportManager.problem(this, con, "Meta table not present"); } else { ReportManager.correct(this, con, "Meta table present"); } return result; }
public boolean run(DatabaseRegistryEntry dbre) { Connection con = dbre.getConnection(); if (!DBUtils.checkTableExists(con, "species_tree_node_tag")) { ReportManager.problem(this, con, "species_tree_node_tag table not present"); return false; } // These methods return false if there is any problem with the test boolean result = true; result &= checkTreeStatsArePresent(dbre); return result; }
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); Map tableToLetter = new HashMap(); tableToLetter.put("gene", "G"); tableToLetter.put("transcript", "T"); tableToLetter.put("translation", "P"); Iterator it = tableToLetter.keySet().iterator(); while (it.hasNext()) { String table = (String) it.next(); String letter = (String) tableToLetter.get(table); String regexp = "EST" + letter + "[0-9]+"; String sql = "SELECT COUNT(*) FROM " + table + " x, analysis a WHERE a.analysis_id=x.analysis_id " + "AND a.logic_name LIKE '%est%' AND x.stable_id NOT REGEXP '" + regexp + "'"; if (table.equals("translation")) { // need extra join to transcript table sql = "SELECT COUNT(*) FROM translation x, transcript t, analysis a WHERE a.analysis_id=t.analysis_id AND x.transcript_id=t.transcript_id AND a.logic_name LIKE '%est%' AND x.stable_id NOT REGEXP 'ESTP[0-9]+'"; } int rows = DBUtils.getRowCount(con, sql); if (rows > 0) { ReportManager.problem( this, con, rows + " " + table + " stable IDs do not contain EST" + letter); result = false; } else { ReportManager.correct(this, con, "All stable IDs contain EST" + letter); } } return result; } // run
private boolean tableHasRows(Connection con) { boolean result = true; int rows = DBUtils.countRowsInTable(con, "meta"); if (rows == 0) { result = false; ReportManager.problem(this, con, "meta table is empty"); } else { ReportManager.correct(this, con, "meta table has data"); } return result; }
public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); if (!DBUtils.checkTableExists(con, "method_link_species_set_tag")) { result = false; ReportManager.problem(this, con, "method_link_species_set_tag table not present"); return result; } result &= checkLowCoverageMLSSAreLinkedToHighCoverageMLSS(dbre); return result; }
public boolean checkLowCoverageMLSSAreLinkedToHighCoverageMLSS(final DatabaseRegistryEntry dbre) { Connection con = dbre.getConnection(); String sql = "SELECT mlss1.method_link_species_set_id, tag, value, mlss2.method_link_species_set_id, ml2.type" + " FROM method_link_species_set mlss1 JOIN method_link ml1 USING (method_link_id)" + " LEFT JOIN method_link_species_set_tag mlsst ON mlsst.method_link_species_set_id = mlss1.method_link_species_set_id AND tag = 'high_coverage_mlss_id'" + " LEFT JOIN (method_link_species_set mlss2 JOIN method_link ml2 USING (method_link_id)) ON value = mlss2.method_link_species_set_id" + " WHERE ml1.type = 'EPO_LOW_COVERAGE'"; List<String[]> all_rows = DBUtils.getRowValuesList(con, sql); boolean result = true; for (String[] row : all_rows) { // Check all the potential errors if (row[1] == null) { ReportManager.problem( this, con, String.format("The MLSS ID %s is missing its 'high_coverage_mlss_id' tag", row[0])); } else if (row[2] == null) { ReportManager.problem( this, con, String.format( "The 'high_coverage_mlss_id' tag for MLSS ID %s has a NULL value", row[0])); } else if (row[3] == null) { ReportManager.problem( this, con, String.format( "The value of the 'high_coverage_mlss_id' tag for MLSS ID %s does not link to a valid MLSS ID: '%s'", row[0], row[2])); } else if (!row[4].equals("EPO")) { ReportManager.problem( this, con, String.format( "The value of the 'high_coverage_mlss_id' tag for MLSS ID %s does not link to a 'EPO' MLSS but '%s' (ID %s)", row[0], row[4], row[3])); } else { // This row is correct: everything is non-NULL, and the EPO_LOW_COVERAGE mlss is linked to a // EPO continue; } result = false; } return result; }
/** * Check the data in the assembly_exception table. Note referential integrity checks are done in * CoreForeignKeys. * * @param dbre The database to use. * @return Result. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); String qry = "select count(*) from gene,transcript,translation " + "where gene.biotype like '%pseudogene%'" + " and transcript.gene_id=gene.gene_id " + " and translation.transcript_id=transcript.transcript_id and gene.biotype!= 'polymorphic_pseudogene' "; if (dbre.getType() == DatabaseType .SANGER_VEGA) { // for sangervega ignore genes that do not have source havana or WU and // allow // polymorphic_pseudogene to have translations qry += " and (gene.source='havana' or gene.source='WU')"; } if (dbre.getType() == DatabaseType.SANGER_VEGA || dbre.getType() == DatabaseType.VEGA) { // Vega allows translations on translated_processed_pseudogene-s qry += " and gene.biotype != 'translated_processed_pseudogene'"; } int rows = DBUtils.getRowCount(con, qry); if (rows > 0) { result = false; ReportManager.problem( this, con, "Translation table contains " + rows + " rows for pseudogene types - should contain none"); } if (result) { ReportManager.correct(this, con, "No pseudogenes have translations"); } return result; }
@Override protected boolean runTest(DatabaseRegistryEntry dbre) { Connection con = dbre.getConnection(); int numRows = DBUtils.getRowCount(con, sql_find_wrong_lengths); boolean passed = numRows == 0; if (!passed) { ReportManager.problem(this, con, error_msg + "\n"); try { Statement stmt = con.createStatement(); ResultSet rs = stmt.executeQuery(sql_find_wrong_lengths); if (rs != null) { while (rs.next()) { ReportManager.problem( this, con, "update seq_region set length=" + rs.getInt("len") + " where seq_region_id=" + rs.getInt("seq_region_id") + ";"); } } rs.close(); stmt.close(); } catch (Exception e) { e.printStackTrace(); } } return passed; }
/** Check that all chromomes have > 0 markers_map_locations and marker_features. */ private boolean checkAllChromosomesHaveMarkers(Connection con) { boolean result = true; // find all the chromosomes, and for each one check that it has some // markers // note a "chromosome" is assumed to be a seq_region that is: // - on the top-level co-ordinate system and // - doesn't have and _ or . in the name and // - has a seq_region name of less than 3 characters // - doesn't have a name starting with "Un" or "MT" // get top level co-ordinate system ID String sql = "SELECT coord_system_id FROM coord_system WHERE rank=1 LIMIT 1"; String s = DBUtils.getRowColumnValue(con, sql); if (s.length() == 0) { System.err.println( "Error: can't get top-level co-ordinate system for " + DBUtils.getShortDatabaseName(con)); return false; } int topLevelCSID = Integer.parseInt(s); try { // check each top-level seq_region (up to a limit) to see how many // marker_map_locations and marker features there are Statement stmt = con.createStatement(); ResultSet rs = stmt.executeQuery( "SELECT * FROM seq_region WHERE coord_system_id=" + topLevelCSID + " AND name NOT LIKE '%\\_%' AND name NOT LIKE '%.%' AND name NOT LIKE 'Un%' AND name NOT LIKE 'MT%' AND LENGTH(name) < 3 ORDER BY name"); int numTopLevel = 0; while (rs.next() && numTopLevel++ < MAX_TOP_LEVEL) { long seqRegionID = rs.getLong("seq_region_id"); String seqRegionName = rs.getString("name"); // check marker_map_locations logger.fine("Counting marker_map_locations on chromosome " + seqRegionName); sql = "SELECT COUNT(*) FROM marker_map_location WHERE chromosome_name='" + seqRegionName + "'"; int rows = DBUtils.getRowCount(con, sql); if (rows == 0) { ReportManager.problem( this, con, "Chromosome " + seqRegionName + " (seq_region_id " + seqRegionID + ") has no entries in marker_map_location"); result = false; } else { ReportManager.correct( this, con, "Chromosome " + seqRegionName + " has " + rows + " marker_map_locations"); } // check marker_features logger.fine("Counting marker_features on chromosome " + seqRegionName); sql = "SELECT COUNT(*) FROM marker_feature WHERE seq_region_id=" + seqRegionID; rows = DBUtils.getRowCount(con, sql); if (rows == 0) { ReportManager.problem( this, con, "Chromosome " + seqRegionName + " (seq_region_id " + seqRegionID + ") has no marker_features"); result = false; } else { ReportManager.correct( this, con, "Chromosome " + seqRegionName + " has " + rows + " marker_features"); } } rs.close(); stmt.close(); if (numTopLevel == MAX_TOP_LEVEL) { logger.warning("Only checked first " + numTopLevel + " seq_regions"); } } catch (SQLException se) { se.printStackTrace(); } return result; }
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); try { Statement stmt = con.createStatement(); // Query returns all external_db_id-object type relations // execute it and loop over each row checking for > 1 consecutive row with same ID ResultSet rs = stmt.executeQuery( "SELECT x.external_db_id, ox.ensembl_object_type, COUNT(*), e.db_name FROM object_xref ox, external_db e, xref x LEFT JOIN transcript t ON t.display_xref_id = x.xref_id WHERE x.xref_id = ox.xref_id AND e.external_db_id = x.external_db_id AND isnull(transcript_id) GROUP BY x.external_db_id, ox.ensembl_object_type"); try { long previousID = -1; String previousType = ""; while (rs != null && rs.next()) { long externalDBID = rs.getLong(1); String objectType = rs.getString(2); // int count = rs.getInt(3); String externalDBName = rs.getString(4); if (externalDBID == previousID) { ReportManager.problem( this, con, "External DB with ID " + externalDBID + " (" + externalDBName + ") is associated with " + objectType + " as well as " + previousType); result = false; } previousType = objectType; previousID = externalDBID; } // while rs } finally { DBUtils.closeQuietly(rs); } stmt.close(); } catch (SQLException e) { e.printStackTrace(); } if (result) { ReportManager.correct( this, con, "All external dbs are only associated with one Ensembl object type"); } return result; } // run
public Connection getDatabaseConnection(String databaseName) throws SQLException { return DBUtils.openConnection(driver, databaseURL + databaseName, user, pass); }
public Connection getServerConnection() throws SQLException { return DBUtils.openConnection(driver, databaseURL, user, pass); }