private boolean checkFeaturesAndMapWeights(Connection con) { boolean result = true; int rowCount = DBUtils.getRowCount(con, "SELECT COUNT(*) FROM marker_feature"); if (rowCount == 0) { ReportManager.problem( this, con, "No marker features in database even though markers are present"); result = false; } int badWeightCount = DBUtils.getRowCount( con, "SELECT marker_id, COUNT(*) AS correct, map_weight FROM marker_feature GROUP BY marker_id HAVING map_weight != correct"); if (badWeightCount > 0) { ReportManager.problem( this, con, badWeightCount + " marker features have not been assigned correct map weights"); result = false; } if (result) { ReportManager.correct(this, con, "Marker features appear to be ok"); } return result; } // checkFeaturesAndMapWeights
public boolean checkTreeStatsArePresent(final DatabaseRegistryEntry dbre) { Connection con = dbre.getConnection(); if (!tableHasRows(con, "species_tree_root")) { return true; } int n_tags_root = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM species_tree_node_tag WHERE tag LIKE 'root\\_%'"); int n_tags_genes = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM species_tree_node_tag WHERE tag LIKE 'nb%\\_genes%'"); boolean result = true; if (n_tags_root == 0) { ReportManager.problem( this, con, "There are no species_tree_node_tags to describe properties of the root nodes"); result = false; } else if (n_tags_genes == 0) { ReportManager.problem( this, con, "There are no species_tree_node_tags to summarize the gene counts"); result = false; } return result; }
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); // -------------------------------- // MGI - dbprimary_acc should have MGI: prefix int rows = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM external_db e, xref x WHERE x.external_db_id=e.external_db_id AND e.db_name='MGI' AND x.dbprimary_acc NOT LIKE 'MGI:%'"); if (rows > 0) { ReportManager.problem( this, con, rows + " MGI xrefs do not have MGI: prefixes in the dbprimary_acc column"); result = false; } else { ReportManager.correct(this, con, "All MGI xrefs have the correct prefix"); } // -------------------------------- // GO - dbprimary_acc and display_label should have GO: prefix rows = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM external_db e, xref x WHERE x.external_db_id=e.external_db_id AND e.db_name='GO' AND (x.dbprimary_acc NOT LIKE 'GO:%' OR x.display_label NOT LIKE 'GO:%')"); if (rows > 0) { ReportManager.problem( this, con, rows + " GO xrefs do not have GO: prefixes in the dbprimary_acc and/or display_label columns"); result = false; } else { ReportManager.correct(this, con, "All GO xrefs have the correct prefix"); } // -------------------------------- // ZFIN - dbprimary_acc should begin with ZDB rows = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM external_db e, xref x WHERE x.external_db_id=e.external_db_id AND e.db_name='ZFIN_ID' AND x.dbprimary_acc NOT LIKE 'ZDB%'"); if (rows > 0) { ReportManager.problem( this, con, rows + " ZFIN xrefs do not have ZDB: prefixes in the dbprimary_acc and/or display_label columns"); result = false; } else { ReportManager.correct(this, con, "All ZFIN xrefs have the correct prefix"); } return result; } // run
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); int rows = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM external_db WHERE db_display_name IS NULL OR db_display_name LIKE ' %'"); if (rows > 0) { ReportManager.problem( this, con, rows + " rows in external_db have null or blank db_display_name - this will mean their label is missing on the web page"); result = false; } else { ReportManager.correct(this, con, "No blank db_display_name fields in external_db"); } return result; } // run
/** * Test various things about ditag features. * * @param dbre The database to use. * @return Result. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); int rows = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM identity_xref WHERE cigar_line REGEXP '^[MDI]'"); if (rows > 0) { ReportManager.problem( this, con, rows + " cigar lines in identity_xref appear to be in the wrong format (number first)"); result = false; } else { ReportManager.correct( this, con, "All cigar lines in identity_xref are in the correct format"); } return result; }
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); // list of transcript analysis logic_names which are allowed to not have supporting features String allowed = "'" + StringUtils.join(allowedNoSupporting, "','") + "'"; String sql = String.format( "SELECT COUNT(*),t.analysis_id FROM transcript t LEFT JOIN transcript_supporting_feature tsf ON t.transcript_id = tsf.transcript_id JOIN analysis a ON a.analysis_id=t.analysis_id WHERE a.analysis_id=t.analysis_id and tsf.transcript_id IS NULL AND a.logic_name NOT IN (%s) group by t.analysis_id", allowed); int rows = DBUtils.getRowCount(con, sql); if (rows > 0) { ReportManager.problem( this, con, rows + " transcripts which should have transcript_supporting_features do not have them\nUseful SQL: " + sql); result = false; } else { ReportManager.correct( this, con, "All transcripts that require supporting features have them"); } return result; } // run
/** Check that all priorities are greater than a certain threshold. */ private boolean checkMarkerPriorities(Connection con) { boolean result = true; int count = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM marker WHERE priority > " + MARKER_PRIORITY_THRESHOLD); if (count == 0) { ReportManager.problem( this, con, " No marker features have priorities greater than the threshold (" + MARKER_PRIORITY_THRESHOLD + ")"); result = false; } else { ReportManager.correct( this, con, "Some marker features have priorities greater than " + MARKER_PRIORITY_THRESHOLD); } return result; }
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); if (tableHasRows(con, "method_link_species_set")) { /* Check if have both BLASTZ_NET and LASTZ_NET entries for the same species set */ int numOfBLASTZ_LASTZSpeciesSets = DBUtils.getRowCount( con, "SELECT species_set_id, count(*) FROM method_link_species_set JOIN method_link USING (method_link_id) WHERE TYPE in ('BLASTZ_NET', 'LASTZ_NET') GROUP BY species_set_id HAVING count(*) > 1"); if (numOfBLASTZ_LASTZSpeciesSets > 0) { ReportManager.problem( this, con, "FAILED method_link_species_set table contains " + numOfBLASTZ_LASTZSpeciesSets + " entries with a BLASTZ_NET and LASTZ_NET entry for the same species_set"); ReportManager.problem( this, con, "USEFUL SQL: SELECT species_set_id, count(*) FROM method_link_species_set JOIN method_link USING (method_link_id) WHERE TYPE in ('BLASTZ_NET', 'LASTZ_NET') GROUP BY species_set_id HAVING count(*) > 1"); result = false; } // Everything below will be ignored on the master database if (isMasterDB(dbre.getConnection())) { return result; } /* Check method_link_species_set <-> synteny_region */ /* All method_link for syntenies must have an internal ID between 101 and 199 */ result &= checkForOrphansWithConstraint( con, "method_link_species_set", "method_link_species_set_id", "synteny_region", "method_link_species_set_id", "method_link_id >= 101 and method_link_id < 200"); result &= checkForOrphans( con, "synteny_region", "method_link_species_set_id", "method_link_species_set", "method_link_species_set_id"); } else { ReportManager.correct( this, con, "NO ENTRIES in method_link_species_set table, so nothing to test IGNORED"); } return result; }
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); Map tableToLetter = new HashMap(); tableToLetter.put("gene", "G"); tableToLetter.put("transcript", "T"); tableToLetter.put("translation", "P"); Iterator it = tableToLetter.keySet().iterator(); while (it.hasNext()) { String table = (String) it.next(); String letter = (String) tableToLetter.get(table); String regexp = "EST" + letter + "[0-9]+"; String sql = "SELECT COUNT(*) FROM " + table + " x, analysis a WHERE a.analysis_id=x.analysis_id " + "AND a.logic_name LIKE '%est%' AND x.stable_id NOT REGEXP '" + regexp + "'"; if (table.equals("translation")) { // need extra join to transcript table sql = "SELECT COUNT(*) FROM translation x, transcript t, analysis a WHERE a.analysis_id=t.analysis_id AND x.transcript_id=t.transcript_id AND a.logic_name LIKE '%est%' AND x.stable_id NOT REGEXP 'ESTP[0-9]+'"; } int rows = DBUtils.getRowCount(con, sql); if (rows > 0) { ReportManager.problem( this, con, rows + " " + table + " stable IDs do not contain EST" + letter); result = false; } else { ReportManager.correct(this, con, "All stable IDs contain EST" + letter); } } return result; } // run
/** * Check the data in the assembly_exception table. Note referential integrity checks are done in * CoreForeignKeys. * * @param dbre The database to use. * @return Result. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); String qry = "select count(*) from gene,transcript,translation " + "where gene.biotype like '%pseudogene%'" + " and transcript.gene_id=gene.gene_id " + " and translation.transcript_id=transcript.transcript_id and gene.biotype!= 'polymorphic_pseudogene' "; if (dbre.getType() == DatabaseType .SANGER_VEGA) { // for sangervega ignore genes that do not have source havana or WU and // allow // polymorphic_pseudogene to have translations qry += " and (gene.source='havana' or gene.source='WU')"; } if (dbre.getType() == DatabaseType.SANGER_VEGA || dbre.getType() == DatabaseType.VEGA) { // Vega allows translations on translated_processed_pseudogene-s qry += " and gene.biotype != 'translated_processed_pseudogene'"; } int rows = DBUtils.getRowCount(con, qry); if (rows > 0) { result = false; ReportManager.problem( this, con, "Translation table contains " + rows + " rows for pseudogene types - should contain none"); } if (result) { ReportManager.correct(this, con, "No pseudogenes have translations"); } return result; }
@Override protected boolean runTest(DatabaseRegistryEntry dbre) { Connection con = dbre.getConnection(); int numRows = DBUtils.getRowCount(con, sql_find_wrong_lengths); boolean passed = numRows == 0; if (!passed) { ReportManager.problem(this, con, error_msg + "\n"); try { Statement stmt = con.createStatement(); ResultSet rs = stmt.executeQuery(sql_find_wrong_lengths); if (rs != null) { while (rs.next()) { ReportManager.problem( this, con, "update seq_region set length=" + rs.getInt("len") + " where seq_region_id=" + rs.getInt("seq_region_id") + ";"); } } rs.close(); stmt.close(); } catch (Exception e) { e.printStackTrace(); } } return passed; }
/** Check that all chromomes have > 0 markers_map_locations and marker_features. */ private boolean checkAllChromosomesHaveMarkers(Connection con) { boolean result = true; // find all the chromosomes, and for each one check that it has some // markers // note a "chromosome" is assumed to be a seq_region that is: // - on the top-level co-ordinate system and // - doesn't have and _ or . in the name and // - has a seq_region name of less than 3 characters // - doesn't have a name starting with "Un" or "MT" // get top level co-ordinate system ID String sql = "SELECT coord_system_id FROM coord_system WHERE rank=1 LIMIT 1"; String s = DBUtils.getRowColumnValue(con, sql); if (s.length() == 0) { System.err.println( "Error: can't get top-level co-ordinate system for " + DBUtils.getShortDatabaseName(con)); return false; } int topLevelCSID = Integer.parseInt(s); try { // check each top-level seq_region (up to a limit) to see how many // marker_map_locations and marker features there are Statement stmt = con.createStatement(); ResultSet rs = stmt.executeQuery( "SELECT * FROM seq_region WHERE coord_system_id=" + topLevelCSID + " AND name NOT LIKE '%\\_%' AND name NOT LIKE '%.%' AND name NOT LIKE 'Un%' AND name NOT LIKE 'MT%' AND LENGTH(name) < 3 ORDER BY name"); int numTopLevel = 0; while (rs.next() && numTopLevel++ < MAX_TOP_LEVEL) { long seqRegionID = rs.getLong("seq_region_id"); String seqRegionName = rs.getString("name"); // check marker_map_locations logger.fine("Counting marker_map_locations on chromosome " + seqRegionName); sql = "SELECT COUNT(*) FROM marker_map_location WHERE chromosome_name='" + seqRegionName + "'"; int rows = DBUtils.getRowCount(con, sql); if (rows == 0) { ReportManager.problem( this, con, "Chromosome " + seqRegionName + " (seq_region_id " + seqRegionID + ") has no entries in marker_map_location"); result = false; } else { ReportManager.correct( this, con, "Chromosome " + seqRegionName + " has " + rows + " marker_map_locations"); } // check marker_features logger.fine("Counting marker_features on chromosome " + seqRegionName); sql = "SELECT COUNT(*) FROM marker_feature WHERE seq_region_id=" + seqRegionID; rows = DBUtils.getRowCount(con, sql); if (rows == 0) { ReportManager.problem( this, con, "Chromosome " + seqRegionName + " (seq_region_id " + seqRegionID + ") has no marker_features"); result = false; } else { ReportManager.correct( this, con, "Chromosome " + seqRegionName + " has " + rows + " marker_features"); } } rs.close(); stmt.close(); if (numTopLevel == MAX_TOP_LEVEL) { logger.warning("Only checked first " + numTopLevel + " seq_regions"); } } catch (SQLException se) { se.printStackTrace(); } return result; }