/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); // -------------------------------- // MGI - dbprimary_acc should have MGI: prefix int rows = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM external_db e, xref x WHERE x.external_db_id=e.external_db_id AND e.db_name='MGI' AND x.dbprimary_acc NOT LIKE 'MGI:%'"); if (rows > 0) { ReportManager.problem( this, con, rows + " MGI xrefs do not have MGI: prefixes in the dbprimary_acc column"); result = false; } else { ReportManager.correct(this, con, "All MGI xrefs have the correct prefix"); } // -------------------------------- // GO - dbprimary_acc and display_label should have GO: prefix rows = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM external_db e, xref x WHERE x.external_db_id=e.external_db_id AND e.db_name='GO' AND (x.dbprimary_acc NOT LIKE 'GO:%' OR x.display_label NOT LIKE 'GO:%')"); if (rows > 0) { ReportManager.problem( this, con, rows + " GO xrefs do not have GO: prefixes in the dbprimary_acc and/or display_label columns"); result = false; } else { ReportManager.correct(this, con, "All GO xrefs have the correct prefix"); } // -------------------------------- // ZFIN - dbprimary_acc should begin with ZDB rows = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM external_db e, xref x WHERE x.external_db_id=e.external_db_id AND e.db_name='ZFIN_ID' AND x.dbprimary_acc NOT LIKE 'ZDB%'"); if (rows > 0) { ReportManager.problem( this, con, rows + " ZFIN xrefs do not have ZDB: prefixes in the dbprimary_acc and/or display_label columns"); result = false; } else { ReportManager.correct(this, con, "All ZFIN xrefs have the correct prefix"); } return result; } // run
@Override protected boolean runTest(DatabaseRegistryEntry dbre) { boolean result = true; for (Map.Entry<String, String[]> method_tags : getMandatoryTags().entrySet()) { Vector<String> quoted_tags = new Vector<String>(); for (String t : method_tags.getValue()) { quoted_tags.add(String.format("'%s'", t)); } List<String> mlsss = getTemplate(dbre) .queryForDefaultObjectList( String.format( QUERY, StringUtils.join(quoted_tags, ","), method_tags.getKey(), method_tags.getValue().length), String.class); if (mlsss.size() > 0) { ReportManager.problem( this, dbre.getConnection(), "MLSSs for " + method_tags.getKey() + " found with no statistics: " + StringUtils.join(mlsss, ",")); result = false; } else { ReportManager.correct(this, dbre.getConnection(), "PASSED "); } } return result; }
private boolean checkFeaturesAndMapWeights(Connection con) { boolean result = true; int rowCount = DBUtils.getRowCount(con, "SELECT COUNT(*) FROM marker_feature"); if (rowCount == 0) { ReportManager.problem( this, con, "No marker features in database even though markers are present"); result = false; } int badWeightCount = DBUtils.getRowCount( con, "SELECT marker_id, COUNT(*) AS correct, map_weight FROM marker_feature GROUP BY marker_id HAVING map_weight != correct"); if (badWeightCount > 0) { ReportManager.problem( this, con, badWeightCount + " marker features have not been assigned correct map weights"); result = false; } if (result) { ReportManager.correct(this, con, "Marker features appear to be ok"); } return result; } // checkFeaturesAndMapWeights
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); int rows = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM external_db WHERE db_display_name IS NULL OR db_display_name LIKE ' %'"); if (rows > 0) { ReportManager.problem( this, con, rows + " rows in external_db have null or blank db_display_name - this will mean their label is missing on the web page"); result = false; } else { ReportManager.correct(this, con, "No blank db_display_name fields in external_db"); } return result; } // run
/** Check that all priorities are greater than a certain threshold. */ private boolean checkMarkerPriorities(Connection con) { boolean result = true; int count = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM marker WHERE priority > " + MARKER_PRIORITY_THRESHOLD); if (count == 0) { ReportManager.problem( this, con, " No marker features have priorities greater than the threshold (" + MARKER_PRIORITY_THRESHOLD + ")"); result = false; } else { ReportManager.correct( this, con, "Some marker features have priorities greater than " + MARKER_PRIORITY_THRESHOLD); } return result; }
/** * Test various things about ditag features. * * @param dbre The database to use. * @return Result. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); int rows = DBUtils.getRowCount( con, "SELECT COUNT(*) FROM identity_xref WHERE cigar_line REGEXP '^[MDI]'"); if (rows > 0) { ReportManager.problem( this, con, rows + " cigar lines in identity_xref appear to be in the wrong format (number first)"); result = false; } else { ReportManager.correct( this, con, "All cigar lines in identity_xref are in the correct format"); } return result; }
private boolean checkCodingCountAttributes(final DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); String code = (dbre.getType() == DatabaseType.SANGER_VEGA) ? "KnwnPCCount" : "coding_cnt"; SqlTemplate t = DBUtils.getSqlTemplate(dbre); String sql = "select distinct g.seq_region_id from gene g where g.biotype = ? and g.seq_region_id not in (select distinct g.seq_region_id from gene g, seq_region_attrib sa, attrib_type at where g.seq_region_id = sa.seq_region_id and sa.attrib_type_id = at.attrib_type_id and at.code in (?,?))"; List<String> toplevel = t.queryForDefaultObjectList(sql, String.class, "protein_coding", "LRG", "non_ref"); sql = "select distinct g.seq_region_id from gene g, seq_region_attrib sa, attrib_type at where g.seq_region_id = sa.seq_region_id and sa.attrib_type_id = at.attrib_type_id and code =? "; List<String> known = t.queryForDefaultObjectList(sql, String.class, code); Set<String> missing = new HashSet<String>(toplevel); missing.removeAll(known); if (missing.isEmpty()) { ReportManager.correct( this, con, "All seq_regions with protein_coding genes have a coding_cnt attribute associated with them"); } else { String msg = String.format( "%s regions with protein_coding genes do not have the coding_cnt attribute associated", missing.size()); ReportManager.problem(this, con, msg); result = false; } return result; }
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); // list of transcript analysis logic_names which are allowed to not have supporting features String allowed = "'" + StringUtils.join(allowedNoSupporting, "','") + "'"; String sql = String.format( "SELECT COUNT(*),t.analysis_id FROM transcript t LEFT JOIN transcript_supporting_feature tsf ON t.transcript_id = tsf.transcript_id JOIN analysis a ON a.analysis_id=t.analysis_id WHERE a.analysis_id=t.analysis_id and tsf.transcript_id IS NULL AND a.logic_name NOT IN (%s) group by t.analysis_id", allowed); int rows = DBUtils.getRowCount(con, sql); if (rows > 0) { ReportManager.problem( this, con, rows + " transcripts which should have transcript_supporting_features do not have them\nUseful SQL: " + sql); result = false; } else { ReportManager.correct( this, con, "All transcripts that require supporting features have them"); } return result; } // run
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); if (tableHasRows(con, "method_link_species_set")) { /* Check if have both BLASTZ_NET and LASTZ_NET entries for the same species set */ int numOfBLASTZ_LASTZSpeciesSets = DBUtils.getRowCount( con, "SELECT species_set_id, count(*) FROM method_link_species_set JOIN method_link USING (method_link_id) WHERE TYPE in ('BLASTZ_NET', 'LASTZ_NET') GROUP BY species_set_id HAVING count(*) > 1"); if (numOfBLASTZ_LASTZSpeciesSets > 0) { ReportManager.problem( this, con, "FAILED method_link_species_set table contains " + numOfBLASTZ_LASTZSpeciesSets + " entries with a BLASTZ_NET and LASTZ_NET entry for the same species_set"); ReportManager.problem( this, con, "USEFUL SQL: SELECT species_set_id, count(*) FROM method_link_species_set JOIN method_link USING (method_link_id) WHERE TYPE in ('BLASTZ_NET', 'LASTZ_NET') GROUP BY species_set_id HAVING count(*) > 1"); result = false; } // Everything below will be ignored on the master database if (isMasterDB(dbre.getConnection())) { return result; } /* Check method_link_species_set <-> synteny_region */ /* All method_link for syntenies must have an internal ID between 101 and 199 */ result &= checkForOrphansWithConstraint( con, "method_link_species_set", "method_link_species_set_id", "synteny_region", "method_link_species_set_id", "method_link_id >= 101 and method_link_id < 200"); result &= checkForOrphans( con, "synteny_region", "method_link_species_set_id", "method_link_species_set", "method_link_species_set_id"); } else { ReportManager.correct( this, con, "NO ENTRIES in method_link_species_set table, so nothing to test IGNORED"); } return result; }
/** Check that the schema_version in the meta table is present and matches the database name. */ private boolean checkSchemaVersionDBName(DatabaseRegistryEntry dbre) { boolean result = true; // get version from database name String dbNameVersion = dbre.getSchemaVersion(); logger.finest("Schema version from database name: " + dbNameVersion); // get version from meta table Connection con = dbre.getConnection(); if (dbNameVersion == null) { ReportManager.warning(this, con, "Can't deduce schema version from database name."); return false; } String schemaVersion = DBUtils.getRowColumnValue( con, "SELECT meta_value FROM meta WHERE meta_key='schema_version'"); logger.finest("schema_version from meta table: " + schemaVersion); if (schemaVersion == null || schemaVersion.length() == 0) { ReportManager.problem(this, con, "No schema_version entry in meta table"); return false; } else if (!schemaVersion.matches("[0-9]+")) { ReportManager.problem(this, con, "Meta schema_version " + schemaVersion + " is not numeric"); return false; } else if (!dbNameVersion.equals(schemaVersion) && !isSangerVega) { // do // not // report // for // sangervega ReportManager.problem( this, con, "Meta schema_version " + schemaVersion + " does not match version inferred from database name (" + dbNameVersion + ")"); return false; } else { ReportManager.correct( this, con, "schema_version " + schemaVersion + " matches database name version " + dbNameVersion); } return result; }
private boolean checkTableExists(Connection con) { boolean result = true; if (!DBUtils.checkTableExists(con, "meta")) { result = false; ReportManager.problem(this, con, "Meta table not present"); } else { ReportManager.correct(this, con, "Meta table present"); } return result; }
private boolean tableHasRows(Connection con) { boolean result = true; int rows = DBUtils.countRowsInTable(con, "meta"); if (rows == 0) { result = false; ReportManager.problem(this, con, "meta table is empty"); } else { ReportManager.correct(this, con, "meta table has data"); } return result; }
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); Map tableToLetter = new HashMap(); tableToLetter.put("gene", "G"); tableToLetter.put("transcript", "T"); tableToLetter.put("translation", "P"); Iterator it = tableToLetter.keySet().iterator(); while (it.hasNext()) { String table = (String) it.next(); String letter = (String) tableToLetter.get(table); String regexp = "EST" + letter + "[0-9]+"; String sql = "SELECT COUNT(*) FROM " + table + " x, analysis a WHERE a.analysis_id=x.analysis_id " + "AND a.logic_name LIKE '%est%' AND x.stable_id NOT REGEXP '" + regexp + "'"; if (table.equals("translation")) { // need extra join to transcript table sql = "SELECT COUNT(*) FROM translation x, transcript t, analysis a WHERE a.analysis_id=t.analysis_id AND x.transcript_id=t.transcript_id AND a.logic_name LIKE '%est%' AND x.stable_id NOT REGEXP 'ESTP[0-9]+'"; } int rows = DBUtils.getRowCount(con, sql); if (rows > 0) { ReportManager.problem( this, con, rows + " " + table + " stable IDs do not contain EST" + letter); result = false; } else { ReportManager.correct(this, con, "All stable IDs contain EST" + letter); } } return result; } // run
private boolean checkKeysPresent(Connection con) { boolean result = true; // check that certain keys exist String[] metaKeys = { "assembly.default", "assembly.name", "assembly.date", "assembly.coverage_depth", "species.classification", "species.common_name", "species.display_name", "species.production_name", "species.scientific_name", "species.stable_id_prefix", "species.taxonomy_id", "species.url", "repeat.analysis", }; for (String metaKey : metaKeys) { int rows = metaKeyCount(con, metaKey); if (rows == 0) { result = false; ReportManager.problem(this, con, "No entry in meta table for " + metaKey); } } // check that there are some species.alias entries int MIN_ALIASES = 3; int rows = metaKeyCount(con, "species.alias"); if (rows < MIN_ALIASES) { result = false; String msg = String.format("Only %d species.alias entries, should be at least %d", rows, MIN_ALIASES); ReportManager.problem(this, con, msg); } else { ReportManager.correct(this, con, rows + " species.alias entries present"); } return result; }
/** * Check the data in the assembly_exception table. Note referential integrity checks are done in * CoreForeignKeys. * * @param dbre The database to use. * @return Result. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); String qry = "select count(*) from gene,transcript,translation " + "where gene.biotype like '%pseudogene%'" + " and transcript.gene_id=gene.gene_id " + " and translation.transcript_id=transcript.transcript_id and gene.biotype!= 'polymorphic_pseudogene' "; if (dbre.getType() == DatabaseType .SANGER_VEGA) { // for sangervega ignore genes that do not have source havana or WU and // allow // polymorphic_pseudogene to have translations qry += " and (gene.source='havana' or gene.source='WU')"; } if (dbre.getType() == DatabaseType.SANGER_VEGA || dbre.getType() == DatabaseType.VEGA) { // Vega allows translations on translated_processed_pseudogene-s qry += " and gene.biotype != 'translated_processed_pseudogene'"; } int rows = DBUtils.getRowCount(con, qry); if (rows > 0) { result = false; ReportManager.problem( this, con, "Translation table contains " + rows + " rows for pseudogene types - should contain none"); } if (result) { ReportManager.correct(this, con, "No pseudogenes have translations"); } return result; }
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); try { Statement stmt = con.createStatement(); // if this query returns anything then something is wrong ResultSet rs = stmt.executeQuery( "SELECT sr.seq_region_id, sr.name FROM seq_region sr, dna d WHERE sr.seq_region_id=d.seq_region_id AND d.sequence REGEXP '[^ATGCN]'"); while (rs != null && rs.next()) { result = false; ReportManager.problem( this, con, String.format( "%s (seq_region_id %s) has non GTACN sequence", rs.getString(2), rs.getLong(1))); } // while rs stmt.close(); } catch (SQLException e) { e.printStackTrace(); } if (result == true) { ReportManager.correct(this, con, "No entries in dna table have non-GTACN bases."); } return result; } // run
/** Check for values containing the text ARRAY(. */ private boolean checkArrays(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); try { Statement stmt = con.createStatement(); ResultSet rs = stmt.executeQuery( "SELECT meta_key, meta_value FROM meta WHERE meta_value LIKE 'ARRAY(%'"); while (rs.next()) { ReportManager.problem( this, con, "Meta table entry for key " + rs.getString(1) + " has value " + rs.getString(2) + " which is probably incorrect"); result = false; } } catch (Exception e) { e.printStackTrace(); } if (result) { ReportManager.correct(this, con, "No duplicates in the meta table"); } return result; }
/** Check for duplicate entries in the meta table. */ private boolean checkDuplicates(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); try { Statement stmt = con.createStatement(); ResultSet rs = stmt.executeQuery( "SELECT meta_key, meta_value FROM meta GROUP BY meta_key, meta_value, species_id HAVING COUNT(*)>1"); while (rs.next()) { ReportManager.problem( this, con, "Key/value pair " + rs.getString(1) + "/" + rs.getString(2) + " appears more than once in the meta table"); result = false; } } catch (Exception e) { e.printStackTrace(); } if (result) { ReportManager.correct(this, con, "No duplicates in the meta table"); } return result; }
protected boolean runTestForSpecies(DatabaseRegistryEntry dbre, int speciesId) { String productionName = fetchSingleMetaValueFor(sqlTemplateTestDb, speciesId, "species.production_name"); String assemblyDefault = fetchSingleMetaValueFor(sqlTemplateTestDb, speciesId, "assembly.default"); String genebuildStartDate = fetchSingleMetaValueFor(sqlTemplateTestDb, speciesId, "genebuild.start_date"); if (!speciesConfiguredForDnaCompara(productionName)) { getLogger() .info( "Skipping species " + productionName + ", because it is not linked to any method involving DNA comparisons in the compara master."); return true; } getLogger().info("Testing species " + productionName); boolean hasEntryInMasterDb = fetchHasGenomeDbId(productionName, assemblyDefault, genebuildStartDate); if (!hasEntryInMasterDb) { ReportManager.correct( this, testDbConn, "Species " + productionName + " has no genome_db entry in the master database."); return true; } int genomeDbId = fetchGenomeDbId(productionName, assemblyDefault, genebuildStartDate); int toplevelSeqRegionCount = fetchToplevelSeqRegionCount(); int dnaFragRowCountFor = fetchDnaFragRowCountFor(genomeDbId); boolean sequenceCountsOk = toplevelSeqRegionCount == dnaFragRowCountFor; if (sequenceCountsOk) { ReportManager.correct( this, testDbConn, "Sequence counts for this " + "species are " + toplevelSeqRegionCount + " both in " + "core and compara master database."); } else { ReportManager.problem( this, testDbConn, "Sequence counts for this " + "species are " + toplevelSeqRegionCount + " toplevel " + "sequence regions in the core database and " + dnaFragRowCountFor + " dna frags in the compara " + "master database. The counts should be equal.\n" + "This can happen, if the assembly has been changed, " + "but the assembly.default entry in the meta table has " + "not been changed. In that case the dna_frag table in " + "the compara master database is not updated " + "by the populate_mdb.pl script."); } boolean allToplevelSeqRegionInDnaFragTable = assertToplevelSeqRegionInDnaFragTable(genomeDbId); return sequenceCountsOk && allToplevelSeqRegionInDnaFragTable; }
/** Check that all chromomes have > 0 markers_map_locations and marker_features. */ private boolean checkAllChromosomesHaveMarkers(Connection con) { boolean result = true; // find all the chromosomes, and for each one check that it has some // markers // note a "chromosome" is assumed to be a seq_region that is: // - on the top-level co-ordinate system and // - doesn't have and _ or . in the name and // - has a seq_region name of less than 3 characters // - doesn't have a name starting with "Un" or "MT" // get top level co-ordinate system ID String sql = "SELECT coord_system_id FROM coord_system WHERE rank=1 LIMIT 1"; String s = DBUtils.getRowColumnValue(con, sql); if (s.length() == 0) { System.err.println( "Error: can't get top-level co-ordinate system for " + DBUtils.getShortDatabaseName(con)); return false; } int topLevelCSID = Integer.parseInt(s); try { // check each top-level seq_region (up to a limit) to see how many // marker_map_locations and marker features there are Statement stmt = con.createStatement(); ResultSet rs = stmt.executeQuery( "SELECT * FROM seq_region WHERE coord_system_id=" + topLevelCSID + " AND name NOT LIKE '%\\_%' AND name NOT LIKE '%.%' AND name NOT LIKE 'Un%' AND name NOT LIKE 'MT%' AND LENGTH(name) < 3 ORDER BY name"); int numTopLevel = 0; while (rs.next() && numTopLevel++ < MAX_TOP_LEVEL) { long seqRegionID = rs.getLong("seq_region_id"); String seqRegionName = rs.getString("name"); // check marker_map_locations logger.fine("Counting marker_map_locations on chromosome " + seqRegionName); sql = "SELECT COUNT(*) FROM marker_map_location WHERE chromosome_name='" + seqRegionName + "'"; int rows = DBUtils.getRowCount(con, sql); if (rows == 0) { ReportManager.problem( this, con, "Chromosome " + seqRegionName + " (seq_region_id " + seqRegionID + ") has no entries in marker_map_location"); result = false; } else { ReportManager.correct( this, con, "Chromosome " + seqRegionName + " has " + rows + " marker_map_locations"); } // check marker_features logger.fine("Counting marker_features on chromosome " + seqRegionName); sql = "SELECT COUNT(*) FROM marker_feature WHERE seq_region_id=" + seqRegionID; rows = DBUtils.getRowCount(con, sql); if (rows == 0) { ReportManager.problem( this, con, "Chromosome " + seqRegionName + " (seq_region_id " + seqRegionID + ") has no marker_features"); result = false; } else { ReportManager.correct( this, con, "Chromosome " + seqRegionName + " has " + rows + " marker_features"); } } rs.close(); stmt.close(); if (numTopLevel == MAX_TOP_LEVEL) { logger.warning("Only checked first " + numTopLevel + " seq_regions"); } } catch (SQLException se) { se.printStackTrace(); } return result; }
/** * Run the test. * * @param dbre The database to use. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); try { Statement stmt = con.createStatement(); // Query returns all external_db_id-object type relations // execute it and loop over each row checking for > 1 consecutive row with same ID ResultSet rs = stmt.executeQuery( "SELECT x.external_db_id, ox.ensembl_object_type, COUNT(*), e.db_name FROM object_xref ox, external_db e, xref x LEFT JOIN transcript t ON t.display_xref_id = x.xref_id WHERE x.xref_id = ox.xref_id AND e.external_db_id = x.external_db_id AND isnull(transcript_id) GROUP BY x.external_db_id, ox.ensembl_object_type"); try { long previousID = -1; String previousType = ""; while (rs != null && rs.next()) { long externalDBID = rs.getLong(1); String objectType = rs.getString(2); // int count = rs.getInt(3); String externalDBName = rs.getString(4); if (externalDBID == previousID) { ReportManager.problem( this, con, "External DB with ID " + externalDBID + " (" + externalDBName + ") is associated with " + objectType + " as well as " + previousType); result = false; } previousType = objectType; previousID = externalDBID; } // while rs } finally { DBUtils.closeQuietly(rs); } stmt.close(); } catch (SQLException e) { e.printStackTrace(); } if (result) { ReportManager.correct( this, con, "All external dbs are only associated with one Ensembl object type"); } return result; } // run