Пример #1
0
  private boolean checkFeaturesAndMapWeights(Connection con) {

    boolean result = true;

    int rowCount = DBUtils.getRowCount(con, "SELECT COUNT(*) FROM marker_feature");

    if (rowCount == 0) {
      ReportManager.problem(
          this, con, "No marker features in database even though markers are present");
      result = false;
    }

    int badWeightCount =
        DBUtils.getRowCount(
            con,
            "SELECT marker_id, COUNT(*) AS correct, map_weight FROM marker_feature GROUP BY marker_id HAVING map_weight != correct");

    if (badWeightCount > 0) {
      ReportManager.problem(
          this,
          con,
          badWeightCount + " marker features have not been assigned correct map weights");
      result = false;
    }

    if (result) {
      ReportManager.correct(this, con, "Marker features appear to be ok");
    }

    return result;
  } // checkFeaturesAndMapWeights
  public boolean checkTreeStatsArePresent(final DatabaseRegistryEntry dbre) {
    Connection con = dbre.getConnection();

    if (!tableHasRows(con, "species_tree_root")) {
      return true;
    }

    int n_tags_root =
        DBUtils.getRowCount(
            con, "SELECT COUNT(*) FROM species_tree_node_tag WHERE tag LIKE 'root\\_%'");
    int n_tags_genes =
        DBUtils.getRowCount(
            con, "SELECT COUNT(*) FROM species_tree_node_tag WHERE tag LIKE 'nb%\\_genes%'");

    boolean result = true;
    if (n_tags_root == 0) {
      ReportManager.problem(
          this,
          con,
          "There are no species_tree_node_tags to describe properties of the root nodes");
      result = false;
    } else if (n_tags_genes == 0) {
      ReportManager.problem(
          this, con, "There are no species_tree_node_tags to summarize the gene counts");
      result = false;
    }
    return result;
  }
  /**
   * Run the test.
   *
   * @param dbre The database to use.
   * @return true if the test passed.
   */
  public boolean run(DatabaseRegistryEntry dbre) {

    boolean result = true;

    Connection con = dbre.getConnection();

    // --------------------------------
    // MGI - dbprimary_acc should have MGI: prefix
    int rows =
        DBUtils.getRowCount(
            con,
            "SELECT COUNT(*) FROM external_db e, xref x WHERE x.external_db_id=e.external_db_id AND e.db_name='MGI' AND x.dbprimary_acc NOT LIKE 'MGI:%'");

    if (rows > 0) {
      ReportManager.problem(
          this, con, rows + " MGI xrefs do not have MGI: prefixes in the dbprimary_acc column");
      result = false;
    } else {
      ReportManager.correct(this, con, "All MGI xrefs have the correct prefix");
    }

    // --------------------------------
    // GO - dbprimary_acc and display_label should have GO: prefix
    rows =
        DBUtils.getRowCount(
            con,
            "SELECT COUNT(*) FROM external_db e, xref x WHERE x.external_db_id=e.external_db_id AND e.db_name='GO' AND (x.dbprimary_acc NOT LIKE 'GO:%' OR x.display_label NOT LIKE 'GO:%')");

    if (rows > 0) {
      ReportManager.problem(
          this,
          con,
          rows
              + " GO xrefs do not have GO: prefixes in the dbprimary_acc and/or display_label columns");
      result = false;
    } else {
      ReportManager.correct(this, con, "All GO xrefs have the correct prefix");
    }

    // --------------------------------
    // ZFIN - dbprimary_acc should begin with ZDB
    rows =
        DBUtils.getRowCount(
            con,
            "SELECT COUNT(*) FROM external_db e, xref x WHERE x.external_db_id=e.external_db_id AND e.db_name='ZFIN_ID' AND x.dbprimary_acc NOT LIKE 'ZDB%'");

    if (rows > 0) {
      ReportManager.problem(
          this,
          con,
          rows
              + " ZFIN xrefs do not have ZDB: prefixes in the dbprimary_acc and/or display_label columns");
      result = false;
    } else {
      ReportManager.correct(this, con, "All ZFIN xrefs have the correct prefix");
    }

    return result;
  } // run
  /**
   * Run the test.
   *
   * @param dbre The database to use.
   * @return true if the test passed.
   */
  public boolean run(DatabaseRegistryEntry dbre) {

    boolean result = true;

    Connection con = dbre.getConnection();

    int rows =
        DBUtils.getRowCount(
            con,
            "SELECT COUNT(*) FROM external_db WHERE db_display_name IS NULL OR db_display_name LIKE ' %'");

    if (rows > 0) {

      ReportManager.problem(
          this,
          con,
          rows
              + " rows in external_db have null or blank db_display_name - this will mean their label is missing on the web page");
      result = false;

    } else {

      ReportManager.correct(this, con, "No blank db_display_name fields in external_db");
    }

    return result;
  } // run
  /**
   * Test various things about ditag features.
   *
   * @param dbre The database to use.
   * @return Result.
   */
  public boolean run(DatabaseRegistryEntry dbre) {

    boolean result = true;

    Connection con = dbre.getConnection();

    int rows =
        DBUtils.getRowCount(
            con, "SELECT COUNT(*) FROM identity_xref WHERE cigar_line REGEXP '^[MDI]'");

    if (rows > 0) {

      ReportManager.problem(
          this,
          con,
          rows + " cigar lines in identity_xref appear to be in the wrong format (number first)");
      result = false;

    } else {

      ReportManager.correct(
          this, con, "All cigar lines in identity_xref are in the correct format");
    }

    return result;
  }
  /**
   * Run the test.
   *
   * @param dbre The database to use.
   * @return true if the test passed.
   */
  public boolean run(DatabaseRegistryEntry dbre) {

    boolean result = true;

    Connection con = dbre.getConnection();

    // list of transcript analysis logic_names which are allowed to not have supporting features
    String allowed = "'" + StringUtils.join(allowedNoSupporting, "','") + "'";

    String sql =
        String.format(
            "SELECT COUNT(*),t.analysis_id FROM transcript t LEFT JOIN transcript_supporting_feature tsf ON t.transcript_id = tsf.transcript_id JOIN analysis a ON a.analysis_id=t.analysis_id WHERE a.analysis_id=t.analysis_id and tsf.transcript_id IS NULL AND a.logic_name NOT IN (%s) group by t.analysis_id",
            allowed);

    int rows = DBUtils.getRowCount(con, sql);

    if (rows > 0) {

      ReportManager.problem(
          this,
          con,
          rows
              + " transcripts which should have transcript_supporting_features do not have them\nUseful SQL: "
              + sql);
      result = false;

    } else {

      ReportManager.correct(
          this, con, "All transcripts that require supporting features have them");
    }

    return result;
  } // run
Пример #7
0
  /** Check that all priorities are greater than a certain threshold. */
  private boolean checkMarkerPriorities(Connection con) {

    boolean result = true;

    int count =
        DBUtils.getRowCount(
            con, "SELECT COUNT(*) FROM marker WHERE priority > " + MARKER_PRIORITY_THRESHOLD);

    if (count == 0) {

      ReportManager.problem(
          this,
          con,
          " No marker features have priorities greater than the threshold ("
              + MARKER_PRIORITY_THRESHOLD
              + ")");
      result = false;

    } else {

      ReportManager.correct(
          this,
          con,
          "Some marker features have priorities greater than " + MARKER_PRIORITY_THRESHOLD);
    }

    return result;
  }
  /**
   * Run the test.
   *
   * @param dbre The database to use.
   * @return true if the test passed.
   */
  public boolean run(DatabaseRegistryEntry dbre) {

    boolean result = true;

    Connection con = dbre.getConnection();

    if (tableHasRows(con, "method_link_species_set")) {

      /* Check if have both BLASTZ_NET and LASTZ_NET entries for the same species set */
      int numOfBLASTZ_LASTZSpeciesSets =
          DBUtils.getRowCount(
              con,
              "SELECT species_set_id, count(*) FROM method_link_species_set JOIN method_link USING (method_link_id) WHERE TYPE in ('BLASTZ_NET', 'LASTZ_NET') GROUP BY species_set_id HAVING count(*) > 1");
      if (numOfBLASTZ_LASTZSpeciesSets > 0) {
        ReportManager.problem(
            this,
            con,
            "FAILED method_link_species_set table contains "
                + numOfBLASTZ_LASTZSpeciesSets
                + " entries with a BLASTZ_NET and LASTZ_NET entry for the same species_set");
        ReportManager.problem(
            this,
            con,
            "USEFUL SQL: SELECT species_set_id, count(*) FROM method_link_species_set JOIN method_link USING (method_link_id) WHERE TYPE in ('BLASTZ_NET', 'LASTZ_NET') GROUP BY species_set_id HAVING count(*) > 1");
        result = false;
      }

      // Everything below will be ignored on the master database
      if (isMasterDB(dbre.getConnection())) {
        return result;
      }
      /* Check method_link_species_set <-> synteny_region */
      /* All method_link for syntenies must have an internal ID between 101 and 199 */
      result &=
          checkForOrphansWithConstraint(
              con,
              "method_link_species_set",
              "method_link_species_set_id",
              "synteny_region",
              "method_link_species_set_id",
              "method_link_id >= 101 and method_link_id < 200");
      result &=
          checkForOrphans(
              con,
              "synteny_region",
              "method_link_species_set_id",
              "method_link_species_set",
              "method_link_species_set_id");

    } else {

      ReportManager.correct(
          this, con, "NO ENTRIES in method_link_species_set table, so nothing to test IGNORED");
    }

    return result;
  }
  /**
   * Run the test.
   *
   * @param dbre The database to use.
   * @return true if the test passed.
   */
  public boolean run(DatabaseRegistryEntry dbre) {

    boolean result = true;

    Connection con = dbre.getConnection();

    Map tableToLetter = new HashMap();

    tableToLetter.put("gene", "G");
    tableToLetter.put("transcript", "T");
    tableToLetter.put("translation", "P");

    Iterator it = tableToLetter.keySet().iterator();

    while (it.hasNext()) {

      String table = (String) it.next();
      String letter = (String) tableToLetter.get(table);

      String regexp = "EST" + letter + "[0-9]+";

      String sql =
          "SELECT COUNT(*) FROM "
              + table
              + " x, analysis a WHERE a.analysis_id=x.analysis_id "
              + "AND a.logic_name LIKE '%est%' AND x.stable_id NOT REGEXP '"
              + regexp
              + "'";
      if (table.equals("translation")) {
        // need extra join to transcript table
        sql =
            "SELECT COUNT(*) FROM translation x, transcript t, analysis a WHERE a.analysis_id=t.analysis_id AND x.transcript_id=t.transcript_id AND a.logic_name LIKE '%est%' AND x.stable_id NOT REGEXP 'ESTP[0-9]+'";
      }

      int rows = DBUtils.getRowCount(con, sql);

      if (rows > 0) {

        ReportManager.problem(
            this, con, rows + " " + table + " stable IDs do not contain EST" + letter);
        result = false;

      } else {

        ReportManager.correct(this, con, "All stable IDs contain EST" + letter);
      }
    }

    return result;
  } // run
Пример #10
0
  /**
   * Check the data in the assembly_exception table. Note referential integrity checks are done in
   * CoreForeignKeys.
   *
   * @param dbre The database to use.
   * @return Result.
   */
  public boolean run(DatabaseRegistryEntry dbre) {

    boolean result = true;

    Connection con = dbre.getConnection();

    String qry =
        "select count(*) from gene,transcript,translation "
            + "where gene.biotype like '%pseudogene%'"
            + " and transcript.gene_id=gene.gene_id "
            + " and translation.transcript_id=transcript.transcript_id and gene.biotype!= 'polymorphic_pseudogene' ";
    if (dbre.getType()
        == DatabaseType
            .SANGER_VEGA) { // for sangervega ignore genes that do not have source havana or WU and
                            // allow
      // polymorphic_pseudogene to have translations
      qry += " and (gene.source='havana' or gene.source='WU')";
    }
    if (dbre.getType() == DatabaseType.SANGER_VEGA || dbre.getType() == DatabaseType.VEGA) {
      // Vega allows translations on translated_processed_pseudogene-s
      qry += " and gene.biotype != 'translated_processed_pseudogene'";
    }

    int rows = DBUtils.getRowCount(con, qry);
    if (rows > 0) {
      result = false;
      ReportManager.problem(
          this,
          con,
          "Translation table contains "
              + rows
              + " rows for pseudogene types - should contain none");
    }

    if (result) {
      ReportManager.correct(this, con, "No pseudogenes have translations");
    }

    return result;
  }
Пример #11
0
  @Override
  protected boolean runTest(DatabaseRegistryEntry dbre) {

    Connection con = dbre.getConnection();

    int numRows = DBUtils.getRowCount(con, sql_find_wrong_lengths);

    boolean passed = numRows == 0;

    if (!passed) {

      ReportManager.problem(this, con, error_msg + "\n");

      try {
        Statement stmt = con.createStatement();
        ResultSet rs = stmt.executeQuery(sql_find_wrong_lengths);
        if (rs != null) {

          while (rs.next()) {

            ReportManager.problem(
                this,
                con,
                "update seq_region set length="
                    + rs.getInt("len")
                    + " where seq_region_id="
                    + rs.getInt("seq_region_id")
                    + ";");
          }
        }
        rs.close();
        stmt.close();
      } catch (Exception e) {
        e.printStackTrace();
      }
    }
    return passed;
  }
Пример #12
0
  /** Check that all chromomes have > 0 markers_map_locations and marker_features. */
  private boolean checkAllChromosomesHaveMarkers(Connection con) {

    boolean result = true;

    // find all the chromosomes, and for each one check that it has some
    // markers
    // note a "chromosome" is assumed to be a seq_region that is:
    // - on the top-level co-ordinate system and
    // - doesn't have and _ or . in the name and
    // - has a seq_region name of less than 3 characters
    // - doesn't have a name starting with "Un" or "MT"

    // get top level co-ordinate system ID
    String sql = "SELECT coord_system_id FROM coord_system WHERE rank=1 LIMIT 1";

    String s = DBUtils.getRowColumnValue(con, sql);

    if (s.length() == 0) {
      System.err.println(
          "Error: can't get top-level co-ordinate system for " + DBUtils.getShortDatabaseName(con));
      return false;
    }

    int topLevelCSID = Integer.parseInt(s);

    try {

      // check each top-level seq_region (up to a limit) to see how many
      // marker_map_locations and marker features there are
      Statement stmt = con.createStatement();

      ResultSet rs =
          stmt.executeQuery(
              "SELECT * FROM seq_region WHERE coord_system_id="
                  + topLevelCSID
                  + " AND name NOT LIKE '%\\_%' AND name NOT LIKE '%.%' AND name NOT LIKE 'Un%' AND name NOT LIKE 'MT%' AND LENGTH(name) < 3 ORDER BY name");

      int numTopLevel = 0;

      while (rs.next() && numTopLevel++ < MAX_TOP_LEVEL) {

        long seqRegionID = rs.getLong("seq_region_id");
        String seqRegionName = rs.getString("name");

        // check marker_map_locations
        logger.fine("Counting marker_map_locations on chromosome " + seqRegionName);

        sql =
            "SELECT COUNT(*) FROM marker_map_location WHERE chromosome_name='"
                + seqRegionName
                + "'";
        int rows = DBUtils.getRowCount(con, sql);
        if (rows == 0) {

          ReportManager.problem(
              this,
              con,
              "Chromosome "
                  + seqRegionName
                  + " (seq_region_id "
                  + seqRegionID
                  + ") has no entries in marker_map_location");
          result = false;

        } else {

          ReportManager.correct(
              this, con, "Chromosome " + seqRegionName + " has " + rows + " marker_map_locations");
        }

        // check marker_features
        logger.fine("Counting marker_features on chromosome " + seqRegionName);
        sql = "SELECT COUNT(*) FROM marker_feature WHERE seq_region_id=" + seqRegionID;
        rows = DBUtils.getRowCount(con, sql);
        if (rows == 0) {

          ReportManager.problem(
              this,
              con,
              "Chromosome "
                  + seqRegionName
                  + " (seq_region_id "
                  + seqRegionID
                  + ") has no marker_features");
          result = false;

        } else {

          ReportManager.correct(
              this, con, "Chromosome " + seqRegionName + " has " + rows + " marker_features");
        }
      }

      rs.close();
      stmt.close();

      if (numTopLevel == MAX_TOP_LEVEL) {
        logger.warning("Only checked first " + numTopLevel + " seq_regions");
      }

    } catch (SQLException se) {
      se.printStackTrace();
    }

    return result;
  }