private void runImportCnaData() throws DaoException, IOException {

    DaoGeneticAlteration dao = DaoGeneticAlteration.getInstance();
    DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance();

    // the largest current true Entrez gene ID counts 8 digits
    daoGene.addGene(new CanonicalGene(999999207, "TESTAKT1"));
    daoGene.addGene(new CanonicalGene(999999208, "TESTAKT2"));
    daoGene.addGene(new CanonicalGene(999910000, "TESTAKT3"));
    daoGene.addGene(new CanonicalGene(999999369, "TESTARAF"));
    daoGene.addGene(new CanonicalGene(999999472, "TESTATM"));
    daoGene.addGene(new CanonicalGene(999999673, "TESTBRAF"));
    daoGene.addGene(new CanonicalGene(999999672, "TESTBRCA1"));
    daoGene.addGene(new CanonicalGene(999999675, "TESTBRCA2"));

    ProgressMonitor.setConsoleMode(false);
    // TBD: change this to use getResourceAsStream()
    File file = new File("src/test/resources/cna_test.txt");
    ImportTabDelimData parser = new ImportTabDelimData(file, "Barry", geneticProfileId, null);
    int numLines = FileUtil.getNumLines(file);
    parser.importData(numLines);

    String value = dao.getGeneticAlteration(geneticProfileId, sample1, 999999207);
    assertEquals("0", value);
    value = dao.getGeneticAlteration(geneticProfileId, sample4, 999999207);
    assertEquals("-1", value);
    value = dao.getGeneticAlteration(geneticProfileId, sample2, 999999207);
    assertEquals("0", value);
    value = dao.getGeneticAlteration(geneticProfileId, sample2, 999910000);
    assertEquals("2", value);
    value = dao.getGeneticAlteration(geneticProfileId, sample3, 999910000);
    assertEquals("2", value);

    int cnaStatus =
        Integer.parseInt(dao.getGeneticAlteration(geneticProfileId, sample3, 999910000));
    assertEquals(CopyNumberStatus.COPY_NUMBER_AMPLIFICATION, cnaStatus);
    cnaStatus = Integer.parseInt(dao.getGeneticAlteration(geneticProfileId, sample2, 999910000));
    assertEquals(CopyNumberStatus.COPY_NUMBER_AMPLIFICATION, cnaStatus);
    cnaStatus = Integer.parseInt(dao.getGeneticAlteration(geneticProfileId, sample4, 999999207));
    assertEquals(CopyNumberStatus.HEMIZYGOUS_DELETION, cnaStatus);

    Patient patient = DaoPatient.getPatientByCancerStudyAndPatientId(studyId, "TCGA-A1-A0SB");
    Sample sample =
        DaoSample.getSampleByPatientAndSampleId(patient.getInternalId(), "TCGA-A1-A0SB-01");
    assertTrue(
        DaoSampleProfile.sampleExistsInGeneticProfile(sample.getInternalId(), geneticProfileId));

    patient = DaoPatient.getPatientByCancerStudyAndPatientId(studyId, "TCGA-A1-A0SJ");
    sample = DaoSample.getSampleByPatientAndSampleId(patient.getInternalId(), "TCGA-A1-A0SJ-01");
    assertTrue(
        DaoSampleProfile.sampleExistsInGeneticProfile(sample.getInternalId(), geneticProfileId));

    ArrayList caseIds = DaoSampleProfile.getAllSampleIdsInProfile(geneticProfileId);
    assertEquals(14, caseIds.size());
  }
Esempio n. 2
0
  // command line utility
  public static void main(String[] args) throws IOException, DaoException {

    if (args.length != 2) {
      System.out.printf(
          "command line usage:  importGistic.pl <gistic-data-file.txt> <cancer-study-id>\n"
              + "\t <gistic-data-file.txt> Note that gistic-data-file.txt must be a massaged file, it does not come straight from the Broad\n"
              + "\t <cancer-study-id> e.g. 'tcga_gbm'");
      return;
    }
    SpringUtil.initDataSource();
    GisticReader gisticReader = new GisticReader();

    File gistic_f = new File(args[0]);
    int cancerStudyInternalId = gisticReader.getCancerStudyInternalId(args[1]);

    ProgressMonitor.setConsoleMode(false);

    System.out.println("Reading data from: " + gistic_f.getAbsolutePath());
    System.out.println("CancerStudyId: " + cancerStudyInternalId);

    int lines = FileUtil.getNumLines(gistic_f);
    System.out.println(" --> total number of lines: " + lines);
    ProgressMonitor.setMaxValue(lines);

    ArrayList<Gistic> gistics = null;

    gistics = gisticReader.parse(gistic_f, cancerStudyInternalId);

    if (gistics == null) {
      System.out.println("Error: didn't get any data");
      return;
    }

    // add to CGDS database
    for (Gistic g : gistics) {
      try {
        DaoGistic.addGistic(g);
      } catch (validationException e) {
        // only catching validationException, not DaoException
        logger.debug(e);
      } catch (DaoException e) {
        System.err.println(e);
      }
    }
    ConsoleUtil.showWarnings();
  }
  private void runImportRnaData1() throws DaoException, IOException {

    DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance();
    DaoGeneticAlteration dao = DaoGeneticAlteration.getInstance();

    daoGene.addGene(new CanonicalGene(999999780, "A"));
    daoGene.addGene(new CanonicalGene(999995982, "B"));
    daoGene.addGene(new CanonicalGene(999993310, "C"));
    daoGene.addGene(new CanonicalGene(999997849, "D"));
    daoGene.addGene(new CanonicalGene(999992978, "E"));
    daoGene.addGene(new CanonicalGene(999997067, "F"));
    daoGene.addGene(new CanonicalGene(999911099, "G"));
    daoGene.addGene(new CanonicalGene(999999675, "6352"));

    GeneticProfile geneticProfile = new GeneticProfile();

    geneticProfile.setCancerStudyId(studyId);
    geneticProfile.setStableId("gbm_mrna");
    geneticProfile.setGeneticAlterationType(GeneticAlterationType.MRNA_EXPRESSION);
    geneticProfile.setDatatype("CONTINUOUS");
    geneticProfile.setProfileName("MRNA Data");
    geneticProfile.setProfileDescription("mRNA Data");
    DaoGeneticProfile.addGeneticProfile(geneticProfile);

    int newGeneticProfileId =
        DaoGeneticProfile.getGeneticProfileByStableId("gbm_mrna").getGeneticProfileId();

    ProgressMonitor.setConsoleMode(true);
    // TBD: change this to use getResourceAsStream()
    File file = new File("src/test/resources/mrna_test.txt");
    ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null);
    int numLines = FileUtil.getNumLines(file);
    parser.importData(numLines);
    ConsoleUtil.showMessages();

    int sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "DD639").getInternalId();
    String value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999992978);
    assertEquals("2.01", value);

    sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "DD638").getInternalId();
    value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999997849);
    assertEquals("0.55", value);
  }
  private void runImportCnaData2() throws DaoException, IOException {

    DaoGeneticAlteration dao = DaoGeneticAlteration.getInstance();

    ProgressMonitor.setConsoleMode(false);
    // TBD: change this to use getResourceAsStream()
    File file = new File("src/test/resources/cna_test2.txt");
    ImportTabDelimData parser = new ImportTabDelimData(file, geneticProfileId, null);
    int numLines = FileUtil.getNumLines(file);
    parser.importData(numLines);

    String value = dao.getGeneticAlteration(geneticProfileId, sample1, 207);
    assertEquals(value, "0");
    value = dao.getGeneticAlteration(geneticProfileId, sample4, 207);
    assertEquals(value, "-1");
    value = dao.getGeneticAlteration(geneticProfileId, sample2, 207);
    assertEquals(value, "0");
    value = dao.getGeneticAlteration(geneticProfileId, sample2, 10000);
    assertEquals(value, "2");
    value = dao.getGeneticAlteration(geneticProfileId, sample3, 10000);
    assertEquals(value, "2");

    int cnaStatus = Integer.parseInt(dao.getGeneticAlteration(geneticProfileId, sample3, 10000));
    assertEquals(CopyNumberStatus.COPY_NUMBER_AMPLIFICATION, cnaStatus);
    cnaStatus = Integer.parseInt(dao.getGeneticAlteration(geneticProfileId, sample2, 10000));
    assertEquals(CopyNumberStatus.COPY_NUMBER_AMPLIFICATION, cnaStatus);
    cnaStatus = Integer.parseInt(dao.getGeneticAlteration(geneticProfileId, sample4, 207));
    assertEquals(CopyNumberStatus.HEMIZYGOUS_DELETION, cnaStatus);

    Patient patient = DaoPatient.getPatientByCancerStudyAndPatientId(studyId, "TCGA-A1-A0SB");
    Sample sample =
        DaoSample.getSampleByPatientAndSampleId(patient.getInternalId(), "TCGA-A1-A0SB-01");
    assertTrue(
        DaoSampleProfile.sampleExistsInGeneticProfile(sample.getInternalId(), geneticProfileId));

    patient = DaoPatient.getPatientByCancerStudyAndPatientId(studyId, "TCGA-A1-A0SJ");
    sample = DaoSample.getSampleByPatientAndSampleId(patient.getInternalId(), "TCGA-A1-A0SJ-01");
    assertTrue(
        DaoSampleProfile.sampleExistsInGeneticProfile(sample.getInternalId(), geneticProfileId));
    ArrayList sampleIds = DaoSampleProfile.getAllSampleIdsInProfile(geneticProfileId);
    assertEquals(14, sampleIds.size());
  }
  /**
   * Test importing of data_rppa file.
   *
   * @throws Exception All Errors.
   */
  @Test
  public void testImportRppaData() throws Exception {
    MySQLbulkLoader.bulkLoadOn();

    DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance();
    DaoGeneticAlteration dao = DaoGeneticAlteration.getInstance();

    // Genes with alias:
    daoGene.addGene(makeGeneWithAlias(999999931, "TESTACACA", "TESTACC1"));
    daoGene.addGene(makeGeneWithAlias(999999207, "TESTAKT1", "TESTAKT"));
    daoGene.addGene(makeGeneWithAlias(999999597, "TESTSANDER", "TESTACC1"));
    daoGene.addGene(makeGeneWithAlias(999997158, "TESTTP53BP1", "TEST53BP1"));
    // test for NA being a special case in RPPA, and not the usual alias
    daoGene.addGene(makeGeneWithAlias(999997504, "XK", "NA"));
    // Other genes:
    daoGene.addGene(new CanonicalGene(999999932, "TESTACACB"));
    daoGene.addGene(new CanonicalGene(999999208, "TESTAKT2"));
    daoGene.addGene(new CanonicalGene(999999369, "TESTARAF"));
    daoGene.addGene(new CanonicalGene(999991978, "TESTEIF4EBP1"));
    daoGene.addGene(new CanonicalGene(999995562, "TESTPRKAA1"));
    daoGene.addGene(new CanonicalGene(999997531, "TESTYWHAE"));
    daoGene.addGene(new CanonicalGene(999910000, "TESTAKT3"));
    daoGene.addGene(new CanonicalGene(999995578, "TESTPRKCA"));

    GeneticProfile geneticProfile = new GeneticProfile();

    geneticProfile.setCancerStudyId(studyId);
    geneticProfile.setStableId("gbm_rppa");
    geneticProfile.setGeneticAlterationType(GeneticAlterationType.PROTEIN_LEVEL);
    geneticProfile.setDatatype("LOG2-VALUE");
    geneticProfile.setProfileName("RPPA Data");
    geneticProfile.setProfileDescription("RPPA Data");
    DaoGeneticProfile.addGeneticProfile(geneticProfile);

    int newGeneticProfileId =
        DaoGeneticProfile.getGeneticProfileByStableId("gbm_rppa").getGeneticProfileId();

    ProgressMonitor.setConsoleMode(true);
    // TBD: change this to use getResourceAsStream()
    File file = new File("src/test/resources/tabDelimitedData/data_rppa.txt");
    ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null);
    int numLines = FileUtil.getNumLines(file);
    parser.importData(numLines);
    ConsoleUtil.showMessages();

    int sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "SAMPLE1").getInternalId();
    String value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999997531);
    assertEquals("1.5", value);

    sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "SAMPLE4").getInternalId();
    value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999997531);
    assertEquals("2", value);

    sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "SAMPLE4").getInternalId();
    value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999997504);
    assertEquals(
        "NaN",
        value); // "NA" is not expected to be stored because of workaround for bug in firehose. See
                // also https://github.com/cBioPortal/cbioportal/issues/839#issuecomment-203523078

    sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "SAMPLE1").getInternalId();
    value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999995578);
    assertEquals("1.5", value);
  }
  /**
   * Test importing of data_expression file.
   *
   * @throws Exception All Errors.
   */
  @Test
  public void testImportmRnaData2() throws Exception {
    MySQLbulkLoader.bulkLoadOn();

    DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance();
    DaoGeneticAlteration dao = DaoGeneticAlteration.getInstance();

    // Gene with alias:
    daoGene.addGene(makeGeneWithAlias(999997504, "TESTXK", "NA"));
    // Other genes:
    daoGene.addGene(new CanonicalGene(999999999, "TESTNAT1"));

    daoGene.addGene(new CanonicalGene(999997124, "TESTTNF"));
    daoGene.addGene(new CanonicalGene(999991111, "TESTCHEK1"));
    daoGene.addGene(new CanonicalGene(999999919, "TESTABCA1"));
    // will get generated negative id:
    daoGene.addGene(new CanonicalGene(-1, "TESTphosphoprotein"));

    GeneticProfile geneticProfile = new GeneticProfile();

    geneticProfile.setCancerStudyId(studyId);
    geneticProfile.setStableId("gbm_mrna");
    geneticProfile.setGeneticAlterationType(GeneticAlterationType.MRNA_EXPRESSION);
    geneticProfile.setDatatype("CONTINUOUS");
    geneticProfile.setProfileName("MRNA Data");
    geneticProfile.setProfileDescription("mRNA Data");
    DaoGeneticProfile.addGeneticProfile(geneticProfile);

    int newGeneticProfileId =
        DaoGeneticProfile.getGeneticProfileByStableId("gbm_mrna").getGeneticProfileId();

    ProgressMonitor.setConsoleMode(true);
    // TBD: change this to use getResourceAsStream()
    File file = new File("src/test/resources/tabDelimitedData/data_expression2.txt");
    ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null);
    int numLines = FileUtil.getNumLines(file);
    parser.importData(numLines);

    // check if expected warnings are given:
    ArrayList<String> warnings = ProgressMonitor.getWarnings();
    int countDuplicatedRowWarnings = 0;
    int countInvalidEntrez = 0;
    int countSkippedWarnings = 0;
    for (String warning : warnings) {
      if (warning.contains("Duplicated row")) {
        countDuplicatedRowWarnings++;
      }
      if (warning.contains("invalid Entrez_Id")) {
        // invalid Entrez
        countInvalidEntrez++;
      }
      if (warning.contains("Record will be skipped")) {
        // Entrez is a valid number, but not found
        countSkippedWarnings++;
      }
    }
    // check that we have 11 warning messages:
    assertEquals(2, countDuplicatedRowWarnings);
    assertEquals(3, countInvalidEntrez);
    assertEquals(6, countSkippedWarnings);

    Set<Long> entrezGeneIds = DaoGeneticAlteration.getGenesIdInProfile(newGeneticProfileId);
    // data will be loaded for 5 of the genes
    assertEquals(5, entrezGeneIds.size());
    HashMap<Long, HashMap<Integer, String>> dataMap =
        dao.getGeneticAlterationMap(newGeneticProfileId, entrezGeneIds);
    assertEquals(5, dataMap.entrySet().size());

    int sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "SAMPLE1").getInternalId();
    String value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999997124);
    assertEquals("770", value);

    sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "SAMPLE3").getInternalId();
    value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999997124);
    assertEquals("220", value);

    // gene should also be loaded via its alias "NA" as defined above:
    sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "SAMPLE3").getInternalId();
    value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999997504);
    assertEquals("9940", value);
  }