@Before public void setUp() throws DaoException { DaoCancerStudy.reCacheAll(); DaoGeneOptimized.getInstance().reCache(); ProgressMonitor.resetWarnings(); studyId = DaoCancerStudy.getCancerStudyByStableId("study_tcga_pub").getInternalId(); GeneticProfile newGeneticProfile = new GeneticProfile(); newGeneticProfile.setCancerStudyId(studyId); newGeneticProfile.setGeneticAlterationType(GeneticAlterationType.COPY_NUMBER_ALTERATION); newGeneticProfile.setStableId("study_tcga_pub_test"); newGeneticProfile.setProfileName("Barry CNA Results"); newGeneticProfile.setDatatype("test"); DaoGeneticProfile.addGeneticProfile(newGeneticProfile); geneticProfileId = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_test").getGeneticProfileId(); sample1 = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "TCGA-A1-A0SB-01").getInternalId(); sample2 = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "TCGA-A1-A0SD-01").getInternalId(); sample3 = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "TCGA-A1-A0SE-01").getInternalId(); sample4 = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "TCGA-A1-A0SF-01").getInternalId(); sample5 = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "TCGA-A1-A0SG-01").getInternalId(); }
public ObjectNode processMutHm(long entrezGeneId, ArrayList<Integer> sampleList, HashMap mutHm) { ObjectNode _datum = mapper.createObjectNode(); // create map to pair sample and value HashMap<Integer, String> mapSampleValue = new HashMap<>(); for (Integer sampleId : sampleList) { // Assign every sample (included non mutated ones) values -- mutated -> // Mutation Type, non-mutated -> "Non" String mutationStatus = "Non"; String tmpStr = new StringBuilder() .append(Integer.toString(sampleId)) .append(Long.toString(entrezGeneId)) .toString(); if (mutHm.containsKey(tmpStr)) mutationStatus = "Mutated"; mapSampleValue.put(sampleId, mutationStatus); } // remove empty entry Iterator it = mapSampleValue.entrySet().iterator(); while (it.hasNext()) { Map.Entry pair = (Map.Entry) it.next(); if (pair.getValue().equals("NA") || pair.getValue().equals("NaN") || pair.getValue().equals("null")) { it.remove(); } } // get Gene Name and Cytoband DaoGeneOptimized daoGeneOptimized = DaoGeneOptimized.getInstance(); String geneName = daoGeneOptimized.getGene(entrezGeneId).getHugoGeneSymbolAllCaps(); String cytoband = daoGeneOptimized.getGene(entrezGeneId).getCytoband(); // statistics analysis if (!(Arrays.asList(queriedGenes)).contains(geneName)) { // remove queried genes from result _datum.put(COL_NAME_GENE, geneName); _datum.put(COL_NAME_CYTOBAND, cytoband); _datum.put(COL_NAME_PCT_ALTERED, calcPct(mapSampleValue, profileType, "altered")); _datum.put(COL_NAME_PCT_UNALTERED, calcPct(mapSampleValue, profileType, "unaltered")); _datum.put( COL_NAME_RATIO, calcRatio( calcPct(mapSampleValue, profileType, "altered"), calcPct(mapSampleValue, profileType, "unaltered"))); _datum.put(COL_NAME_DIRECTION, "place holder"); // calculation is done by the front-end _datum.put(COL_NAME_P_VALUE, calcPval(mapSampleValue, profileType, geneticProfileStableId)); if (!(calcPct(mapSampleValue, profileType, "altered") == 0.0 && calcPct(mapSampleValue, profileType, "unaltered") == 0.0) && !Double.isNaN(calcPval(mapSampleValue, profileType, geneticProfileStableId))) { return _datum; } } return null; }
private void runImportCnaData() throws DaoException, IOException { DaoGeneticAlteration dao = DaoGeneticAlteration.getInstance(); DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance(); // the largest current true Entrez gene ID counts 8 digits daoGene.addGene(new CanonicalGene(999999207, "TESTAKT1")); daoGene.addGene(new CanonicalGene(999999208, "TESTAKT2")); daoGene.addGene(new CanonicalGene(999910000, "TESTAKT3")); daoGene.addGene(new CanonicalGene(999999369, "TESTARAF")); daoGene.addGene(new CanonicalGene(999999472, "TESTATM")); daoGene.addGene(new CanonicalGene(999999673, "TESTBRAF")); daoGene.addGene(new CanonicalGene(999999672, "TESTBRCA1")); daoGene.addGene(new CanonicalGene(999999675, "TESTBRCA2")); ProgressMonitor.setConsoleMode(false); // TBD: change this to use getResourceAsStream() File file = new File("src/test/resources/cna_test.txt"); ImportTabDelimData parser = new ImportTabDelimData(file, "Barry", geneticProfileId, null); int numLines = FileUtil.getNumLines(file); parser.importData(numLines); String value = dao.getGeneticAlteration(geneticProfileId, sample1, 999999207); assertEquals("0", value); value = dao.getGeneticAlteration(geneticProfileId, sample4, 999999207); assertEquals("-1", value); value = dao.getGeneticAlteration(geneticProfileId, sample2, 999999207); assertEquals("0", value); value = dao.getGeneticAlteration(geneticProfileId, sample2, 999910000); assertEquals("2", value); value = dao.getGeneticAlteration(geneticProfileId, sample3, 999910000); assertEquals("2", value); int cnaStatus = Integer.parseInt(dao.getGeneticAlteration(geneticProfileId, sample3, 999910000)); assertEquals(CopyNumberStatus.COPY_NUMBER_AMPLIFICATION, cnaStatus); cnaStatus = Integer.parseInt(dao.getGeneticAlteration(geneticProfileId, sample2, 999910000)); assertEquals(CopyNumberStatus.COPY_NUMBER_AMPLIFICATION, cnaStatus); cnaStatus = Integer.parseInt(dao.getGeneticAlteration(geneticProfileId, sample4, 999999207)); assertEquals(CopyNumberStatus.HEMIZYGOUS_DELETION, cnaStatus); Patient patient = DaoPatient.getPatientByCancerStudyAndPatientId(studyId, "TCGA-A1-A0SB"); Sample sample = DaoSample.getSampleByPatientAndSampleId(patient.getInternalId(), "TCGA-A1-A0SB-01"); assertTrue( DaoSampleProfile.sampleExistsInGeneticProfile(sample.getInternalId(), geneticProfileId)); patient = DaoPatient.getPatientByCancerStudyAndPatientId(studyId, "TCGA-A1-A0SJ"); sample = DaoSample.getSampleByPatientAndSampleId(patient.getInternalId(), "TCGA-A1-A0SJ-01"); assertTrue( DaoSampleProfile.sampleExistsInGeneticProfile(sample.getInternalId(), geneticProfileId)); ArrayList caseIds = DaoSampleProfile.getAllSampleIdsInProfile(geneticProfileId); assertEquals(14, caseIds.size()); }
public void testImporter() throws Exception { ResetDatabase.resetDatabase(); DaoGeneOptimized daoGeneOptimized = DaoGeneOptimized.getInstance(); String[] genes = { "F2", "EGFR", "FCGR3B", "C1R", "C1QA", "C1QB", "C1QC", "FCGR3A", "C1S", "FCGR1A", "FCGR2A", "FCGR2B", "FCGR2C", "IL2RA", "IL2RB", "IL2RG" }; for (String gene : genes) { daoGeneOptimized.addGene(new CanonicalGene(gene)); } Calendar cal = Calendar.getInstance(); SimpleDateFormat format = new SimpleDateFormat("yyyy-MM"); String today = format.format(cal.getTime()); DrugDataResource pihelper = new DrugDataResource( "PiHelper", "https://bitbucket.org/armish/pihelper/downloads/pihelper_data_20121107.zip", today); PiHelperImporter importer = new PiHelperImporter(pihelper); ClassLoader classLoader = this.getClass().getClassLoader(); importer.setDrugInfoFile(classLoader.getResourceAsStream("test_pihelper_drugs.tsv")); importer.setDrugTargetsFile(classLoader.getResourceAsStream("test_pihelper_drugtargets.tsv")); importer.importData(); DaoDrug daoDrug = DaoDrug.getInstance(); DaoDrugInteraction daoDrugInteraction = DaoDrugInteraction.getInstance(); ArrayList<Drug> allDrugs = daoDrug.getAllDrugs(); int count = allDrugs.size(); assertEquals(6, count); assertEquals(16, daoDrugInteraction.getCount()); int[] numOfTargets = {1, 12, 0, 3, 0, 0}; for (int i = 0; i < count; i++) { assertEquals(numOfTargets[i], daoDrugInteraction.getTargets(allDrugs.get(i)).size()); } Drug cetuximab = daoDrug.getDrug("33612"); assertEquals(204, cetuximab.getNumberOfClinicalTrials().intValue()); assertTrue(cetuximab.isCancerDrug()); assertFalse(cetuximab.isNutraceuitical()); assertTrue(cetuximab.isApprovedFDA()); Drug etanercept = daoDrug.getDrug("33615"); assertEquals(-1, etanercept.getNumberOfClinicalTrials().intValue()); assertFalse(etanercept.isCancerDrug()); assertFalse(etanercept.isNutraceuitical()); assertTrue(etanercept.isApprovedFDA()); }
private void runImportRnaData1() throws DaoException, IOException { DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance(); DaoGeneticAlteration dao = DaoGeneticAlteration.getInstance(); daoGene.addGene(new CanonicalGene(999999780, "A")); daoGene.addGene(new CanonicalGene(999995982, "B")); daoGene.addGene(new CanonicalGene(999993310, "C")); daoGene.addGene(new CanonicalGene(999997849, "D")); daoGene.addGene(new CanonicalGene(999992978, "E")); daoGene.addGene(new CanonicalGene(999997067, "F")); daoGene.addGene(new CanonicalGene(999911099, "G")); daoGene.addGene(new CanonicalGene(999999675, "6352")); GeneticProfile geneticProfile = new GeneticProfile(); geneticProfile.setCancerStudyId(studyId); geneticProfile.setStableId("gbm_mrna"); geneticProfile.setGeneticAlterationType(GeneticAlterationType.MRNA_EXPRESSION); geneticProfile.setDatatype("CONTINUOUS"); geneticProfile.setProfileName("MRNA Data"); geneticProfile.setProfileDescription("mRNA Data"); DaoGeneticProfile.addGeneticProfile(geneticProfile); int newGeneticProfileId = DaoGeneticProfile.getGeneticProfileByStableId("gbm_mrna").getGeneticProfileId(); ProgressMonitor.setConsoleMode(true); // TBD: change this to use getResourceAsStream() File file = new File("src/test/resources/mrna_test.txt"); ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null); int numLines = FileUtil.getNumLines(file); parser.importData(numLines); ConsoleUtil.showMessages(); int sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "DD639").getInternalId(); String value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999992978); assertEquals("2.01", value); sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "DD638").getInternalId(); value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999997849); assertEquals("0.55", value); }
@Override public ObjectNode process(long entrezGeneId, String[] values, ArrayList<Integer> sampleList) { ObjectNode _datum = mapper.createObjectNode(); // create map to pair sample and value HashMap<Integer, String> mapSampleValue = new HashMap<>(); for (int i = 0; i < values.length; i++) { String value = values[i]; Integer sampleId = sampleList.get(i); mapSampleValue.put(sampleId, value); } // remove empty entry Iterator it = mapSampleValue.entrySet().iterator(); while (it.hasNext()) { Map.Entry pair = (Map.Entry) it.next(); if (pair.getValue().equals("NA") || pair.getValue().equals("NaN") || pair.getValue().equals("null")) { it.remove(); } } // get Gene Name and Cytoband DaoGeneOptimized daoGeneOptimized = DaoGeneOptimized.getInstance(); String geneName = daoGeneOptimized.getGene(entrezGeneId).getHugoGeneSymbolAllCaps(); String cytoband = daoGeneOptimized.getGene(entrezGeneId).getCytoband(); // statistics analysis if (profileType.equals(GeneticAlterationType.COPY_NUMBER_ALTERATION.toString())) { if (!(Arrays.asList(queriedGenes)).contains(geneName)) { // remove queried genes from result _datum.put(COL_NAME_GENE, geneName); _datum.put(COL_NAME_CYTOBAND, cytoband); _datum.put( COL_NAME_PCT_ALTERED, Integer.toString(countAltered(mapSampleValue, profileType, "altered")) + "////" + Double.toString(calcPct(mapSampleValue, profileType, "altered"))); _datum.put( COL_NAME_PCT_UNALTERED, Integer.toString(countAltered(mapSampleValue, profileType, "unaltered")) + "////" + Double.toString(calcPct(mapSampleValue, profileType, "unaltered"))); _datum.put( COL_NAME_RATIO, calcRatio( calcPct(mapSampleValue, profileType, "altered"), calcPct(mapSampleValue, profileType, "unaltered"))); _datum.put(COL_NAME_DIRECTION, "place holder"); // calculation is done by the front-end _datum.put(COL_NAME_P_VALUE, calcPval(mapSampleValue, profileType, geneticProfileStableId)); if (!(calcPct(mapSampleValue, profileType, "altered") == 0.0 && calcPct(mapSampleValue, profileType, "unaltered") == 0.0) && !Double.isNaN(calcPval(mapSampleValue, profileType, geneticProfileStableId))) { return _datum; } } } else if (profileType.equals(GeneticAlterationType.MRNA_EXPRESSION.toString())) { _datum.put(COL_NAME_GENE, geneName); _datum.put(COL_NAME_CYTOBAND, cytoband); _datum.put( COL_NAME_MEAN_ALTERED, calcMean(mapSampleValue, "altered", geneticProfileStableId)); _datum.put( COL_NAME_MEAN_UNALTERED, calcMean(mapSampleValue, "unaltered", geneticProfileStableId)); _datum.put( COL_NAME_STDEV_ALTERED, calcSTDev(mapSampleValue, "altered", geneticProfileStableId)); _datum.put( COL_NAME_STDEV_UNALTERED, calcSTDev(mapSampleValue, "unaltered", geneticProfileStableId)); _datum.put(COL_NAME_P_VALUE, calcPval(mapSampleValue, profileType, geneticProfileStableId)); if (!Double.isNaN(calcPval(mapSampleValue, profileType, geneticProfileStableId))) { return _datum; } } else if (profileType.equals(GeneticAlterationType.PROTEIN_LEVEL.toString())) { _datum.put(COL_NAME_GENE, geneName); _datum.put(COL_NAME_CYTOBAND, cytoband); _datum.put( COL_NAME_MEAN_ALTERED, calcMean(mapSampleValue, "altered", geneticProfileStableId)); _datum.put( COL_NAME_MEAN_UNALTERED, calcMean(mapSampleValue, "unaltered", geneticProfileStableId)); _datum.put( COL_NAME_STDEV_ALTERED, calcSTDev(mapSampleValue, "altered", geneticProfileStableId)); _datum.put( COL_NAME_STDEV_UNALTERED, calcSTDev(mapSampleValue, "unaltered", geneticProfileStableId)); _datum.put(COL_NAME_P_VALUE, calcPval(mapSampleValue, profileType, geneticProfileStableId)); if (!Double.isNaN(calcPval(mapSampleValue, profileType, geneticProfileStableId))) { return _datum; } } return null; }
/** * Test importing of data_rppa file. * * @throws Exception All Errors. */ @Test public void testImportRppaData() throws Exception { MySQLbulkLoader.bulkLoadOn(); DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance(); DaoGeneticAlteration dao = DaoGeneticAlteration.getInstance(); // Genes with alias: daoGene.addGene(makeGeneWithAlias(999999931, "TESTACACA", "TESTACC1")); daoGene.addGene(makeGeneWithAlias(999999207, "TESTAKT1", "TESTAKT")); daoGene.addGene(makeGeneWithAlias(999999597, "TESTSANDER", "TESTACC1")); daoGene.addGene(makeGeneWithAlias(999997158, "TESTTP53BP1", "TEST53BP1")); // test for NA being a special case in RPPA, and not the usual alias daoGene.addGene(makeGeneWithAlias(999997504, "XK", "NA")); // Other genes: daoGene.addGene(new CanonicalGene(999999932, "TESTACACB")); daoGene.addGene(new CanonicalGene(999999208, "TESTAKT2")); daoGene.addGene(new CanonicalGene(999999369, "TESTARAF")); daoGene.addGene(new CanonicalGene(999991978, "TESTEIF4EBP1")); daoGene.addGene(new CanonicalGene(999995562, "TESTPRKAA1")); daoGene.addGene(new CanonicalGene(999997531, "TESTYWHAE")); daoGene.addGene(new CanonicalGene(999910000, "TESTAKT3")); daoGene.addGene(new CanonicalGene(999995578, "TESTPRKCA")); GeneticProfile geneticProfile = new GeneticProfile(); geneticProfile.setCancerStudyId(studyId); geneticProfile.setStableId("gbm_rppa"); geneticProfile.setGeneticAlterationType(GeneticAlterationType.PROTEIN_LEVEL); geneticProfile.setDatatype("LOG2-VALUE"); geneticProfile.setProfileName("RPPA Data"); geneticProfile.setProfileDescription("RPPA Data"); DaoGeneticProfile.addGeneticProfile(geneticProfile); int newGeneticProfileId = DaoGeneticProfile.getGeneticProfileByStableId("gbm_rppa").getGeneticProfileId(); ProgressMonitor.setConsoleMode(true); // TBD: change this to use getResourceAsStream() File file = new File("src/test/resources/tabDelimitedData/data_rppa.txt"); ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null); int numLines = FileUtil.getNumLines(file); parser.importData(numLines); ConsoleUtil.showMessages(); int sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "SAMPLE1").getInternalId(); String value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999997531); assertEquals("1.5", value); sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "SAMPLE4").getInternalId(); value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999997531); assertEquals("2", value); sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "SAMPLE4").getInternalId(); value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999997504); assertEquals( "NaN", value); // "NA" is not expected to be stored because of workaround for bug in firehose. See // also https://github.com/cBioPortal/cbioportal/issues/839#issuecomment-203523078 sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "SAMPLE1").getInternalId(); value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999995578); assertEquals("1.5", value); }
/** * Test importing of data_expression file. * * @throws Exception All Errors. */ @Test public void testImportmRnaData2() throws Exception { MySQLbulkLoader.bulkLoadOn(); DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance(); DaoGeneticAlteration dao = DaoGeneticAlteration.getInstance(); // Gene with alias: daoGene.addGene(makeGeneWithAlias(999997504, "TESTXK", "NA")); // Other genes: daoGene.addGene(new CanonicalGene(999999999, "TESTNAT1")); daoGene.addGene(new CanonicalGene(999997124, "TESTTNF")); daoGene.addGene(new CanonicalGene(999991111, "TESTCHEK1")); daoGene.addGene(new CanonicalGene(999999919, "TESTABCA1")); // will get generated negative id: daoGene.addGene(new CanonicalGene(-1, "TESTphosphoprotein")); GeneticProfile geneticProfile = new GeneticProfile(); geneticProfile.setCancerStudyId(studyId); geneticProfile.setStableId("gbm_mrna"); geneticProfile.setGeneticAlterationType(GeneticAlterationType.MRNA_EXPRESSION); geneticProfile.setDatatype("CONTINUOUS"); geneticProfile.setProfileName("MRNA Data"); geneticProfile.setProfileDescription("mRNA Data"); DaoGeneticProfile.addGeneticProfile(geneticProfile); int newGeneticProfileId = DaoGeneticProfile.getGeneticProfileByStableId("gbm_mrna").getGeneticProfileId(); ProgressMonitor.setConsoleMode(true); // TBD: change this to use getResourceAsStream() File file = new File("src/test/resources/tabDelimitedData/data_expression2.txt"); ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null); int numLines = FileUtil.getNumLines(file); parser.importData(numLines); // check if expected warnings are given: ArrayList<String> warnings = ProgressMonitor.getWarnings(); int countDuplicatedRowWarnings = 0; int countInvalidEntrez = 0; int countSkippedWarnings = 0; for (String warning : warnings) { if (warning.contains("Duplicated row")) { countDuplicatedRowWarnings++; } if (warning.contains("invalid Entrez_Id")) { // invalid Entrez countInvalidEntrez++; } if (warning.contains("Record will be skipped")) { // Entrez is a valid number, but not found countSkippedWarnings++; } } // check that we have 11 warning messages: assertEquals(2, countDuplicatedRowWarnings); assertEquals(3, countInvalidEntrez); assertEquals(6, countSkippedWarnings); Set<Long> entrezGeneIds = DaoGeneticAlteration.getGenesIdInProfile(newGeneticProfileId); // data will be loaded for 5 of the genes assertEquals(5, entrezGeneIds.size()); HashMap<Long, HashMap<Integer, String>> dataMap = dao.getGeneticAlterationMap(newGeneticProfileId, entrezGeneIds); assertEquals(5, dataMap.entrySet().size()); int sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "SAMPLE1").getInternalId(); String value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999997124); assertEquals("770", value); sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "SAMPLE3").getInternalId(); value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999997124); assertEquals("220", value); // gene should also be loaded via its alias "NA" as defined above: sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "SAMPLE3").getInternalId(); value = dao.getGeneticAlteration(newGeneticProfileId, sampleId, 999997504); assertEquals("9940", value); }