/** * Checks whether CEL files and CDF files are from the same platform. * * @param normalCelFileName Affymetrix CEL file of the paired normal sample * @param tumorCelFileName Affymetrix CEL file of the tumor sample * @param cdfFileName Affymetrix library file (CDF file) for the platform on which the samples are * generated * @return <code>true</code> if the CEL files and CDF file are from the same platform, otherwise * <code>false</code> */ private boolean checkChipType( String normalCelFileName, String tumorCelFileName, String cdfFileName) { // Check whether the normal CEL file, tumor CEL file, and the CDF file are consistent, and set // the variable chipType. FusionCELData celNormal; FusionCELData celTumor; FusionCDFData cdf; celNormal = new FusionCELData(); celNormal.setFileName(normalCelFileName); if (celNormal.read() == false) { isSuccessfulOpenFile = false; return false; } celTumor = new FusionCELData(); celTumor.setFileName(tumorCelFileName); if (celTumor.read() == false) { isSuccessfulOpenFile = false; return false; } cdf = new FusionCDFData(); cdf.setFileName(cdfFileName); if (cdf.read() == false) { isSuccessfulOpenFile = false; return false; } if (!celNormal.getChipType().equals(cdf.getChipType())) { isSuccessfulOpenFile = false; return false; } if (!celTumor.getChipType().equals(cdf.getChipType())) { isSuccessfulOpenFile = false; return false; } chipType = celNormal.getChipType(); isSuccessfulOpenFile = true; return true; }
/** * Reads CEL files. <code>readCel</code> reads the intensity signals from the tumor sample and the * normal sample. The copy number at a particular locus is calculated using the ratio of the tumor * intensity at that locus and the correspoding normal intensity times 2. * * @param normalCelFileName Affymetrix CEL file of the paired normal sample * @param tumorCelFileName Affymetrix CEL file of the tumor sample * @param cdfFileName Affymetrix library file (CDF file) for the platform on which the samples are * generated */ private void readCel(String normalCelFileName, String tumorCelFileName, String cdfFileName) { FusionCELData celNormal; FusionCELData celTumor; FusionCDFData cdf; celNormal = new FusionCELData(); celNormal.setFileName(normalCelFileName); if (celNormal.read() == false) { System.out.println("Failed to read the CEL file."); return; } celTumor = new FusionCELData(); celTumor.setFileName(tumorCelFileName); if (celTumor.read() == false) { System.out.println("Failed to read the CEL file."); return; } cdf = new FusionCDFData(); cdf.setFileName(cdfFileName); if (cdf.read() == false) { System.out.println("Failed to read the CDF file."); return; } int nsets = cdf.getHeader().getNumProbeSets(); ProbeSetIntensityData[] probeSetDataNormal = new ProbeSetIntensityData[nsets]; ProbeSetIntensityData[] probeSetDataTumor = new ProbeSetIntensityData[nsets]; // SNP array structure: // 1. Each probeset contains several groups (the number of groups varies among different // probeset) // 2. Each group contains several cells (the number of cells also varies among different // groups) for (int iset = 0; iset < nsets; iset++) { String probeSetName = cdf.getProbeSetName(iset); // get the probeset name FusionCDFProbeSetInformation set = new FusionCDFProbeSetInformation(); cdf.getProbeSetInformation(iset, set); int ngroups = set.getNumGroups(); int numPmANormal = 0; // Pm : perfect match int numPmBNormal = 0; int numPmATumor = 0; int numPmBTumor = 0; int numMmANormal = 0; // Mm: Mis-Match int numMmBNormal = 0; int numMmATumor = 0; int numMmBTumor = 0; probeSetDataNormal[iset] = new ProbeSetIntensityData(); probeSetDataTumor[iset] = new ProbeSetIntensityData(); probeSetDataNormal[iset].probeSetType = set.getProbeSetType(); probeSetDataTumor[iset].probeSetType = set.getProbeSetType(); probeSetDataNormal[iset].probeSetID = cdf.getProbeSetName(iset) + ""; probeSetDataTumor[iset].probeSetID = cdf.getProbeSetName(iset) + ""; for (int igroup = 0; igroup < ngroups; igroup++) { FusionCDFProbeGroupInformation group = new FusionCDFProbeGroupInformation(); set.getGroup(igroup, group); int ncells = group.getNumCells(); for (int icell = 0; icell < ncells; icell++) { FusionCDFProbeInformation probe = new FusionCDFProbeInformation(); group.getCell(icell, probe); try { char pBase = probe.getPBase(); char tBase = probe.getTBase(); // only If the match is perfect, the intensity of this cell // contributes if ((((pBase + tBase) == 213) || ((pBase + tBase) == 202))) { // Perfect match is the match that with pBase:tBase = a:t or c:g if ((igroup % 2) == 0) { if (!celNormal.isOutlier(probe.getX(), probe.getY())) { probeSetDataNormal[iset].pmA += celNormal.getIntensity(probe.getX(), probe.getY()); numPmANormal++; } if (!celTumor.isOutlier(probe.getX(), probe.getY())) { probeSetDataTumor[iset].pmA += celTumor.getIntensity(probe.getX(), probe.getY()); numPmATumor++; } } else { if (!celNormal.isOutlier(probe.getX(), probe.getY())) { probeSetDataNormal[iset].pmB += celNormal.getIntensity(probe.getX(), probe.getY()); numPmBNormal++; } if (!celTumor.isOutlier(probe.getX(), probe.getY())) { probeSetDataTumor[iset].pmB += celTumor.getIntensity(probe.getX(), probe.getY()); numPmBTumor++; } } } } catch (Exception e) { } } } // using the average of intensity of the perfect match cells as the intensity for certain // probeset if (numPmANormal != 0) { probeSetDataNormal[iset].pmA = probeSetDataNormal[iset].pmA / numPmANormal; } if (numPmBNormal != 0) { probeSetDataNormal[iset].pmB = probeSetDataNormal[iset].pmB / numPmBNormal; } if (numPmATumor != 0) { probeSetDataTumor[iset].pmA = probeSetDataTumor[iset].pmA / numPmATumor; } if (numPmBTumor != 0) { probeSetDataTumor[iset].pmB = probeSetDataTumor[iset].pmB / numPmBTumor; } } try { genotypeCalling(probeSetDataNormal); // to decide whether certain probe is AB type } catch (Exception e) { } Arrays.sort(probeSetDataNormal); Arrays.sort(probeSetDataTumor); ArrayList<Integer> indexFoundList = new ArrayList<Integer>(probeSetDataNormal.length); ArrayList<Integer> indexFoundListSNP = new ArrayList<Integer>(probeSetDataNormal.length); for (int i = 0; i < numAnnotatedProbeSet; i++) { int indexFound = Arrays.binarySearch(probeSetDataNormal, new ProbeSetIntensityData(probeSetID[i])); if (indexFound >= 0) { isGenotypeAB[i] = probeSetDataNormal[indexFound].isGenotypeAB; probeSetType[i] = probeSetDataNormal[indexFound].probeSetType; copyNumber[i] = 2 * (probeSetDataTumor[indexFound].pmA + probeSetDataTumor[indexFound].pmB) / (probeSetDataNormal[indexFound].pmA + probeSetDataNormal[indexFound].pmB); intensityNormal[i] = probeSetDataNormal[indexFound].pmA + probeSetDataNormal[indexFound].pmB; intensityTumor[i] = probeSetDataTumor[indexFound].pmA + probeSetDataTumor[indexFound].pmB; // For 250k affymetrix chip, all the probes are SNP probes if (probeSetType[i] == FusionGeneChipProbeSetType.GenotypingProbeSetType) { alleleA[i] = probeSetDataTumor[indexFound].pmA / probeSetDataNormal[indexFound].pmA; alleleB[i] = probeSetDataTumor[indexFound].pmB / probeSetDataNormal[indexFound].pmB; if (alleleA[i] > 1E-10 && alleleA[i] <= 30 && alleleB[i] >= 1E-10 && alleleB[i] <= 30 && copyNumber[i] > 1E-10 && copyNumber[i] <= 30) { isOutlier[i] = false; indexFoundList.add(i); indexFoundListSNP.add(i); } } // For SNP6.0, there are half SNP probes and half CN probes if (probeSetType[i] == FusionGeneChipProbeSetType.CopyNumberProbeSetType) { if (copyNumber[i] > 1E-10 && copyNumber[i] <= 30) { isOutlier[i] = false; indexFoundList.add(i); } } } } /* Median filtering on the intensity data before calculating the copy numbers*/ // Filters f1 = new Filters(intensityNormal); // f1.medianFilter(3); // Filters f2 = new Filters(intensityTumor); // f2.medianFilter(3); // for (int j = 0; j < indexFoundList.size(); j ++) { // int i = indexFoundList.get(j); // copyNumber[i] = intensityTumor[i] / intensityNormal[i] * 2; // if (!(copyNumber[i] > 1E-10 && copyNumber[i] <=30)) { // copyNumber[i] = 2.0; // isOutlier[i] = true; // } // } globalNormalization(indexFoundList, indexFoundListSNP); }