public void testGetLocationalProbabilities() { ArrayList<Feature> collection = new ArrayList<Feature>(); collection.add(new Feature(Feature.TYPE_TFBS, "TF1", "geneA", "D", -3, 0, 5)); collection.add(new Feature(Feature.TYPE_TFBS, "TF1", "geneC", "D", -2, 0, 5)); collection.add(new Feature(Feature.TYPE_TFBS, "TF2", "geneC", "D", -5, 0, 5)); collection.add(new Feature(Feature.TYPE_TFBS, "TF2", "geneB", "D", -7, 0, 5)); collection.add(new Feature(Feature.TYPE_TFBS, "TF2", "geneD", "D", -9, 0, 5)); collection.add(new Feature(Feature.TYPE_TFBS, "TF3", "geneD", "D", -9, 0, 5)); fixture = new StatAnalyser(collection); OccurrencePerLocationMatrix pm = fixture.getLocationalProbabilities(5, 10, 4); double[][] expected = {{0.5, 0.0, 0.0}, {0.0, 0.75, 0.25}}; // pm.printData(); double[][] actual = pm.getProbabilityMatrix(); for (int i = 0; i < expected.length; i++) { for (int j = 0; j < expected[0].length; j++) { assertTrue(Double.compare(expected[i][j], actual[i][j]) == 0); } } }
private String generatePosExSequence( StatAnalyser stat, ArrayList<RegulatoryElementPWM> regElPWMs, int sequenceLength, int numberOfSeq) throws DataFormatException { String sequence = ""; final int lengthOfSection = 50; Random rand = new Random(); int plantCtr = 0; // Generate probabilities based on C.elegans data /* GeneService geneService = GeneServiceFactory.getService(GeneService.C_ELEGANS_DATA_SERVICE); ArrayList<Feature> cElegansGenes = geneService.getGenes(); if (cElegansGenes == null){ throw new DataFormatException("SyntheticGeneService: Unable to retrieve C.elegans genes to generate positive sequences."); } TfbsService tfbsService = TfbsServiceFactory.getService(TfbsService.PATSER_SERVICE); ArrayList<Feature> tfbsHits = tfbsService.getTfbsHits(cElegansGenes); ArrayList<TfbsPWM> tfbsPWMs = tfbsService.getTfbsPWMs(); StatAnalyser stat = new StatAnalyser(tfbsHits); */ // OccurrencePerLocationMatrix probMtxObj = stat.getLocationalProbabilities(lengthOfSection, // cElegansGenes.get(0).getUpstreamSequence().length(), cElegansGenes.size()); OccurrencePerLocationMatrix probMtxObj = stat.getLocationalProbabilities(lengthOfSection, sequenceLength, numberOfSeq); double[][] pm = probMtxObj.getProbabilityMatrix(); // int currPos = 0; //position till which the sequence has been built for (int i = 0; i < pm.length; i++) { ArrayList<String> regElementNames = new ArrayList<String>(); ArrayList<Double> probabilities = new ArrayList<Double>(); for (int j = 0; j < pm[i].length; j++) { if (Double.compare(pm[i][j], 0.0) > 0) { probabilities.add(pm[i][j]); regElementNames.add(probMtxObj.getNamesOfTfbs().get(j)); } } while (!probabilities.isEmpty()) { if (sequence.length() >= (i + 1) * lengthOfSection) { break; } int randomProbabilPosition = rand.nextInt(probabilities.size()); double currProb = probabilities.get(randomProbabilPosition); if (DataModeller.generateRandomEvent(currProb)) { int[][] pwm = getPwmByName(regElementNames.get(randomProbabilPosition), regElPWMs); String regElementSeq = DataModeller.getPssmSequence(pwm); int randomPwmPosition = rand.nextInt((i + 1) * lengthOfSection - sequence.length()); String randSeq = ""; if (randomPwmPosition > 0) { randSeq = DataModeller.getRandomSequence(randomPwmPosition, 0.25, 0.25, 0.25, 0.25); } sequence = sequence.concat(randSeq); sequence = sequence.concat(regElementSeq); plantCtr++; } regElementNames.remove(randomProbabilPosition); probabilities.remove(randomProbabilPosition); } // fill what's left in a section with a random sequence if (sequence.length() < (i + 1) * lengthOfSection) { int len = (i + 1) * lengthOfSection - sequence.length(); String seq = DataModeller.getRandomSequence(len, 0.25, 0.25, 0.25, 0.25); sequence = sequence.concat(seq); } } sequence = sequence.substring(0, sequenceLength); // TODO print System.out.println("=== Number of PWMs planted in this sequence: " + plantCtr); return sequence; }