private String generatePosExSequence( StatAnalyser stat, ArrayList<RegulatoryElementPWM> regElPWMs, int sequenceLength, int numberOfSeq) throws DataFormatException { String sequence = ""; final int lengthOfSection = 50; Random rand = new Random(); int plantCtr = 0; // Generate probabilities based on C.elegans data /* GeneService geneService = GeneServiceFactory.getService(GeneService.C_ELEGANS_DATA_SERVICE); ArrayList<Feature> cElegansGenes = geneService.getGenes(); if (cElegansGenes == null){ throw new DataFormatException("SyntheticGeneService: Unable to retrieve C.elegans genes to generate positive sequences."); } TfbsService tfbsService = TfbsServiceFactory.getService(TfbsService.PATSER_SERVICE); ArrayList<Feature> tfbsHits = tfbsService.getTfbsHits(cElegansGenes); ArrayList<TfbsPWM> tfbsPWMs = tfbsService.getTfbsPWMs(); StatAnalyser stat = new StatAnalyser(tfbsHits); */ // OccurrencePerLocationMatrix probMtxObj = stat.getLocationalProbabilities(lengthOfSection, // cElegansGenes.get(0).getUpstreamSequence().length(), cElegansGenes.size()); OccurrencePerLocationMatrix probMtxObj = stat.getLocationalProbabilities(lengthOfSection, sequenceLength, numberOfSeq); double[][] pm = probMtxObj.getProbabilityMatrix(); // int currPos = 0; //position till which the sequence has been built for (int i = 0; i < pm.length; i++) { ArrayList<String> regElementNames = new ArrayList<String>(); ArrayList<Double> probabilities = new ArrayList<Double>(); for (int j = 0; j < pm[i].length; j++) { if (Double.compare(pm[i][j], 0.0) > 0) { probabilities.add(pm[i][j]); regElementNames.add(probMtxObj.getNamesOfTfbs().get(j)); } } while (!probabilities.isEmpty()) { if (sequence.length() >= (i + 1) * lengthOfSection) { break; } int randomProbabilPosition = rand.nextInt(probabilities.size()); double currProb = probabilities.get(randomProbabilPosition); if (DataModeller.generateRandomEvent(currProb)) { int[][] pwm = getPwmByName(regElementNames.get(randomProbabilPosition), regElPWMs); String regElementSeq = DataModeller.getPssmSequence(pwm); int randomPwmPosition = rand.nextInt((i + 1) * lengthOfSection - sequence.length()); String randSeq = ""; if (randomPwmPosition > 0) { randSeq = DataModeller.getRandomSequence(randomPwmPosition, 0.25, 0.25, 0.25, 0.25); } sequence = sequence.concat(randSeq); sequence = sequence.concat(regElementSeq); plantCtr++; } regElementNames.remove(randomProbabilPosition); probabilities.remove(randomProbabilPosition); } // fill what's left in a section with a random sequence if (sequence.length() < (i + 1) * lengthOfSection) { int len = (i + 1) * lengthOfSection - sequence.length(); String seq = DataModeller.getRandomSequence(len, 0.25, 0.25, 0.25, 0.25); sequence = sequence.concat(seq); } } sequence = sequence.substring(0, sequenceLength); // TODO print System.out.println("=== Number of PWMs planted in this sequence: " + plantCtr); return sequence; }