public SyntheticRegRegionService(int numOfPositiveRegRegions, int negExMultiplicationFactor) throws DataFormatException { modelRegRegionService = new CElegansRegRegionService(0); File pwmDir = new File(SystemVariables.getInstance().getString("C.elegans.PWMs.dir")); String tmpJobDir = SystemVariables.getInstance() .getString("temp.output.dir"); // TODO create a proper tmp job dir for experiments modelRegElService = new PatserRegElementService(pwmDir, tmpJobDir); if (modelRegRegionService.getPositiveRegulatoryRegions().size() > 0) { regulatorySequenceLength = modelRegRegionService.getPositiveRegulatoryRegions().get(0).getSequence().length(); } else { throw new DataFormatException( "Model regulatory regions are empty. Can not model synthetic regions based on an empty regions."); } // TODO: remove - just for testing regulatorySequenceLength = 25; negativeRegRegions = FeaturesTools.generateSimulatedRegulatoryRegions( negExMultiplicationFactor * numOfPositiveRegRegions, regulatorySequenceLength, NEGATIVE_REGION_PREFIX, sequenceNtProbabilities); positiveRegRegions = FeaturesTools.generateSimulatedRegulatoryRegionsWithPositionalPSSMs( numOfPositiveRegRegions, regulatorySequenceLength, sequenceNtProbabilities, SystemVariables.getInstance().getPositivePatserCutOffScore(), POSITIVE_REGION_PREFIX, modelRegRegionService.getPositiveRegulatoryRegions(), modelRegElService); // this.generateRegulatoryRegions(numOfPositiveRegRegions, ); }
@Override public void updateNumberOfNegativeRegRegions(int multiplicationFactor) { // TODO check for empty regions // TODO test this int deltaSeqNum = positiveRegRegions.size() * multiplicationFactor - negativeRegRegions.size(); if (deltaSeqNum < 0) { // desired number of negative sequences is less than is available now for (int i = 0; i > deltaSeqNum; i--) { negativeRegRegions.remove(0); } } else { // desired number is more than available -> need to generate extra ArrayList<Feature> extraSimulatedGenes = FeaturesTools.generateSimulatedRegulatoryRegions( deltaSeqNum, positiveRegRegions.get(0).getSequence().length(), NEGATIVE_REGION_PREFIX, sequenceNtProbabilities); negativeRegRegions.addAll(extraSimulatedGenes); } }