public String induceRulesWithTestSet(double testSetPercent) throws DataFormatException { SystemVariables.getInstance() .appendToExperimentNotes("\\n* Total number of PSSM matches in positive sequences: "); // System.out.println("Total number of PSSM matches in positive sequences: "); ArrayList<Feature> posTrainingSet = regulatoryElementService.getRegulatoryElements( regulatoryRegionService.getPositiveRegulatoryRegions(), positiveCutOffScore, posATcomposition, posCGcomposition); SystemVariables.getInstance() .appendToExperimentNotes("\\n* Total number of PSSM matches in negative sequences: "); // System.out.println("Total number of PSSM matches in negative sequences: "); ArrayList<Feature> negTrainingSet = regulatoryElementService.getRegulatoryElements( regulatoryRegionService.getNegativeRegulatoryRegions(), negativeCutOffScore, negATcomposition, negCGcomposition); IlpService formatter = new IlpService(tempIlpJobDirName); formatter.createIlpFilesWithTestSet( regulatoryRegionService.getPositiveRegulatoryRegions(), regulatoryRegionService.getNegativeRegulatoryRegions(), posTrainingSet, negTrainingSet, testSetPercent); String ilpTheory = formatter.runILP(); return ilpTheory; }
public IlpService createIlpFiles() throws DataFormatException { ArrayList<Feature> posRegElements = regulatoryElementService.getRegulatoryElements( regulatoryRegionService.getPositiveRegulatoryRegions(), positiveCutOffScore, posATcomposition, posCGcomposition); if (posRegElements == null) { posRegElements = regulatoryElementService.getRegulatoryElements( regulatoryRegionService.getPositiveRegulatoryRegions(), regulatoryRegionService.getNegativeRegulatoryRegions(), positiveCutOffScore); } // ***** Add statistics addMotifMatchingStatisticsToNotes("positive"); SystemVariables.getInstance().setPosSeqRelElMatchesNum(posRegElements.size()); ArrayList<Feature> negRegElements = regulatoryElementService.getRegulatoryElements( regulatoryRegionService.getNegativeRegulatoryRegions(), negativeCutOffScore, negATcomposition, negCGcomposition); if (negRegElements == null) { negRegElements = regulatoryElementService.getRegulatoryElements( regulatoryRegionService.getNegativeRegulatoryRegions(), null, negativeCutOffScore); } // ***** Add statistics addMotifMatchingStatisticsToNotes("negative"); SystemVariables.getInstance().setNegSeqRelElMatchesNum(negRegElements.size()); /* System.out.println("\n Number of matches of pairs of matrices in jurkat and erythroid sequences"); String[] pwmPair = new String[]{"MA0055.1", "MA0152.1"}; System.out.println(pwmPair[0] + " and " + pwmPair[1]+" :\t" + FeaturesTools.numSequencesContainingAll(pwmPair, posRegElements) + "\t"+ FeaturesTools.numSequencesContainingAll(pwmPair, negRegElements)); System.out.println("\nDistances between Gata and Ebox for positive sequences:"); FeaturesTools.printRegElDistances("Gata", "Ebox", posRegElements); */ IlpService ilpService = new IlpService(tempIlpJobDirName); ilpService.createIlpFiles( regulatoryRegionService.getPositiveRegulatoryRegions(), regulatoryRegionService.getNegativeRegulatoryRegions(), posRegElements, negRegElements); return ilpService; }
/* Note: unsound method * Based on the statistics, collected when extracting motif matches by RegElementService, * collects text note of motif matching statistics, which will be added to ILP file. * This method has to be called immediately after RegElementService.getRegulatoryElements call, * since every such call overrides the pssmMatchStatistics in RegElementService * * @param sequenceKind - can only be "positive" or "negative", depending on the kind of sequences */ private void addMotifMatchingStatisticsToNotes(String sequenceKind) throws DataFormatException { Hashtable<String, Double> pssmMatchStats = regulatoryElementService.getPssmMatchingStatistics(); if (pssmMatchStats != null && !pssmMatchStats.isEmpty()) { SystemVariables.getInstance() .appendToExperimentNotes( "\\n* Number of PSSM matches in " + sequenceKind + " sequences: "); int numberOfRegions = 0; if ("positive".equals(sequenceKind)) { numberOfRegions = regulatoryRegionService.getPositiveRegulatoryRegions().size(); } else { numberOfRegions = regulatoryRegionService.getNegativeRegulatoryRegions().size(); } Enumeration<String> pssmMatchNames = pssmMatchStats.keys(); int totalNumMatches = 0; String r_pssmNames = ""; String r_matches = ""; while (pssmMatchNames.hasMoreElements()) { String pssmName = pssmMatchNames.nextElement(); double pssmStat = pssmMatchStats.get(pssmName); int numMatches = (int) (pssmStat * numberOfRegions); totalNumMatches = totalNumMatches + numMatches; r_pssmNames = r_pssmNames + "\'" + pssmName + "\', "; r_matches = r_matches + numMatches + ", "; SystemVariables.getInstance() .appendToExperimentNotes("\\n\\t" + pssmName + "\\t" + numMatches); } SystemVariables.getInstance() .appendToExperimentNotes("\\n Total number of matches: " + totalNumMatches); // System.out.println("In positive sequences: "); // System.out.println(r_pssmNames); // System.out.println(r_matches); } }