Beispiel #1
0
  public String induceRulesWithTestSet(double testSetPercent) throws DataFormatException {

    SystemVariables.getInstance()
        .appendToExperimentNotes("\\n* Total number of PSSM matches in positive sequences: ");
    // System.out.println("Total number of PSSM matches in positive sequences: ");
    ArrayList<Feature> posTrainingSet =
        regulatoryElementService.getRegulatoryElements(
            regulatoryRegionService.getPositiveRegulatoryRegions(),
            positiveCutOffScore,
            posATcomposition,
            posCGcomposition);

    SystemVariables.getInstance()
        .appendToExperimentNotes("\\n* Total number of PSSM matches in negative sequences: ");
    // System.out.println("Total number of PSSM matches in negative sequences: ");
    ArrayList<Feature> negTrainingSet =
        regulatoryElementService.getRegulatoryElements(
            regulatoryRegionService.getNegativeRegulatoryRegions(),
            negativeCutOffScore,
            negATcomposition,
            negCGcomposition);

    IlpService formatter = new IlpService(tempIlpJobDirName);
    formatter.createIlpFilesWithTestSet(
        regulatoryRegionService.getPositiveRegulatoryRegions(),
        regulatoryRegionService.getNegativeRegulatoryRegions(),
        posTrainingSet,
        negTrainingSet,
        testSetPercent);

    String ilpTheory = formatter.runILP();

    return ilpTheory;
  }
Beispiel #2
0
  public IlpService createIlpFiles() throws DataFormatException {

    ArrayList<Feature> posRegElements =
        regulatoryElementService.getRegulatoryElements(
            regulatoryRegionService.getPositiveRegulatoryRegions(),
            positiveCutOffScore,
            posATcomposition,
            posCGcomposition);

    if (posRegElements == null) {
      posRegElements =
          regulatoryElementService.getRegulatoryElements(
              regulatoryRegionService.getPositiveRegulatoryRegions(),
              regulatoryRegionService.getNegativeRegulatoryRegions(),
              positiveCutOffScore);
    }

    // ***** Add statistics
    addMotifMatchingStatisticsToNotes("positive");
    SystemVariables.getInstance().setPosSeqRelElMatchesNum(posRegElements.size());

    ArrayList<Feature> negRegElements =
        regulatoryElementService.getRegulatoryElements(
            regulatoryRegionService.getNegativeRegulatoryRegions(),
            negativeCutOffScore,
            negATcomposition,
            negCGcomposition);

    if (negRegElements == null) {
      negRegElements =
          regulatoryElementService.getRegulatoryElements(
              regulatoryRegionService.getNegativeRegulatoryRegions(), null, negativeCutOffScore);
    }

    // ***** Add statistics
    addMotifMatchingStatisticsToNotes("negative");
    SystemVariables.getInstance().setNegSeqRelElMatchesNum(negRegElements.size());

    /*
    	System.out.println("\n Number of matches of pairs of matrices in jurkat and erythroid sequences");

    	String[] pwmPair = new String[]{"MA0055.1", "MA0152.1"};
    	System.out.println(pwmPair[0] + " and " + pwmPair[1]+" :\t" +
    			FeaturesTools.numSequencesContainingAll(pwmPair, posRegElements) + "\t"+
    			FeaturesTools.numSequencesContainingAll(pwmPair, negRegElements));

    	System.out.println("\nDistances between Gata and Ebox for positive sequences:");
    	FeaturesTools.printRegElDistances("Gata", "Ebox", posRegElements);
    */

    IlpService ilpService = new IlpService(tempIlpJobDirName);
    ilpService.createIlpFiles(
        regulatoryRegionService.getPositiveRegulatoryRegions(),
        regulatoryRegionService.getNegativeRegulatoryRegions(),
        posRegElements,
        negRegElements);

    return ilpService;
  }