Ejemplo n.º 1
0
  public String induceRulesWithTestSet(double testSetPercent) throws DataFormatException {

    SystemVariables.getInstance()
        .appendToExperimentNotes("\\n* Total number of PSSM matches in positive sequences: ");
    // System.out.println("Total number of PSSM matches in positive sequences: ");
    ArrayList<Feature> posTrainingSet =
        regulatoryElementService.getRegulatoryElements(
            regulatoryRegionService.getPositiveRegulatoryRegions(),
            positiveCutOffScore,
            posATcomposition,
            posCGcomposition);

    SystemVariables.getInstance()
        .appendToExperimentNotes("\\n* Total number of PSSM matches in negative sequences: ");
    // System.out.println("Total number of PSSM matches in negative sequences: ");
    ArrayList<Feature> negTrainingSet =
        regulatoryElementService.getRegulatoryElements(
            regulatoryRegionService.getNegativeRegulatoryRegions(),
            negativeCutOffScore,
            negATcomposition,
            negCGcomposition);

    IlpService formatter = new IlpService(tempIlpJobDirName);
    formatter.createIlpFilesWithTestSet(
        regulatoryRegionService.getPositiveRegulatoryRegions(),
        regulatoryRegionService.getNegativeRegulatoryRegions(),
        posTrainingSet,
        negTrainingSet,
        testSetPercent);

    String ilpTheory = formatter.runILP();

    return ilpTheory;
  }
Ejemplo n.º 2
0
  public IlpService createIlpFiles() throws DataFormatException {

    ArrayList<Feature> posRegElements =
        regulatoryElementService.getRegulatoryElements(
            regulatoryRegionService.getPositiveRegulatoryRegions(),
            positiveCutOffScore,
            posATcomposition,
            posCGcomposition);

    if (posRegElements == null) {
      posRegElements =
          regulatoryElementService.getRegulatoryElements(
              regulatoryRegionService.getPositiveRegulatoryRegions(),
              regulatoryRegionService.getNegativeRegulatoryRegions(),
              positiveCutOffScore);
    }

    // ***** Add statistics
    addMotifMatchingStatisticsToNotes("positive");
    SystemVariables.getInstance().setPosSeqRelElMatchesNum(posRegElements.size());

    ArrayList<Feature> negRegElements =
        regulatoryElementService.getRegulatoryElements(
            regulatoryRegionService.getNegativeRegulatoryRegions(),
            negativeCutOffScore,
            negATcomposition,
            negCGcomposition);

    if (negRegElements == null) {
      negRegElements =
          regulatoryElementService.getRegulatoryElements(
              regulatoryRegionService.getNegativeRegulatoryRegions(), null, negativeCutOffScore);
    }

    // ***** Add statistics
    addMotifMatchingStatisticsToNotes("negative");
    SystemVariables.getInstance().setNegSeqRelElMatchesNum(negRegElements.size());

    /*
    	System.out.println("\n Number of matches of pairs of matrices in jurkat and erythroid sequences");

    	String[] pwmPair = new String[]{"MA0055.1", "MA0152.1"};
    	System.out.println(pwmPair[0] + " and " + pwmPair[1]+" :\t" +
    			FeaturesTools.numSequencesContainingAll(pwmPair, posRegElements) + "\t"+
    			FeaturesTools.numSequencesContainingAll(pwmPair, negRegElements));

    	System.out.println("\nDistances between Gata and Ebox for positive sequences:");
    	FeaturesTools.printRegElDistances("Gata", "Ebox", posRegElements);
    */

    IlpService ilpService = new IlpService(tempIlpJobDirName);
    ilpService.createIlpFiles(
        regulatoryRegionService.getPositiveRegulatoryRegions(),
        regulatoryRegionService.getNegativeRegulatoryRegions(),
        posRegElements,
        negRegElements);

    return ilpService;
  }
Ejemplo n.º 3
0
  /* Note: unsound method
   * Based on the statistics, collected when extracting motif matches by RegElementService,
   * collects text note of motif matching statistics, which will be added to ILP file.
   * This method has to be called immediately after RegElementService.getRegulatoryElements call,
   * since every such call overrides the pssmMatchStatistics in RegElementService
   *
   * @param sequenceKind - can only be "positive" or "negative", depending on the kind of sequences
   */
  private void addMotifMatchingStatisticsToNotes(String sequenceKind) throws DataFormatException {

    Hashtable<String, Double> pssmMatchStats = regulatoryElementService.getPssmMatchingStatistics();
    if (pssmMatchStats != null && !pssmMatchStats.isEmpty()) {

      SystemVariables.getInstance()
          .appendToExperimentNotes(
              "\\n* Number of PSSM matches in " + sequenceKind + " sequences: ");
      int numberOfRegions = 0;
      if ("positive".equals(sequenceKind)) {
        numberOfRegions = regulatoryRegionService.getPositiveRegulatoryRegions().size();
      } else {
        numberOfRegions = regulatoryRegionService.getNegativeRegulatoryRegions().size();
      }

      Enumeration<String> pssmMatchNames = pssmMatchStats.keys();
      int totalNumMatches = 0;
      String r_pssmNames = "";
      String r_matches = "";
      while (pssmMatchNames.hasMoreElements()) {
        String pssmName = pssmMatchNames.nextElement();
        double pssmStat = pssmMatchStats.get(pssmName);
        int numMatches = (int) (pssmStat * numberOfRegions);
        totalNumMatches = totalNumMatches + numMatches;

        r_pssmNames = r_pssmNames + "\'" + pssmName + "\', ";
        r_matches = r_matches + numMatches + ", ";
        SystemVariables.getInstance()
            .appendToExperimentNotes("\\n\\t" + pssmName + "\\t" + numMatches);
      }
      SystemVariables.getInstance()
          .appendToExperimentNotes("\\n Total number of matches: " + totalNumMatches);

      //			System.out.println("In positive sequences: ");
      //			System.out.println(r_pssmNames);
      //			System.out.println(r_matches);
    }
  }