예제 #1
0
  public void testGetLocationalProbabilities() {
    ArrayList<Feature> collection = new ArrayList<Feature>();

    collection.add(new Feature(Feature.TYPE_TFBS, "TF1", "geneA", "D", -3, 0, 5));
    collection.add(new Feature(Feature.TYPE_TFBS, "TF1", "geneC", "D", -2, 0, 5));

    collection.add(new Feature(Feature.TYPE_TFBS, "TF2", "geneC", "D", -5, 0, 5));
    collection.add(new Feature(Feature.TYPE_TFBS, "TF2", "geneB", "D", -7, 0, 5));
    collection.add(new Feature(Feature.TYPE_TFBS, "TF2", "geneD", "D", -9, 0, 5));

    collection.add(new Feature(Feature.TYPE_TFBS, "TF3", "geneD", "D", -9, 0, 5));

    fixture = new StatAnalyser(collection);
    OccurrencePerLocationMatrix pm = fixture.getLocationalProbabilities(5, 10, 4);

    double[][] expected = {{0.5, 0.0, 0.0}, {0.0, 0.75, 0.25}};
    // pm.printData();
    double[][] actual = pm.getProbabilityMatrix();

    for (int i = 0; i < expected.length; i++) {
      for (int j = 0; j < expected[0].length; j++) {
        assertTrue(Double.compare(expected[i][j], actual[i][j]) == 0);
      }
    }
  }
  private String generatePosExSequence(
      StatAnalyser stat,
      ArrayList<RegulatoryElementPWM> regElPWMs,
      int sequenceLength,
      int numberOfSeq)
      throws DataFormatException {
    String sequence = "";
    final int lengthOfSection = 50;
    Random rand = new Random();
    int plantCtr = 0;

    // Generate probabilities based on C.elegans data
    /*
    	GeneService geneService = GeneServiceFactory.getService(GeneService.C_ELEGANS_DATA_SERVICE);
    	ArrayList<Feature> cElegansGenes = geneService.getGenes();
    	if (cElegansGenes == null){
    		throw new DataFormatException("SyntheticGeneService: Unable to retrieve C.elegans genes to generate positive sequences.");
    	}
    	TfbsService tfbsService = TfbsServiceFactory.getService(TfbsService.PATSER_SERVICE);
    	ArrayList<Feature> tfbsHits = tfbsService.getTfbsHits(cElegansGenes);

    	ArrayList<TfbsPWM> tfbsPWMs = tfbsService.getTfbsPWMs();

    	StatAnalyser stat = new StatAnalyser(tfbsHits);
    */
    // OccurrencePerLocationMatrix probMtxObj = stat.getLocationalProbabilities(lengthOfSection,
    // cElegansGenes.get(0).getUpstreamSequence().length(), cElegansGenes.size());
    OccurrencePerLocationMatrix probMtxObj =
        stat.getLocationalProbabilities(lengthOfSection, sequenceLength, numberOfSeq);
    double[][] pm = probMtxObj.getProbabilityMatrix();

    // int currPos = 0; //position till which the sequence has been built
    for (int i = 0; i < pm.length; i++) {

      ArrayList<String> regElementNames = new ArrayList<String>();
      ArrayList<Double> probabilities = new ArrayList<Double>();
      for (int j = 0; j < pm[i].length; j++) {
        if (Double.compare(pm[i][j], 0.0) > 0) {
          probabilities.add(pm[i][j]);
          regElementNames.add(probMtxObj.getNamesOfTfbs().get(j));
        }
      }

      while (!probabilities.isEmpty()) {
        if (sequence.length() >= (i + 1) * lengthOfSection) {
          break;
        }

        int randomProbabilPosition = rand.nextInt(probabilities.size());
        double currProb = probabilities.get(randomProbabilPosition);

        if (DataModeller.generateRandomEvent(currProb)) {
          int[][] pwm = getPwmByName(regElementNames.get(randomProbabilPosition), regElPWMs);
          String regElementSeq = DataModeller.getPssmSequence(pwm);
          int randomPwmPosition = rand.nextInt((i + 1) * lengthOfSection - sequence.length());
          String randSeq = "";
          if (randomPwmPosition > 0) {
            randSeq = DataModeller.getRandomSequence(randomPwmPosition, 0.25, 0.25, 0.25, 0.25);
          }
          sequence = sequence.concat(randSeq);
          sequence = sequence.concat(regElementSeq);
          plantCtr++;
        }

        regElementNames.remove(randomProbabilPosition);
        probabilities.remove(randomProbabilPosition);
      }

      // fill what's left in a section with a random sequence
      if (sequence.length() < (i + 1) * lengthOfSection) {
        int len = (i + 1) * lengthOfSection - sequence.length();
        String seq = DataModeller.getRandomSequence(len, 0.25, 0.25, 0.25, 0.25);
        sequence = sequence.concat(seq);
      }
    }
    sequence = sequence.substring(0, sequenceLength);

    // TODO print
    System.out.println("=== Number of PWMs planted in this sequence: " + plantCtr);

    return sequence;
  }