@Override
  public void updateNumberOfPositiveRegRegions(int num) throws DataFormatException {
    // TODO check for empty regions
    // TODO test this
    int deltaSeqNum = num - positiveRegRegions.size();

    if (deltaSeqNum < 0) { // desired number of positive sequences is less than is available now
      for (int i = 0; i > deltaSeqNum; i--) {
        positiveRegRegions.remove(0);
      }
    } else { // desired number is more than available -> need to generate extra
      ArrayList<Feature> extraSimulatedGenes =
          FeaturesTools.generateSimulatedRegulatoryRegionsWithPositionalPSSMs(
              deltaSeqNum,
              regulatorySequenceLength,
              sequenceNtProbabilities,
              SystemVariables.getInstance().getPositivePatserCutOffScore(),
              POSITIVE_REGION_PREFIX,
              modelRegRegionService.getPositiveRegulatoryRegions(),
              modelRegElService);

      // generateRegulatoryRegions(deltaSeqNum, POSITIVE_REGION_PREFIX);

      negativeRegRegions.addAll(extraSimulatedGenes);
    }
  }
  public SyntheticRegRegionService(int numOfPositiveRegRegions, int negExMultiplicationFactor)
      throws DataFormatException {

    modelRegRegionService = new CElegansRegRegionService(0);

    File pwmDir = new File(SystemVariables.getInstance().getString("C.elegans.PWMs.dir"));
    String tmpJobDir =
        SystemVariables.getInstance()
            .getString("temp.output.dir"); // TODO create a proper tmp job dir for experiments

    modelRegElService = new PatserRegElementService(pwmDir, tmpJobDir);

    if (modelRegRegionService.getPositiveRegulatoryRegions().size() > 0) {
      regulatorySequenceLength =
          modelRegRegionService.getPositiveRegulatoryRegions().get(0).getSequence().length();
    } else {
      throw new DataFormatException(
          "Model regulatory regions are empty. Can not model synthetic regions based on an empty regions.");
    }

    // TODO: remove - just for testing
    regulatorySequenceLength = 25;
    negativeRegRegions =
        FeaturesTools.generateSimulatedRegulatoryRegions(
            negExMultiplicationFactor * numOfPositiveRegRegions,
            regulatorySequenceLength,
            NEGATIVE_REGION_PREFIX,
            sequenceNtProbabilities);

    positiveRegRegions =
        FeaturesTools.generateSimulatedRegulatoryRegionsWithPositionalPSSMs(
            numOfPositiveRegRegions,
            regulatorySequenceLength,
            sequenceNtProbabilities,
            SystemVariables.getInstance().getPositivePatserCutOffScore(),
            POSITIVE_REGION_PREFIX,
            modelRegRegionService.getPositiveRegulatoryRegions(),
            modelRegElService);
    // this.generateRegulatoryRegions(numOfPositiveRegRegions, );

  }
  @Override
  public void updateNumberOfNegativeRegRegions(int multiplicationFactor) {
    // TODO check for empty regions
    // TODO test this
    int deltaSeqNum = positiveRegRegions.size() * multiplicationFactor - negativeRegRegions.size();

    if (deltaSeqNum < 0) { // desired number of negative sequences is less than is available now
      for (int i = 0; i > deltaSeqNum; i--) {
        negativeRegRegions.remove(0);
      }
    } else { // desired number is more than available -> need to generate extra
      ArrayList<Feature> extraSimulatedGenes =
          FeaturesTools.generateSimulatedRegulatoryRegions(
              deltaSeqNum,
              positiveRegRegions.get(0).getSequence().length(),
              NEGATIVE_REGION_PREFIX,
              sequenceNtProbabilities);

      negativeRegRegions.addAll(extraSimulatedGenes);
    }
  }
Пример #4
0
  public Explorer(
      RegulatoryRegionService regRegionService,
      RegulatoryElementService regElService,
      String tempIlpJobDirName)
      throws DataFormatException {
    this.regulatoryRegionService = regRegionService;
    this.regulatoryElementService = regElService;
    this.tempIlpJobDirName = tempIlpJobDirName;

    positiveCutOffScore = SystemVariables.getInstance().getPositivePatserCutOffScore();
    negativeCutOffScore = SystemVariables.getInstance().getNegativePatserCutOffScore();

    ///////  Statistics
    SystemVariables.getInstance().cleanStatistics(); // clean old;

    SystemVariables.getInstance()
        .setPosSeqNum(regulatoryRegionService.getPositiveRegulatoryRegions().size());
    SystemVariables.getInstance()
        .setNegSeqNum(regulatoryRegionService.getNegativeRegulatoryRegions().size());

    SystemVariables.getInstance()
        .appendToExperimentNotes(
            "\\n* Number of positive sequences: "
                + regulatoryRegionService.getPositiveRegulatoryRegions().size());
    SystemVariables.getInstance()
        .appendToExperimentNotes(
            "\\n* Number of negative sequences: "
                + regulatoryRegionService.getNegativeRegulatoryRegions().size());

    SystemVariables.getInstance()
        .appendToExperimentNotes(
            "\\n* Patser cut-off score for positive sequences: " + positiveCutOffScore);
    SystemVariables.getInstance()
        .appendToExperimentNotes(
            "\\n* Patser cut-off score for negative sequences: " + negativeCutOffScore);

    double[] posNtComposition =
        FeaturesTools.getNucleotideComposition(
            regulatoryRegionService.getPositiveRegulatoryRegions());
    posATcomposition = posNtComposition[0] + posNtComposition[3];
    posCGcomposition = posNtComposition[1] + posNtComposition[2];
    SystemVariables.getInstance()
        .appendToExperimentNotes(
            "\\n* A:T and C:G composition of positive sequences: "
                + posATcomposition
                + " "
                + posCGcomposition);

    SystemVariables.getInstance().setPosATcomposition(posATcomposition);
    SystemVariables.getInstance().setPosCGcomposition(posCGcomposition);

    double[] negNtComposition =
        FeaturesTools.getNucleotideComposition(
            regulatoryRegionService.getNegativeRegulatoryRegions());
    negATcomposition = negNtComposition[0] + negNtComposition[3];
    negCGcomposition = negNtComposition[1] + negNtComposition[2];
    SystemVariables.getInstance()
        .appendToExperimentNotes(
            "\\n* A:T and C:G composition of negative sequences: "
                + negATcomposition
                + " "
                + negCGcomposition);

    SystemVariables.getInstance().setNegATcomposition(negATcomposition);
    SystemVariables.getInstance().setNegCGcomposition(negCGcomposition);
  }