예제 #1
0
  public static void saveAsCGHInput(Dataset dataset, String outFileName)
      throws IOException, InvalidIndexException {
    dataset.load();

    List<String> sampleNames = dataset.getSampleNames();

    String header = "#ID\tChr\tStart\tEnd\t" + ListUtils.toString(sampleNames, "\t") + "\n";

    StringBuilder line = new StringBuilder();
    ;
    List<String> lines = new ArrayList<String>();

    DataFrame featureDataFrame = dataset.getFeatureData().getDataFrame();

    DataFrame dataFrame = new DataFrame();
    dataFrame.setRowNames(featureDataFrame.getColumn("ProbeName"));
    dataFrame.addColumn(featureDataFrame.getColumn("loc.chromosome"));
    dataFrame.addColumn(featureDataFrame.getColumn("loc.start"));
    dataFrame.addColumn(featureDataFrame.getColumn("loc.end"));
    for (int i = 0; i < sampleNames.size(); i++) {
      dataFrame.addColumn(ArrayUtils.toStringList(dataset.getDoubleMatrix().getColumn(i)));
    }

    System.out.println("---> writting " + outFileName);
    IOUtils.write(new File(outFileName), header + dataFrame.toString(true, false));

    //		System.out.println("feature column names = " +
    // ListUtils.toString(dataset.getFeatureData().getDataFrame().getColumn("ProbeUID")));

    //		line.append("#ID\tChr\tStart\tEnd\t").append(ListUtils.toString(dataset.getSampleNames(),
    // "\t"));
    //		lines.add(line.toString());
    //
    //		for(int i=0 ; i<dataset.getRowDimension() ; i++) {
    //
    //		}
    //
    //
    //		IOUtils.write(new File(outFileName), lines);
  }
예제 #2
0
  /**
   * @throws InvalidColumnIndexException
   * @throws IOException
   */
  public void executeLimma() {

    Limma limma = null;
    if (classValues.size() > 2) {
      limma = new Limma(babelomicsHomePath + "/bin/diffexp/limma_multiclasses.r");
    } else if (classValues.size() == 2) {
      limma = new Limma(babelomicsHomePath + "/bin/diffexp/limma_twoclasses.r");
    } else if (classValues.size() == 1) {
      limma = new Limma(babelomicsHomePath + "/bin/diffexp/limma_oneclass.r");
    } else {
      abort(
          "testmismatched_executelimma_classcomparison",
          "test " + test + " not supported for " + classValues.size() + "-class test",
          "test " + test + " not supported for " + classValues.size() + "-class test",
          "test " + test + " not supported for " + classValues.size() + "-class test");
    }

    // System.out.println("dataset = " + dataset.toString());
    System.out.println("class name = " + className);

    limma.setInputFilename(dataset.getDatasetFile().getAbsolutePath());
    limma.setClasses(dataset.getVariables().getByName(className).getValues());
    limma.setContrast(classValues);

    try {
      Dataset subDataset = dataset.getSubDataset(className, classValues);

      // apply test and multiple test correction according
      //
      TestResultList<LimmaTestResult> res = limma.compute();
      DiffExpressionUtils.multipleTestCorrection(res, correction);

      // create output file
      //
      int[] columnOrder =
          ListUtils.order(subDataset.getVariables().getByName(className).getValues());
      int[] rowOrder = ListUtils.order(ArrayUtils.toList(res.getStatistics()), true);

      DataFrame dataFrame = new DataFrame(subDataset.getFeatureNames().size(), 0);
      dataFrame.addColumn(
          "statistic",
          ListUtils.toStringList(
              ListUtils.ordered(ArrayUtils.toList(res.getStatistics()), rowOrder)));
      dataFrame.addColumn(
          "p-value",
          ListUtils.toStringList(ListUtils.ordered(ArrayUtils.toList(res.getPValues()), rowOrder)));
      dataFrame.addColumn(
          "adj. p-value",
          ListUtils.toStringList(
              ListUtils.ordered(ArrayUtils.toList(res.getAdjPValues()), rowOrder)));
      dataFrame.setRowNames(ListUtils.ordered(subDataset.getFeatureNames(), rowOrder));

      FeatureData featureData = new FeatureData(dataFrame);
      File file = new File(outdir + "/" + test + ".txt");
      featureData.save(file);
      if (file.exists()) {
        result.addOutputItem(
            new Item(
                test + "file",
                file.getName(),
                "Limma output file",
                TYPE.FILE,
                new ArrayList<String>(2),
                new HashMap<String, String>(2),
                "Limma output files"));
      }

      // getting significative genes
      //
      DiffExpressionUtils.addSignificativeResults(
          subDataset,
          test,
          "statistic",
          res.getStatistics(),
          "adj. p-value",
          res.getAdjPValues(),
          "p-value",
          res.getPValues(),
          null,
          null,
          null,
          null,
          className,
          columnOrder,
          pValue,
          maxDisplay,
          this);
      DiffExpressionUtils.createFatiScanRedirection(dataFrame, test, "statistic", result, outdir);
    } catch (Exception e) {
      e.printStackTrace();
      abort(
          "exception_executelimma_classcomparison",
          "error running limma",
          "error running limma: " + e.toString(),
          "error running limma: " + e.toString());
    }
  }
예제 #3
0
  private void setFoldChangeResults(
      Dataset subDataset, String test, String testLabel, double[] res, String className)
      throws InvalidIndexException, IOException {

    //		int[] columnOrder =
    // ListUtils.order(subDataset.getVariables().getByName(className).getValues());
    int[] rowOrder = ListUtils.order(ArrayUtils.toList(res), true);

    DataFrame dataFrame = new DataFrame(subDataset.getFeatureNames().size(), 0);
    dataFrame.addColumn(test, ListUtils.ordered(ArrayUtils.toStringList(res), rowOrder));
    dataFrame.setRowNames(ListUtils.ordered(subDataset.getFeatureNames(), rowOrder));

    FeatureData featureData = new FeatureData(dataFrame);
    File file = new File(outdir + "/" + test + "_foldchange.txt");
    IOUtils.write(file, dataFrame.toString(true, true));

    /** Get significative terms, babelomics 5 * */
    List<String> featureNames = new ArrayList<String>();
    List<String> featureValues = new ArrayList<String>();

    List<String> featureNamesUp = new ArrayList<String>();
    List<String> featureNamesDown = new ArrayList<String>();

    for (String rowName : dataFrame.getRowNames()) {
      List<String> row = dataFrame.getRow(rowName);
      double stats = Double.parseDouble(row.get(0));
      if (Math.abs(stats) >= foldChangeValue) {
        featureNames.add(rowName);
        featureValues.add(row.get(0));
        if (stats >= 0) featureNamesUp.add(rowName);
        else featureNamesDown.add(rowName);
      }
    }

    dataFrame = new DataFrame(featureNames.size(), 0);
    dataFrame.addColumn(test, featureValues);
    dataFrame.setRowNames(featureNames);

    List<Integer> sigRowIndexes = new ArrayList<Integer>();
    for (String feat : featureNames) {
      int idx = 0;
      for (String featSub : subDataset.getFeatureNames()) {
        if (feat.equalsIgnoreCase(featSub)) sigRowIndexes.add(idx);
        idx++;
      }
    }
    DoubleMatrix doubleMatrix =
        new DoubleMatrix(dataFrame.getRowDimension(), subDataset.getColumnDimension());
    for (int i = 0; i < sigRowIndexes.size(); i++) {
      doubleMatrix.setRow(i, subDataset.getDoubleMatrix().getRow(sigRowIndexes.get(i)));
    }
    File fileXX = new File(outdir + "/" + test + "_foldchange_significative_dataset.txt");
    Dataset sigDataset = new Dataset(subDataset.getSampleNames(), featureNames, doubleMatrix);
    sigDataset.save(fileXX);

    File fileAux = new File(outdir + "/" + test + "_foldchange_significative_table.txt");
    IOUtils.write(fileAux, dataFrame.toString(true, true));

    fileAux = new File(outdir + "/" + test + "_foldchange_significative_table_up.txt");
    IOUtils.write(fileAux, featureNamesUp);

    fileAux = new File(outdir + "/" + test + "_foldchange_significative_table_down.txt");
    IOUtils.write(fileAux, featureNamesDown);

    //        fileAux = new File(outdir + "/" + test + "_foldchange_significative_dataset.txt");
    //        IOUtils.write(fileAux, featureNamesDown);

    //
    //		List<String> featuresUp = new ArrayList<String>();
    //		List<String> featuresDown = new ArrayList<String>();
    //		for (String rowName : dataFrame.getRowNames()) {
    //                List<String> row = dataFrame.getRow(rowName);
    //                double stats = Double.parseDouble(row.get(0));
    //                if(stats>=foldChangeValue){
    //
    //
    ////                    featuresUp.add(rowName);
    ////                else
    ////                    featuresDown.add(rowName);
    //				}
    //            }
    //			file = new File(tool.getOutdir() + "/" + test + "_significative_table_up.txt");
    //			IOUtils.write(file, featuresUp);
    //			file = new File(tool.getOutdir() + "/" + test + "_significative_table_down.txt");
    //			IOUtils.write(file, featuresDown);

    // featureData.save(file);
    if (file.exists()) {
      result.addOutputItem(
          new Item(
              test + "_foldchange",
              file.getName(),
              testLabel + " fold-change output file",
              TYPE.FILE,
              new ArrayList<String>(),
              new HashMap<String, String>(),
              testLabel + " fold-change.Output files"));

      String json =
          "{\\\"paramfilename\\\": \\\"input_params.txt\\\", \\\"testfilename\\\": \\\""
              + file.getName()
              + "\\\"}";
      result.addOutputItem(
          new Item(
              "diff_expr_" + StringUtils.randomString(8),
              json,
              "Significative results",
              TYPE.FILE,
              StringUtils.toList("DIFF_EXPRESSION_VIEWER"),
              new HashMap<String, String>(),
              testLabel + " fold-change. Significative results"));

      DiffExpressionUtils.createFatiScanRedirection(
          dataFrame, test, test, result, outdir, testLabel + " fold-change.");
    }

    /*
    		List<Double> orderedRes = ListUtils.ordered(ArrayUtils.toList(res), rowOrder);
    		int posValues = 0;
    		int negValues = 0;
    		for(int i=0 ; i<orderedRes.size() ; i++) {
    			if (Math.abs(orderedRes.get(i))>foldChangeValue) {
    				if (orderedRes.get(i)>0) {
    					posValues++;
    				} else {
    					negValues++;
    				}
    			}
    		}

    		if (posValues + negValues == 0) {
    			result.addOutputItem(new Item("no_sig_results", "No significative results (fold-change value = " + foldChangeValue + ")", "Significative results", TYPE.MESSAGE, new ArrayList<String>(), new HashMap<String, String>(2), testLabel + " fold-change.Significative results"));
    			return;
    		}

    		int halfDisplay = maxDisplay/2;
    		int posValuesToDisplay = posValues;
    		int negValuesToDisplay = negValues;
    		if (posValues + negValues > maxDisplay) {
    			if (Math.min(posValues, negValues)>halfDisplay) {
    				posValuesToDisplay = halfDisplay;
    				negValuesToDisplay = halfDisplay;
    			} else {
    				posValuesToDisplay = posValues>negValues ? (maxDisplay-negValues) : posValues;
    				negValuesToDisplay = negValues>posValues ? (maxDisplay-posValues) : negValues;
    			}
    		}

    		int nbToDisplay = posValuesToDisplay + negValuesToDisplay;
    		DoubleMatrix doubleMatrix = new DoubleMatrix(nbToDisplay, subDataset.getColumnDimension());
    		int rowIndex = 0;
    		int negLimit = rowOrder.length-negValuesToDisplay;
    		List<Integer> sigRowIndexes = new ArrayList<Integer>();
    		for(int i=0 ; i<rowOrder.length ; i++) {
    			if (i<posValuesToDisplay || i>=negLimit) {
    				doubleMatrix.setRow(rowIndex++, subDataset.getDoubleMatrix().getRow(rowOrder[i]));
    				//System.out.println(subDataset.getFeatureNames().get(sigOrder[i]));
    				sigRowIndexes.add(rowOrder[i]);
    			}
    		}

    		file = new File(outdir + "/" + test +"_fold_change_significative_dataset.txt");
    		Dataset sigDataset = new Dataset(subDataset.getSampleNames(), ListUtils.subList(subDataset.getFeatureNames(), ListUtils.toIntArray(sigRowIndexes)), doubleMatrix);
    		sigDataset.setVariables(subDataset.getVariables());
    		sigDataset.validate();
    		sigDataset.save(file);
    		if (file.exists()) {
    			String tags = "datamatrix,expression";
    			result.addOutputItem(new Item(test + "_sig_dataset", file.getName(), "Significative values dataset (fold-change value = " + foldChangeValue + ")", TYPE.DATA, StringUtils.toList(tags, ","), new HashMap<String, String>(2),  testLabel + " fold-change.Significative results"));

    			File redirectionFile = new File(outdir + "/clustering.redirection");
    			DiffExpressionUtils.createClusteringRedirectionFile(redirectionFile, file);
    			if ( redirectionFile.exists() ) {
    				tags = "REDIRECTION(" + redirectionFile.getName() + ":Send to Clustering tool...)";
    				result.addOutputItem(new Item(test + "_sig_dataset", file.getName(), "Significative values dataset (fold-change value = " + foldChangeValue + ")", TYPE.FILE, StringUtils.toList(tags, ","), new HashMap<String, String>(2),  testLabel + " fold-change.Significative results"));
    			}
    		}

    		rowOrder = ListUtils.order(ListUtils.subList(ArrayUtils.toList(res), ListUtils.toIntArray(sigRowIndexes)), true);

    		DataFrame sigDataFrame = new DataFrame(sigDataset.getFeatureNames().size(), 0);

    		sigDataFrame.addColumn(test, ListUtils.toStringList(ListUtils.ordered(ListUtils.subList(ArrayUtils.toList(res), ListUtils.toIntArray(sigRowIndexes)), rowOrder)));
    		sigDataFrame.setRowNames(ListUtils.ordered(ListUtils.subList(subDataset.getFeatureNames(), ListUtils.toIntArray(sigRowIndexes)), rowOrder));

    		// adding table to results
    		//
    		file = new File(outdir + "/" + test + "fold_change_significative_table.txt");
    		IOUtils.write(file, sigDataFrame.toString(true, true));
    		if ( file.exists() ) {
    			result.addOutputItem(new Item(test + "fold_change_table", file.getName(), "Significative values table (fold-change value = " + foldChangeValue + ")", TYPE.FILE, StringUtils.toList("TABLE," + test.toUpperCase() + "_FOLD_CHANGE_TABLE", ","), new HashMap<String, String>(2), testLabel + " fold-change.Significative results"));
    		}

    		// adding heatmap to results
    		//
    		Canvas sigHeatmap = DiffExpressionUtils.generateHeatmap(sigDataset, className, columnOrder, rowOrder, testLabel, ListUtils.toDoubleArray(ListUtils.subList(ArrayUtils.toList(res), ListUtils.toIntArray(sigRowIndexes))), null, null);
    		if (sigHeatmap == null) {
    			printError("ioexception_execute_fold_change_classcomparison", "ERROR", "Error generating " + test + " fold-change heatmap image");
    		} else {
    			try {
    				File sigHeatmapFile = new File(outdir + "/" + test + "fold_change_heatmap_significative.png");
    				sigHeatmap.save(sigHeatmapFile.getAbsolutePath());
    				if (sigHeatmapFile.exists()) {
    					result.addOutputItem(new Item(test + "_fold_change_heatmap_significative", sigHeatmapFile.getName(), testLabel + " fold-change heatmap with significative values (fold-change value = " + foldChangeValue + ")", TYPE.IMAGE, new ArrayList<String>(2), new HashMap<String, String>(2), testLabel + " fold-change.Significative results"));
    				}
    			} catch (IOException e) {
    				printError("ioexception_execute_fold_change_classcomparison", "ERROR", "Error saving " + test + " fold-change heatmap image");
    			}
    		}
    		DiffExpressionUtils.createFatiGoRedirection(dataFrame.getRowNames(), dataFrame.getColumnAsDoubleArray(test), test, result, outdir, testLabel + " fold-change.");
    		DiffExpressionUtils.createFatiScanRedirection(sigDataFrame, test, test, result, outdir, testLabel + " fold-change.");
    */
  }
예제 #4
0
  public void executeAnova() {

    DoubleMatrix matrix = null;
    List<String> vars = new ArrayList<String>();
    List<Integer> indices = new ArrayList<Integer>();
    List<String> values = dataset.getVariables().getByName(className).getValues();

    if (values.size() == classValues.size()) {
      matrix = dataset.getDoubleMatrix();
      vars = values;
    } else {
      for (int i = 0; i < values.size(); i++) {
        if (classValues.contains(values.get(i))) {
          indices.add(i);
          vars.add(values.get(i));
        }
      }
      matrix = dataset.getSubMatrixByColumns(ListUtils.toIntArray(indices));
    }

    try {
      Dataset subDataset = dataset.getSubDataset(className, classValues);

      // apply test and multiple test correction according
      //
      AnovaTest anova = new AnovaTest(matrix, vars);
      TestResultList<AnovaTestResult> res = anova.compute();
      DiffExpressionUtils.multipleTestCorrection(res, correction);

      // create output file
      //
      int[] columnOrder =
          ListUtils.order(subDataset.getVariables().getByName(className).getValues());
      int[] rowOrder = ListUtils.order(ArrayUtils.toList(res.getStatistics()), true);

      DataFrame dataFrame = new DataFrame(subDataset.getFeatureNames().size(), 0);
      dataFrame.addColumn(
          "statistic",
          ListUtils.toStringList(
              ListUtils.ordered(ArrayUtils.toList(res.getStatistics()), rowOrder)));
      dataFrame.addColumn(
          "p-value",
          ListUtils.toStringList(ListUtils.ordered(ArrayUtils.toList(res.getPValues()), rowOrder)));
      dataFrame.addColumn(
          "adj. p-value",
          ListUtils.toStringList(
              ListUtils.ordered(ArrayUtils.toList(res.getAdjPValues()), rowOrder)));
      dataFrame.setRowNames(ListUtils.ordered(subDataset.getFeatureNames(), rowOrder));

      FeatureData featureData = new FeatureData(dataFrame);
      File file = new File(outdir + "/" + test + ".txt");
      featureData.save(file);
      if (file.exists()) {
        result.addOutputItem(
            new Item(
                test + "file",
                file.getName(),
                "Anova output file",
                TYPE.FILE,
                new ArrayList<String>(2),
                new HashMap<String, String>(2),
                "Anova output files"));
      }

      // getting significative genes
      //
      DiffExpressionUtils.addSignificativeResults(
          subDataset,
          test,
          "statistic",
          res.getStatistics(),
          "adj. p-value",
          res.getAdjPValues(),
          "p-value",
          res.getPValues(),
          null,
          null,
          null,
          null,
          className,
          columnOrder,
          pValue,
          maxDisplay,
          this);
      DiffExpressionUtils.createFatiScanRedirection(dataFrame, test, "statistic", result, outdir);
    } catch (Exception e) {
      e.printStackTrace();
      abort(
          "exception_executeanova_classcomparison",
          "error running anova",
          "error running anova: " + e.getMessage(),
          "error running anova: " + e.getMessage());
    }
  }
예제 #5
0
  public void executeT() {

    int[] cols = dataset.getColumnIndexesByVariableValue(className, classValues.get(0));
    DoubleMatrix sample1 = dataset.getSubMatrixByColumns(cols);

    cols = dataset.getColumnIndexesByVariableValue(className, classValues.get(1));
    DoubleMatrix sample2 = dataset.getSubMatrixByColumns(cols);

    try {
      Dataset subDataset = dataset.getSubDataset(className, classValues);

      // apply test and multiple test correction according
      //
      TTest tTest = new TTest();
      TestResultList<TTestResult> res = tTest.tTest(sample1, sample2);
      DiffExpressionUtils.multipleTestCorrection(res, correction);

      // create output file
      //
      int[] columnOrder =
          ListUtils.order(subDataset.getVariables().getByName(className).getValues());
      int[] rowOrder = ListUtils.order(ArrayUtils.toList(res.getStatistics()), true);

      DataFrame dataFrame = new DataFrame(subDataset.getFeatureNames().size(), 0);
      dataFrame.addColumn(
          "statistic",
          ListUtils.toStringList(
              ListUtils.ordered(ArrayUtils.toList(res.getStatistics()), rowOrder)));
      dataFrame.addColumn(
          "p-value",
          ListUtils.toStringList(ListUtils.ordered(ArrayUtils.toList(res.getPValues()), rowOrder)));
      dataFrame.addColumn(
          "adj. p-value",
          ListUtils.toStringList(
              ListUtils.ordered(ArrayUtils.toList(res.getAdjPValues()), rowOrder)));
      dataFrame.setRowNames(ListUtils.ordered(subDataset.getFeatureNames(), rowOrder));

      FeatureData featureData = new FeatureData(dataFrame);
      File file = new File(outdir + "/t.txt");
      featureData.save(file);
      if (file.exists()) {
        result.addOutputItem(
            new Item(
                "tfile",
                file.getName(),
                "T-test output file",
                TYPE.FILE,
                new ArrayList<String>(),
                new HashMap<String, String>(),
                "T-test output files"));
      }

      // getting significative genes
      //
      DiffExpressionUtils.addSignificativeResults(
          subDataset,
          test,
          "statistic",
          res.getStatistics(),
          "adj. p-value",
          res.getAdjPValues(),
          "p-value",
          res.getPValues(),
          null,
          null,
          null,
          null,
          className,
          columnOrder,
          pValue,
          maxDisplay,
          this);
      DiffExpressionUtils.createFatiScanRedirection(dataFrame, test, "statistic", result, outdir);
    } catch (Exception e) {
      e.printStackTrace();
      abort("exception_executet_classcomparison", "ERROR", "Error running t-test", "");
    }
  }