/// default constructor public SignalToNoiseEstimatorMedian() { setName("SignalToNoiseEstimatorMedian"); defaults_.setValue( "MaxIntensity", -1., "maximal intensity considered for histogram construction. By default, it will be calculated automatically (see AutoMode)." + " Only provide this parameter if you know what you are doing (and change 'AutoMode' to '-1')!" + " All intensities EQUAL/ABOVE 'MaxIntensity' will be added to the LAST histogram bin." + " If you choose 'MaxIntensity' too small, the noise estimate might be too small as well. " + " If chosen too big, the bins become quite large (which you could counter by increasing 'BinCount', which increases runtime)." + " In general, the Median-S/N estimator is more robust to a manual MaxIntensity than the MeanIterative-S/N."); defaults_.setValue( "AutoMaxStdevFactor", 3.0, "parameter for 'MaxIntensity' estimation (if 'AutoMode' == 0): mean + 'AutoMaxStdevFactor' * stdev"); defaults_.setValue( "AutoMaxPercentile", 95, "parameter for 'MaxIntensity' estimation (if 'AutoMode' == 1): AutoMaxPercentile th percentile"); defaults_.setValue( "AutoMode", 0, "method to use to determine maximal intensity: -1 --> use 'MaxIntensity'; 0 --> 'AutoMaxStdevFactor' method (default); 1 --> 'AutoMaxPercentile' method"); defaults_.setValue("WinLen", 200.0, "window length in Thomson"); defaults_.setValue("BinCount", 30, "number of bins used for histogram"); defaults_.setValue( "MinRequiredElements", 10, "minimum number of elements required in a window (otherwise it is considered sparse)"); defaults_.setValue( "NoiseForEmptyWindow", Math.pow(10.0, 20), "noise value used for sparse windows"); defaultsToParam_(); }
/// calculate StN values for all datapoints given, by using a sliding window approach /// @param scan_first_ first element in the scan /// @param scan_last_ last element in the scan (disregarded) protected void computeSTN_(Peak[] data_, int scan_first_, int scan_last_) throws Exception { // reset counter for sparse windows double sparse_window_percent = 0; // reset counter for histogram overflow double histogram_oob_percent = 0; // reset the results stn_estimates_.clear(); // maximal range of histogram needs to be calculated first if (auto_mode_ == AUTOMAXBYSTDEV) { // use MEAN+auto_max_intensity_*STDEV as threshold GaussianEstimate gauss_global = estimate_(data_, scan_first_, scan_last_); max_intensity_ = gauss_global.mean + Math.sqrt(gauss_global.variance) * auto_max_stdev_factor_; } else if (auto_mode_ == AUTOMAXBYPERCENT) { // get value at "auto_max_percentile_"th percentile // we use a histogram approach here as well. if ((auto_max_percentile_ < 0) || (auto_max_percentile_ > 100)) { String s = "" + auto_max_percentile_; throw new Exception( "AutoMode is on AUTOMAXBYPERCENT! AutoMaxPercentile is not in [0,100]. Use setAutoMaxPercentile(<value>) to change it!"); } int[] histogram_auto = new int[100]; Arrays.fill(histogram_auto, 0); // find maximum of current scan int size = 0; double maxInt = 0; int run = scan_first_; while (run != scan_last_) { maxInt = Math.max(maxInt, data_[run].getIntensity()); ++size; ++run; } double bin_size = maxInt / 100; // fill histogram run = scan_first_; while (run != scan_last_) { ++histogram_auto[(int) ((data_[run].getIntensity() - 1) / bin_size)]; ++run; } // add up element counts in histogram until ?th percentile is reached int elements_below_percentile = (int) (auto_max_percentile_ * size / 100); int elements_seen = 0; int i = -1; run = scan_first_; while (run != scan_last_ && elements_seen < elements_below_percentile) { ++i; elements_seen += histogram_auto[i]; ++run; } max_intensity_ = (((double) i) + 0.5) * bin_size; } else { // if (auto_mode_ == MANUAL) if (max_intensity_ <= 0) { String s = "" + max_intensity_; throw new Exception( "AutoMode is on MANUAL! MaxIntensity is <=0. Needs to be positive! Use setMaxIntensity(<value>) or enable AutoMode!"); } } if (max_intensity_ <= 0) { System.err.println( "TODO SignalToNoiseEstimatorMedian: the max_intensity_ value should be positive! " + max_intensity_); return; } int window_pos_center = scan_first_; int window_pos_borderleft = scan_first_; int window_pos_borderright = scan_first_; double window_half_size = win_len_ / 2; double bin_size = max_intensity_ / bin_count_; int bin_count_minus_1 = bin_count_ - 1; int[] histogram = new int[bin_count_]; Arrays.fill(histogram, 0); double[] bin_value = new double[bin_count_]; Arrays.fill(bin_value, 0.); // calculate average intensity that is represented by a bin for (int bin = 0; bin < bin_count_; bin++) { histogram[bin] = 0; bin_value[bin] = (bin + 0.5) * bin_size; } // bin in which a datapoint would fall int to_bin = 0; // index of bin where the median is located int median_bin = 0; // additive number of elements from left to x in histogram int element_inc_count = 0; // tracks elements in current window, which may vary because of uneven spaced data int elements_in_window = 0; // number of windows int window_count = 0; // number of elements where we find the median int element_in_window_half = 0; double noise; // noise value of a datapoint // determine how many elements we need to estimate (for progress estimation) int windows_overall = 0; int run = scan_first_; while (run != scan_last_) { ++windows_overall; ++run; } // MAIN LOOP while (window_pos_center != scan_last_) { // erase all elements from histogram that will leave the window on the LEFT side while (data_[window_pos_borderleft].getMZ() < data_[window_pos_center].getMZ() - window_half_size) { to_bin = Math.min( (int) ((data_[window_pos_borderleft].getIntensity()) / bin_size), bin_count_minus_1); --histogram[to_bin]; --elements_in_window; ++window_pos_borderleft; } // add all elements to histogram that will enter the window on the RIGHT side while ((window_pos_borderright != scan_last_) && (data_[window_pos_borderright].getMZ() <= data_[window_pos_center].getMZ() + window_half_size)) { to_bin = Math.min( (int) ((data_[window_pos_borderright].getIntensity()) / bin_size), bin_count_minus_1); ++histogram[to_bin]; ++elements_in_window; ++window_pos_borderright; } if (elements_in_window < min_required_elements_) { noise = noise_for_empty_window_; ++sparse_window_percent; } else { // find bin i where ceil[elements_in_window/2] <= sum_c(0..i){ histogram[c] } median_bin = -1; element_inc_count = 0; element_in_window_half = (elements_in_window + 1) / 2; while (median_bin < bin_count_minus_1 && element_inc_count < element_in_window_half) { ++median_bin; element_inc_count += histogram[median_bin]; } // increase the error count if (median_bin == bin_count_minus_1) ++histogram_oob_percent; // just avoid division by 0 noise = Math.max(1.0, bin_value[median_bin]); } // store result stn_estimates_.put(data_[window_pos_center], data_[window_pos_center].getIntensity() / noise); // advance the window center by one datapoint ++window_pos_center; ++window_count; } // end while sparse_window_percent = sparse_window_percent * 100 / window_count; histogram_oob_percent = histogram_oob_percent * 100 / window_count; // warn if percentage of sparse windows is above 20% if (sparse_window_percent > 20) { System.err.println( "WARNING in SignalToNoiseEstimatorMedian: " + sparse_window_percent + "% of all windows were sparse. You should consider increasing WindowLength or decreasing MinReqElementsInWindow"); } // warn if percentage of possibly wrong median estimates is above 1% if (histogram_oob_percent > 1) { System.err.println( "WARNING in SignalToNoiseEstimatorMedian: " + histogram_oob_percent + "% of all Signal-to-Noise estimates are too high, because the median was found in the rightmost histogram-bin. " + "You should consider increasing MaxIntensity (and maybe BinCount with it, to keep bin width reasonable)"); } } // end of shiftWindow_