private static void checkZeros(ExpDataManager expMan) { int neverabove = 0; int sometimes = 0; int always = 0; Histogram h = new Histogram(0.1); for (String gene : HGNC.getAllSymbols()) { if (!expMan.contains(gene)) continue; double r = expMan.getNonZeroRatio(gene); h.count(r); if (r == 0) neverabove++; else if (r == 1) always++; else sometimes++; } System.out.println("neverabove = " + neverabove); System.out.println("always = " + always); System.out.println("sometimes = " + sometimes); h.print(); }
/** @throws Exception */ private void loadWorkload() throws Exception { final boolean debug = LOG.isDebugEnabled(); // Workload Trace if (this.params.containsKey(PARAM_WORKLOAD)) { assert (this.catalog_db != null) : "Missing catalog!"; String path = new File(this.params.get(PARAM_WORKLOAD)).getAbsolutePath(); boolean weightedTxns = this.getBooleanParam(PARAM_WORKLOAD_XACT_WEIGHTS, false); if (debug) LOG.debug("Use Transaction Weights in Limits: " + weightedTxns); // This will prune out duplicate trace records... if (params.containsKey(PARAM_WORKLOAD_REMOVE_DUPES)) { DuplicateTraceFilter filter = new DuplicateTraceFilter(); this.workload_filter = (this.workload_filter != null ? filter.attach(this.workload_filter) : filter); if (debug) LOG.debug("Attached " + filter.debugImpl()); } // TRANSACTION OFFSET if (params.containsKey(PARAM_WORKLOAD_XACT_OFFSET)) { this.workload_xact_offset = Long.parseLong(params.get(PARAM_WORKLOAD_XACT_OFFSET)); ProcedureLimitFilter filter = new ProcedureLimitFilter(-1l, this.workload_xact_offset, weightedTxns); // Important! The offset should go in the front! this.workload_filter = (this.workload_filter != null ? filter.attach(this.workload_filter) : filter); if (debug) LOG.debug("Attached " + filter.debugImpl()); } // BASE PARTITIONS if (params.containsKey(PARAM_WORKLOAD_RANDOM_PARTITIONS) || params.containsKey(PARAM_WORKLOAD_BASE_PARTITIONS)) { BasePartitionTxnFilter filter = new BasePartitionTxnFilter(new PartitionEstimator(catalog_db)); // FIXED LIST if (params.containsKey(PARAM_WORKLOAD_BASE_PARTITIONS)) { for (String p_str : this.getParam(PARAM_WORKLOAD_BASE_PARTITIONS).split(",")) { workload_base_partitions.add(Integer.valueOf(p_str)); } // FOR // RANDOM } else { double factor = this.getDoubleParam(PARAM_WORKLOAD_RANDOM_PARTITIONS); List<Integer> all_partitions = new ArrayList<Integer>(CatalogUtil.getAllPartitionIds(catalog_db)); Collections.shuffle(all_partitions, new Random()); workload_base_partitions.addAll( all_partitions.subList(0, (int) (all_partitions.size() * factor))); } filter.addPartitions(workload_base_partitions); this.workload_filter = (this.workload_filter != null ? this.workload_filter.attach(filter) : filter); if (debug) LOG.debug("Attached " + filter.debugImpl()); } // Txn Limit this.workload_xact_limit = this.getLongParam(PARAM_WORKLOAD_XACT_LIMIT); Histogram<String> proc_histogram = null; // Include/exclude procedures from the traces if (params.containsKey(PARAM_WORKLOAD_PROC_INCLUDE) || params.containsKey(PARAM_WORKLOAD_PROC_EXCLUDE)) { Filter filter = new ProcedureNameFilter(weightedTxns); // INCLUDE String temp = params.get(PARAM_WORKLOAD_PROC_INCLUDE); if (temp != null && !temp.equals(ProcedureNameFilter.INCLUDE_ALL)) { // We can take the counts for PROC_INCLUDE and scale them // with the multiplier double multiplier = 1.0d; if (this.hasDoubleParam(PARAM_WORKLOAD_PROC_INCLUDE_MULTIPLIER)) { multiplier = this.getDoubleParam(PARAM_WORKLOAD_PROC_INCLUDE_MULTIPLIER); if (debug) LOG.debug("Workload Procedure Multiplier: " + multiplier); } // Default Txn Frequencies String procinclude = params.get(PARAM_WORKLOAD_PROC_INCLUDE); if (procinclude.equalsIgnoreCase("default")) { procinclude = AbstractProjectBuilder.getProjectBuilder(catalog_type) .getTransactionFrequencyString(); } Map<String, Integer> limits = new HashMap<String, Integer>(); int total_unlimited = 0; int total = 0; for (String proc_name : procinclude.split(",")) { int limit = -1; // Check if there is a limit for this procedure if (proc_name.contains(":")) { String pieces[] = proc_name.split(":"); proc_name = pieces[0]; limit = (int) Math.round(Integer.parseInt(pieces[1]) * multiplier); } if (limit < 0) { if (proc_histogram == null) { if (debug) LOG.debug("Generating procedure histogram from workload file"); proc_histogram = WorkloadUtil.getProcedureHistogram(new File(path)); } limit = (int) proc_histogram.get(proc_name, 0); total_unlimited += limit; } else { total += limit; } limits.put(proc_name, limit); } // FOR // If we have a workload limit and some txns that we want // to get unlimited // records from, then we want to modify the other txns so // that we fill in the "gap" if (this.workload_xact_limit != null && total_unlimited > 0) { int remaining = this.workload_xact_limit.intValue() - total - total_unlimited; if (remaining > 0) { for (Entry<String, Integer> e : limits.entrySet()) { double ratio = e.getValue() / (double) total; e.setValue((int) Math.ceil(e.getValue() + (ratio * remaining))); } // FOR } } Histogram<String> proc_multiplier_histogram = null; if (debug) { if (proc_histogram != null) LOG.debug("Full Workload Histogram:\n" + proc_histogram); proc_multiplier_histogram = new Histogram<String>(); } total = 0; for (Entry<String, Integer> e : limits.entrySet()) { if (debug) proc_multiplier_histogram.put(e.getKey(), e.getValue()); ((ProcedureNameFilter) filter).include(e.getKey(), e.getValue()); total += e.getValue(); } // FOR if (debug) LOG.debug("Multiplier Histogram [total=" + total + "]:\n" + proc_multiplier_histogram); } // EXCLUDE temp = params.get(PARAM_WORKLOAD_PROC_EXCLUDE); if (temp != null) { for (String proc_name : params.get(PARAM_WORKLOAD_PROC_EXCLUDE).split(",")) { ((ProcedureNameFilter) filter).exclude(proc_name); } // FOR } // Sampling!! if (this.getBooleanParam(PARAM_WORKLOAD_PROC_SAMPLE, false)) { if (debug) LOG.debug("Attaching sampling filter"); if (proc_histogram == null) proc_histogram = WorkloadUtil.getProcedureHistogram(new File(path)); Map<String, Integer> proc_includes = ((ProcedureNameFilter) filter).getProcIncludes(); SamplingFilter sampling_filter = new SamplingFilter(proc_includes, proc_histogram); filter = sampling_filter; if (debug) LOG.debug("Workload Procedure Histogram:\n" + proc_histogram); } // Attach our new filter to the chain (or make it the head if // it's the first one) this.workload_filter = (this.workload_filter != null ? this.workload_filter.attach(filter) : filter); if (debug) LOG.debug("Attached " + filter.debugImpl()); } // TRANSACTION LIMIT if (this.workload_xact_limit != null) { ProcedureLimitFilter filter = new ProcedureLimitFilter(this.workload_xact_limit, weightedTxns); this.workload_filter = (this.workload_filter != null ? this.workload_filter.attach(filter) : filter); if (debug) LOG.debug("Attached " + filter.debugImpl()); } // QUERY LIMIT if (params.containsKey(PARAM_WORKLOAD_QUERY_LIMIT)) { this.workload_query_limit = Long.parseLong(params.get(PARAM_WORKLOAD_QUERY_LIMIT)); QueryLimitFilter filter = new QueryLimitFilter(this.workload_query_limit); this.workload_filter = (this.workload_filter != null ? this.workload_filter.attach(filter) : filter); } if (this.workload_filter != null && debug) LOG.debug("Workload Filters: " + this.workload_filter.toString()); this.workload = new Workload(this.catalog); this.workload.load(path, this.catalog_db, this.workload_filter); this.workload_path = new File(path).getAbsolutePath(); if (this.workload_filter != null) this.workload_filter.reset(); } // Workload Statistics if (this.catalog_db != null) { this.stats = new WorkloadStatistics(this.catalog_db); if (this.params.containsKey(PARAM_STATS)) { String path = this.params.get(PARAM_STATS); if (debug) LOG.debug("Loading in workload statistics from '" + path + "'"); this.stats_path = new File(path).getAbsolutePath(); try { this.stats.load(path, this.catalog_db); } catch (Throwable ex) { throw new RuntimeException("Failed to load stats file '" + this.stats_path + "'", ex); } } // Scaling if (this.params.containsKey(PARAM_STATS_SCALE_FACTOR)) { double scale_factor = this.getDoubleParam(PARAM_STATS_SCALE_FACTOR); LOG.info("Scaling TableStatistics: " + scale_factor); AbstractTableStatisticsGenerator generator = AbstractTableStatisticsGenerator.factory( this.catalog_db, this.catalog_type, scale_factor); generator.apply(this.stats); } } }