private static void checkZeros(ExpDataManager expMan) {
   int neverabove = 0;
   int sometimes = 0;
   int always = 0;
   Histogram h = new Histogram(0.1);
   for (String gene : HGNC.getAllSymbols()) {
     if (!expMan.contains(gene)) continue;
     double r = expMan.getNonZeroRatio(gene);
     h.count(r);
     if (r == 0) neverabove++;
     else if (r == 1) always++;
     else sometimes++;
   }
   System.out.println("neverabove = " + neverabove);
   System.out.println("always = " + always);
   System.out.println("sometimes = " + sometimes);
   h.print();
 }
Esempio n. 2
0
  /** @throws Exception */
  private void loadWorkload() throws Exception {
    final boolean debug = LOG.isDebugEnabled();
    // Workload Trace
    if (this.params.containsKey(PARAM_WORKLOAD)) {
      assert (this.catalog_db != null) : "Missing catalog!";
      String path = new File(this.params.get(PARAM_WORKLOAD)).getAbsolutePath();

      boolean weightedTxns = this.getBooleanParam(PARAM_WORKLOAD_XACT_WEIGHTS, false);
      if (debug) LOG.debug("Use Transaction Weights in Limits: " + weightedTxns);

      // This will prune out duplicate trace records...
      if (params.containsKey(PARAM_WORKLOAD_REMOVE_DUPES)) {
        DuplicateTraceFilter filter = new DuplicateTraceFilter();
        this.workload_filter =
            (this.workload_filter != null ? filter.attach(this.workload_filter) : filter);
        if (debug) LOG.debug("Attached " + filter.debugImpl());
      }

      // TRANSACTION OFFSET
      if (params.containsKey(PARAM_WORKLOAD_XACT_OFFSET)) {
        this.workload_xact_offset = Long.parseLong(params.get(PARAM_WORKLOAD_XACT_OFFSET));
        ProcedureLimitFilter filter =
            new ProcedureLimitFilter(-1l, this.workload_xact_offset, weightedTxns);
        // Important! The offset should go in the front!
        this.workload_filter =
            (this.workload_filter != null ? filter.attach(this.workload_filter) : filter);
        if (debug) LOG.debug("Attached " + filter.debugImpl());
      }

      // BASE PARTITIONS
      if (params.containsKey(PARAM_WORKLOAD_RANDOM_PARTITIONS)
          || params.containsKey(PARAM_WORKLOAD_BASE_PARTITIONS)) {
        BasePartitionTxnFilter filter =
            new BasePartitionTxnFilter(new PartitionEstimator(catalog_db));

        // FIXED LIST
        if (params.containsKey(PARAM_WORKLOAD_BASE_PARTITIONS)) {
          for (String p_str : this.getParam(PARAM_WORKLOAD_BASE_PARTITIONS).split(",")) {
            workload_base_partitions.add(Integer.valueOf(p_str));
          } // FOR
          // RANDOM
        } else {
          double factor = this.getDoubleParam(PARAM_WORKLOAD_RANDOM_PARTITIONS);
          List<Integer> all_partitions =
              new ArrayList<Integer>(CatalogUtil.getAllPartitionIds(catalog_db));
          Collections.shuffle(all_partitions, new Random());
          workload_base_partitions.addAll(
              all_partitions.subList(0, (int) (all_partitions.size() * factor)));
        }
        filter.addPartitions(workload_base_partitions);
        this.workload_filter =
            (this.workload_filter != null ? this.workload_filter.attach(filter) : filter);
        if (debug) LOG.debug("Attached " + filter.debugImpl());
      }

      // Txn Limit
      this.workload_xact_limit = this.getLongParam(PARAM_WORKLOAD_XACT_LIMIT);
      Histogram<String> proc_histogram = null;

      // Include/exclude procedures from the traces
      if (params.containsKey(PARAM_WORKLOAD_PROC_INCLUDE)
          || params.containsKey(PARAM_WORKLOAD_PROC_EXCLUDE)) {
        Filter filter = new ProcedureNameFilter(weightedTxns);

        // INCLUDE
        String temp = params.get(PARAM_WORKLOAD_PROC_INCLUDE);
        if (temp != null && !temp.equals(ProcedureNameFilter.INCLUDE_ALL)) {

          // We can take the counts for PROC_INCLUDE and scale them
          // with the multiplier
          double multiplier = 1.0d;
          if (this.hasDoubleParam(PARAM_WORKLOAD_PROC_INCLUDE_MULTIPLIER)) {
            multiplier = this.getDoubleParam(PARAM_WORKLOAD_PROC_INCLUDE_MULTIPLIER);
            if (debug) LOG.debug("Workload Procedure Multiplier: " + multiplier);
          }

          // Default Txn Frequencies
          String procinclude = params.get(PARAM_WORKLOAD_PROC_INCLUDE);
          if (procinclude.equalsIgnoreCase("default")) {
            procinclude =
                AbstractProjectBuilder.getProjectBuilder(catalog_type)
                    .getTransactionFrequencyString();
          }

          Map<String, Integer> limits = new HashMap<String, Integer>();
          int total_unlimited = 0;
          int total = 0;
          for (String proc_name : procinclude.split(",")) {
            int limit = -1;
            // Check if there is a limit for this procedure
            if (proc_name.contains(":")) {
              String pieces[] = proc_name.split(":");
              proc_name = pieces[0];
              limit = (int) Math.round(Integer.parseInt(pieces[1]) * multiplier);
            }

            if (limit < 0) {
              if (proc_histogram == null) {
                if (debug) LOG.debug("Generating procedure histogram from workload file");
                proc_histogram = WorkloadUtil.getProcedureHistogram(new File(path));
              }
              limit = (int) proc_histogram.get(proc_name, 0);
              total_unlimited += limit;
            } else {
              total += limit;
            }
            limits.put(proc_name, limit);
          } // FOR
          // If we have a workload limit and some txns that we want
          // to get unlimited
          // records from, then we want to modify the other txns so
          // that we fill in the "gap"
          if (this.workload_xact_limit != null && total_unlimited > 0) {
            int remaining = this.workload_xact_limit.intValue() - total - total_unlimited;
            if (remaining > 0) {
              for (Entry<String, Integer> e : limits.entrySet()) {
                double ratio = e.getValue() / (double) total;
                e.setValue((int) Math.ceil(e.getValue() + (ratio * remaining)));
              } // FOR
            }
          }

          Histogram<String> proc_multiplier_histogram = null;
          if (debug) {
            if (proc_histogram != null) LOG.debug("Full Workload Histogram:\n" + proc_histogram);
            proc_multiplier_histogram = new Histogram<String>();
          }
          total = 0;
          for (Entry<String, Integer> e : limits.entrySet()) {
            if (debug) proc_multiplier_histogram.put(e.getKey(), e.getValue());
            ((ProcedureNameFilter) filter).include(e.getKey(), e.getValue());
            total += e.getValue();
          } // FOR
          if (debug)
            LOG.debug("Multiplier Histogram [total=" + total + "]:\n" + proc_multiplier_histogram);
        }

        // EXCLUDE
        temp = params.get(PARAM_WORKLOAD_PROC_EXCLUDE);
        if (temp != null) {
          for (String proc_name : params.get(PARAM_WORKLOAD_PROC_EXCLUDE).split(",")) {
            ((ProcedureNameFilter) filter).exclude(proc_name);
          } // FOR
        }

        // Sampling!!
        if (this.getBooleanParam(PARAM_WORKLOAD_PROC_SAMPLE, false)) {
          if (debug) LOG.debug("Attaching sampling filter");
          if (proc_histogram == null)
            proc_histogram = WorkloadUtil.getProcedureHistogram(new File(path));
          Map<String, Integer> proc_includes = ((ProcedureNameFilter) filter).getProcIncludes();
          SamplingFilter sampling_filter = new SamplingFilter(proc_includes, proc_histogram);
          filter = sampling_filter;
          if (debug) LOG.debug("Workload Procedure Histogram:\n" + proc_histogram);
        }

        // Attach our new filter to the chain (or make it the head if
        // it's the first one)
        this.workload_filter =
            (this.workload_filter != null ? this.workload_filter.attach(filter) : filter);
        if (debug) LOG.debug("Attached " + filter.debugImpl());
      }

      // TRANSACTION LIMIT
      if (this.workload_xact_limit != null) {
        ProcedureLimitFilter filter =
            new ProcedureLimitFilter(this.workload_xact_limit, weightedTxns);
        this.workload_filter =
            (this.workload_filter != null ? this.workload_filter.attach(filter) : filter);
        if (debug) LOG.debug("Attached " + filter.debugImpl());
      }

      // QUERY LIMIT
      if (params.containsKey(PARAM_WORKLOAD_QUERY_LIMIT)) {
        this.workload_query_limit = Long.parseLong(params.get(PARAM_WORKLOAD_QUERY_LIMIT));
        QueryLimitFilter filter = new QueryLimitFilter(this.workload_query_limit);
        this.workload_filter =
            (this.workload_filter != null ? this.workload_filter.attach(filter) : filter);
      }

      if (this.workload_filter != null && debug)
        LOG.debug("Workload Filters: " + this.workload_filter.toString());
      this.workload = new Workload(this.catalog);
      this.workload.load(path, this.catalog_db, this.workload_filter);
      this.workload_path = new File(path).getAbsolutePath();
      if (this.workload_filter != null) this.workload_filter.reset();
    }

    // Workload Statistics
    if (this.catalog_db != null) {
      this.stats = new WorkloadStatistics(this.catalog_db);
      if (this.params.containsKey(PARAM_STATS)) {
        String path = this.params.get(PARAM_STATS);
        if (debug) LOG.debug("Loading in workload statistics from '" + path + "'");
        this.stats_path = new File(path).getAbsolutePath();
        try {
          this.stats.load(path, this.catalog_db);
        } catch (Throwable ex) {
          throw new RuntimeException("Failed to load stats file '" + this.stats_path + "'", ex);
        }
      }

      // Scaling
      if (this.params.containsKey(PARAM_STATS_SCALE_FACTOR)) {
        double scale_factor = this.getDoubleParam(PARAM_STATS_SCALE_FACTOR);
        LOG.info("Scaling TableStatistics: " + scale_factor);
        AbstractTableStatisticsGenerator generator =
            AbstractTableStatisticsGenerator.factory(
                this.catalog_db, this.catalog_type, scale_factor);
        generator.apply(this.stats);
      }
    }
  }