/**
  * Adds the configurations for data stores whose entities will be mapped using the engine created
  * by this class.
  *
  * @param dataStores configurations of data stores that will have their entities mapped through
  *     this engine.
  */
 public final void addDataStoreConfigurations(DataStoreConfiguration... dataStores) {
   Args.notEmpty(dataStores, "Data stores");
   for (DataStoreConfiguration config : dataStores) {
     Args.notNull(config, "Data store configuration");
     dataStoreConfigurations.add(config);
   }
 }
 /**
  * Adds instances of {@link CustomDataStoreFactory} that should be used by uniVocity to read any
  * custom {@link DataStoreConfiguration}, provided by the user in the constructor of this class,
  * to properly create instances of {@link CustomDataStore}.
  *
  * @param customFactories the factories that process user-provided data store configurations and
  *     generate custom data store instances.
  */
 public final void addCustomDataStoreFactories(CustomDataStoreFactory<?>... customFactories) {
   Args.notEmpty(customFactories, "Custom data store factories");
   for (CustomDataStoreFactory<?> customFactory : customFactories) {
     Args.notNull(customFactory, "Custom data store factory");
     customDataStoreFactories.add(customFactory);
   }
 }
  /**
   * Creates a new engine configuration with the essential configuration required by uniVocity for
   * enabling the definition and execution of data mappings.
   *
   * @param engineName the name of the new engine. The engine name is used to obtain instances of
   *     {@link DataIntegrationEngine} and manage them using {@link Univocity}
   * @param dataStores optional parameter for the configurations of data stores that will have their
   *     entities mapped through this engine. More dataStores can be added later using {@link
   *     #addDataStoreConfigurations(DataStoreConfiguration...)}
   */
  public EngineConfiguration(String engineName, DataStoreConfiguration... dataStores) {
    Args.notBlank(engineName, "Engine name");
    this.engineName = engineName;

    if (dataStores != null && dataStores.length > 0) {
      addDataStoreConfigurations(dataStores);
    }
  }
Пример #4
0
  protected JobOutput executeJellyfishMerger(
      Args args, String ecqName, Set<File> fileSet, File outputDir)
      throws InterruptedException, ProcessExecutionException, ConanParameterException, IOException {

    String suffix = "jellyfish_" + ecqName + "_all.jf31_0";

    String jobName = args.getJobPrefix() + "-merge-" + suffix;

    List<File> files = new ArrayList<>();
    files.addAll(fileSet);

    File outputFile = new File(outputDir, suffix);

    JellyfishMergeV11 jellyfishMerge =
        this.makeJellyfishMerge(files, outputFile, args.getOrganism());

    ExecutionResult id =
        this.conanExecutorService.executeProcess(
            jellyfishMerge,
            args.getOutputDir(),
            jobName,
            args.getThreadsPerProcess(),
            args.getMemoryPerProcess(),
            args.isRunParallel());

    id.setName("merge-" + suffix);

    return new JobOutput(id, outputFile);
  }
Пример #5
0
  protected JobOutput executeJellyfishCount(Args args, String ecqName, File outputDir, Library lib)
      throws ProcessExecutionException, InterruptedException, ConanParameterException, IOException {
    String suffix = "jellyfish_" + ecqName + "_" + lib.getName() + ".jf31";

    // Create the process
    JellyfishCountV11 jellyfishProcess =
        this.makeJellyfishCount(
            this.makeInputStringFromLib(lib),
            new File(new File(outputDir, ecqName), suffix).getAbsolutePath(),
            args.getOrganism(),
            args.getThreadsPerProcess());

    File outputFile = jellyfishProcess.getArgs().getOutputFile();

    // Create a job name
    String jobName = args.getJobPrefix() + "-count-" + suffix;

    // Start jellyfish
    final ExecutionResult id =
        this.conanExecutorService.executeProcess(
            jellyfishProcess,
            new File(outputDir, ecqName),
            jobName,
            args.getThreadsPerProcess(),
            args.getMemoryPerProcess(),
            args.isRunParallel());

    id.setName("count-" + suffix);

    return new JobOutput(id, outputFile);
  }
Пример #6
0
 @Override
 public List<Shape<V>> caseBuiltin(Builtin builtin, Args<V> arg) {
   if (Debug) System.out.println("inside ShapePropgator, builtin fn is " + builtin);
   if (Debug) System.out.println("the number of output variables is " + arg.getNargout());
   if (builtin instanceof HasShapePropagationInfo) {
     // call shape prop tool
     ShapePropTool<V> shapePropTool = new ShapePropTool<V>();
     @SuppressWarnings({"unchecked"})
     List<Shape<V>> result =
         shapePropTool.matchByValues(
             ((HasShapePropagationInfo<V>) builtin).getShapePropagationInfo(), arg);
     return result;
   }
   throw new UnsupportedOperationException();
 }
 public CryptoMapper(final IRequestMapper wrappedMapper, final IProvider<ICrypt> cryptProvider) {
   super();
   this.wrappedMapper = (IRequestMapper) Args.notNull((Object) wrappedMapper, "wrappedMapper");
   this.cryptProvider = (IProvider<ICrypt>) Args.notNull((Object) cryptProvider, "cryptProvider");
 }
Пример #8
0
  void doTest(String name) throws Exception {
    Method m = tests.get(name);
    Method m_check = tests.get(name + "_check");
    Class[] paramTypes = m.getParameterTypes();
    Object[] params = new Object[paramTypes.length];
    Class retType = m.getReturnType();
    boolean isIntArray =
        (retType.isPrimitive() && !retType.equals(Void.TYPE))
            || (retType.equals(Void.TYPE) && paramTypes[0].getComponentType().isPrimitive())
            || (retType.isArray() && retType.getComponentType().isPrimitive());

    Args args = m.getAnnotation(Args.class);

    Object src = null;
    switch (args.src()) {
      case SMALL:
        {
          if (isIntArray) {
            src = small_int_src;
          } else {
            src = small_a_src;
          }
          break;
        }
      case LARGE:
        {
          if (isIntArray) {
            src = large_int_src;
          } else {
            src = large_a_src;
          }
          break;
        }
      case ZERO:
        {
          if (isIntArray) {
            src = zero_int_src;
          } else {
            src = zero_a_src;
          }
          break;
        }
    }

    for (int i = 0; i < 20000; i++) {
      boolean failure = false;

      int p = 0;

      if (params.length > 0) {
        if (isIntArray) {
          params[0] = ((int[]) src).clone();
        } else {
          params[0] = ((A[]) src).clone();
        }
        p++;
      }

      if (params.length > 1) {
        switch (args.dst()) {
          case NEW:
            {
              if (isIntArray) {
                params[1] = new int[((int[]) params[0]).length];
              } else {
                params[1] = new A[((A[]) params[0]).length];
              }
              p++;
              break;
            }
          case SRC:
            {
              params[1] = params[0];
              p++;
              break;
            }
          case NONE:
            break;
        }
      }

      for (int j = 0; j < args.extra_args().length; j++) {
        params[p + j] = args.extra_args()[j];
      }

      Object res = m.invoke(null, params);

      if (retType.isPrimitive() && !retType.equals(Void.TYPE)) {
        int s = (int) res;
        int sum = 0;
        int[] int_res = (int[]) src;
        for (int j = 0; j < int_res.length; j++) {
          sum += int_res[j];
        }
        failure = (s != sum);
        if (failure) {
          System.out.println("Test " + name + " failed: result = " + s + " != " + sum);
        }
      } else {
        Object dest = null;
        if (!retType.equals(Void.TYPE)) {
          dest = res;
        } else {
          dest = params[1];
        }

        if (m_check != null) {
          failure = (boolean) m_check.invoke(null, new Object[] {src, dest});
        } else {
          if (isIntArray) {
            int[] int_res = (int[]) src;
            int[] int_dest = (int[]) dest;
            for (int j = 0; j < int_res.length; j++) {
              if (int_res[j] != int_dest[j]) {
                System.out.println(
                    "Test "
                        + name
                        + " failed for "
                        + j
                        + " src["
                        + j
                        + "]="
                        + int_res[j]
                        + ", dest["
                        + j
                        + "]="
                        + int_dest[j]);
                failure = true;
              }
            }
          } else {
            Object[] object_res = (Object[]) src;
            Object[] object_dest = (Object[]) dest;
            for (int j = 0; j < object_res.length; j++) {
              if (object_res[j] != object_dest[j]) {
                System.out.println(
                    "Test "
                        + name
                        + " failed for "
                        + j
                        + " src["
                        + j
                        + "]="
                        + object_res[j]
                        + ", dest["
                        + j
                        + "]="
                        + object_dest[j]);
                failure = true;
              }
            }
          }
        }
      }

      if (failure) {
        success = false;
        break;
      }
    }
  }
Пример #9
0
  @Override
  public ExecutionResult execute(ExecutionContext executionContext)
      throws ProcessExecutionException, InterruptedException {

    try {

      StopWatch stopWatch = new StopWatch();
      stopWatch.start();

      log.info("Starting Kmer Counting on all Reads");

      // Create shortcut to args for convienience
      Args args = this.getArgs();

      // Force run parallel to false if not using a scheduler
      if (!executionContext.usingScheduler() && args.isRunParallel()) {
        log.warn("Forcing linear execution due to lack of job scheduler");
        args.setRunParallel(false);
      }

      // Create the output directory
      args.getOutputDir().mkdirs();

      JobOutputMap jfCountOutputs = new JobOutputMap();
      List<ExecutionResult> jobResults = new ArrayList<>();
      List<ExecutionResult> allJobResults = new ArrayList<>();

      // Create the output directory for the RAW datasets
      File rawOutputDir = new File(args.getOutputDir(), "raw");

      if (!rawOutputDir.exists()) {
        rawOutputDir.mkdirs();
      }

      // Start jellyfish on all RAW datasets
      for (Library lib : args.getAllLibraries()) {

        // Execute jellyfish and add id to list of job ids
        JobOutput jfOut = this.executeJellyfishCount(args, "raw", args.getOutputDir(), lib);
        jobResults.add(jfOut.getResult());
        allJobResults.add(jfOut.getResult());
        jfCountOutputs.updateTracker("raw", jfOut.getOutputFile());
      }

      // Also start jellyfish on all the prep-processed libraries from MECQ
      if (args.getAllMecqs() != null) {
        for (Mecq.EcqArgs ecqArgs : args.getAllMecqs()) {

          // Create the output directory for the RAW datasets
          File ecqOutputDir = new File(args.getOutputDir(), ecqArgs.getName());

          if (!ecqOutputDir.exists()) {
            ecqOutputDir.mkdirs();
          }

          for (Library lib : ecqArgs.getOutputLibraries()) {

            // Add jellyfish id to list of job ids
            JobOutput jfOut =
                this.executeJellyfishCount(args, ecqArgs.getName(), args.getOutputDir(), lib);

            jobResults.add(jfOut.getResult());
            allJobResults.add(jfOut.getResult());
            jfCountOutputs.updateTracker(ecqArgs.getName(), jfOut.getOutputFile());
          }
        }
      }

      // If we're using a scheduler and we have been asked to run each job
      // in parallel, then we should wait for all those to complete before continueing.
      if (executionContext.usingScheduler() && args.isRunParallel()) {
        log.info("Kmer counting all ECQ groups in parallel, waiting for completion");
        this.conanExecutorService.executeScheduledWait(
            jobResults,
            args.getJobPrefix() + "-count-*",
            ExitStatus.Type.COMPLETED_ANY,
            args.getJobPrefix() + "-kmer-count-wait",
            args.getOutputDir());
      }

      // Waiting point... clear job ids.
      jobResults.clear();

      JobOutputMap mergedOutputs = new JobOutputMap();

      // Now execute merge jobs if required
      for (Map.Entry<String, Set<File>> entry : jfCountOutputs.entrySet()) {

        String ecqName = entry.getKey();
        Set<File> fileSet = entry.getValue();

        // Only merge if there's more than one library
        if (fileSet.size() > 1) {
          JobOutput jfOut =
              this.executeJellyfishMerger(
                  args, ecqName, fileSet, new File(args.getOutputDir(), ecqName));

          jobResults.add(jfOut.getResult());
          allJobResults.add(jfOut.getResult());
          mergedOutputs.updateTracker(ecqName, jfOut.getOutputFile());
        }
      }

      // If we're using a scheduler and we have been asked to run each job
      // in parallel, then we should wait for all those to complete before continueing.
      if (executionContext.usingScheduler() && args.isRunParallel()) {
        log.info(
            "Creating merged kmer counts for all ECQ groups in parallel, waiting for completion");
        this.conanExecutorService.executeScheduledWait(
            jobResults,
            args.getJobPrefix() + "-merge-*",
            ExitStatus.Type.COMPLETED_ANY,
            args.getJobPrefix() + "-kmer-merge-wait",
            args.getOutputDir());
      }

      // Waiting point... clear job ids.
      jobResults.clear();

      // Combine all jellyfish out maps
      jfCountOutputs.combine(mergedOutputs);

      String katGcpJobPrefix = args.getJobPrefix() + "-kat-gcp";

      // Run KAT GCP on everything
      List<ExecutionResult> katGcpResults =
          this.executeKatGcp(
              jfCountOutputs,
              katGcpJobPrefix,
              args.getThreadsPerProcess(),
              args.getMemoryPerProcess(),
              args.isRunParallel());

      for (ExecutionResult result : katGcpResults) {
        result.setName(result.getName().substring(args.getJobPrefix().length() + 1));
        jobResults.add(result);
        allJobResults.add(result);
      }

      // If we're using a scheduler and we have been asked to run each job
      // in parallel, then we should wait for all those to complete before continueing.
      if (executionContext.usingScheduler() && args.isRunParallel()) {
        log.info("Running \"kat gcp\" for all ECQ groups in parallel, waiting for completion");
        this.conanExecutorService.executeScheduledWait(
            jobResults,
            katGcpJobPrefix + "*",
            ExitStatus.Type.COMPLETED_ANY,
            args.getJobPrefix() + "-kat-gcp-wait",
            args.getOutputDir());
      }

      // Waiting point... clear job ids.
      jobResults.clear();

      log.info("Kmer counting of all reads finished.");

      stopWatch.stop();

      TaskResult taskResult =
          new DefaultTaskResult(
              "rampart-read_analysis-kmer", true, allJobResults, stopWatch.getTime() / 1000L);

      // Output the resource usage to file
      FileUtils.writeLines(
          new File(args.getOutputDir(), args.getJobPrefix() + ".summary"), taskResult.getOutput());

      return new DefaultExecutionResult(
          taskResult.getTaskName(),
          0,
          new String[] {},
          null,
          -1,
          new ResourceUsage(
              taskResult.getMaxMemUsage(),
              taskResult.getActualTotalRuntime(),
              taskResult.getTotalExternalCputime()));
    } catch (ConanParameterException | IOException e) {
      throw new ProcessExecutionException(-1, e);
    }
  }