Ejemplo n.º 1
0
  /**
   * Method getHeads returns the first Pipe instances in this pipe assembly.
   *
   * @return the first (type Pipe[]) of this Pipe object.
   */
  public Pipe[] getHeads() {
    Pipe[] pipes = getPrevious();

    if (pipes.length == 0) return new Pipe[] {this};

    if (pipes.length == 1) return pipes[0].getHeads();

    Set<Pipe> heads = new HashSet<Pipe>();

    for (Pipe pipe : pipes) Collections.addAll(heads, pipe.getHeads());

    return heads.toArray(new Pipe[heads.size()]);
  }
  @Override
  HadoopFlow createFlow() throws IOException {
    // copy flowDef
    FlowDef def = FlowDef.flowDef();

    if (flowDef != null) {
      def.addSinks(flowDef.getSinksCopy())
          .addSources(flowDef.getSourcesCopy())
          .addTraps(flowDef.getTrapsCopy())
          .addTails(flowDef.getTailsArray())
          .setAssertionLevel(flowDef.getAssertionLevel())
          .setDebugLevel(flowDef.getDebugLevel())
          .addCheckpoints(flowDef.getCheckpointsCopy())
          .addTags(flowDef.getTags())
          .setName(flowDef.getName());
    }

    Set<Pipe> heads = new LinkedHashSet<Pipe>();

    if (tails != null) {
      for (Pipe pipe : tails) {
        Collections.addAll(heads, pipe.getHeads());
      }
    }

    Pipe pipe = null;

    if (heads.size() == 1) {
      pipe = heads.iterator().next();
    }

    if (sources != null && sources.size() == 1) {
      Tap tap = sources.remove(MARKER);
      if (tap != null) {
        sources.put(pipe.getName(), tap);
      }
    }

    if (sinks != null && sinks.size() == 1) {
      Tap tap = sinks.remove(MARKER);
      if (tap != null) {
        sinks.put(pipe.getName(), tap);
      }
    }

    def.addSources(sources).addSinks(sinks).addTraps(traps);

    if (tails != null) {
      def.addTails(tails);
    }

    if (StringUtils.hasText(beanName)) {
      def.addTag(beanName);

      if (!StringUtils.hasText(def.getName())) {
        def.setName(beanName);
      }
    }

    Configuration cfg = ConfigurationUtils.createFrom(configuration, properties);
    Properties props = ConfigurationUtils.asProperties(cfg);

    if (jarSetup) {
      if (jar != null) {
        AppProps.setApplicationJarPath(props, ResourceUtils.decode(jar.getURI().toString()));
      } else if (jarClass != null) {
        AppProps.setApplicationJarClass(props, jarClass);
      } else {
        // auto-detection based on the classpath
        ClassLoader cascadingCL = Cascade.class.getClassLoader();
        Resource cascadingCore = ResourceUtils.findContainingJar(Cascade.class);
        Resource cascadingHadoop =
            ResourceUtils.findContainingJar(cascadingCL, "cascading/flow/hadoop/HadoopFlow.class");
        // find jgrapht
        Resource jgrapht = ResourceUtils.findContainingJar(cascadingCL, "org/jgrapht/Graph.class");

        Assert.notNull(cascadingCore, "Cannot find cascading-core.jar");
        Assert.notNull(cascadingHadoop, "Cannot find cascading-hadoop.jar");
        Assert.notNull(jgrapht, "Cannot find jgraphts-jdk.jar");

        if (log.isDebugEnabled()) {
          log.debug(
              "Auto-detecting Cascading Libs ["
                  + Arrays.toString(new Resource[] {cascadingCore, cascadingHadoop, jgrapht})
                  + "]");
        }

        ConfigurationUtils.addLibs(cfg, cascadingCore, cascadingHadoop, jgrapht);

        // config changed, reinit properties
        props = ConfigurationUtils.asProperties(cfg);
      }
    }

    if (jobPoolingInterval != null) {
      FlowProps.setJobPollingInterval(props, jobPoolingInterval);
    }

    if (maxConcurrentSteps != null) {
      FlowProps.setMaxConcurrentSteps(props, maxConcurrentSteps);
    }

    HadoopFlow flow = (HadoopFlow) new HadoopFlowConnector(props).connect(def);

    return flow;
  }