예제 #1
0
  /**
   * Create a new datum with field names defined by <fields>, and field values contained in <tuple>
   *
   * <p>WARNING - <tuple> will be kept as the data container, so don't call this with a tuple
   * provided by a Cascading operation/iterator, as those get reused.
   *
   * @param fields Names of fields
   * @param tuple Data for the datum
   */
  public BaseDatum(Fields fields, Tuple tuple) {
    if (fields.size() != tuple.size()) {
      throw new IllegalArgumentException(
          "Size of fields must be the same as the size of the tuple: " + fields + "/" + tuple);
    }

    _tupleEntry = new TupleEntry(fields, tuple);
  }
  public Pipe addAssembly(
      String value, Map<String, String> subParams, Map<String, Pipe> pipes, Pipe pipe) {
    Fields fields = asFields(getString(subParams, "args", null));

    if (fields == null) fields = Fields.FIRST;

    return new Each(pipe, fields, new ExpressionFunction(Fields.size(1), value, String.class));
  }
예제 #3
0
  public static <T> void populateOutputTupleEntry(
      CombinerDefinition<T> definition, TupleEntry output, Tuple resultTuple) {
    // set the ID so we can differentiate later
    output.setRaw(MultiCombiner.ID_FIELD, definition.getId());

    // our tuples are of the form groupFields+outputFields, set the TupleEntry fields appropriately
    Fields groupFields = definition.getGroupFields();
    int index = 0;
    for (int i = 0; i < groupFields.size(); i++) {
      output.setRaw(groupFields.get(i), resultTuple.getObject(index));
      index++;
    }
    Fields outputFields = definition.getOutputFields();
    for (int i = 0; i < outputFields.size(); i++) {
      output.setRaw(outputFields.get(i), resultTuple.getObject(index));
      index++;
    }
  }
    static Tuple coerceToString(SinkCall<?, ?> sinkCall) {
      TupleEntry entry = sinkCall.getOutgoingEntry();
      Fields fields = entry.getFields();
      Tuple tuple = entry.getTuple();

      if (fields.hasTypes()) {
        Type types[] = new Type[fields.size()];
        for (int index = 0; index < fields.size(); index++) {
          Type type = fields.getType(index);
          if (type instanceof CoercibleType<?>) {
            types[index] = String.class;
          } else {
            types[index] = type;
          }
        }

        tuple = entry.getCoercedTuple(types);
      }
      return tuple;
    }
  static List<String> asStrings(Fields fields) {
    if (fields == null || !fields.isDefined()) {
      // use auto-generated name
      return Collections.emptyList();
    }

    int size = fields.size();
    List<String> names = new ArrayList<String>(size);
    for (int fieldIndex = 0; fieldIndex < size; fieldIndex++) {
      names.add(fields.get(fieldIndex).toString());
    }

    return names;
  }
예제 #6
0
  @Test
  public void testParserDeclared6() throws IOException {
    RegexParser splitter = new RegexParser(new Fields("lhs"), "(\\S+)\\s+\\S+", new int[] {1});
    Tuple arguments = new Tuple("foo\tbar");
    Fields resultFields = Fields.size(1);

    TupleListCollector collector = invokeFunction(splitter, arguments, resultFields);

    assertEquals("wrong size", 1, collector.size());

    Iterator<Tuple> iterator = collector.iterator();

    Tuple tuple = iterator.next();

    assertEquals("wrong tupel size", 1, tuple.size());
    assertEquals("not equal: tuple.get(0)", "foo", tuple.getObject(0));
  }
예제 #7
0
  /** Contributed by gicode */
  @Test
  public void testParserDeclared5() throws IOException {
    RegexParser splitter = new RegexParser(new Fields("bar"), "^GET /foo\\?bar=([^\\&]+)&");
    Tuple arguments = new Tuple("GET /foo?bar=z123&baz=2");
    Fields resultFields = Fields.size(1);

    TupleListCollector collector = invokeFunction(splitter, arguments, resultFields);

    assertEquals("wrong size", 1, collector.size());

    Iterator<Tuple> iterator = collector.iterator();

    Tuple tuple = iterator.next();

    assertEquals("wrong tuple size", 1, tuple.size());
    assertEquals("not equal: tuple.get(0)", "z123", tuple.getObject(0));
  }
예제 #8
0
 /**
  * Selects and returns the first argument Tuple encountered.
  *
  * @param fieldDeclaration of type Fields
  */
 @ConstructorProperties({"fieldDeclaration"})
 public First(Fields fieldDeclaration) {
   super(fieldDeclaration.size(), fieldDeclaration);
 }
예제 #9
0
  public void failingListenerTest(FailingFlowListener.OnFail onFail) throws Exception {
    if (!new File(inputFileLower).exists()) fail("data file not found");

    copyFromLocal(inputFileLower);
    copyFromLocal(inputFileUpper);

    Tap sourceLower = new Hfs(new TextLine(new Fields("offset", "line")), inputFileLower);
    Tap sourceUpper = new Hfs(new TextLine(new Fields("offset", "line")), inputFileUpper);

    Map sources = new HashMap();

    sources.put("lower", sourceLower);
    sources.put("upper", sourceUpper);

    Function splitter = new RegexSplitter(new Fields("num", "char"), " ");

    // using null pos so all fields are written
    Tap sink = new Hfs(new TextLine(), outputPath + "/stopped/", true);

    Pipe pipeLower = new Each(new Pipe("lower"), new Fields("line"), splitter);

    if (onFail == FailingFlowListener.OnFail.THROWABLE) {
      pipeLower =
          new Each(
              pipeLower,
              new Debug() {
                @Override
                public boolean isRemove(FlowProcess flowProcess, FilterCall filterCall) {
                  throw new RuntimeException("failing inside pipe assembly intentionally");
                }
              });
    }

    pipeLower = new GroupBy(pipeLower, new Fields("num"));

    Pipe pipeUpper = new Each(new Pipe("upper"), new Fields("line"), splitter);

    pipeUpper = new GroupBy(pipeUpper, new Fields("num"));

    Pipe splice =
        new CoGroup(pipeLower, new Fields("num"), pipeUpper, new Fields("num"), Fields.size(4));

    Flow flow = new FlowConnector(getProperties()).connect(sources, sink, splice);

    //    countFlow.writeDOT( "stopped.dot" );

    FailingFlowListener listener = new FailingFlowListener(onFail);

    flow.addListener(listener);

    System.out.println("calling start");
    flow.start();

    assertTrue("did not start", listener.started.tryAcquire(120, TimeUnit.SECONDS));

    if (onFail == FailingFlowListener.OnFail.STOPPING) {
      while (true) {
        System.out.println("testing if running");
        Thread.sleep(1000);

        Map<String, Callable<Throwable>> map = flow.getJobsMap();

        if (map == null || map.values().size() == 0) continue;

        if (((FlowStepJob) map.values().iterator().next()).wasStarted()) break;
      }

      System.out.println("calling stop");

      flow.stop();
    }

    assertTrue("did not complete", listener.completed.tryAcquire(120, TimeUnit.SECONDS));
    assertTrue("did not stop", listener.stopped.tryAcquire(120, TimeUnit.SECONDS));

    try {
      flow.complete();
      fail("did not rethrow exception from listener");
    } catch (Exception exception) {
      // ignore
    }
  }
예제 #10
0
  public void testStop() throws Exception {
    if (!new File(inputFileLower).exists()) fail("data file not found");

    copyFromLocal(inputFileLower);
    copyFromLocal(inputFileUpper);

    Tap sourceLower = new Hfs(new TextLine(new Fields("offset", "line")), inputFileLower);
    Tap sourceUpper = new Hfs(new TextLine(new Fields("offset", "line")), inputFileUpper);

    Map sources = new HashMap();

    sources.put("lower", sourceLower);
    sources.put("upper", sourceUpper);

    Function splitter = new RegexSplitter(new Fields("num", "char"), " ");

    // using null pos so all fields are written
    Tap sink = new Hfs(new TextLine(), outputPath + "/stopped/", true);

    Pipe pipeLower = new Each(new Pipe("lower"), new Fields("line"), splitter);

    pipeLower = new GroupBy(pipeLower, new Fields("num"));

    Pipe pipeUpper = new Each(new Pipe("upper"), new Fields("line"), splitter);

    pipeUpper = new GroupBy(pipeUpper, new Fields("num"));

    Pipe splice =
        new CoGroup(pipeLower, new Fields("num"), pipeUpper, new Fields("num"), Fields.size(4));

    Flow flow = new FlowConnector(getProperties()).connect(sources, sink, splice);

    //    countFlow.writeDOT( "stopped.dot" );

    LockingFlowListener listener = new LockingFlowListener();

    flow.addListener(listener);

    System.out.println("calling start");
    flow.start();

    assertTrue("did not start", listener.started.tryAcquire(60, TimeUnit.SECONDS));

    while (true) {
      System.out.println("testing if running");
      Thread.sleep(1000);

      Map<String, Callable<Throwable>> map = flow.getJobsMap();

      if (map == null || map.values().size() == 0) continue;

      if (((FlowStepJob) map.values().iterator().next()).wasStarted()) break;
    }

    System.out.println("calling stop");

    flow.stop();

    assertTrue("did not stop", listener.stopped.tryAcquire(60, TimeUnit.SECONDS));
    assertTrue("did not complete", listener.completed.tryAcquire(60, TimeUnit.SECONDS));
  }
예제 #11
0
  public static void validate(File solrCoreDir, String dataDirPropertyName, Fields schemeFields)
      throws IOException {

    // Verify solrHomeDir exists
    if (!solrCoreDir.exists() || !solrCoreDir.isDirectory()) {
      throw new TapException("Solr core directory doesn't exist: " + solrCoreDir);
    }

    File tmpSolrHome = makeTempSolrHome(solrCoreDir);

    // Set up a temp location for Solr home, where we're write out a synthetic solr.xml
    // that references the core directory.
    String coreName = solrCoreDir.getName();
    String corePath = solrCoreDir.getAbsolutePath();
    String solrXmlContent =
        String.format(
            "<solr><cores><core name=\"%s\" instanceDir=\"%s\"></core></cores></solr>",
            coreName, corePath);
    File solrXmlFile = new File(tmpSolrHome, "solr.xml");
    FileUtils.write(solrXmlFile, solrXmlContent);

    // Set up a temp location for data, so when we instantiate the coreContainer,
    // we don't pollute the solr home with a /data sub-dir.
    String tmpFolder = System.getProperty("java.io.tmpdir");
    File tmpDataDir = new File(tmpFolder, UUID.randomUUID().toString());
    tmpDataDir.mkdir();

    System.setProperty("solr.solr.home", tmpSolrHome.getAbsolutePath());
    System.setProperty(dataDirPropertyName, tmpDataDir.getAbsolutePath());
    System.setProperty(
        "enable.special-handlers", "false"); // All we need is the update request handler
    System.setProperty(
        "enable.cache-warming", "false"); // We certainly don't need to warm the cache

    CoreContainer.Initializer initializer = new CoreContainer.Initializer();
    CoreContainer coreContainer = null;

    try {
      coreContainer = initializer.initialize();
      Collection<SolrCore> cores = coreContainer.getCores();
      SolrCore core = null;

      if (cores.size() == 0) {
        throw new TapException("No Solr cores are available");
      } else if (cores.size() > 1) {
        throw new TapException("Only one Solr core is supported");
      } else {
        core = cores.iterator().next();
      }

      IndexSchema schema = core.getSchema();
      Map<String, SchemaField> solrFields = schema.getFields();
      Set<String> schemeFieldnames = new HashSet<String>();

      for (int i = 0; i < schemeFields.size(); i++) {
        String fieldName = schemeFields.get(i).toString();
        if (!solrFields.containsKey(fieldName)) {
          throw new TapException("Sink field name doesn't exist in Solr schema: " + fieldName);
        }

        schemeFieldnames.add(fieldName);
      }

      for (String solrFieldname : solrFields.keySet()) {
        SchemaField solrField = solrFields.get(solrFieldname);
        if (solrField.isRequired() && !schemeFieldnames.contains(solrFieldname)) {
          throw new TapException("No sink field name for required Solr field: " + solrFieldname);
        }
      }
    } finally {
      if (coreContainer != null) {
        coreContainer.shutdown();
      }
    }
  }
예제 #12
0
 private TupleEntry getEntry(Tuple tuple) {
   return new TupleEntry(Fields.size(tuple.size()), tuple);
 }
예제 #13
0
 /**
  * Create an empty datum with field names defined by <fields>
  *
  * @param fields Names of fields
  */
 public BaseDatum(Fields fields) {
   this(new TupleEntry(fields, Tuple.size(fields.size())));
 }