/** * Create a new datum with field names defined by <fields>, and field values contained in <tuple> * * <p>WARNING - <tuple> will be kept as the data container, so don't call this with a tuple * provided by a Cascading operation/iterator, as those get reused. * * @param fields Names of fields * @param tuple Data for the datum */ public BaseDatum(Fields fields, Tuple tuple) { if (fields.size() != tuple.size()) { throw new IllegalArgumentException( "Size of fields must be the same as the size of the tuple: " + fields + "/" + tuple); } _tupleEntry = new TupleEntry(fields, tuple); }
public Pipe addAssembly( String value, Map<String, String> subParams, Map<String, Pipe> pipes, Pipe pipe) { Fields fields = asFields(getString(subParams, "args", null)); if (fields == null) fields = Fields.FIRST; return new Each(pipe, fields, new ExpressionFunction(Fields.size(1), value, String.class)); }
public static <T> void populateOutputTupleEntry( CombinerDefinition<T> definition, TupleEntry output, Tuple resultTuple) { // set the ID so we can differentiate later output.setRaw(MultiCombiner.ID_FIELD, definition.getId()); // our tuples are of the form groupFields+outputFields, set the TupleEntry fields appropriately Fields groupFields = definition.getGroupFields(); int index = 0; for (int i = 0; i < groupFields.size(); i++) { output.setRaw(groupFields.get(i), resultTuple.getObject(index)); index++; } Fields outputFields = definition.getOutputFields(); for (int i = 0; i < outputFields.size(); i++) { output.setRaw(outputFields.get(i), resultTuple.getObject(index)); index++; } }
static Tuple coerceToString(SinkCall<?, ?> sinkCall) { TupleEntry entry = sinkCall.getOutgoingEntry(); Fields fields = entry.getFields(); Tuple tuple = entry.getTuple(); if (fields.hasTypes()) { Type types[] = new Type[fields.size()]; for (int index = 0; index < fields.size(); index++) { Type type = fields.getType(index); if (type instanceof CoercibleType<?>) { types[index] = String.class; } else { types[index] = type; } } tuple = entry.getCoercedTuple(types); } return tuple; }
static List<String> asStrings(Fields fields) { if (fields == null || !fields.isDefined()) { // use auto-generated name return Collections.emptyList(); } int size = fields.size(); List<String> names = new ArrayList<String>(size); for (int fieldIndex = 0; fieldIndex < size; fieldIndex++) { names.add(fields.get(fieldIndex).toString()); } return names; }
@Test public void testParserDeclared6() throws IOException { RegexParser splitter = new RegexParser(new Fields("lhs"), "(\\S+)\\s+\\S+", new int[] {1}); Tuple arguments = new Tuple("foo\tbar"); Fields resultFields = Fields.size(1); TupleListCollector collector = invokeFunction(splitter, arguments, resultFields); assertEquals("wrong size", 1, collector.size()); Iterator<Tuple> iterator = collector.iterator(); Tuple tuple = iterator.next(); assertEquals("wrong tupel size", 1, tuple.size()); assertEquals("not equal: tuple.get(0)", "foo", tuple.getObject(0)); }
/** Contributed by gicode */ @Test public void testParserDeclared5() throws IOException { RegexParser splitter = new RegexParser(new Fields("bar"), "^GET /foo\\?bar=([^\\&]+)&"); Tuple arguments = new Tuple("GET /foo?bar=z123&baz=2"); Fields resultFields = Fields.size(1); TupleListCollector collector = invokeFunction(splitter, arguments, resultFields); assertEquals("wrong size", 1, collector.size()); Iterator<Tuple> iterator = collector.iterator(); Tuple tuple = iterator.next(); assertEquals("wrong tuple size", 1, tuple.size()); assertEquals("not equal: tuple.get(0)", "z123", tuple.getObject(0)); }
/** * Selects and returns the first argument Tuple encountered. * * @param fieldDeclaration of type Fields */ @ConstructorProperties({"fieldDeclaration"}) public First(Fields fieldDeclaration) { super(fieldDeclaration.size(), fieldDeclaration); }
public void failingListenerTest(FailingFlowListener.OnFail onFail) throws Exception { if (!new File(inputFileLower).exists()) fail("data file not found"); copyFromLocal(inputFileLower); copyFromLocal(inputFileUpper); Tap sourceLower = new Hfs(new TextLine(new Fields("offset", "line")), inputFileLower); Tap sourceUpper = new Hfs(new TextLine(new Fields("offset", "line")), inputFileUpper); Map sources = new HashMap(); sources.put("lower", sourceLower); sources.put("upper", sourceUpper); Function splitter = new RegexSplitter(new Fields("num", "char"), " "); // using null pos so all fields are written Tap sink = new Hfs(new TextLine(), outputPath + "/stopped/", true); Pipe pipeLower = new Each(new Pipe("lower"), new Fields("line"), splitter); if (onFail == FailingFlowListener.OnFail.THROWABLE) { pipeLower = new Each( pipeLower, new Debug() { @Override public boolean isRemove(FlowProcess flowProcess, FilterCall filterCall) { throw new RuntimeException("failing inside pipe assembly intentionally"); } }); } pipeLower = new GroupBy(pipeLower, new Fields("num")); Pipe pipeUpper = new Each(new Pipe("upper"), new Fields("line"), splitter); pipeUpper = new GroupBy(pipeUpper, new Fields("num")); Pipe splice = new CoGroup(pipeLower, new Fields("num"), pipeUpper, new Fields("num"), Fields.size(4)); Flow flow = new FlowConnector(getProperties()).connect(sources, sink, splice); // countFlow.writeDOT( "stopped.dot" ); FailingFlowListener listener = new FailingFlowListener(onFail); flow.addListener(listener); System.out.println("calling start"); flow.start(); assertTrue("did not start", listener.started.tryAcquire(120, TimeUnit.SECONDS)); if (onFail == FailingFlowListener.OnFail.STOPPING) { while (true) { System.out.println("testing if running"); Thread.sleep(1000); Map<String, Callable<Throwable>> map = flow.getJobsMap(); if (map == null || map.values().size() == 0) continue; if (((FlowStepJob) map.values().iterator().next()).wasStarted()) break; } System.out.println("calling stop"); flow.stop(); } assertTrue("did not complete", listener.completed.tryAcquire(120, TimeUnit.SECONDS)); assertTrue("did not stop", listener.stopped.tryAcquire(120, TimeUnit.SECONDS)); try { flow.complete(); fail("did not rethrow exception from listener"); } catch (Exception exception) { // ignore } }
public void testStop() throws Exception { if (!new File(inputFileLower).exists()) fail("data file not found"); copyFromLocal(inputFileLower); copyFromLocal(inputFileUpper); Tap sourceLower = new Hfs(new TextLine(new Fields("offset", "line")), inputFileLower); Tap sourceUpper = new Hfs(new TextLine(new Fields("offset", "line")), inputFileUpper); Map sources = new HashMap(); sources.put("lower", sourceLower); sources.put("upper", sourceUpper); Function splitter = new RegexSplitter(new Fields("num", "char"), " "); // using null pos so all fields are written Tap sink = new Hfs(new TextLine(), outputPath + "/stopped/", true); Pipe pipeLower = new Each(new Pipe("lower"), new Fields("line"), splitter); pipeLower = new GroupBy(pipeLower, new Fields("num")); Pipe pipeUpper = new Each(new Pipe("upper"), new Fields("line"), splitter); pipeUpper = new GroupBy(pipeUpper, new Fields("num")); Pipe splice = new CoGroup(pipeLower, new Fields("num"), pipeUpper, new Fields("num"), Fields.size(4)); Flow flow = new FlowConnector(getProperties()).connect(sources, sink, splice); // countFlow.writeDOT( "stopped.dot" ); LockingFlowListener listener = new LockingFlowListener(); flow.addListener(listener); System.out.println("calling start"); flow.start(); assertTrue("did not start", listener.started.tryAcquire(60, TimeUnit.SECONDS)); while (true) { System.out.println("testing if running"); Thread.sleep(1000); Map<String, Callable<Throwable>> map = flow.getJobsMap(); if (map == null || map.values().size() == 0) continue; if (((FlowStepJob) map.values().iterator().next()).wasStarted()) break; } System.out.println("calling stop"); flow.stop(); assertTrue("did not stop", listener.stopped.tryAcquire(60, TimeUnit.SECONDS)); assertTrue("did not complete", listener.completed.tryAcquire(60, TimeUnit.SECONDS)); }
public static void validate(File solrCoreDir, String dataDirPropertyName, Fields schemeFields) throws IOException { // Verify solrHomeDir exists if (!solrCoreDir.exists() || !solrCoreDir.isDirectory()) { throw new TapException("Solr core directory doesn't exist: " + solrCoreDir); } File tmpSolrHome = makeTempSolrHome(solrCoreDir); // Set up a temp location for Solr home, where we're write out a synthetic solr.xml // that references the core directory. String coreName = solrCoreDir.getName(); String corePath = solrCoreDir.getAbsolutePath(); String solrXmlContent = String.format( "<solr><cores><core name=\"%s\" instanceDir=\"%s\"></core></cores></solr>", coreName, corePath); File solrXmlFile = new File(tmpSolrHome, "solr.xml"); FileUtils.write(solrXmlFile, solrXmlContent); // Set up a temp location for data, so when we instantiate the coreContainer, // we don't pollute the solr home with a /data sub-dir. String tmpFolder = System.getProperty("java.io.tmpdir"); File tmpDataDir = new File(tmpFolder, UUID.randomUUID().toString()); tmpDataDir.mkdir(); System.setProperty("solr.solr.home", tmpSolrHome.getAbsolutePath()); System.setProperty(dataDirPropertyName, tmpDataDir.getAbsolutePath()); System.setProperty( "enable.special-handlers", "false"); // All we need is the update request handler System.setProperty( "enable.cache-warming", "false"); // We certainly don't need to warm the cache CoreContainer.Initializer initializer = new CoreContainer.Initializer(); CoreContainer coreContainer = null; try { coreContainer = initializer.initialize(); Collection<SolrCore> cores = coreContainer.getCores(); SolrCore core = null; if (cores.size() == 0) { throw new TapException("No Solr cores are available"); } else if (cores.size() > 1) { throw new TapException("Only one Solr core is supported"); } else { core = cores.iterator().next(); } IndexSchema schema = core.getSchema(); Map<String, SchemaField> solrFields = schema.getFields(); Set<String> schemeFieldnames = new HashSet<String>(); for (int i = 0; i < schemeFields.size(); i++) { String fieldName = schemeFields.get(i).toString(); if (!solrFields.containsKey(fieldName)) { throw new TapException("Sink field name doesn't exist in Solr schema: " + fieldName); } schemeFieldnames.add(fieldName); } for (String solrFieldname : solrFields.keySet()) { SchemaField solrField = solrFields.get(solrFieldname); if (solrField.isRequired() && !schemeFieldnames.contains(solrFieldname)) { throw new TapException("No sink field name for required Solr field: " + solrFieldname); } } } finally { if (coreContainer != null) { coreContainer.shutdown(); } } }
private TupleEntry getEntry(Tuple tuple) { return new TupleEntry(Fields.size(tuple.size()), tuple); }
/** * Create an empty datum with field names defined by <fields> * * @param fields Names of fields */ public BaseDatum(Fields fields) { this(new TupleEntry(fields, Tuple.size(fields.size()))); }