@Test public void testFieldCoercion() throws IOException { // 75.185.76.245 - - [01/Sep/2007:00:01:03 +0000] "POST /mt-tb.cgi/235 HTTP/1.1" 403 174 "-" // "Opera/9.10 (Windows NT 5.1; U; ru)" "-" DateType dateType = new DateType(TestConstants.APACHE_DATE_FORMAT); Type[] types = new Type[] { String.class, // ip String.class, // - String.class, // - dateType, // date String.class, // request int.class, // code long.class, // bytes String.class, // - String.class, // agent String.class // - }; Fields fields = new Fields( "ip", "client", "user", "date", "request", "code", "bytes", "referrer", "agent", "na"); fields = fields.applyTypes(types); Tap input = getPlatform() .getDelimitedFile( fields, true, true, ",", "\"", null, inputFileApacheClean, SinkMode.KEEP); Tap output = getPlatform() .getDelimitedFile( fields, true, true, ",", "\"", null, getOutputPath(getTestName()), SinkMode.REPLACE); Pipe pipe = new Pipe("pipe"); pipe = new Each( pipe, new Fields("date"), AssertionLevel.STRICT, new AssertExpression("date instanceof Long", Object.class)); Flow flow = getPlatform().getFlowConnector().connect(input, output, pipe); flow.complete(); validateLength(flow, 9, 10); }
static List<String> asStrings(Fields fields) { if (fields == null || !fields.isDefined()) { // use auto-generated name return Collections.emptyList(); } int size = fields.size(); List<String> names = new ArrayList<String>(size); for (int fieldIndex = 0; fieldIndex < size; fieldIndex++) { names.add(fields.get(fieldIndex).toString()); } return names; }
@Override public void notifyWriteSpillBegin(Spillable spillable, int spillSize, String spillReason) { int numFiles = spillable.spillCount(); if (numFiles % 10 == 0) { LOG.info( "spilling group: {}, on grouping: {}, num times: {}, with reason: {}", new Object[] { joinField.printVerbose(), spillable.getGrouping().print(), numFiles + 1, spillReason }); Runtime runtime = Runtime.getRuntime(); long freeMem = runtime.freeMemory() / 1024 / 1024; long maxMem = runtime.maxMemory() / 1024 / 1024; long totalMem = runtime.totalMemory() / 1024 / 1024; LOG.info( "mem on spill (mb), free: " + freeMem + ", total: " + totalMem + ", max: " + maxMem); } LOG.info("spilling {} tuples in list to file number {}", spillSize, numFiles + 1); flowProcess.increment(Spill.Num_Spills_Written, 1); flowProcess.increment(Spill.Num_Tuples_Spilled, spillSize); }
/** * Create a new datum with field names defined by <fields>, and field values contained in <tuple> * * <p>WARNING - <tuple> will be kept as the data container, so don't call this with a tuple * provided by a Cascading operation/iterator, as those get reused. * * @param fields Names of fields * @param tuple Data for the datum */ public BaseDatum(Fields fields, Tuple tuple) { if (fields.size() != tuple.size()) { throw new IllegalArgumentException( "Size of fields must be the same as the size of the tuple: " + fields + "/" + tuple); } _tupleEntry = new TupleEntry(fields, tuple); }
public Pipe addAssembly( String value, Map<String, String> subParams, Map<String, Pipe> pipes, Pipe pipe) { Fields fields = asFields(getString(subParams, "args", null)); if (fields == null) fields = Fields.FIRST; return new Each(pipe, fields, new ExpressionFunction(Fields.size(1), value, String.class)); }
private static Fields determineGroupFields(List<CombinerDefinition> combinerDefinitions) { Fields summedGroupFields = new Fields(MultiCombiner.ID_FIELD); for (CombinerDefinition def : combinerDefinitions) { summedGroupFields = Fields.merge(summedGroupFields, def.getGroupFields()); } return summedGroupFields; }
static Tuple coerceToString(SinkCall<?, ?> sinkCall) { TupleEntry entry = sinkCall.getOutgoingEntry(); Fields fields = entry.getFields(); Tuple tuple = entry.getTuple(); if (fields.hasTypes()) { Type types[] = new Type[fields.size()]; for (int index = 0; index < fields.size(); index++) { Type type = fields.getType(index); if (type instanceof CoercibleType<?>) { types[index] = String.class; } else { types[index] = type; } } tuple = entry.getCoercedTuple(types); } return tuple; }
public static Fields getInputFields(List<CombinerDefinition> combinerDefinitions) { Fields summedInputFields = new Fields(); for (CombinerDefinition combinerDefinition : combinerDefinitions) { summedInputFields = Fields.merge( summedInputFields, combinerDefinition.getGroupFields(), combinerDefinition.getInputFields()); } return summedInputFields; }
public static Fields getIntermediateFields(List<CombinerDefinition> combinerDefinitions) { Fields summedIntermediateFields = new Fields(MultiCombiner.ID_FIELD); for (CombinerDefinition combinerDefinition : combinerDefinitions) { summedIntermediateFields = Fields.merge( summedIntermediateFields, combinerDefinition.getGroupFields(), combinerDefinition.getIntermediateFields()); } return summedIntermediateFields; }
@Override public boolean equals(Object object) { if (this == object) return true; if (!(object instanceof BaseOperation)) return false; BaseOperation that = (BaseOperation) object; if (numArgs != that.numArgs) return false; if (fieldDeclaration != null ? !fieldDeclaration.equals(that.fieldDeclaration) : that.fieldDeclaration != null) return false; return true; }
/** Contributed by gicode */ @Test public void testParserDeclared5() throws IOException { RegexParser splitter = new RegexParser(new Fields("bar"), "^GET /foo\\?bar=([^\\&]+)&"); Tuple arguments = new Tuple("GET /foo?bar=z123&baz=2"); Fields resultFields = Fields.size(1); TupleListCollector collector = invokeFunction(splitter, arguments, resultFields); assertEquals("wrong size", 1, collector.size()); Iterator<Tuple> iterator = collector.iterator(); Tuple tuple = iterator.next(); assertEquals("wrong tuple size", 1, tuple.size()); assertEquals("not equal: tuple.get(0)", "z123", tuple.getObject(0)); }
@Test public void testParserDeclared6() throws IOException { RegexParser splitter = new RegexParser(new Fields("lhs"), "(\\S+)\\s+\\S+", new int[] {1}); Tuple arguments = new Tuple("foo\tbar"); Fields resultFields = Fields.size(1); TupleListCollector collector = invokeFunction(splitter, arguments, resultFields); assertEquals("wrong size", 1, collector.size()); Iterator<Tuple> iterator = collector.iterator(); Tuple tuple = iterator.next(); assertEquals("wrong tupel size", 1, tuple.size()); assertEquals("not equal: tuple.get(0)", "foo", tuple.getObject(0)); }
public static <T> void populateOutputTupleEntry( CombinerDefinition<T> definition, TupleEntry output, Tuple resultTuple) { // set the ID so we can differentiate later output.setRaw(MultiCombiner.ID_FIELD, definition.getId()); // our tuples are of the form groupFields+outputFields, set the TupleEntry fields appropriately Fields groupFields = definition.getGroupFields(); int index = 0; for (int i = 0; i < groupFields.size(); i++) { output.setRaw(groupFields.get(i), resultTuple.getObject(index)); index++; } Fields outputFields = definition.getOutputFields(); for (int i = 0; i < outputFields.size(); i++) { output.setRaw(outputFields.get(i), resultTuple.getObject(index)); index++; } }
/** * Constructor Unique creates a new Unique instance. * * @param name of type String * @param pipes of type Pipe[] * @param uniqueFields of type Fields * @param threshold of type int */ @ConstructorProperties({"name", "pipes", "uniqueFields", "include", "threshold"}) public Unique(String name, Pipe[] pipes, Fields uniqueFields, Include include, int threshold) { super(pipes); if (uniqueFields == null) throw new IllegalArgumentException("uniqueFields may not be null"); Pipe[] filters = new Pipe[pipes.length]; TupleHasher tupleHasher = null; Comparator[] comparators = uniqueFields.getComparators(); if (!TupleHasher.isNull(comparators)) tupleHasher = new TupleHasher(null, comparators); FilterPartialDuplicates partialDuplicates = new FilterPartialDuplicates(include, threshold, tupleHasher); for (int i = 0; i < filters.length; i++) filters[i] = new Each(pipes[i], uniqueFields, partialDuplicates); Pipe pipe = new GroupBy(name, filters, uniqueFields); pipe = new Every(pipe, Fields.ALL, new FirstNBuffer(), Fields.RESULTS); setTails(pipe); }
public void testStop() throws Exception { if (!new File(inputFileLower).exists()) fail("data file not found"); copyFromLocal(inputFileLower); copyFromLocal(inputFileUpper); Tap sourceLower = new Hfs(new TextLine(new Fields("offset", "line")), inputFileLower); Tap sourceUpper = new Hfs(new TextLine(new Fields("offset", "line")), inputFileUpper); Map sources = new HashMap(); sources.put("lower", sourceLower); sources.put("upper", sourceUpper); Function splitter = new RegexSplitter(new Fields("num", "char"), " "); // using null pos so all fields are written Tap sink = new Hfs(new TextLine(), outputPath + "/stopped/", true); Pipe pipeLower = new Each(new Pipe("lower"), new Fields("line"), splitter); pipeLower = new GroupBy(pipeLower, new Fields("num")); Pipe pipeUpper = new Each(new Pipe("upper"), new Fields("line"), splitter); pipeUpper = new GroupBy(pipeUpper, new Fields("num")); Pipe splice = new CoGroup(pipeLower, new Fields("num"), pipeUpper, new Fields("num"), Fields.size(4)); Flow flow = new FlowConnector(getProperties()).connect(sources, sink, splice); // countFlow.writeDOT( "stopped.dot" ); LockingFlowListener listener = new LockingFlowListener(); flow.addListener(listener); System.out.println("calling start"); flow.start(); assertTrue("did not start", listener.started.tryAcquire(60, TimeUnit.SECONDS)); while (true) { System.out.println("testing if running"); Thread.sleep(1000); Map<String, Callable<Throwable>> map = flow.getJobsMap(); if (map == null || map.values().size() == 0) continue; if (((FlowStepJob) map.values().iterator().next()).wasStarted()) break; } System.out.println("calling stop"); flow.stop(); assertTrue("did not stop", listener.stopped.tryAcquire(60, TimeUnit.SECONDS)); assertTrue("did not complete", listener.completed.tryAcquire(60, TimeUnit.SECONDS)); }
protected void validateFields(Fields superFields, Fields myFields) { if (!superFields.contains(myFields)) { throw new IllegalArgumentException("Fields passed to constructor don't contain " + myFields); } }
/** * Create an empty datum with field names defined by <fields> * * @param fields Names of fields */ public BaseDatum(Fields fields) { this(new TupleEntry(fields, Tuple.size(fields.size()))); }
private TupleEntry getEntry(Tuple tuple) { return new TupleEntry(Fields.size(tuple.size()), tuple); }
public static void validate(File solrCoreDir, String dataDirPropertyName, Fields schemeFields) throws IOException { // Verify solrHomeDir exists if (!solrCoreDir.exists() || !solrCoreDir.isDirectory()) { throw new TapException("Solr core directory doesn't exist: " + solrCoreDir); } File tmpSolrHome = makeTempSolrHome(solrCoreDir); // Set up a temp location for Solr home, where we're write out a synthetic solr.xml // that references the core directory. String coreName = solrCoreDir.getName(); String corePath = solrCoreDir.getAbsolutePath(); String solrXmlContent = String.format( "<solr><cores><core name=\"%s\" instanceDir=\"%s\"></core></cores></solr>", coreName, corePath); File solrXmlFile = new File(tmpSolrHome, "solr.xml"); FileUtils.write(solrXmlFile, solrXmlContent); // Set up a temp location for data, so when we instantiate the coreContainer, // we don't pollute the solr home with a /data sub-dir. String tmpFolder = System.getProperty("java.io.tmpdir"); File tmpDataDir = new File(tmpFolder, UUID.randomUUID().toString()); tmpDataDir.mkdir(); System.setProperty("solr.solr.home", tmpSolrHome.getAbsolutePath()); System.setProperty(dataDirPropertyName, tmpDataDir.getAbsolutePath()); System.setProperty( "enable.special-handlers", "false"); // All we need is the update request handler System.setProperty( "enable.cache-warming", "false"); // We certainly don't need to warm the cache CoreContainer.Initializer initializer = new CoreContainer.Initializer(); CoreContainer coreContainer = null; try { coreContainer = initializer.initialize(); Collection<SolrCore> cores = coreContainer.getCores(); SolrCore core = null; if (cores.size() == 0) { throw new TapException("No Solr cores are available"); } else if (cores.size() > 1) { throw new TapException("Only one Solr core is supported"); } else { core = cores.iterator().next(); } IndexSchema schema = core.getSchema(); Map<String, SchemaField> solrFields = schema.getFields(); Set<String> schemeFieldnames = new HashSet<String>(); for (int i = 0; i < schemeFields.size(); i++) { String fieldName = schemeFields.get(i).toString(); if (!solrFields.containsKey(fieldName)) { throw new TapException("Sink field name doesn't exist in Solr schema: " + fieldName); } schemeFieldnames.add(fieldName); } for (String solrFieldname : solrFields.keySet()) { SchemaField solrField = solrFields.get(solrFieldname); if (solrField.isRequired() && !schemeFieldnames.contains(solrFieldname)) { throw new TapException("No sink field name for required Solr field: " + solrFieldname); } } } finally { if (coreContainer != null) { coreContainer.shutdown(); } } }
@Override public int hashCode() { int result = fieldDeclaration != null ? fieldDeclaration.hashCode() : 0; result = 31 * result + numArgs; return result; }
/** * Selects and returns the first argument Tuple encountered. * * @param fieldDeclaration of type Fields */ @ConstructorProperties({"fieldDeclaration"}) public First(Fields fieldDeclaration) { super(fieldDeclaration.size(), fieldDeclaration); }
public void failingListenerTest(FailingFlowListener.OnFail onFail) throws Exception { if (!new File(inputFileLower).exists()) fail("data file not found"); copyFromLocal(inputFileLower); copyFromLocal(inputFileUpper); Tap sourceLower = new Hfs(new TextLine(new Fields("offset", "line")), inputFileLower); Tap sourceUpper = new Hfs(new TextLine(new Fields("offset", "line")), inputFileUpper); Map sources = new HashMap(); sources.put("lower", sourceLower); sources.put("upper", sourceUpper); Function splitter = new RegexSplitter(new Fields("num", "char"), " "); // using null pos so all fields are written Tap sink = new Hfs(new TextLine(), outputPath + "/stopped/", true); Pipe pipeLower = new Each(new Pipe("lower"), new Fields("line"), splitter); if (onFail == FailingFlowListener.OnFail.THROWABLE) { pipeLower = new Each( pipeLower, new Debug() { @Override public boolean isRemove(FlowProcess flowProcess, FilterCall filterCall) { throw new RuntimeException("failing inside pipe assembly intentionally"); } }); } pipeLower = new GroupBy(pipeLower, new Fields("num")); Pipe pipeUpper = new Each(new Pipe("upper"), new Fields("line"), splitter); pipeUpper = new GroupBy(pipeUpper, new Fields("num")); Pipe splice = new CoGroup(pipeLower, new Fields("num"), pipeUpper, new Fields("num"), Fields.size(4)); Flow flow = new FlowConnector(getProperties()).connect(sources, sink, splice); // countFlow.writeDOT( "stopped.dot" ); FailingFlowListener listener = new FailingFlowListener(onFail); flow.addListener(listener); System.out.println("calling start"); flow.start(); assertTrue("did not start", listener.started.tryAcquire(120, TimeUnit.SECONDS)); if (onFail == FailingFlowListener.OnFail.STOPPING) { while (true) { System.out.println("testing if running"); Thread.sleep(1000); Map<String, Callable<Throwable>> map = flow.getJobsMap(); if (map == null || map.values().size() == 0) continue; if (((FlowStepJob) map.values().iterator().next()).wasStarted()) break; } System.out.println("calling stop"); flow.stop(); } assertTrue("did not complete", listener.completed.tryAcquire(120, TimeUnit.SECONDS)); assertTrue("did not stop", listener.stopped.tryAcquire(120, TimeUnit.SECONDS)); try { flow.complete(); fail("did not rethrow exception from listener"); } catch (Exception exception) { // ignore } }