private void set(int[] pos, Type[] types, Tuple tuple, CoercibleType[] coercions) { verifyModifiable(); if (pos.length != tuple.size()) throw new TupleException( "given tuple not same size as position array: " + pos.length + ", tuple: " + tuple.print()); int count = 0; for (int i : pos) { Object element = tuple.elements.get(count); if (types != null) { Type type = types[i]; element = coercions[count].coerce(element, type); } elements.set(i, element); count++; } }
/** * Creates a new Tuple from the given positions, but sets the values in the current tuple to null. * * @param pos of type int[] * @return Tuple */ Tuple extract(int[] pos) { Tuple results = new Tuple(); for (int i : pos) results.add(elements.set(i, null)); return results; }
public void operate(FlowProcess flowProcess, FunctionCall<NullContext> functionCall) { String filename = functionCall.getArguments().getString("line"); Tuple result = new Tuple(""); BufferedReader reader; try { InputStream stream = new FileInputStream(filename); reader = new BufferedReader(new InputStreamReader(stream, "us-ascii")); } catch (UnsupportedEncodingException e) { throw new RuntimeException("Impossible exception!", e); } catch (Exception e) { throw new RuntimeException(String.format("Exception splitting mbox file %s", filename), e); } StringBuilder email = new StringBuilder(); for (String curLine = safeReadLine(reader); curLine != null; curLine = safeReadLine(reader)) { if (curLine.startsWith(MBOX_RECORD_DIVIDER)) { if (email.length() > 0) { result.setString(0, email.toString()); functionCall.getOutputCollector().add(result); } email.setLength(0); } email.append(curLine); email.append('\n'); } // Output the final record. if (email.length() > 0) { result.setString(0, email.toString()); functionCall.getOutputCollector().add(result); } }
/** * Method append appends all the values of the given Tuple instances to a copy of this instance. * * @param tuples of type Tuple * @return Tuple */ public Tuple append(Tuple... tuples) { Tuple result = new Tuple(this); for (Tuple tuple : tuples) result.addAll(tuple); return result; }
/** * Method size returns a new Tuple instance of the given size with the given Comparable as its * element values. * * @param size of type int * @param value of type Comparable * @return Tuple */ public static Tuple size(int size, Comparable value) { Tuple result = new Tuple(); for (int i = 0; i < size; i++) result.add(value); return result; }
@Override public boolean isRemove( FlowProcess flowProcess, FilterCall<LinkedHashMap<Tuple, Object>> filterCall) { // we assume its more painful to create lots of tuple copies vs comparisons Tuple args = TupleHasher.wrapTuple(tupleHasher, filterCall.getArguments().getTuple()); switch (include) { case ALL: break; case NO_NULLS: if (Tuples.frequency(args, null) == args.size()) return true; break; } if (filterCall.getContext().containsKey(args)) { flowProcess.increment(Cache.Num_Keys_Hit, 1); return true; } // only do the copy here filterCall .getContext() .put(TupleHasher.wrapTuple(tupleHasher, filterCall.getArguments().getTupleCopy()), null); flowProcess.increment(Cache.Num_Keys_Missed, 1); return false; }
/** * Method size returns a new Tuple instance of the given size with the given Comparable as its * element values. * * @param size of type int * @param value of type Comparable * @return Tuple */ public static Tuple size(int size, Comparable value) { Tuple result = new Tuple(new ArrayList<Object>(size)); for (int i = 0; i < size; i++) result.add(value); return result; }
/** * @param flowProcess * @param sourceCall * @return * @throws IOException */ @Override public boolean source( FlowProcess<JobConf> flowProcess, SourceCall<BSONWritable[], RecordReader> sourceCall) throws IOException { Tuple result = new Tuple(); BSONWritable key = sourceCall.getContext()[0]; BSONWritable value = sourceCall.getContext()[1]; if (!sourceCall.getInput().next(key, value)) { logger.info("Nothing left to read, exiting"); return false; } for (String columnFieldName : columnFieldNames) { Object tupleEntry = value.get(columnFieldName); if (tupleEntry != null) { result.add(tupleEntry); } else if (columnFieldName != this.keyColumnName) { result.add(""); } } sourceCall.getIncomingEntry().setTuple(result); return true; }
@Override public void complete(FlowProcess flowProcess, AggregatorCall aggregatorCall) { Tuple result = new Tuple(); result.add(min); result.add(max); aggregatorCall.getOutputCollector().add(result); }
Tuple nulledCopy(int[] pos) { if (pos == null) return size(size()); Tuple results = new Tuple(this); for (int i : pos) results.set(i, null); return results; }
/** * Method get will return a new Tuple instace populated with element values from the given array * of positions. * * @param pos of type int[] * @return Tuple */ public Tuple get(int[] pos) { if (pos == null || pos.length == 0) return new Tuple(this); Tuple results = new Tuple(); for (int i : pos) results.add(elements.get(i)); return results; }
/** * Sets the values in the given positions to the values from the given Tuple. * * @param pos of type int[] * @param tuple of type Tuple */ void set(int[] pos, Tuple tuple) { verifyModifiable(); if (pos.length != tuple.size()) throw new TupleException( "given tuple not same size as position array, tuple: " + tuple.print()); int count = 0; for (int i : pos) elements.set(i, tuple.elements.get(count++)); }
/** * Method is the inverse of {@link #remove(int[])}. * * @param pos of type int[] * @return Tuple */ public Tuple leave(int[] pos) { verifyModifiable(); Tuple results = remove(pos); List<Comparable> temp = results.elements; results.elements = this.elements; this.elements = temp; return results; }
@Test public void testArgumentSelector() { Fields declarator = new Fields("1", "2", "3", "4"); Tuple incoming = new Tuple(1, 2, 3, 4); Fields selector = new Fields("3", "2"); assertTuple(incoming.get(declarator, selector)); int[] pos = incoming.getPos(declarator, selector); assertTuple(incoming.get(pos)); assertTuple(TupleViews.createNarrow(pos, incoming)); }
/** * Method put places the values of the given tuple into the positions specified by the fields * argument. The declarator Fields value declares the fields in this Tuple instance. * * @param declarator of type Fields * @param fields of type Fields * @param tuple of type Tuple */ public void put(Fields declarator, Fields fields, Tuple tuple) { verifyModifiable(); int[] pos = getPos(declarator, fields); for (int i = 0; i < pos.length; i++) internalSet(pos[i], tuple.getObject(i)); }
@Test public void testReplace() throws IOException { RegexReplace splitter = new RegexReplace(new Fields("words"), "\\s+", "-", true); Tuple arguments = new Tuple("foo\t bar"); Fields resultFields = Fields.UNKNOWN; TupleListCollector collector = invokeFunction(splitter, arguments, resultFields); assertEquals("wrong size", 1, collector.size()); Iterator<Tuple> iterator = collector.iterator(); Tuple tuple = iterator.next(); assertEquals("not equal: tuple.get(0)", "foo-bar", tuple.getObject(0)); }
/** * Method put places the values of the given tuple into the positions specified by the fields * argument. The declarator Fields value declares the fields in this Tuple instance. * * @param declarator of type Fields * @param fields of type Fields * @param tuple of type Tuple */ public void put(Fields declarator, Fields fields, Tuple tuple) { verifyModifiable(); int[] pos = declarator.getPos(fields, size()); for (int i = 0; i < pos.length; i++) elements.set(pos[i], tuple.get(i)); }
private void skipUrls(List<ScoredUrlDatum> urls, UrlStatus status, String traceMsg) { for (ScoredUrlDatum datum : urls) { FetchedDatum result = new FetchedDatum(datum); Tuple tuple = result.getTuple(); tuple.add(status.toString()); _collector.add(BixoPlatform.clone(tuple, _flowProcess)); } _flowProcess.increment(FetchCounters.URLS_SKIPPED, urls.size()); if (status == UrlStatus.SKIPPED_PER_SERVER_LIMIT) { _flowProcess.increment(FetchCounters.URLS_SKIPPED_PER_SERVER_LIMIT, urls.size()); } if ((traceMsg != null) && LOGGER.isTraceEnabled()) { LOGGER.trace(String.format(traceMsg, urls.size())); } }
/** * Create a new datum with field names defined by <fields>, and field values contained in <tuple> * * <p>WARNING - <tuple> will be kept as the data container, so don't call this with a tuple * provided by a Cascading operation/iterator, as those get reused. * * @param fields Names of fields * @param tuple Data for the datum */ public BaseDatum(Fields fields, Tuple tuple) { if (fields.size() != tuple.size()) { throw new IllegalArgumentException( "Size of fields must be the same as the size of the tuple: " + fields + "/" + tuple); } _tupleEntry = new TupleEntry(fields, tuple); }
public void setTuple(Tuple tuple) { if (getFields().size() != tuple.size()) { throw new IllegalArgumentException("Size of tuple doesn't match current fields"); } _tupleEntry.setTuple(tuple); reset(); }
@Test public void testParserDeclared6() throws IOException { RegexParser splitter = new RegexParser(new Fields("lhs"), "(\\S+)\\s+\\S+", new int[] {1}); Tuple arguments = new Tuple("foo\tbar"); Fields resultFields = Fields.size(1); TupleListCollector collector = invokeFunction(splitter, arguments, resultFields); assertEquals("wrong size", 1, collector.size()); Iterator<Tuple> iterator = collector.iterator(); Tuple tuple = iterator.next(); assertEquals("wrong tupel size", 1, tuple.size()); assertEquals("not equal: tuple.get(0)", "foo", tuple.getObject(0)); }
@Test public void testSplitter() throws IOException { RegexSplitter splitter = new RegexSplitter("\t"); Tuple arguments = new Tuple("foo\tbar"); Fields resultFields = Fields.UNKNOWN; TupleListCollector collector = invokeFunction(splitter, arguments, resultFields); assertEquals("wrong size", 1, collector.size()); Iterator<Tuple> iterator = collector.iterator(); Tuple tuple = iterator.next(); assertEquals("not equal: tuple.get(0)", "foo", tuple.getObject(0)); assertEquals("not equal: tuple.get(1)", "bar", tuple.getObject(1)); }
/** Contributed by gicode */ @Test public void testParserDeclared5() throws IOException { RegexParser splitter = new RegexParser(new Fields("bar"), "^GET /foo\\?bar=([^\\&]+)&"); Tuple arguments = new Tuple("GET /foo?bar=z123&baz=2"); Fields resultFields = Fields.size(1); TupleListCollector collector = invokeFunction(splitter, arguments, resultFields); assertEquals("wrong size", 1, collector.size()); Iterator<Tuple> iterator = collector.iterator(); Tuple tuple = iterator.next(); assertEquals("wrong tuple size", 1, tuple.size()); assertEquals("not equal: tuple.get(0)", "z123", tuple.getObject(0)); }
public static <T> void populateOutputTupleEntry( CombinerDefinition<T> definition, TupleEntry output, Tuple resultTuple) { // set the ID so we can differentiate later output.setRaw(MultiCombiner.ID_FIELD, definition.getId()); // our tuples are of the form groupFields+outputFields, set the TupleEntry fields appropriately Fields groupFields = definition.getGroupFields(); int index = 0; for (int i = 0; i < groupFields.size(); i++) { output.setRaw(groupFields.get(i), resultTuple.getObject(index)); index++; } Fields outputFields = definition.getOutputFields(); for (int i = 0; i < outputFields.size(); i++) { output.setRaw(outputFields.get(i), resultTuple.getObject(index)); index++; } }
/** * Method matchWholeTuple ... * * @param matcher * @param input of type Tuple @return boolean */ protected boolean matchWholeTuple(Matcher matcher, Tuple input) { matcher.reset(input.toString("\t", false)); boolean matchFound = matcher.find(); LOG.debug("pattern: {}, matches: {}", getPatternString(), matchFound); return matchFound == negateMatch; }
public Tuple source(Map<String, Object> settings, Object boxedKey, Object boxedColumns) throws IOException { SortedMap<ByteBuffer, IColumn> columns = (SortedMap<ByteBuffer, IColumn>) boxedColumns; ByteBuffer key = (ByteBuffer) boxedKey; Tuple result = new Tuple(); result.add(ByteBufferUtil.string(key)); Map<String, String> dataTypes = SettingsHelper.getTypes(settings); List<String> sourceMappings = SettingsHelper.getSourceMappings(settings); Map<String, IColumn> columnsByStringName = new HashMap<String, IColumn>(); for (ByteBuffer columnName : columns.keySet()) { String stringName = ByteBufferUtil.string(columnName); logger.debug("column name: {}", stringName); IColumn col = columns.get(columnName); logger.debug("column: {}", col); columnsByStringName.put(stringName, col); } for (String columnName : sourceMappings) { AbstractType columnValueType = SerializerHelper.inferType(dataTypes.get(columnName)); if (columnValueType != null) { try { IColumn column = columnsByStringName.get(columnName); ByteBuffer serializedVal = column.value(); Object val = null; if (serializedVal != null) { val = SerializerHelper.deserialize(serializedVal, columnValueType); } logger.debug("Putting deserialized column: {}. {}", columnName, val); result.add(val); } catch (Exception e) { throw new RuntimeException("Couldn't deserialize column: " + columnName, e); } } else { throw new RuntimeException("no type given for column: " + columnName); } } return result; }
// where outgoing fields are ALL @Test public void testSelectorAll() { // if( getOutputSelector().isAll() ) // return inputEntry.getTuple().append( output ); Fields incomingFields = new Fields("1", "2", "3", "4"); Tuple incoming = new Tuple(1, 2, 3, 4); Fields resultFields = new Fields("5", "6", "7"); Tuple result = new Tuple(5, 6, 7); Tuple view = TupleViews.createComposite(incoming, result); assertEquals(new Tuple(1, 2, 3, 4, 5, 6, 7), view); assertEquals(new Tuple(1, 2, 3, 4, 5, 6, 7), new Tuple(view)); Fields allFields = Fields.join(incomingFields, resultFields); Fields selector = new Fields("3", "2"); assertTuple(view.get(allFields, selector)); }
/** * Method remove removes the values specified by the given pos array and returns a new Tuple * containing the removed values. * * @param pos of type int[] * @return Tuple */ public Tuple remove(int[] pos) { verifyModifiable(); // calculate offsets to apply when removing values from elements int offset[] = new int[pos.length]; for (int i = 0; i < pos.length; i++) { offset[i] = 0; for (int j = 0; j < i; j++) { if (pos[j] < pos[i]) offset[i]++; } } Tuple results = new Tuple(); for (int i = 0; i < pos.length; i++) results.add(elements.remove(pos[i] - offset[i])); return results; }
/** * Method parse will parse the {@link #print()} String representation of a Tuple instance and * return a new Tuple instance. * * @param string of type String * @return Tuple */ public static Tuple parse(String string) { if (string == null || string.length() == 0) return null; string = string.replaceAll("^ *\\[*", ""); string = string.replaceAll("\\]* *$", ""); Scanner scanner = new Scanner(new StringReader(string)); scanner.useDelimiter("(' *, *')|(^ *')|(' *$)"); Tuple result = new Tuple(); while (scanner.hasNext()) { if (scanner.hasNextInt()) result.add(scanner.nextInt()); else if (scanner.hasNextDouble()) result.add(scanner.nextDouble()); else result.add(scanner.next()); } scanner.close(); return result; }
private StringBuffer print(StringBuffer buffer) { buffer.append("["); for (int i = 0; i < elements.size(); i++) { Comparable element = elements.get(i); if (element instanceof Tuple) ((Tuple) element).print(buffer); else buffer.append("\'").append(element).append("\'"); if (i < elements.size() - 1) buffer.append(", "); } buffer.append("]"); return buffer; }