Пример #1
0
  private void set(int[] pos, Type[] types, Tuple tuple, CoercibleType[] coercions) {
    verifyModifiable();

    if (pos.length != tuple.size())
      throw new TupleException(
          "given tuple not same size as position array: "
              + pos.length
              + ", tuple: "
              + tuple.print());

    int count = 0;

    for (int i : pos) {
      Object element = tuple.elements.get(count);

      if (types != null) {
        Type type = types[i];
        element = coercions[count].coerce(element, type);
      }

      elements.set(i, element);

      count++;
    }
  }
Пример #2
0
  /**
   * Creates a new Tuple from the given positions, but sets the values in the current tuple to null.
   *
   * @param pos of type int[]
   * @return Tuple
   */
  Tuple extract(int[] pos) {
    Tuple results = new Tuple();

    for (int i : pos) results.add(elements.set(i, null));

    return results;
  }
    public void operate(FlowProcess flowProcess, FunctionCall<NullContext> functionCall) {
      String filename = functionCall.getArguments().getString("line");
      Tuple result = new Tuple("");

      BufferedReader reader;
      try {
        InputStream stream = new FileInputStream(filename);
        reader = new BufferedReader(new InputStreamReader(stream, "us-ascii"));
      } catch (UnsupportedEncodingException e) {
        throw new RuntimeException("Impossible exception!", e);
      } catch (Exception e) {
        throw new RuntimeException(String.format("Exception splitting mbox file %s", filename), e);
      }

      StringBuilder email = new StringBuilder();
      for (String curLine = safeReadLine(reader); curLine != null; curLine = safeReadLine(reader)) {
        if (curLine.startsWith(MBOX_RECORD_DIVIDER)) {
          if (email.length() > 0) {
            result.setString(0, email.toString());
            functionCall.getOutputCollector().add(result);
          }

          email.setLength(0);
        }

        email.append(curLine);
        email.append('\n');
      }

      // Output the final record.
      if (email.length() > 0) {
        result.setString(0, email.toString());
        functionCall.getOutputCollector().add(result);
      }
    }
Пример #4
0
  /**
   * Method append appends all the values of the given Tuple instances to a copy of this instance.
   *
   * @param tuples of type Tuple
   * @return Tuple
   */
  public Tuple append(Tuple... tuples) {
    Tuple result = new Tuple(this);

    for (Tuple tuple : tuples) result.addAll(tuple);

    return result;
  }
Пример #5
0
  /**
   * Method size returns a new Tuple instance of the given size with the given Comparable as its
   * element values.
   *
   * @param size of type int
   * @param value of type Comparable
   * @return Tuple
   */
  public static Tuple size(int size, Comparable value) {
    Tuple result = new Tuple();

    for (int i = 0; i < size; i++) result.add(value);

    return result;
  }
Пример #6
0
    @Override
    public boolean isRemove(
        FlowProcess flowProcess, FilterCall<LinkedHashMap<Tuple, Object>> filterCall) {
      // we assume its more painful to create lots of tuple copies vs comparisons
      Tuple args = TupleHasher.wrapTuple(tupleHasher, filterCall.getArguments().getTuple());

      switch (include) {
        case ALL:
          break;

        case NO_NULLS:
          if (Tuples.frequency(args, null) == args.size()) return true;

          break;
      }

      if (filterCall.getContext().containsKey(args)) {
        flowProcess.increment(Cache.Num_Keys_Hit, 1);
        return true;
      }

      // only do the copy here
      filterCall
          .getContext()
          .put(TupleHasher.wrapTuple(tupleHasher, filterCall.getArguments().getTupleCopy()), null);

      flowProcess.increment(Cache.Num_Keys_Missed, 1);

      return false;
    }
Пример #7
0
  /**
   * Method size returns a new Tuple instance of the given size with the given Comparable as its
   * element values.
   *
   * @param size of type int
   * @param value of type Comparable
   * @return Tuple
   */
  public static Tuple size(int size, Comparable value) {
    Tuple result = new Tuple(new ArrayList<Object>(size));

    for (int i = 0; i < size; i++) result.add(value);

    return result;
  }
Пример #8
0
  /**
   * @param flowProcess
   * @param sourceCall
   * @return
   * @throws IOException
   */
  @Override
  public boolean source(
      FlowProcess<JobConf> flowProcess, SourceCall<BSONWritable[], RecordReader> sourceCall)
      throws IOException {
    Tuple result = new Tuple();

    BSONWritable key = sourceCall.getContext()[0];
    BSONWritable value = sourceCall.getContext()[1];

    if (!sourceCall.getInput().next(key, value)) {
      logger.info("Nothing left to read, exiting");
      return false;
    }

    for (String columnFieldName : columnFieldNames) {
      Object tupleEntry = value.get(columnFieldName);
      if (tupleEntry != null) {
        result.add(tupleEntry);
      } else if (columnFieldName != this.keyColumnName) {
        result.add("");
      }
    }

    sourceCall.getIncomingEntry().setTuple(result);
    return true;
  }
Пример #9
0
  @Override
  public void complete(FlowProcess flowProcess, AggregatorCall aggregatorCall) {
    Tuple result = new Tuple();
    result.add(min);
    result.add(max);

    aggregatorCall.getOutputCollector().add(result);
  }
Пример #10
0
  Tuple nulledCopy(int[] pos) {
    if (pos == null) return size(size());

    Tuple results = new Tuple(this);

    for (int i : pos) results.set(i, null);

    return results;
  }
Пример #11
0
  /**
   * Method get will return a new Tuple instace populated with element values from the given array
   * of positions.
   *
   * @param pos of type int[]
   * @return Tuple
   */
  public Tuple get(int[] pos) {
    if (pos == null || pos.length == 0) return new Tuple(this);

    Tuple results = new Tuple();

    for (int i : pos) results.add(elements.get(i));

    return results;
  }
Пример #12
0
  /**
   * Sets the values in the given positions to the values from the given Tuple.
   *
   * @param pos of type int[]
   * @param tuple of type Tuple
   */
  void set(int[] pos, Tuple tuple) {
    verifyModifiable();

    if (pos.length != tuple.size())
      throw new TupleException(
          "given tuple not same size as position array, tuple: " + tuple.print());

    int count = 0;
    for (int i : pos) elements.set(i, tuple.elements.get(count++));
  }
Пример #13
0
  /**
   * Method is the inverse of {@link #remove(int[])}.
   *
   * @param pos of type int[]
   * @return Tuple
   */
  public Tuple leave(int[] pos) {
    verifyModifiable();

    Tuple results = remove(pos);

    List<Comparable> temp = results.elements;
    results.elements = this.elements;
    this.elements = temp;

    return results;
  }
Пример #14
0
  @Test
  public void testArgumentSelector() {
    Fields declarator = new Fields("1", "2", "3", "4");
    Tuple incoming = new Tuple(1, 2, 3, 4);
    Fields selector = new Fields("3", "2");

    assertTuple(incoming.get(declarator, selector));

    int[] pos = incoming.getPos(declarator, selector);

    assertTuple(incoming.get(pos));

    assertTuple(TupleViews.createNarrow(pos, incoming));
  }
Пример #15
0
  /**
   * Method put places the values of the given tuple into the positions specified by the fields
   * argument. The declarator Fields value declares the fields in this Tuple instance.
   *
   * @param declarator of type Fields
   * @param fields of type Fields
   * @param tuple of type Tuple
   */
  public void put(Fields declarator, Fields fields, Tuple tuple) {
    verifyModifiable();

    int[] pos = getPos(declarator, fields);

    for (int i = 0; i < pos.length; i++) internalSet(pos[i], tuple.getObject(i));
  }
Пример #16
0
  @Test
  public void testReplace() throws IOException {
    RegexReplace splitter = new RegexReplace(new Fields("words"), "\\s+", "-", true);
    Tuple arguments = new Tuple("foo\t bar");
    Fields resultFields = Fields.UNKNOWN;

    TupleListCollector collector = invokeFunction(splitter, arguments, resultFields);

    assertEquals("wrong size", 1, collector.size());

    Iterator<Tuple> iterator = collector.iterator();

    Tuple tuple = iterator.next();

    assertEquals("not equal: tuple.get(0)", "foo-bar", tuple.getObject(0));
  }
Пример #17
0
  /**
   * Method put places the values of the given tuple into the positions specified by the fields
   * argument. The declarator Fields value declares the fields in this Tuple instance.
   *
   * @param declarator of type Fields
   * @param fields of type Fields
   * @param tuple of type Tuple
   */
  public void put(Fields declarator, Fields fields, Tuple tuple) {
    verifyModifiable();

    int[] pos = declarator.getPos(fields, size());

    for (int i = 0; i < pos.length; i++) elements.set(pos[i], tuple.get(i));
  }
Пример #18
0
  private void skipUrls(List<ScoredUrlDatum> urls, UrlStatus status, String traceMsg) {
    for (ScoredUrlDatum datum : urls) {
      FetchedDatum result = new FetchedDatum(datum);
      Tuple tuple = result.getTuple();
      tuple.add(status.toString());
      _collector.add(BixoPlatform.clone(tuple, _flowProcess));
    }

    _flowProcess.increment(FetchCounters.URLS_SKIPPED, urls.size());
    if (status == UrlStatus.SKIPPED_PER_SERVER_LIMIT) {
      _flowProcess.increment(FetchCounters.URLS_SKIPPED_PER_SERVER_LIMIT, urls.size());
    }

    if ((traceMsg != null) && LOGGER.isTraceEnabled()) {
      LOGGER.trace(String.format(traceMsg, urls.size()));
    }
  }
Пример #19
0
  /**
   * Create a new datum with field names defined by <fields>, and field values contained in <tuple>
   *
   * <p>WARNING - <tuple> will be kept as the data container, so don't call this with a tuple
   * provided by a Cascading operation/iterator, as those get reused.
   *
   * @param fields Names of fields
   * @param tuple Data for the datum
   */
  public BaseDatum(Fields fields, Tuple tuple) {
    if (fields.size() != tuple.size()) {
      throw new IllegalArgumentException(
          "Size of fields must be the same as the size of the tuple: " + fields + "/" + tuple);
    }

    _tupleEntry = new TupleEntry(fields, tuple);
  }
Пример #20
0
  public void setTuple(Tuple tuple) {
    if (getFields().size() != tuple.size()) {
      throw new IllegalArgumentException("Size of tuple doesn't match current fields");
    }

    _tupleEntry.setTuple(tuple);
    reset();
  }
Пример #21
0
  @Test
  public void testParserDeclared6() throws IOException {
    RegexParser splitter = new RegexParser(new Fields("lhs"), "(\\S+)\\s+\\S+", new int[] {1});
    Tuple arguments = new Tuple("foo\tbar");
    Fields resultFields = Fields.size(1);

    TupleListCollector collector = invokeFunction(splitter, arguments, resultFields);

    assertEquals("wrong size", 1, collector.size());

    Iterator<Tuple> iterator = collector.iterator();

    Tuple tuple = iterator.next();

    assertEquals("wrong tupel size", 1, tuple.size());
    assertEquals("not equal: tuple.get(0)", "foo", tuple.getObject(0));
  }
Пример #22
0
  @Test
  public void testSplitter() throws IOException {
    RegexSplitter splitter = new RegexSplitter("\t");
    Tuple arguments = new Tuple("foo\tbar");
    Fields resultFields = Fields.UNKNOWN;

    TupleListCollector collector = invokeFunction(splitter, arguments, resultFields);

    assertEquals("wrong size", 1, collector.size());

    Iterator<Tuple> iterator = collector.iterator();

    Tuple tuple = iterator.next();

    assertEquals("not equal: tuple.get(0)", "foo", tuple.getObject(0));
    assertEquals("not equal: tuple.get(1)", "bar", tuple.getObject(1));
  }
Пример #23
0
  /** Contributed by gicode */
  @Test
  public void testParserDeclared5() throws IOException {
    RegexParser splitter = new RegexParser(new Fields("bar"), "^GET /foo\\?bar=([^\\&]+)&");
    Tuple arguments = new Tuple("GET /foo?bar=z123&baz=2");
    Fields resultFields = Fields.size(1);

    TupleListCollector collector = invokeFunction(splitter, arguments, resultFields);

    assertEquals("wrong size", 1, collector.size());

    Iterator<Tuple> iterator = collector.iterator();

    Tuple tuple = iterator.next();

    assertEquals("wrong tuple size", 1, tuple.size());
    assertEquals("not equal: tuple.get(0)", "z123", tuple.getObject(0));
  }
Пример #24
0
  public static <T> void populateOutputTupleEntry(
      CombinerDefinition<T> definition, TupleEntry output, Tuple resultTuple) {
    // set the ID so we can differentiate later
    output.setRaw(MultiCombiner.ID_FIELD, definition.getId());

    // our tuples are of the form groupFields+outputFields, set the TupleEntry fields appropriately
    Fields groupFields = definition.getGroupFields();
    int index = 0;
    for (int i = 0; i < groupFields.size(); i++) {
      output.setRaw(groupFields.get(i), resultTuple.getObject(index));
      index++;
    }
    Fields outputFields = definition.getOutputFields();
    for (int i = 0; i < outputFields.size(); i++) {
      output.setRaw(outputFields.get(i), resultTuple.getObject(index));
      index++;
    }
  }
Пример #25
0
  /**
   * Method matchWholeTuple ...
   *
   * @param matcher
   * @param input of type Tuple @return boolean
   */
  protected boolean matchWholeTuple(Matcher matcher, Tuple input) {
    matcher.reset(input.toString("\t", false));

    boolean matchFound = matcher.find();

    LOG.debug("pattern: {}, matches: {}", getPatternString(), matchFound);

    return matchFound == negateMatch;
  }
  public Tuple source(Map<String, Object> settings, Object boxedKey, Object boxedColumns)
      throws IOException {
    SortedMap<ByteBuffer, IColumn> columns = (SortedMap<ByteBuffer, IColumn>) boxedColumns;
    ByteBuffer key = (ByteBuffer) boxedKey;

    Tuple result = new Tuple();
    result.add(ByteBufferUtil.string(key));

    Map<String, String> dataTypes = SettingsHelper.getTypes(settings);
    List<String> sourceMappings = SettingsHelper.getSourceMappings(settings);

    Map<String, IColumn> columnsByStringName = new HashMap<String, IColumn>();
    for (ByteBuffer columnName : columns.keySet()) {
      String stringName = ByteBufferUtil.string(columnName);
      logger.debug("column name: {}", stringName);
      IColumn col = columns.get(columnName);
      logger.debug("column: {}", col);
      columnsByStringName.put(stringName, col);
    }

    for (String columnName : sourceMappings) {
      AbstractType columnValueType = SerializerHelper.inferType(dataTypes.get(columnName));
      if (columnValueType != null) {
        try {
          IColumn column = columnsByStringName.get(columnName);
          ByteBuffer serializedVal = column.value();
          Object val = null;
          if (serializedVal != null) {
            val = SerializerHelper.deserialize(serializedVal, columnValueType);
          }
          logger.debug("Putting deserialized column: {}. {}", columnName, val);
          result.add(val);
        } catch (Exception e) {
          throw new RuntimeException("Couldn't deserialize column: " + columnName, e);
        }
      } else {
        throw new RuntimeException("no type given for column: " + columnName);
      }
    }

    return result;
  }
Пример #27
0
  // where outgoing fields are ALL
  @Test
  public void testSelectorAll() {
    //    if( getOutputSelector().isAll() )
    //      return inputEntry.getTuple().append( output );

    Fields incomingFields = new Fields("1", "2", "3", "4");
    Tuple incoming = new Tuple(1, 2, 3, 4);
    Fields resultFields = new Fields("5", "6", "7");
    Tuple result = new Tuple(5, 6, 7);

    Tuple view = TupleViews.createComposite(incoming, result);

    assertEquals(new Tuple(1, 2, 3, 4, 5, 6, 7), view);
    assertEquals(new Tuple(1, 2, 3, 4, 5, 6, 7), new Tuple(view));

    Fields allFields = Fields.join(incomingFields, resultFields);
    Fields selector = new Fields("3", "2");

    assertTuple(view.get(allFields, selector));
  }
Пример #28
0
  /**
   * Method remove removes the values specified by the given pos array and returns a new Tuple
   * containing the removed values.
   *
   * @param pos of type int[]
   * @return Tuple
   */
  public Tuple remove(int[] pos) {
    verifyModifiable();

    // calculate offsets to apply when removing values from elements
    int offset[] = new int[pos.length];

    for (int i = 0; i < pos.length; i++) {
      offset[i] = 0;

      for (int j = 0; j < i; j++) {
        if (pos[j] < pos[i]) offset[i]++;
      }
    }

    Tuple results = new Tuple();

    for (int i = 0; i < pos.length; i++) results.add(elements.remove(pos[i] - offset[i]));

    return results;
  }
Пример #29
0
  /**
   * Method parse will parse the {@link #print()} String representation of a Tuple instance and
   * return a new Tuple instance.
   *
   * @param string of type String
   * @return Tuple
   */
  public static Tuple parse(String string) {
    if (string == null || string.length() == 0) return null;

    string = string.replaceAll("^ *\\[*", "");
    string = string.replaceAll("\\]* *$", "");

    Scanner scanner = new Scanner(new StringReader(string));
    scanner.useDelimiter("(' *, *')|(^ *')|(' *$)");

    Tuple result = new Tuple();

    while (scanner.hasNext()) {
      if (scanner.hasNextInt()) result.add(scanner.nextInt());
      else if (scanner.hasNextDouble()) result.add(scanner.nextDouble());
      else result.add(scanner.next());
    }

    scanner.close();

    return result;
  }
Пример #30
0
  private StringBuffer print(StringBuffer buffer) {
    buffer.append("[");
    for (int i = 0; i < elements.size(); i++) {
      Comparable element = elements.get(i);

      if (element instanceof Tuple) ((Tuple) element).print(buffer);
      else buffer.append("\'").append(element).append("\'");

      if (i < elements.size() - 1) buffer.append(", ");
    }
    buffer.append("]");

    return buffer;
  }