@Override
    public void coGroup(
        Iterator<Record> candidates, Iterator<Record> current, Collector<Record> out)
        throws Exception {
      if (!current.hasNext()) {
        throw new Exception("Error: Id not encountered before.");
      }
      Record old = current.next();
      long oldId = old.getField(1, LongValue.class).getValue();

      long minimumComponentID = Long.MAX_VALUE;

      while (candidates.hasNext()) {
        long candidateComponentID = candidates.next().getField(1, LongValue.class).getValue();
        if (candidateComponentID < minimumComponentID) {
          minimumComponentID = candidateComponentID;
        }
      }

      if (minimumComponentID < oldId) {
        newComponentId.setValue(minimumComponentID);
        old.setField(1, newComponentId);
        out.collect(old);
      }
    }
Пример #2
0
  /**
   * Project "supplier".
   *
   * <p>Output Schema: Key: nationkey Value: suppkey
   */
  @Override
  public void map(Record record, Collector<Record> out) throws Exception {
    suppKey = record.getField(0, suppKey);
    inputTuple = record.getField(1, inputTuple);

    /* Project (suppkey | name, address, nationkey, phone, acctbal, comment): */
    IntValue nationKey = new IntValue(Integer.parseInt(inputTuple.getStringValueAt(3)));

    record.setField(0, nationKey);
    record.setField(1, suppKey);

    out.collect(record);
  }
    @Override
    public void writeRecord(Record rec) throws IOException {
      IntValue key = rec.getField(0, IntValue.class);
      IntValue value = rec.getField(1, IntValue.class);

      this.bld.setLength(0);
      this.bld.append(key.getValue());
      this.bld.append('_');
      this.bld.append(value.getValue());
      this.bld.append('\n');

      byte[] bytes = this.bld.toString().getBytes();

      this.stream.write(bytes);
    }
  /**
   * Tests the resettable iterator with too little memory, so that the data has to be written to
   * disk.
   */
  @Test
  public void testResettableIterator() {
    try {
      final AbstractInvokable memOwner = new DummyInvokable();

      // create the resettable Iterator
      SpillingResettableMutableObjectIterator<Record> iterator =
          new SpillingResettableMutableObjectIterator<Record>(
              this.reader, this.serializer, this.memman, this.ioman, 2, memOwner);

      // open the iterator
      iterator.open();

      // now test walking through the iterator
      int count = 0;
      Record target = new Record();
      while ((target = iterator.next(target)) != null) {
        Assert.assertEquals(
            "In initial run, element " + count + " does not match expected value!",
            count++,
            target.getField(0, IntValue.class).getValue());
      }
      Assert.assertEquals(
          "Too few elements were deserialzied in initial run!", NUM_TESTRECORDS, count);
      // test resetting the iterator a few times
      for (int j = 0; j < 10; ++j) {
        count = 0;
        iterator.reset();
        target = new Record();
        // now we should get the same results
        while ((target = iterator.next(target)) != null) {
          Assert.assertEquals(
              "After reset nr. " + j + 1 + " element " + count + " does not match expected value!",
              count++,
              target.getField(0, IntValue.class).getValue());
        }
        Assert.assertEquals(
            "Too few elements were deserialzied after reset nr. " + j + 1 + "!",
            NUM_TESTRECORDS,
            count);
      }
      // close the iterator
      iterator.close();
    } catch (Exception ex) {
      ex.printStackTrace();
      Assert.fail("Test encountered an exception.");
    }
  }
Пример #5
0
    @Override
    public Record readRecord(Record target, byte[] record, int offset, int numBytes) {

      String line = new String(record, offset, numBytes);

      try {
        this.key.setValue(Integer.parseInt(line.substring(0, line.indexOf("_"))));
        this.value.setValue(Integer.parseInt(line.substring(line.indexOf("_") + 1, line.length())));
      } catch (RuntimeException re) {
        return null;
      }

      target.setField(0, this.key);
      target.setField(1, this.value);
      return target;
    }
 @Override
 public int serializeRecord(Record rec, byte[] target) throws Exception {
   String string = rec.getField(0, StringValue.class).toString();
   byte[] stringBytes = string.getBytes();
   Tuple tuple = rec.getField(1, Tuple.class);
   String tupleStr = tuple.toString();
   byte[] tupleBytes = tupleStr.getBytes();
   int totalLength = stringBytes.length + 1 + tupleBytes.length;
   if (target.length >= totalLength) {
     System.arraycopy(stringBytes, 0, target, 0, stringBytes.length);
     target[stringBytes.length] = '|';
     System.arraycopy(tupleBytes, 0, target, stringBytes.length + 1, tupleBytes.length);
     return totalLength;
   } else {
     return -1 * totalLength;
   }
 }
  private Map<TestData.Key, Collection<TestData.Value>> collectData(Generator iter, int num)
      throws Exception {
    Map<TestData.Key, Collection<TestData.Value>> map =
        new HashMap<TestData.Key, Collection<TestData.Value>>();
    Record pair = new Record();

    for (int i = 0; i < num; i++) {
      iter.next(pair);
      TestData.Key key = pair.getField(0, TestData.Key.class);

      if (!map.containsKey(key)) {
        map.put(new TestData.Key(key.getKey()), new ArrayList<TestData.Value>());
      }

      Collection<TestData.Value> values = map.get(key);
      values.add(new TestData.Value(pair.getField(1, TestData.Value.class).getValue()));
    }
    return map;
  }
Пример #8
0
    public static void prepareInputFile(
        MutableObjectIterator<Record> inIt, String inputFilePath, boolean insertInvalidData)
        throws IOException {
      FileWriter fw = new FileWriter(inputFilePath);
      BufferedWriter bw = new BufferedWriter(fw);

      if (insertInvalidData) {
        bw.write("####_I_AM_INVALID_########\n");
      }

      Record rec = new Record();
      while ((rec = inIt.next(rec)) != null) {
        IntValue key = rec.getField(0, IntValue.class);
        IntValue value = rec.getField(1, IntValue.class);

        bw.write(key.getValue() + "_" + value.getValue() + "\n");
      }
      if (insertInvalidData) {
        bw.write("####_I_AM_INVALID_########\n");
      }

      bw.flush();
      bw.close();
    }
  @Test
  public void testSpillingSortWithIntermediateMerge() {
    try {
      // amount of pairs
      final int PAIRS = 10000000;

      // comparator
      final Comparator<TestData.Key> keyComparator = new TestData.KeyComparator();

      final TestData.Generator generator =
          new TestData.Generator(SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM, ValueMode.FIX_LENGTH);
      final MutableObjectIterator<Record> source = new TestData.GeneratorIterator(generator, PAIRS);

      // merge iterator
      LOG.debug("Initializing sortmerger...");

      Sorter<Record> merger =
          new UnilateralSortMerger<Record>(
              this.memoryManager,
              this.ioManager,
              source,
              this.parentTask,
              this.pactRecordSerializer,
              this.pactRecordComparator,
              (double) 64 / 78,
              16,
              0.7f);

      // emit data
      LOG.debug("Emitting data...");

      // check order
      MutableObjectIterator<Record> iterator = merger.getIterator();

      LOG.debug("Checking results...");
      int pairsRead = 1;
      int nextStep = PAIRS / 20;

      Record rec1 = new Record();
      Record rec2 = new Record();

      Assert.assertTrue((rec1 = iterator.next(rec1)) != null);
      while ((rec2 = iterator.next(rec2)) != null) {
        final Key k1 = rec1.getField(0, TestData.Key.class);
        final Key k2 = rec2.getField(0, TestData.Key.class);
        pairsRead++;

        Assert.assertTrue(keyComparator.compare(k1, k2) <= 0);

        Record tmp = rec1;
        rec1 = rec2;
        k1.setKey(k2.getKey());
        rec2 = tmp;

        // log
        if (pairsRead == nextStep) {
          nextStep += PAIRS / 20;
        }
      }
      Assert.assertEquals("Not all pairs were read back in.", PAIRS, pairsRead);
      merger.close();
      testSuccess = true;
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
  @Test
  public void testSpillingSort() {
    try {
      // comparator
      final Comparator<TestData.Key> keyComparator = new TestData.KeyComparator();

      final TestData.Generator generator =
          new TestData.Generator(
              SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM, ValueMode.CONSTANT, VAL);
      final MutableObjectIterator<Record> source =
          new TestData.GeneratorIterator(generator, NUM_PAIRS);

      // merge iterator
      LOG.debug("Initializing sortmerger...");

      Sorter<Record> merger =
          new UnilateralSortMerger<Record>(
              this.memoryManager,
              this.ioManager,
              source,
              this.parentTask,
              this.pactRecordSerializer,
              this.pactRecordComparator,
              (double) 16 / 78,
              64,
              0.7f);

      // emit data
      LOG.debug("Reading and sorting data...");

      // check order
      MutableObjectIterator<Record> iterator = merger.getIterator();

      LOG.debug("Checking results...");
      int pairsEmitted = 1;

      Record rec1 = new Record();
      Record rec2 = new Record();

      Assert.assertTrue((rec1 = iterator.next(rec1)) != null);
      while ((rec2 = iterator.next(rec2)) != null) {
        final Key k1 = rec1.getField(0, TestData.Key.class);
        final Key k2 = rec2.getField(0, TestData.Key.class);
        pairsEmitted++;

        Assert.assertTrue(keyComparator.compare(k1, k2) <= 0);

        Record tmp = rec1;
        rec1 = rec2;
        k1.setKey(k2.getKey());

        rec2 = tmp;
      }
      Assert.assertTrue(NUM_PAIRS == pairsEmitted);

      merger.close();
      testSuccess = true;
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
Пример #11
0
  @Test
  public void testDataSourceTask() {
    int keyCnt = 100;
    int valCnt = 20;

    this.outList = new ArrayList<Record>();

    try {
      InputFilePreparator.prepareInputFile(
          new UniformRecordGenerator(keyCnt, valCnt, false), this.tempTestPath, true);
    } catch (IOException e1) {
      Assert.fail("Unable to set-up test input file");
    }

    super.initEnvironment(MEMORY_MANAGER_SIZE, NETWORK_BUFFER_SIZE);
    super.addOutput(this.outList);

    DataSourceTask<Record> testTask = new DataSourceTask<>();

    super.registerFileInputTask(
        testTask, MockInputFormat.class, new File(tempTestPath).toURI().toString(), "\n");

    try {
      testTask.invoke();
    } catch (Exception e) {
      System.err.println(e);
      Assert.fail("Invoke method caused exception.");
    }

    Assert.assertTrue(
        "Invalid output size. Expected: " + (keyCnt * valCnt) + " Actual: " + this.outList.size(),
        this.outList.size() == keyCnt * valCnt);

    HashMap<Integer, HashSet<Integer>> keyValueCountMap = new HashMap<>(keyCnt);

    for (Record kvp : this.outList) {

      int key = kvp.getField(0, IntValue.class).getValue();
      int val = kvp.getField(1, IntValue.class).getValue();

      if (!keyValueCountMap.containsKey(key)) {
        keyValueCountMap.put(key, new HashSet<Integer>());
      }
      keyValueCountMap.get(key).add(val);
    }

    Assert.assertTrue(
        "Invalid key count in out file. Expected: "
            + keyCnt
            + " Actual: "
            + keyValueCountMap.keySet().size(),
        keyValueCountMap.keySet().size() == keyCnt);

    for (Integer mapKey : keyValueCountMap.keySet()) {
      Assert.assertTrue(
          "Invalid value count for key: "
              + mapKey
              + ". Expected: "
              + valCnt
              + " Actual: "
              + keyValueCountMap.get(mapKey).size(),
          keyValueCountMap.get(mapKey).size() == valCnt);
    }
  }
 @Override
 public void collect(Record record) {
   this.output.add(record.createCopy());
 }
  @Test
  public void testMerge() {
    try {

      generator1 = new Generator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
      generator2 = new Generator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);

      reader1 = new TestData.GeneratorIterator(generator1, INPUT_1_SIZE);
      reader2 = new TestData.GeneratorIterator(generator2, INPUT_2_SIZE);

      // collect expected data
      Map<TestData.Key, Collection<TestData.Value>> expectedValuesMap1 =
          collectData(generator1, INPUT_1_SIZE);
      Map<TestData.Key, Collection<TestData.Value>> expectedValuesMap2 =
          collectData(generator2, INPUT_2_SIZE);
      Map<TestData.Key, List<Collection<TestData.Value>>> expectedCoGroupsMap =
          coGroupValues(expectedValuesMap1, expectedValuesMap2);

      // reset the generators
      generator1.reset();
      generator2.reset();

      // compare with iterator values
      SortMergeCoGroupIterator<Record, Record> iterator =
          new SortMergeCoGroupIterator<Record, Record>(
              this.reader1,
              this.reader2,
              this.serializer1,
              this.comparator1,
              this.serializer2,
              this.comparator2,
              this.pairComparator);

      iterator.open();

      final TestData.Key key = new TestData.Key();
      while (iterator.next()) {
        Iterator<Record> iter1 = iterator.getValues1().iterator();
        Iterator<Record> iter2 = iterator.getValues2().iterator();

        TestData.Value v1 = null;
        TestData.Value v2 = null;

        if (iter1.hasNext()) {
          Record rec = iter1.next();
          rec.getFieldInto(0, key);
          v1 = rec.getField(1, TestData.Value.class);
        } else if (iter2.hasNext()) {
          Record rec = iter2.next();
          rec.getFieldInto(0, key);
          v2 = rec.getField(1, TestData.Value.class);
        } else {
          Assert.fail("No input on both sides.");
        }

        // assert that matches for this key exist
        Assert.assertTrue("No matches for key " + key, expectedCoGroupsMap.containsKey(key));

        Collection<TestData.Value> expValues1 = expectedCoGroupsMap.get(key).get(0);
        Collection<TestData.Value> expValues2 = expectedCoGroupsMap.get(key).get(1);

        if (v1 != null) {
          expValues1.remove(v1);
        } else {
          expValues2.remove(v2);
        }

        while (iter1.hasNext()) {
          Record rec = iter1.next();
          Assert.assertTrue(
              "Value not in expected set of first input",
              expValues1.remove(rec.getField(1, TestData.Value.class)));
        }
        Assert.assertTrue("Expected set of first input not empty", expValues1.isEmpty());

        while (iter2.hasNext()) {
          Record rec = iter2.next();
          Assert.assertTrue(
              "Value not in expected set of second input",
              expValues2.remove(rec.getField(1, TestData.Value.class)));
        }
        Assert.assertTrue("Expected set of second input not empty", expValues2.isEmpty());

        expectedCoGroupsMap.remove(key);
      }
      iterator.close();

      Assert.assertTrue("Expected key set not empty", expectedCoGroupsMap.isEmpty());
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail("An exception occurred during the test: " + e.getMessage());
    }
  }