@Test
  public void leftJoinTest() throws Exception {
    PigTest test = createPigTestFromString(leftJoinTest);

    this.writeLinesToFile("input1", "1,1", "2,2", "5,5");

    this.writeLinesToFile("input2", "1,10", "3,30", "5,50");

    this.writeLinesToFile("input3", "2,100", "5,500");

    test.runScript();

    List<Tuple> lines = this.getLinesForAlias(test, "data5");

    Assert.assertEquals(3, lines.size());
    for (Tuple t : lines) {
      switch ((Integer) t.get(0)) {
        case 1:
          Assert.assertEquals(10L, t.get(1));
          Assert.assertEquals(0L, t.get(2));
          break;
        case 2:
          Assert.assertEquals(0L, t.get(1));
          Assert.assertEquals(100L, t.get(2));
          break;
        case 5:
          Assert.assertEquals(50L, t.get(1));
          Assert.assertEquals(500L, t.get(2));
          break;
        default:
          Assert.fail("Did not expect: " + t.get(0));
      }
    }
  }
  @Test
  public void coalesceCastIntToDatetimeLazyTest() throws Exception {
    PigTest test = createPigTestFromString(coalesceCastIntToDatetimeLazyTest);

    this.writeLinesToFile("input", "1,1375826183000", "2,");

    test.runScript();

    List<Tuple> lines = this.getLinesForAlias(test, "data3");

    Assert.assertEquals(2, lines.size());
    for (Tuple t : lines) {
      Integer testcase = (Integer) t.get(0);
      Assert.assertNotNull(testcase);
      switch (testcase) {
        case 1:
          Assert.assertEquals(
              "2013-08-06T21:56:23.000Z",
              ((DateTime) t.get(1)).toDateTime(DateTimeZone.UTC).toString());
          break;
        case 2:
          Assert.assertEquals("1970-01-01T00:00:00.000Z", t.get(1).toString());
          break;
        default:
          Assert.fail("Did not expect: " + t.get(1));
      }
    }
  }
  @Test(expectedExceptions = FrontendException.class)
  public void coalesceBagIncompatibleTypeTest() throws Exception {
    PigTest test = createPigTestFromString(coalesceBagIncompatibleTypeTest);

    this.writeLinesToFile("input", "1,1,2L}");

    test.runScript();

    this.getLinesForAlias(test, "data3");
  }
  @Test
  public void testMostSeenLetterScript() throws IOException, ParseException {
    URL scriptUrl = this.getClass().getResource("/pig/MostSeenStartLetter.pig");
    PigTest pigTest = new PigTest(scriptUrl.getPath());

    String[] input = {"input text for the script", "second line for the input text"};
    String[] expected = {"(t,4)"};

    pigTest.assertOutput("lines", input, "result", expected);
  }
Example #5
0
  @Test
  public void testStore() throws ParseException, IOException {
    String[] args = {
      "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries",
    };
    test = new PigTest(PIG_SCRIPT, args);

    // By default PigUnit removes all the STORE and DUMP
    test.unoverride("STORE");

    test.runScript();

    TestCase.assertTrue(cluster.delete(new Path("top_3_queries")));
  }
Example #6
0
  @Test
  public void testOverride() throws ParseException, IOException {
    String[] args = {
      "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries",
    };
    test = new PigTest(PIG_SCRIPT, args);

    test.override("queries_limit", "queries_limit = LIMIT queries_ordered 2;");

    String[] output = {
      "(yahoo,25)", "(facebook,15)",
    };

    test.assertOutput(output);
  }
Example #7
0
  @Test
  public void testTextInput() throws ParseException, IOException {
    String[] args = {
      "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries",
    };
    test = new PigTest(PIG_SCRIPT, args);

    String[] input = {
      "yahoo\t10",
      "twitter\t7",
      "facebook\t10",
      "yahoo\t15",
      "facebook\t5",
      "a\t1",
      "b\t2",
      "c\t3",
      "d\t4",
      "e\t5",
    };

    String[] output = {
      "(yahoo,25)", "(facebook,15)", "(twitter,7)",
    };

    test.assertOutput("data", input, "queries_limit", output);
  }
Example #8
0
  @Test
  public void testArgFiles() throws ParseException, IOException {
    String[] argsFile = {"test/data/pigunit/top_queries_params.txt"};

    test = new PigTest(PIG_SCRIPT, null, argsFile);

    test.assertOutput(new File("test/data/pigunit/top_queries_expected_top_3.txt"));
  }
  @Test
  public void simpleRandomSampleTest() throws Exception {
    writeLinesToFile(
        "input",
        "A1\tB1\t1",
        "A1\tB1\t4",
        "A1\tB3\t4",
        "A1\tB4\t4",
        "A2\tB1\t4",
        "A2\tB2\t4",
        "A3\tB1\t3",
        "A3\tB1\t1",
        "A3\tB3\t77",
        "A4\tB1\t3",
        "A4\tB2\t3",
        "A4\tB3\t59",
        "A4\tB4\t29",
        "A5\tB1\t4",
        "A6\tB2\t3",
        "A6\tB2\t55",
        "A6\tB3\t1",
        "A7\tB1\t39",
        "A7\tB2\t27",
        "A7\tB3\t85",
        "A8\tB1\t4",
        "A8\tB2\t45",
        "A9\tB3\t92",
        "A9\tB3\t0",
        "A9\tB6\t42",
        "A9\tB5\t1",
        "A10\tB1\t7",
        "A10\tB2\t23",
        "A10\tB2\t1",
        "A10\tB2\t31",
        "A10\tB6\t41",
        "A10\tB7\t52");

    int n = 32;
    double p = 0.3;
    int s = (int) Math.ceil(p * n);
    PigTest test = createPigTestFromString(simpleRandomSampleTest, "SAMPLING_PROBABILITY=" + p);

    test.runScript();

    assertOutput(test, "sampled", "(" + s + ")");
  }
Example #10
0
  @BeforeClass
  public static void setUpOnce() throws IOException {
    cluster = PigTest.getCluster();

    cluster.update(
        new Path("test/data/pigunit/top_queries_input_data.txt"),
        new Path("top_queries_input_data.txt"));
  }
  @Test
  public void stratifiedSampleTest() throws Exception {
    writeLinesToFile(
        "input",
        "A1\tB1\t1",
        "A1\tB1\t4",
        "A1\tB3\t4",
        "A1\tB4\t4",
        "A2\tB1\t4",
        "A2\tB2\t4",
        "A3\tB1\t3",
        "A3\tB1\t1",
        "A3\tB3\t77",
        "A4\tB1\t3",
        "A4\tB2\t3",
        "A4\tB3\t59",
        "A4\tB4\t29",
        "A5\tB1\t4",
        "A6\tB2\t3",
        "A6\tB2\t55",
        "A6\tB3\t1",
        "A7\tB1\t39",
        "A7\tB2\t27",
        "A7\tB3\t85",
        "A8\tB1\t4",
        "A8\tB2\t45",
        "A9\tB3\t92",
        "A9\tB3\t0",
        "A9\tB6\t42",
        "A9\tB5\t1",
        "A10\tB1\t7",
        "A10\tB2\t23",
        "A10\tB2\t1",
        "A10\tB2\t31",
        "A10\tB6\t41",
        "A10\tB7\t52");

    double p = 0.5;

    PigTest test = createPigTestFromString(stratifiedSampleTest, "SAMPLING_PROBABILITY=" + p);
    test.runScript();
    assertOutput(
        test, "sampled", "(A1,2)", "(A10,3)", "(A2,1)", "(A3,2)", "(A4,2)", "(A5,1)", "(A6,2)",
        "(A7,2)", "(A8,1)", "(A9,2)");
  }
Example #12
0
  @Test
  public void testFileOutput() throws ParseException, IOException {
    String[] args = {
      "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries",
    };
    test = new PigTest(PIG_SCRIPT, args);

    test.assertOutput(new File("test/data/pigunit/top_queries_expected_top_3.txt"));
  }
Example #13
0
  @Test(expectedExceptions = {org.apache.pig.impl.logicalLayer.FrontendException.class})
  public void transposeBadTypeTest() throws Exception {
    PigTest test = createPigTestFromString(transposeBadTypeTest);
    writeLinesToFile("input", "1,10,11,12.0", "2,20,21,22.0");
    test.runScript();

    List<Tuple> output = getLinesForAlias(test, "data3");
    for (Tuple tuple : output) {
      int testCase = (Integer) tuple.get(0);
      DataBag bag = (DataBag) tuple.get(1);
      Assert.assertEquals(bag.size(), 3);
      int i = 0;
      for (Tuple t : bag) {
        String expectedKey = String.format("val%d", i + 1);
        Assert.assertEquals((String) t.get(0), expectedKey);
        int actualValue = (Integer) t.get(1);
        Assert.assertEquals(actualValue, testCase * 10 + i);
        i++;
      }
    }
  }
Example #14
0
  @Test
  public void testImplicitNtoN() throws ParseException, IOException {
    String[] args = {
      "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries",
    };
    test = new PigTest(PIG_SCRIPT, args);

    String[] output = {
      "(yahoo,25)", "(facebook,15)", "(twitter,7)",
    };

    test.assertOutput(output);
  }
Example #15
0
  @Ignore("Not ready yet")
  @Test
  public void testWithMock() throws ParseException, IOException {
    String[] args = {
      "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries",
    };

    PigServer mockServer = null;
    Cluster mockCluster = null;

    test = new PigTest(PIG_SCRIPT, args, mockServer, mockCluster);

    test.assertOutput(new File("data/top_queries_expected_top_3.txt"));
  }
  // The first parameter is an int, but the fixed parameter is a long.
  // They are merged to a long.
  @Test
  public void coalesceCastIntToDoubleTest() throws Exception {
    PigTest test = createPigTestFromString(coalesceIntAndDoubleTest);

    this.writeLinesToFile("input", "1,5", "2,");

    test.runScript();

    List<Tuple> lines = this.getLinesForAlias(test, "data4");

    Assert.assertEquals(2, lines.size());
    for (Tuple t : lines) {
      switch ((Integer) t.get(0)) {
        case 1:
          Assert.assertEquals(500.0, t.get(1));
          break;
        case 2:
          Assert.assertEquals(10000.0, t.get(1));
          break;
        default:
          Assert.fail("Did not expect: " + t.get(1));
      }
    }
  }
  @Test
  public void coalesceIntTest() throws Exception {
    PigTest test = createPigTestFromString(coalesceIntTest);

    this.writeLinesToFile("input", "1,1,2,3", "2,,2,3", "3,,,3", "4,,,", "5,1,,3", "6,1,,");

    test.runScript();

    List<Tuple> lines = this.getLinesForAlias(test, "data3");

    Assert.assertEquals(6, lines.size());
    for (Tuple t : lines) {
      switch ((Integer) t.get(0)) {
        case 1:
          Assert.assertEquals(1, t.get(1));
          break;
        case 2:
          Assert.assertEquals(2, t.get(1));
          break;
        case 3:
          Assert.assertEquals(3, t.get(1));
          break;
        case 4:
          Assert.assertEquals(null, t.get(1));
          break;
        case 5:
          Assert.assertEquals(1, t.get(1));
          break;
        case 6:
          Assert.assertEquals(1, t.get(1));
          break;
        default:
          Assert.fail("Did not expect: " + t.get(1));
      }
    }
  }
Example #18
0
  @Test
  public void testGetLastAlias() throws ParseException, IOException {
    String[] script = {
      "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);",
      "queries_group = GROUP data BY query PARALLEL 1;",
      "queries_sum = FOREACH queries_group GENERATE group AS query, SUM(data.count) AS count;",
      "queries_ordered = ORDER queries_sum BY count DESC PARALLEL 1;",
      "queries_limit = LIMIT queries_ordered 3;",
      "STORE queries_limit INTO 'top_3_queries';",
    };

    test = new PigTest(script);

    String expected = "(yahoo,25)\n" + "(facebook,15)\n" + "(twitter,7)";

    TestCase.assertEquals(expected, StringUtils.join(test.getAlias("queries_limit"), "\n"));
  }
Example #19
0
  @Test
  public void testInlinePigScript() throws ParseException, IOException {
    String[] script = {
      "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);",
      "queries_group = GROUP data BY query PARALLEL 1;",
      "queries_sum = FOREACH queries_group GENERATE group AS query, SUM(data.count) AS count;",
      "queries_ordered = ORDER queries_sum BY count DESC PARALLEL 1;",
      "queries_limit = LIMIT queries_ordered 3;",
      "STORE queries_limit INTO 'top_3_queries';",
    };

    test = new PigTest(script);

    String[] output = {
      "(yahoo,25)", "(facebook,15)", "(twitter,7)",
    };

    test.assertOutput(output);
  }
Example #20
0
  @Test
  public void testWithUdf() throws ParseException, IOException {
    String[] script = {
      // "REGISTER myIfNeeded.jar;",
      "DEFINE TOKENIZE TOKENIZE();",
      "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);",
      "queries = FOREACH data GENERATE query, TOKENIZE(query) AS query_tokens;",
      "queries_ordered = ORDER queries BY query DESC PARALLEL 1;",
      "queries_limit = LIMIT queries_ordered 3;",
      "STORE queries_limit INTO 'top_3_queries';",
    };

    test = new PigTest(script);

    String[] output = {
      "(yahoo,{(yahoo)})", "(yahoo,{(yahoo)})", "(twitter,{(twitter)})",
    };

    test.assertOutput(output);
  }
Example #21
0
  /**
   * This is a test for default bootup. PIG-2456
   *
   * @throws IOException
   */
  @Test
  public void testDefaultBootup() throws ParseException, IOException {
    // Test with properties file
    String pigProps = "pig.properties";
    String bootupPath = "/tmp/.temppigbootup";
    File propertyFile = new File(pigProps);
    PrintWriter out = new PrintWriter(new FileWriter(propertyFile));
    out.println("pig.load.default.statements=" + bootupPath);
    out.close();

    File bootupFile = new File(bootupPath);
    out = new PrintWriter(new FileWriter(bootupFile));
    out.println("data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);");
    out.close();

    String[] script = {
      // The following line is commented as the test creates a bootup file which contains it
      // instead. PigTests (and Pig scripts in general) will read the bootup file to load default
      // statements
      // "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);",
      "queries_group = GROUP data BY query PARALLEL 1;",
      "queries_sum = FOREACH queries_group GENERATE group AS query, SUM(data.count) AS count;",
      "queries_ordered = ORDER queries_sum BY count DESC PARALLEL 1;",
      "queries_limit = LIMIT queries_ordered 3;",
      "STORE queries_limit INTO 'top_3_queries';",
    };

    String scriptPath = "/tmp/tempScript";
    File scriptFile = new File(scriptPath);
    out = new PrintWriter(new FileWriter(scriptFile));
    for (String line : script) {
      out.println(line);
    }
    out.close();

    String[] args = {
      "n=3", "reducers=1", "input=top_queries_input_data.txt", "output=top_3_queries",
    };

    // Create a pigunit.pig.PigServer and Cluster to run this test.
    PigServer pig = null;
    if (System.getProperties().containsKey("pigunit.exectype.cluster")) {
      LOG.info("Using cluster mode");
      pig = new PigServer(ExecType.MAPREDUCE);
    } else {
      LOG.info("Using default local mode");
      pig = new PigServer(ExecType.LOCAL);
    }

    final Cluster cluster = new Cluster(pig.getPigContext());

    test = new PigTest(scriptPath, args, pig, cluster);

    String[] output = {
      "(yahoo,25)", "(facebook,15)", "(twitter,7)",
    };

    test.assertOutput("queries_limit", output);

    propertyFile.delete();
    scriptFile.delete();
    bootupFile.delete();
  }