Пример #1
0
  @Test
  public void testBzipStoreInMultiQuery2() throws Exception {
    String[] inputData = new String[] {"1\t2\r3\t4"};

    String inputFileName = "input2.txt";
    Util.createInputFile(cluster, inputFileName, inputData);

    PigServer pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
    PigContext pigContext = pig.getPigContext();
    pigContext.getProperties().setProperty("output.compression.enabled", "true");
    pigContext
        .getProperties()
        .setProperty("output.compression.codec", "org.apache.hadoop.io.compress.BZip2Codec");

    pig.setBatchOn();
    pig.registerQuery("a = load '" + inputFileName + "';");
    pig.registerQuery("store a into 'output2.bz2';");
    pig.registerQuery("store a into 'output2';");
    pig.executeBatch();

    FileSystem fs =
        FileSystem.get(ConfigurationUtil.toConfiguration(pig.getPigContext().getProperties()));
    FileStatus stat = fs.getFileStatus(new Path("output2/part-m-00000.bz2"));
    assertTrue(stat.getLen() > 0);

    stat = fs.getFileStatus(new Path("output2.bz2/part-m-00000.bz2"));
    assertTrue(stat.getLen() > 0);
  }
Пример #2
0
  // See PIG-1434
  @Test
  public void testFilteredScalarDollarProj() throws Exception {
    String output = BUILD_TEST_TMP + "table_testFilteredScalarDollarProjDir";
    TestScalarAliases.deleteDirectory(new File(output));
    String[] input = {
      "1\t5\t[state#maine,city#portland]\t{(a),(b)}\t(a,b)", "2\t10\t\t\t", "3\t20\t\t\t"
    };

    // Test the use of scalars in expressions
    String inputPath = BUILD_TEST_TMP + "table_testFilteredScalarDollarProj";
    TestScalarAliases.createLocalInputFile(inputPath, input);
    // Test in script mode
    pigServer.setBatchOn();
    pigServer.registerQuery(
        "A = LOAD '"
            + inputPath
            + "'"
            + " as (a0: long, a1: double, a2 : bytearray, "
            + "a3: bag{ t : tuple(tc : chararray)}, "
            + "a4: tuple(c1 : chararray, c2 : chararray) );");
    pigServer.registerQuery("B = filter A by $1 < 8;");
    pigServer.registerQuery(
        "Y = foreach A generate (a0 * B.$0), (a1 / B.$1), B.$2, B.$2#'state', B.$3, B.a4;");
    pigServer.registerQuery("Store Y into '" + output + "';");
    pigServer.explain("Y", System.err);
    pigServer.executeBatch();
    // Check output
    pigServer.registerQuery("Z = LOAD '" + output + "' as (a0: int, a1: double);");
    pigServer.explain("Z", System.err);

    Iterator<Tuple> iter = pigServer.openIterator("Z");

    Tuple t = iter.next();
    assertTrue(t.toString().equals("(1,1.0)"));

    t = iter.next();
    assertTrue(t.toString().equals("(2,2.0)"));

    t = iter.next();
    assertTrue(t.toString().equals("(3,4.0)"));

    assertFalse(iter.hasNext());

    // Check in non-batch mode
    iter = pigServer.openIterator("Y");

    t = iter.next();
    assertEquals(t.toString(), "(1,1.0,[state#maine,city#portland],maine,{(a),(b)},(a,b))");

    t = iter.next();
    assertEquals(t.toString(), "(2,2.0,[state#maine,city#portland],maine,{(a),(b)},(a,b))");

    t = iter.next();
    assertEquals(t.toString(), "(3,4.0,[state#maine,city#portland],maine,{(a),(b)},(a,b))");

    assertFalse(iter.hasNext());
  }
Пример #3
0
  // See PIG-1434
  @Test
  public void testScalarWithTwoBranches() throws Exception {
    String[] inputA = {"1\t5", "2\t10", "3\t20"};

    String[] inputX = {"pig", "hadoop", "rocks"};

    String output = BUILD_TEST_TMP + "testScalarWithTwoBranchesDir";
    TestScalarAliases.deleteDirectory(new File(output));
    // Test the use of scalars in expressions
    String inputPathA = BUILD_TEST_TMP + "testScalarWithTwoBranchesA";
    TestScalarAliases.createLocalInputFile(inputPathA, inputA);
    String inputPathX = BUILD_TEST_TMP + "testScalarWithTwoBranchesX";
    TestScalarAliases.createLocalInputFile(inputPathX, inputX);
    // Test in script mode
    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD '" + inputPathA + "' as (a0: long, a1: double);");
    pigServer.registerQuery("B = group A all;");
    pigServer.registerQuery("C = foreach B generate COUNT(A) as count, MAX(A.$1) as max;");
    pigServer.registerQuery("X = LOAD '" + inputPathX + "' as (names: chararray);");
    pigServer.registerQuery("Y = foreach X generate names, C.max;");
    pigServer.registerQuery("Store Y into '" + output + "';");
    pigServer.executeBatch();
    // Check output
    pigServer.registerQuery("Z = LOAD '" + output + "' as (a0: chararray, a1: double);");

    Iterator<Tuple> iter = pigServer.openIterator("Z");

    Tuple t = iter.next();
    assertTrue(t.toString().equals("(pig,20.0)"));

    t = iter.next();
    assertTrue(t.toString().equals("(hadoop,20.0)"));

    t = iter.next();
    assertTrue(t.toString().equals("(rocks,20.0)"));

    assertFalse(iter.hasNext());

    // Check in non-batch mode
    iter = pigServer.openIterator("Y");

    t = iter.next();
    assertTrue(t.toString().equals("(pig,20.0)"));

    t = iter.next();
    assertTrue(t.toString().equals("(hadoop,20.0)"));

    t = iter.next();
    assertTrue(t.toString().equals("(rocks,20.0)"));

    assertFalse(iter.hasNext());

    pigServer.getPigContext().getProperties().remove("tez.am.inline.task.execution.max-tasks");
  }
Пример #4
0
  // See PIG-1434
  @Test
  public void testScalarAliasesBatchNobatch() throws Exception {
    String[] input = {"1\t5", "2\t10", "3\t20"};

    String output = BUILD_TEST_TMP + "table_testScalarAliasesDir";
    TestScalarAliases.deleteDirectory(new File(output));
    // Test the use of scalars in expressions
    String inputPath = BUILD_TEST_TMP + "table_testScalarAliasesBatch";
    TestScalarAliases.createLocalInputFile(inputPath, input);
    // Test in script mode
    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD '" + inputPath + "' as (a0: long, a1: double);");
    pigServer.registerQuery("B = group A all;");
    pigServer.registerQuery("C = foreach B generate COUNT(A) as count, MAX(A.$1) as max;");
    pigServer.registerQuery("Y = foreach A generate (a0 * C.count), (a1 / C.max);");
    pigServer.registerQuery("Store Y into '" + output + "';");
    pigServer.executeBatch();
    // Check output
    pigServer.registerQuery("Z = LOAD '" + output + "' as (a0: int, a1: double);");

    Iterator<Tuple> iter;
    Tuple t;
    iter = pigServer.openIterator("Z");

    t = iter.next();
    assertTrue(t.toString().equals("(3,0.25)"));

    t = iter.next();
    assertTrue(t.toString().equals("(6,0.5)"));

    t = iter.next();
    assertTrue(t.toString().equals("(9,1.0)"));

    assertFalse(iter.hasNext());

    iter = pigServer.openIterator("Y");

    t = iter.next();
    assertTrue(t.toString().equals("(3,0.25)"));

    t = iter.next();
    assertTrue(t.toString().equals("(6,0.5)"));

    t = iter.next();
    assertTrue(t.toString().equals("(9,1.0)"));

    assertFalse(iter.hasNext());
  }
Пример #5
0
  @Test
  public void testScalarNullValue() throws Exception {
    Storage.Data data = Storage.resetData(pigServer);
    data.set("input", Storage.tuple("a", 1), Storage.tuple("b", 2));

    pigServer.setBatchOn();
    pigServer.registerQuery("A = load 'input' using mock.Storage() as (a:chararray, b:int);");
    pigServer.registerQuery("B = FILTER A by a == 'c';");
    pigServer.registerQuery("C = FOREACH A generate a, b + B.b;");
    pigServer.registerQuery("store C into 'output' using mock.Storage();");

    pigServer.executeBatch();

    List<Tuple> actualResults = data.get("output");
    List<Tuple> expectedResults =
        Util.getTuplesFromConstantTupleStrings(new String[] {"('a', null)", "('b', null)"});
    Util.checkQueryOutputsAfterSort(actualResults.iterator(), expectedResults);
  }
  @Test
  public void testUpdate() throws Exception {
    BasicDBObject obj1 = new BasicDBObject().append("f1", "a").append("f2", "value1");
    BasicDBObject obj2 = new BasicDBObject().append("f1", "b").append("f2", "value2");
    insertData("testUpdate", obj1, obj2);

    String[] input = {"a\tnewValue1\t1", "b\tnewValue2\t2"};
    Util.createLocalInputFile("simple_input", input);

    pigServerLocal = new PigServer(ExecType.LOCAL);
    pigServerLocal.registerQuery(
        "A = LOAD 'simple_input' as (f1:chararray, f2:chararray, f3:int);");
    pigServerLocal.registerQuery(
        String.format(
            "STORE A INTO 'mongodb://localhost:27017/%s.%s' USING com.mongodb.hadoop.pig.MongoUpdateStorage("
                + "  '{f1:\"\\\\$f1\"}',"
                + "  '{\\\\$set:{f2:\"\\\\$f2\", f3:\"\\\\$f3\"}}',"
                + "  'f1:chararray, f2:chararray, f3:int'"
                + ");",
            dbName, "update_simple"));
    pigServerLocal.setBatchOn();
    pigServerLocal.executeBatch();

    MongoClient mc = new MongoClient();
    DBCollection col = mc.getDB(dbName).getCollection("update_simple");

    DBCursor cursor = col.find();

    assertEquals(2, cursor.size());
    DBObject result1 = cursor.next();
    assertEquals("a", result1.get("f1"));
    assertEquals("newValue1", result1.get("f2"));
    assertEquals(1, result1.get("f3"));
    DBObject result2 = cursor.next();
    assertEquals("b", result2.get("f1"));
    assertEquals("newValue2", result2.get("f2"));
    assertEquals(2, result2.get("f3"));
  }
Пример #7
0
  // org.apache.pig.test.udf.storefunc.PigPerformanceLoader()
  @Test
  public void testScriptL1() throws Exception {
    System.out.println("testScriptL1");
    PigServer pigServer = new PigServer(pigContext);
    pigServer.setBatchOn();
    // pigServer.registerJar("/home/kaituo/code/pig3/trunk/pigperf.jar");
    pigServer.registerQuery(
        "A = load '"
            + page_viewsX.toString()
            + "/part-m-00000' using org.apache.pig.test.udf.storefunc.PigPerformanceLoader()  as (user: chararray, action:int, timespent:int, query_term:chararray, ip_addr:long, timestamp:long,estimated_revenue:double, page_info:map[], page_links:bag{t:(p:map[])});");
    // as (user, action, timespent, query_term, ip_addr, timestamp, estimated_revenue, page_info,
    // page_links);");
    pigServer.registerQuery(
        "B = foreach A generate user, action, page_info, flatten(page_links) as page_links;");
    // user, (int)action as action, (map[])page_info as page_info,
    // flatten((bag{tuple(map[])})page_links) as page_links;");
    pigServer.registerQuery(
        "C = foreach B generate user, (action == 1 ? page_info#'a' : page_links#'b') as header;");
    pigServer.registerQuery("D = group C by user parallel 40;");
    pigServer.registerQuery("E = foreach D generate group, COUNT(C) as cnt;");
    Map<Operator, DataBag> derivedData = pigServer.getExamples2("E");

    assertTrue(derivedData != null);
  }
Пример #8
0
  // See PIG-1434
  @Test
  public void testUseScalarMultipleTimes() throws Exception {
    String[] input = {"1\t5", "2\t10", "3\t20"};

    String outputY = BUILD_TEST_TMP + "table_testUseScalarMultipleTimesOutY";
    TestScalarAliases.deleteDirectory(new File(outputY));
    String outputZ = BUILD_TEST_TMP + "table_testUseScalarMultipleTimesOutZ";
    TestScalarAliases.deleteDirectory(new File(outputZ));
    // Test the use of scalars in expressions
    String inputPath = BUILD_TEST_TMP + "table_testUseScalarMultipleTimes";
    TestScalarAliases.createLocalInputFile(inputPath, input);
    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD '" + inputPath + "' as (a0: long, a1: double);");
    pigServer.registerQuery("B = group A all;");
    pigServer.registerQuery("C = foreach B generate COUNT(A) as count, MAX(A.$1) as max;");
    pigServer.registerQuery("Y = foreach A generate (a0 * C.count), (a1 / C.max);");
    pigServer.registerQuery("Store Y into '" + outputY + "';");
    pigServer.registerQuery("Z = foreach A generate (a1 + C.count), (a0 * C.max);");
    pigServer.registerQuery("Store Z into '" + outputZ + "';");
    // Test Multiquery store
    pigServer.executeBatch();

    // Check output
    pigServer.registerQuery("M = LOAD '" + outputY + "' as (a0: int, a1: double);");

    Iterator<Tuple> iter;
    Tuple t;
    iter = pigServer.openIterator("M");

    t = iter.next();
    assertTrue(t.toString().equals("(3,0.25)"));

    t = iter.next();
    assertTrue(t.toString().equals("(6,0.5)"));

    t = iter.next();
    assertTrue(t.toString().equals("(9,1.0)"));

    assertFalse(iter.hasNext());

    // Check output
    pigServer.registerQuery("N = LOAD '" + outputZ + "' as (a0: double, a1: double);");

    iter = pigServer.openIterator("N");

    t = iter.next();
    assertTrue(t.toString().equals("(8.0,20.0)"));

    t = iter.next();
    assertTrue(t.toString().equals("(13.0,40.0)"));

    t = iter.next();
    assertTrue(t.toString().equals("(23.0,60.0)"));

    assertFalse(iter.hasNext());

    // Non batch mode
    iter = pigServer.openIterator("Y");

    t = iter.next();
    assertTrue(t.toString().equals("(3,0.25)"));

    t = iter.next();
    assertTrue(t.toString().equals("(6,0.5)"));

    t = iter.next();
    assertTrue(t.toString().equals("(9,1.0)"));

    assertFalse(iter.hasNext());

    // Check in non-batch mode
    iter = pigServer.openIterator("Z");

    t = iter.next();
    assertTrue(t.toString().equals("(8.0,20.0)"));

    t = iter.next();
    assertTrue(t.toString().equals("(13.0,40.0)"));

    t = iter.next();
    assertTrue(t.toString().equals("(23.0,60.0)"));

    assertFalse(iter.hasNext());
  }