Пример #1
0
  // See PIG-1434
  @Test
  public void testScalarAliasesFilterClause() throws Exception {
    String[] input = {"1\t5", "2\t10", "3\t20", "4\t12", "5\t8"};

    // Test the use of scalars in expressions
    String inputPath = BUILD_TEST_TMP + "table_testScalarAliasesFilterClause";
    TestScalarAliases.createLocalInputFile(inputPath, input);
    // Test in script mode
    pigServer.registerQuery("A = LOAD '" + inputPath + "' as (a0, a1);");
    pigServer.registerQuery("G = group A all;");
    pigServer.registerQuery("C = foreach G generate AVG(A.$1) as average;");

    pigServer.registerQuery("Y = filter A by a1 > C.average;");

    Iterator<Tuple> iter = pigServer.openIterator("Y");

    // Average is 11
    Tuple t = iter.next();
    assertTrue(t.toString().equals("(3,20)"));

    t = iter.next();
    assertTrue(t.toString().equals("(4,12)"));

    assertFalse(iter.hasNext());
  }
Пример #2
0
  // See PIG-1434
  @Test
  public void testScalarWithNoSchemaDollarProj() throws Exception {
    String[] scalarInput = {"1\t5"};
    String[] input = {"1\t5", "2\t10", "3\t20"};
    String inputPath = BUILD_TEST_TMP + "table_testScalarWithNoSchemaDollarProj";
    TestScalarAliases.createLocalInputFile(inputPath, input);
    String inputPathScalar = BUILD_TEST_TMP + "table_testScalarWithNoSchemaDollarProjScalar";
    TestScalarAliases.createLocalInputFile(inputPathScalar, scalarInput);
    // Load A as a scalar
    pigServer.registerQuery("A = LOAD '" + inputPath + "';");
    pigServer.registerQuery("scalar = LOAD '" + inputPathScalar + "';");
    pigServer.registerQuery("B = foreach A generate 5 / scalar.$1;");

    Iterator<Tuple> iter = pigServer.openIterator("B");

    Tuple t = iter.next();
    assertTrue(t.get(0).toString().equals("1"));

    t = iter.next();
    assertTrue(t.get(0).toString().equals("1"));

    t = iter.next();
    assertTrue(t.get(0).toString().equals("1"));

    assertFalse(iter.hasNext());
  }
Пример #3
0
  public void testRecursiveFileListing() throws IOException {
    String LOCAL_INPUT_FILE = "test.dat";
    String INPUT_FILE = "foo/bar/test.dat";

    PrintWriter w = new PrintWriter(new FileWriter(LOCAL_INPUT_FILE));
    w.println("1");
    w.println("2");
    w.println("3");
    w.println("5");
    w.close();

    Util.copyFromLocalToCluster(cluster, LOCAL_INPUT_FILE, INPUT_FILE);

    pigServer.registerQuery("a = load 'foo' as (nums:chararray);");
    pigServer.registerQuery("b = load 'foo' as (nums:chararray);");
    pigServer.registerQuery("d = join a by nums, b by nums USING 'skewed';");

    Iterator<Tuple> iter = pigServer.openIterator("d");
    int count = 0;
    while (iter.hasNext()) {
      iter.next();
      count++;
    }
    Assert.assertEquals(4, count);

    new File(LOCAL_INPUT_FILE).delete();
    Util.deleteFile(cluster, INPUT_FILE);
  }
Пример #4
0
  /**
   * Tests that '\n', '\r' and '\r\n' are treated as record delims when using bzip just like they
   * are when using uncompressed text
   */
  @Test
  public void testRecordDelims() throws Exception {
    String[] inputData =
        new String[] {
          "1\t2\r3\t4", // '\r' case - this will be split into two tuples
          "5\t6\r", // '\r\n' case
          "7\t8", // '\n' case
          "9\t10\r" // '\r\n' at the end of file
        };

    // bzip compressed input
    File in = File.createTempFile("junit", ".bz2");
    String compressedInputFileName = in.getAbsolutePath();
    in.deleteOnExit();
    String clusterCompressedFilePath = Util.removeColon(compressedInputFileName);

    String unCompressedInputFileName = "testRecordDelims-uncomp.txt";
    Util.createInputFile(cluster, unCompressedInputFileName, inputData);

    try {
      CBZip2OutputStream cos = new CBZip2OutputStream(new FileOutputStream(in));
      for (int i = 0; i < inputData.length; i++) {
        StringBuffer sb = new StringBuffer();
        sb.append(inputData[i]).append("\n");
        byte bytes[] = sb.toString().getBytes();
        cos.write(bytes);
      }
      cos.close();

      Util.copyFromLocalToCluster(cluster, compressedInputFileName, clusterCompressedFilePath);

      // pig script to read uncompressed input
      String script = "a = load '" + unCompressedInputFileName + "';";
      PigServer pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
      pig.registerQuery(script);
      Iterator<Tuple> it1 = pig.openIterator("a");

      // pig script to read compressed input
      script = "a = load '" + Util.encodeEscape(clusterCompressedFilePath) + "';";
      pig.registerQuery(script);
      Iterator<Tuple> it2 = pig.openIterator("a");

      while (it1.hasNext()) {
        Tuple t1 = it1.next();
        Tuple t2 = it2.next();
        Assert.assertEquals(t1, t2);
      }

      Assert.assertFalse(it2.hasNext());

    } finally {
      in.delete();
      Util.deleteFile(cluster, unCompressedInputFileName);
      Util.deleteFile(cluster, clusterCompressedFilePath);
    }
  }
Пример #5
0
  public void testShouldWorkWithWKT() throws Exception {
    ArrayList<String[]> data = new ArrayList<String[]>();
    data.add(new String[] {"1", "1", "0", "POINT (0.0 0.0)"});
    data.add(new String[] {"1", "2", "1", "POINT (0.0 3.0)"});
    data.add(new String[] {"1", "3", "2", "POINT (4.0 5.0)"});
    data.add(new String[] {"1", "4", "3", "POINT (10.0 0.0)"});
    data.add(new String[] {"2", "5", "0", "POINT (5.0 6.0)"});
    data.add(new String[] {"2", "6", "1", "POINT (10.0 3.0)"});
    data.add(new String[] {"2", "7", "2", "POINT (7.0 13.0)"});
    data.add(new String[] {"3", "1", "0", "POINT (0.0 0.0)"});
    data.add(new String[] {"3", "8", "1", "POINT (10.0 10.0)"});
    data.add(new String[] {"3", "9", "2", "POINT (18.0 5.0)"});
    data.add(new String[] {"3", "1", "3", "POINT (0.0 0.0)"});
    String datafile = TestHelper.createTempFile(data, "\t");
    datafile = datafile.replace("\\", "\\\\");
    PigServer pig = new PigServer(LOCAL);
    String query =
        "A = LOAD 'file:"
            + datafile
            + "' as (geom_id: int, point_id: int, point_pos: int, point);\n"
            + "B = ORDER A BY point_pos;"
            + "C = GROUP B BY geom_id;"
            + "D = FOREACH C GENERATE group, FLATTEN("
            + MakeSegments.class.getName()
            + "(B.point_id, B.point));";
    pig.registerQuery(query);
    Iterator<?> it = pig.openIterator("D");

    ArrayList<String[]> expectedResults = new ArrayList<String[]>();
    expectedResults.add(new String[] {"1", "0", "1", "0.0", "0.0", "2", "0.0", "3.0"});
    expectedResults.add(new String[] {"1", "1", "2", "0.0", "3.0", "3", "4.0", "5.0"});
    expectedResults.add(new String[] {"1", "2", "3", "4.0", "5.0", "4", "10.0", "0.0"});
    expectedResults.add(new String[] {"2", "0", "5", "5.0", "6.0", "6", "10.0", "3.0"});
    expectedResults.add(new String[] {"2", "1", "6", "10.0", "3.0", "7", "7.0", "13.0"});
    expectedResults.add(new String[] {"3", "0", "1", "0.0", "0.0", "8", "10.0", "10.0"});
    expectedResults.add(new String[] {"3", "1", "8", "10.0", "10.0", "9", "18.0", "5.0"});
    expectedResults.add(new String[] {"3", "2", "9", "18.0", "5.0", "1", "0.0", "0.0"});
    Iterator<String[]> expectedResultIter = expectedResults.iterator();
    int count = 0;
    while (it.hasNext() && expectedResultIter.hasNext()) {
      Tuple tuple = (Tuple) it.next();
      String[] expectedResult = expectedResultIter.next();
      if (tuple == null) break;
      assertEquals(Integer.parseInt(expectedResult[0]), tuple.get(0));
      assertEquals(Integer.parseInt(expectedResult[1]), tuple.get(1));
      assertEquals(Long.parseLong(expectedResult[2]), tuple.get(2));
      assertEquals(Double.parseDouble(expectedResult[3]), tuple.get(3));
      assertEquals(Double.parseDouble(expectedResult[4]), tuple.get(4));
      assertEquals(Long.parseLong(expectedResult[5]), tuple.get(5));
      assertEquals(Double.parseDouble(expectedResult[6]), tuple.get(6));
      assertEquals(Double.parseDouble(expectedResult[7]), tuple.get(7));
      count++;
    }
    assertEquals(expectedResults.size(), count);
  }
Пример #6
0
 @Test
 public void testErrorMessageUndefinedAliasInGroupByStatement() throws Exception {
   String queryA = "A = load 'nosuchfile'  using PigStorage() as (f1:chararray,f2:chararray);";
   String queryB = "B = GROUP B by f1;";
   PigServer ps = new PigServer(ExecType.LOCAL);
   ps.registerQuery(queryA);
   try {
     ps.registerQuery(queryB);
   } catch (FrontendException e) {
     Assert.assertTrue(e.getMessage().contains("Undefined alias:"));
     return;
   }
   Assert.fail();
 }
 @Test
 public void readWriteNullValuesRead() throws IOException {
   registerLoadQuery();
   tempFilename = tempFilename + "-2";
   pigServer.registerQuery(
       String.format(
           "STORE A INTO 'file:%s' USING %s('-c %s', '-c %s');",
           tempFilename,
           SequenceFileStorage.class.getName(),
           IntWritableConverter.class.getName(),
           NullWritableConverter.class.getName()));
   registerLoadQuery(IntWritableConverter.class, NullWritableConverter.class, null);
   validateIndex(pigServer.openIterator("A"), 2, 0, 0);
 }
Пример #8
0
  @Test
  public void testBzipStoreInMultiQuery2() throws Exception {
    String[] inputData = new String[] {"1\t2\r3\t4"};

    String inputFileName = "input2.txt";
    Util.createInputFile(cluster, inputFileName, inputData);

    PigServer pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
    PigContext pigContext = pig.getPigContext();
    pigContext.getProperties().setProperty("output.compression.enabled", "true");
    pigContext
        .getProperties()
        .setProperty("output.compression.codec", "org.apache.hadoop.io.compress.BZip2Codec");

    pig.setBatchOn();
    pig.registerQuery("a = load '" + inputFileName + "';");
    pig.registerQuery("store a into 'output2.bz2';");
    pig.registerQuery("store a into 'output2';");
    pig.executeBatch();

    FileSystem fs =
        FileSystem.get(ConfigurationUtil.toConfiguration(pig.getPigContext().getProperties()));
    FileStatus stat = fs.getFileStatus(new Path("output2/part-m-00000.bz2"));
    assertTrue(stat.getLen() > 0);

    stat = fs.getFileStatus(new Path("output2.bz2/part-m-00000.bz2"));
    assertTrue(stat.getLen() > 0);
  }
 @Test(expected = IOException.class)
 public void writeUnsupportedConversion() throws IOException {
   registerLoadQuery();
   // swap ordering of key and value
   pigServer.registerQuery("A = FOREACH A GENERATE TOTUPLE(key), value;");
   // the following should die because IntWritableConverter doesn't support conversion of Tuple to
   // IntWritable
   pigServer.registerQuery(
       String.format(
           "STORE A INTO 'file:%s-2' USING %s('-c %s', '-c %s');",
           tempFilename,
           SequenceFileStorage.class.getName(),
           IntWritableConverter.class.getName(),
           TextConverter.class.getName()));
 }
 @Test(expected = Exception.class)
 public void readByteArraysWriteByteArraysWithoutTypeRead() throws IOException {
   registerLoadQuery(
       GenericWritableConverter.class, TextConverter.class, "key:bytearray, value:bytearray");
   tempFilename = tempFilename + "-2";
   pigServer.registerQuery(
       String.format(
           "STORE A INTO 'file:%s' USING %s('-c %s', '-c %s');",
           tempFilename,
           SequenceFileStorage.class.getName(),
           GenericWritableConverter.class.getName(),
           TextConverter.class.getName()));
   registerLoadQuery();
   validate(pigServer.openIterator("A"));
 }
 @Test
 public void writeTextConversion() throws IOException {
   registerLoadQuery();
   tempFilename = tempFilename + "-2";
   // rely on TextConverter for conversion of int to Text
   pigServer.registerQuery(
       String.format(
           "STORE A INTO 'file:%s' USING %s('-c %s', '-c %s');",
           tempFilename,
           SequenceFileStorage.class.getName(),
           TextConverter.class.getName(),
           TextConverter.class.getName()));
   registerLoadQuery(TextConverter.class, TextConverter.class, "key:chararray, value:chararray");
   validate(pigServer.openIterator("A"));
 }
Пример #12
0
  private void testCount(
      String inputFileName, Long expectedCount, int splitSize, String loadFuncSpec)
      throws IOException {
    String outputFile = "/tmp/bz-output";
    // simple load-store script to verify that the bzip input is getting
    // split
    String scriptToTestSplitting =
        "a = load '"
            + inputFileName
            + "' using "
            + loadFuncSpec
            + "; store a into '"
            + outputFile
            + "';";

    String script =
        "a = load '"
            + inputFileName
            + "';"
            + "b = group a all;"
            + "c = foreach b generate COUNT_STAR(a);";
    Properties props = new Properties();
    for (Entry<Object, Object> entry : cluster.getProperties().entrySet()) {
      props.put(entry.getKey(), entry.getValue());
    }
    props.setProperty("mapred.max.split.size", Integer.toString(splitSize));
    PigContext pigContext = new PigContext(ExecType.MAPREDUCE, props);
    PigServer pig = new PigServer(pigContext);
    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(props));
    fs.delete(new Path(outputFile), true);
    Util.registerMultiLineQuery(pig, scriptToTestSplitting);

    // verify that > 1 maps were launched due to splitting of the bzip input
    FileStatus[] files = fs.listStatus(new Path(outputFile));
    int numPartFiles = 0;
    for (FileStatus fileStatus : files) {
      if (fileStatus.getPath().getName().startsWith("part")) {
        numPartFiles++;
      }
    }
    assertEquals(true, numPartFiles > 0);

    // verify record count to verify we read bzip data correctly
    Util.registerMultiLineQuery(pig, script);
    Iterator<Tuple> it = pig.openIterator("c");
    Long result = (Long) it.next().get(0);
    assertEquals(expectedCount, result);
  }
Пример #13
0
  // See PIG-1434
  @Test
  public void testScalarAliasesBatchNobatch() throws Exception {
    String[] input = {"1\t5", "2\t10", "3\t20"};

    String output = BUILD_TEST_TMP + "table_testScalarAliasesDir";
    TestScalarAliases.deleteDirectory(new File(output));
    // Test the use of scalars in expressions
    String inputPath = BUILD_TEST_TMP + "table_testScalarAliasesBatch";
    TestScalarAliases.createLocalInputFile(inputPath, input);
    // Test in script mode
    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD '" + inputPath + "' as (a0: long, a1: double);");
    pigServer.registerQuery("B = group A all;");
    pigServer.registerQuery("C = foreach B generate COUNT(A) as count, MAX(A.$1) as max;");
    pigServer.registerQuery("Y = foreach A generate (a0 * C.count), (a1 / C.max);");
    pigServer.registerQuery("Store Y into '" + output + "';");
    pigServer.executeBatch();
    // Check output
    pigServer.registerQuery("Z = LOAD '" + output + "' as (a0: int, a1: double);");

    Iterator<Tuple> iter;
    Tuple t;
    iter = pigServer.openIterator("Z");

    t = iter.next();
    assertTrue(t.toString().equals("(3,0.25)"));

    t = iter.next();
    assertTrue(t.toString().equals("(6,0.5)"));

    t = iter.next();
    assertTrue(t.toString().equals("(9,1.0)"));

    assertFalse(iter.hasNext());

    iter = pigServer.openIterator("Y");

    t = iter.next();
    assertTrue(t.toString().equals("(3,0.25)"));

    t = iter.next();
    assertTrue(t.toString().equals("(6,0.5)"));

    t = iter.next();
    assertTrue(t.toString().equals("(9,1.0)"));

    assertFalse(iter.hasNext());
  }
Пример #14
0
  public void testSkewedJoinReducers() throws IOException {
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' as (id, name, n);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (id, name);");
    try {
      DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
      {
        pigServer.registerQuery("C = join A by id, B by id using \"skewed\" parallel 1;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        while (iter.hasNext()) {
          dbfrj.add(iter.next());
        }
      }
    } catch (Exception e) {
      fail("Should not throw exception, should continue execution");
    }
  }
 @Test(expected = Exception.class)
 public void readWithMissingWritableConverterArguments() throws IOException {
   registerLoadQuery(
       FixedArgsConstructorIntWritableConverter.class,
       TextConverter.class,
       "key: int, value: chararray");
   validate(pigServer.openIterator("A"));
 }
Пример #16
0
  public void testSkewedJoinWithGroup() throws IOException {
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' as (id, name, n);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (id, name);");
    pigServer.registerQuery("C = GROUP A by id;");
    pigServer.registerQuery("D = GROUP B by id;");

    DataBag dbfrj = BagFactory.getInstance().newDefaultBag(),
        dbshj = BagFactory.getInstance().newDefaultBag();
    {
      pigServer.registerQuery("E = join C by group, D by group using \"skewed\" parallel 5;");
      Iterator<Tuple> iter = pigServer.openIterator("E");

      while (iter.hasNext()) {
        dbfrj.add(iter.next());
      }
    }
    {
      pigServer.registerQuery("E = join C by group, D by group;");
      Iterator<Tuple> iter = pigServer.openIterator("E");

      while (iter.hasNext()) {
        dbshj.add(iter.next());
      }
    }
    Assert.assertTrue(dbfrj.size() > 0 && dbshj.size() > 0);
    Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
  }
Пример #17
0
  // See PIG-1434
  @Test
  public void testScalarAliasesJoinClause() throws Exception {
    String[] inputA = {"1\t5", "2\t10", "3\t20"};
    String[] inputB = {"Total3\tthree", "Total2\ttwo", "Total1\tone"};

    // Test the use of scalars in expressions
    String inputPathA = BUILD_TEST_TMP + "table_testScalarAliasesJoinClauseA";
    TestScalarAliases.createLocalInputFile(inputPathA, inputA);
    String inputPathB = BUILD_TEST_TMP + "table_testScalarAliasesJoinClauseB";
    TestScalarAliases.createLocalInputFile(inputPathB, inputB);
    // Test in script mode
    pigServer.registerQuery("A = LOAD '" + inputPathA + "' as (a0, a1);");
    pigServer.registerQuery("G = group A all;");
    pigServer.registerQuery("C = foreach G generate COUNT(A) as count;");

    pigServer.registerQuery("B = LOAD '" + inputPathB + "' as (b0:chararray, b1:chararray);");
    pigServer.registerQuery("Y = join A by CONCAT('Total', (chararray)C.count), B by $0;");

    Iterator<Tuple> iter = pigServer.openIterator("Y");

    String[] expected =
        new String[] {"(1,5,Total3,three)", "(2,10,Total3,three)", "(3,20,Total3,three)"};

    Util.checkQueryOutputsAfterSortRecursive(
        iter,
        expected,
        org.apache.pig.newplan.logical.Util.translateSchema(pigServer.dumpSchema("Y")));
  }
Пример #18
0
 @Test
 public void testSplitWithNotEvalCondition() throws Exception {
   String defineQ =
       "define minelogs org.apache.pig.test.RegexGroupCount('www\\\\.xyz\\\\.com/sports');";
   String defineL = "a = load 'nosuchfile' " + " using PigStorage() as (source : chararray);";
   String defineSplit =
       "SPLIT a INTO a1 IF (minelogs(source) > 0 ), a2 IF (NOT (minelogs(source)>0));"; //    (NOT
                                                                                        // (
                                                                                        // minelogs(source) ) > 0) ;";
   PigServer ps = new PigServer(ExecType.LOCAL);
   ps.registerQuery(defineQ);
   ps.registerQuery(defineL);
   try {
     ps.registerQuery(defineSplit);
   } catch (FrontendException e) {
     Assert.fail(e.getMessage());
   }
 }
Пример #19
0
  /** Tests the end-to-end writing and reading of a BZip file. */
  @Test
  public void testBzipInPig() throws Exception {
    PigServer pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());

    File in = File.createTempFile("junit", ".bz2");
    in.deleteOnExit();

    File out = File.createTempFile("junit", ".bz2");
    out.delete();
    String clusterOutput = Util.removeColon(out.getAbsolutePath());

    CBZip2OutputStream cos = new CBZip2OutputStream(new FileOutputStream(in));
    for (int i = 1; i < 100; i++) {
      StringBuffer sb = new StringBuffer();
      sb.append(i).append("\n").append(-i).append("\n");
      byte bytes[] = sb.toString().getBytes();
      cos.write(bytes);
    }
    cos.close();

    pig.registerQuery(
        "AA = load '"
            + Util.generateURI(Util.encodeEscape(in.getAbsolutePath()), pig.getPigContext())
            + "';");
    pig.registerQuery("A = foreach (group (filter AA by $0 > 0) all) generate flatten($1);");
    pig.registerQuery("store A into '" + Util.encodeEscape(clusterOutput) + "';");
    FileSystem fs =
        FileSystem.get(ConfigurationUtil.toConfiguration(pig.getPigContext().getProperties()));
    FSDataInputStream is = fs.open(new Path(clusterOutput + "/part-r-00000.bz2"));
    CBZip2InputStream cis = new CBZip2InputStream(is, -1, out.length());

    // Just a sanity check, to make sure it was a bzip file; we
    // will do the value verification later
    assertEquals(100, cis.read(new byte[100]));
    cis.close();

    pig.registerQuery("B = load '" + Util.encodeEscape(clusterOutput) + "';");

    Iterator<Tuple> i = pig.openIterator("B");
    HashMap<Integer, Integer> map = new HashMap<Integer, Integer>();
    while (i.hasNext()) {
      Integer val = DataType.toInteger(i.next().get(0));
      map.put(val, val);
    }

    assertEquals(new Integer(99), new Integer(map.keySet().size()));

    for (int j = 1; j < 100; j++) {
      assertEquals(new Integer(j), map.get(j));
    }

    in.delete();
    Util.deleteFile(cluster, clusterOutput);
  }
 @Test
 public void readWriteUnexpectedNullValuesRead() throws IOException {
   registerLoadQuery();
   tempFilename = tempFilename + "-2";
   // swap last value with null; this pair should not be stored
   pigServer.registerQuery(
       String.format("A = FOREACH A GENERATE key, (key == 2 ? null : value) AS value;"));
   pigServer.registerQuery(
       String.format(
           "STORE A INTO 'file:%s' USING %s('-c %s', '-c %s');",
           tempFilename,
           SequenceFileStorage.class.getName(),
           IntWritableConverter.class.getName(),
           TextConverter.class.getName()));
   registerLoadQuery();
   // validation against expected pairs will succeed, with expected number of pairs one less than
   // usual (the last pair wasn't stored due to null value)
   validate(pigServer.openIterator("A"), DATA.length - 1);
 }
 @Test
 public void readWithoutSchemaTestSchema() throws IOException {
   registerLoadQuery(IntWritableConverter.class, TextConverter.class, null);
   Schema schema = pigServer.dumpSchema("A");
   Assert.assertNotNull(schema);
   Assert.assertEquals("key", schema.getField(0).alias);
   Assert.assertEquals(DataType.INTEGER, schema.getField(0).type);
   Assert.assertEquals("value", schema.getField(1).alias);
   Assert.assertEquals(DataType.CHARARRAY, schema.getField(1).type);
 }
Пример #22
0
 public void testSchemaSerialization() throws IOException {
   MiniCluster cluster = MiniCluster.buildCluster();
   PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
   String inputFileName = "testSchemaSerialization-input.txt";
   String[] inputData = new String[] {"foo\t1", "hello\t2"};
   Util.createInputFile(cluster, inputFileName, inputData);
   String script =
       "a = load '"
           + inputFileName
           + "' as (f1:chararray, f2:int);"
           + " b = group a all; c = foreach b generate org.apache.pig.test.InputSchemaUDF(a);";
   Util.registerMultiLineQuery(pigServer, script);
   Iterator<Tuple> it = pigServer.openIterator("c");
   while (it.hasNext()) {
     Tuple t = it.next();
     Assert.assertEquals("{a: {(f1: chararray,f2: int)}}", t.get(0));
   }
   cluster.shutDown();
 }
Пример #23
0
  public void testSkewedJoinKeyPartition() throws IOException {
    try {
      Util.deleteFile(cluster, "skewedjoin");
    } catch (Exception e) {
      // it is ok if directory not exist
    }

    pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' as (id, name, n);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (id, name);");

    pigServer.registerQuery("E = join A by id, B by id using \"skewed\" parallel 7;");
    pigServer.store("E", "skewedjoin");

    int[][] lineCount = new int[3][7];

    new File("skewedjoin").mkdir();
    // check how many times a key appear in each part- file
    for (int i = 0; i < 7; i++) {
      Util.copyFromClusterToLocal(
          cluster, "skewedjoin/part-r-0000" + i, "skewedjoin/part-r-0000" + i);

      BufferedReader reader = new BufferedReader(new FileReader("skewedjoin/part-r-0000" + i));
      String line = null;
      while ((line = reader.readLine()) != null) {
        String[] cols = line.split("\t");
        int key = Integer.parseInt(cols[0]) / 100 - 1;
        lineCount[key][i]++;
      }
    }

    int fc = 0;
    for (int i = 0; i < 3; i++) {
      for (int j = 0; j < 7; j++) {
        if (lineCount[i][j] > 0) {
          fc++;
        }
      }
    }
    // atleast one key should be a skewed key
    // check atleast one key should appear in more than 1 part- file
    assertTrue(fc > 3);
  }
Пример #24
0
  // See PIG-1434
  @Test
  public void testScalarAliasesGrammarNegative() throws Exception {
    String[] input = {"1\t5", "2\t10", "3\t20"};

    String inputPath = BUILD_TEST_TMP + "table_testScalarAliasesGrammar";
    TestScalarAliases.createLocalInputFile(inputPath, input);

    try {
      pigServer.registerQuery("A = LOAD '" + inputPath + "' as (a0: long, a1: double);");
      pigServer.registerQuery("B = group A all;");
      pigServer.registerQuery("C = foreach B generate COUNT(A);");
      // Only projections of C are supported
      pigServer.registerQuery("Y = foreach A generate C;");
      pigServer.openIterator("Y");
      // Control should not reach here
      fail("Scalar projections are only supported");
    } catch (IOException pe) {
      assertTrue(pe.getMessage().contains("Invalid scalar projection: C"));
    }
  }
Пример #25
0
  public void testSkewedJoin3Way() throws IOException {
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' as (id, name, n);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (id, name);");
    pigServer.registerQuery("C = LOAD '" + INPUT_FILE3 + "' as (id, name);");
    try {
      DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
      {
        pigServer.registerQuery("D = join A by id, B by id, C by id using \"skewed\" parallel 5;");
        Iterator<Tuple> iter = pigServer.openIterator("D");

        while (iter.hasNext()) {
          dbfrj.add(iter.next());
        }
      }
    } catch (Exception e) {
      return;
    }

    fail("Should throw exception, do not support 3 way join");
  }
Пример #26
0
  public void testSkewedJoinNullKeys() throws IOException {
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE5 + "' as (id,name);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE5 + "' as (id,name);");
    try {
      DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
      {
        pigServer.registerQuery("C = join A by id, B by id using \"skewed\";");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        while (iter.hasNext()) {
          dbfrj.add(iter.next());
        }
      }
    } catch (Exception e) {
      System.out.println(e.getMessage());
      e.printStackTrace();
      fail("Should support null keys in skewed join");
    }
    return;
  }
Пример #27
0
  public void testSkewedJoinManyReducers() throws IOException {
    pigServer.getPigContext().getProperties().setProperty("pig.skewedjoin.reduce.maxtuple", "2");
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE6 + "' as (id,name);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE7 + "' as (id,name);");

    DataBag dbfrj = BagFactory.getInstance().newDefaultBag(),
        dbrj = BagFactory.getInstance().newDefaultBag();
    {
      pigServer.registerQuery("E = join A by id, B by id using \"skewed\" parallel 300;");
      Iterator<Tuple> iter = pigServer.openIterator("E");

      while (iter.hasNext()) {
        dbfrj.add(iter.next());
      }
    }
    {
      pigServer.registerQuery("E = join A by id, B by id;");
      Iterator<Tuple> iter = pigServer.openIterator("E");

      while (iter.hasNext()) {
        dbrj.add(iter.next());
      }
    }
    Assert.assertEquals(dbfrj.size(), dbrj.size());
    Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbrj));
  }
Пример #28
0
  // See PIG-1636
  @Test
  public void testScalarAliasesLimit() throws Exception {
    String[] input = {"a\t1", "b\t2", "c\t3", "a\t4", "c\t5"};

    // Test the use of scalars in expressions
    String inputPath = BUILD_TEST_TMP + "table_testScalarAliasesLimit";
    TestScalarAliases.createLocalInputFile(inputPath, input);
    // Test in script mode
    pigServer.registerQuery("A = LOAD '" + inputPath + "' as (a0:chararray, a1: int);");
    pigServer.registerQuery("G = group A all;");
    pigServer.registerQuery("C = foreach G generate SUM(A.$1) as total;");
    pigServer.registerQuery("C1 = limit C 1;");
    pigServer.registerQuery("Y = foreach A generate a0, a1 * (double)C1.total;");

    Iterator<Tuple> iter = pigServer.openIterator("Y");

    // Average is 11
    Tuple t = iter.next();
    assertTrue(t.toString().equals("(a,15.0)"));

    t = iter.next();
    assertTrue(t.toString().equals("(b,30.0)"));

    t = iter.next();
    assertTrue(t.toString().equals("(c,45.0)"));

    t = iter.next();
    assertTrue(t.toString().equals("(a,60.0)"));

    t = iter.next();
    assertTrue(t.toString().equals("(c,75.0)"));

    assertFalse(iter.hasNext());
  }
Пример #29
0
  public void testSkewedJoinWithNoProperties() throws IOException {
    pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());

    pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' as (id, name, n);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (id, name);");
    try {
      DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
      DataBag dbshj = BagFactory.getInstance().newDefaultBag();
      {
        pigServer.registerQuery(
            "C = join A by (id, name), B by (id, name) using \"skewed\" parallel 5;");
        Iterator<Tuple> iter = pigServer.openIterator("C");

        while (iter.hasNext()) {
          dbfrj.add(iter.next());
        }
      }
      {
        pigServer.registerQuery("E = join A by(id, name), B by (id, name);");
        Iterator<Tuple> iter = pigServer.openIterator("E");

        while (iter.hasNext()) {
          dbshj.add(iter.next());
        }
      }
      Assert.assertTrue(dbfrj.size() > 0 && dbshj.size() > 0);
      Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));

    } catch (Exception e) {
      fail(e.getMessage());
    }
  }
Пример #30
0
  // pig 1048
  public void testSkewedJoinOneValue() throws IOException {
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE3 + "' as (id,name);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE3 + "' as (id,name);");
    // Filter key with a single value

    pigServer.registerQuery("C = FILTER A by id == 400;");
    pigServer.registerQuery("D = FILTER B by id == 400;");

    DataBag dbfrj = BagFactory.getInstance().newDefaultBag(),
        dbrj = BagFactory.getInstance().newDefaultBag();
    {
      pigServer.registerQuery("E = join C by id, D by id using \"skewed\";");
      Iterator<Tuple> iter = pigServer.openIterator("E");

      while (iter.hasNext()) {
        dbfrj.add(iter.next());
      }
    }
    {
      pigServer.registerQuery("E = join C by id, D by id;");
      Iterator<Tuple> iter = pigServer.openIterator("E");

      while (iter.hasNext()) {
        dbrj.add(iter.next());
      }
    }
    Assert.assertEquals(dbfrj.size(), dbrj.size());
    Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbrj));
  }