Ejemplo n.º 1
0
  // See PIG-1714
  @Test
  public void testBzipStoreInMultiQuery3() throws Exception {
    String[] inputData = new String[] {"1\t2\r3\t4"};

    String inputFileName = "input3.txt";
    Util.createInputFile(cluster, inputFileName, inputData);

    String inputScript =
        "set mapred.output.compress true\n"
            + "set mapreduce.output.fileoutputformat.compress true\n"
            + "set mapred.output.compression.codec org.apache.hadoop.io.compress.BZip2Codec\n"
            + "set mapreduce.output.fileoutputformat.compress.codec org.apache.hadoop.io.compress.BZip2Codec\n"
            + "a = load '"
            + inputFileName
            + "';\n"
            + "store a into 'output3.bz2';\n"
            + "store a into 'output3';";

    String inputScriptName = "script3.txt";
    PrintWriter pw = new PrintWriter(new FileWriter(inputScriptName));
    pw.println(inputScript);
    pw.close();

    PigServer pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());

    FileInputStream fis = new FileInputStream(inputScriptName);
    pig.registerScript(fis);

    FileSystem fs =
        FileSystem.get(ConfigurationUtil.toConfiguration(pig.getPigContext().getProperties()));
    FileStatus stat = fs.getFileStatus(new Path("output3/part-m-00000.bz2"));
    assertTrue(stat.getLen() > 0);

    stat = fs.getFileStatus(new Path("output3.bz2/part-m-00000.bz2"));
    assertTrue(stat.getLen() > 0);
  }
Ejemplo n.º 2
0
  // Run Pig Locally
  public void runPigLocal(
      Map<String, String> params,
      String out,
      String tmp,
      final boolean quiet,
      final boolean silent,
      Configuration conf,
      String queue_name,
      String additional_jars,
      File pig_tmp,
      ArrayList<String> D_options,
      String PIG_DIR,
      FileSystem fs)
      throws IllegalArgumentException, IOException {
    // Create temp file on local to hold data to sort
    final File local_tmp = Files.createTempDir();
    local_tmp.deleteOnExit();

    Runtime.getRuntime()
        .addShutdownHook(
            new Thread(
                new Runnable() {
                  @Override
                  public void run() {
                    try {
                      logConsole(quiet, silent, warn, "Deleting tmp files in local tmp");
                      delete(local_tmp);
                    } catch (IOException e) {
                      // TODO Auto-generated catch block
                      e.printStackTrace();
                    }
                  }
                }));

    // Set input parameter for pig job
    params.put("tmpdir", local_tmp.toString() + "/" + tmp);

    // Check for an out of '-', meaning write to stdout
    String pigout;
    if (out.equals("-")) {
      params.put("out", local_tmp + "/" + tmp + "/final");
      pigout = local_tmp + "/" + tmp + "/final";
    } else {
      params.put("out", local_tmp + "/" + StringEscapeUtils.escapeJava(out));
      pigout = StringEscapeUtils.escapeJava(out);
    }

    // Copy the tmp folder from HDFS to the local tmp directory, and delete the remote folder
    fs.copyToLocalFile(true, new Path(tmp), new Path(local_tmp + "/" + tmp));

    try {
      logConsole(quiet, silent, info, "Running PIG Command");
      conf.set("mapred.job.queue.name", queue_name);
      conf.set("pig.additional.jars", additional_jars);
      conf.set("pig.exec.reducers.bytes.per.reducer", Integer.toString(100 * 1000 * 1000));
      conf.set("pig.logfile", pig_tmp.toString());
      conf.set("hadoopversion", "23");
      // PIG temp directory set to be able to delete all temp files/directories
      conf.set("pig.temp.dir", local_tmp.getAbsolutePath());

      // Setting output separator for logdriver
      String DEFAULT_OUTPUT_SEPARATOR = "\t";
      Charset UTF_8 = Charset.forName("UTF-8");
      String outputSeparator =
          conf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
      byte[] bytes = outputSeparator.getBytes(UTF_8);
      if (bytes.length != 1) {
        System.err.println(
            ";******************** The output separator must be a single byte in UTF-8. ******************** ");
        System.exit(1);
      }
      conf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));

      dOpts(D_options, silent, out, conf);

      PigServer pigServer = new PigServer(ExecType.LOCAL, conf);
      UserGroupInformation.setConfiguration(new Configuration(false));
      pigServer.registerScript(PIG_DIR + "/formatAndSortLocal.pg", params);
    } catch (Exception e) {
      e.printStackTrace();
      System.exit(1);
    }

    logConsole(quiet, silent, warn, "PIG Job Completed.");

    if (out.equals("-")) {
      System.out.println(";#################### DATA RESULTS ####################");
      try {
        File results = new File(pigout);
        String[] resultList = results.list();

        // Find the files in the directory, open and printout results
        for (int i = 0; i < resultList.length; i++) {
          if (resultList[i].contains("part-") && !resultList[i].contains(".crc")) {
            BufferedReader br =
                new BufferedReader(new FileReader(new File(pigout + "/" + resultList[i])));
            String line;
            line = br.readLine();
            while (line != null) {
              System.out.println(line);
              line = br.readLine();
            }
            br.close();
          }
        }
        System.out.println(";#################### END OF RESULTS ####################");
      } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
      }
    } else {
      fs.copyFromLocalFile(
          new Path(local_tmp + "/" + StringEscapeUtils.escapeJava(out)), new Path(pigout));
      System.out.println(
          ";#################### Done. Search results are in " + pigout + " ####################");
    }
  }
Ejemplo n.º 3
0
  // Run Pig Remotely
  public void runPigRemote(
      Map<String, String> params,
      String out,
      String tmp,
      boolean quiet,
      boolean silent,
      Configuration conf,
      String queue_name,
      String additional_jars,
      File pig_tmp,
      ArrayList<String> D_options,
      String PIG_DIR,
      FileSystem fs) {
    // Set input parameter for pig job - calling Pig directly
    params.put("tmpdir", StringEscapeUtils.escapeJava(tmp));

    // Check for an out of '-', meaning write to stdout
    String pigout;
    if (out.equals("-")) {
      params.put("out", tmp + "/final");
      pigout = tmp + "/final";
    } else {
      params.put("out", StringEscapeUtils.escapeJava(out));
      pigout = StringEscapeUtils.escapeJava(out);
    }

    try {
      logConsole(quiet, silent, info, "Running PIG Command");
      conf.set("mapred.job.queue.name", queue_name);
      conf.set("pig.additional.jars", additional_jars);
      conf.set("pig.exec.reducers.bytes.per.reducer", Integer.toString(100 * 1000 * 1000));
      conf.set("pig.logfile", pig_tmp.toString());
      conf.set("hadoopversion", "23");
      // PIG temp directory set to be able to delete all temp files/directories
      conf.set("pig.temp.dir", tmp);

      // Setting output separator for logdriver
      String DEFAULT_OUTPUT_SEPARATOR = "\t";
      Charset UTF_8 = Charset.forName("UTF-8");
      String outputSeparator =
          conf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
      byte[] bytes = outputSeparator.getBytes(UTF_8);
      if (bytes.length != 1) {
        logConsole(true, true, error, "The output separator must be a single byte in UTF-8.");
        System.exit(1);
      }
      conf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));

      dOpts(D_options, silent, out, conf);

      PigServer pigServer = new PigServer(ExecType.MAPREDUCE, conf);
      pigServer.registerScript(PIG_DIR + "/formatAndSort.pg", params);
    } catch (Exception e) {
      e.printStackTrace();
      System.exit(1);
    }

    logConsole(quiet, silent, warn, "PIG Job Completed.");
    if (out.equals("-")) {
      System.out.println(";#################### DATA RESULTS ####################");
      try {
        // Create filter to find files with the results from PIG job
        PathFilter filter =
            new PathFilter() {
              public boolean accept(Path file) {
                return file.getName().contains("part-");
              }
            };

        // Find the files in the directory, open and printout results
        FileStatus[] status = fs.listStatus(new Path(tmp + "/final"), filter);
        for (int i = 0; i < status.length; i++) {
          BufferedReader br =
              new BufferedReader(new InputStreamReader(fs.open(status[i].getPath())));
          String line;
          line = br.readLine();
          while (line != null) {
            System.out.println(line);
            line = br.readLine();
          }
        }
        System.out.println(";#################### END OF RESULTS ####################");
      } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
      }
    } else {
      System.out.println(
          ";#################### Done. Search results are in " + pigout + " ####################");
    }
  }