// See PIG-1714 @Test public void testBzipStoreInMultiQuery3() throws Exception { String[] inputData = new String[] {"1\t2\r3\t4"}; String inputFileName = "input3.txt"; Util.createInputFile(cluster, inputFileName, inputData); String inputScript = "set mapred.output.compress true\n" + "set mapreduce.output.fileoutputformat.compress true\n" + "set mapred.output.compression.codec org.apache.hadoop.io.compress.BZip2Codec\n" + "set mapreduce.output.fileoutputformat.compress.codec org.apache.hadoop.io.compress.BZip2Codec\n" + "a = load '" + inputFileName + "';\n" + "store a into 'output3.bz2';\n" + "store a into 'output3';"; String inputScriptName = "script3.txt"; PrintWriter pw = new PrintWriter(new FileWriter(inputScriptName)); pw.println(inputScript); pw.close(); PigServer pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); FileInputStream fis = new FileInputStream(inputScriptName); pig.registerScript(fis); FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(pig.getPigContext().getProperties())); FileStatus stat = fs.getFileStatus(new Path("output3/part-m-00000.bz2")); assertTrue(stat.getLen() > 0); stat = fs.getFileStatus(new Path("output3.bz2/part-m-00000.bz2")); assertTrue(stat.getLen() > 0); }
// Run Pig Locally public void runPigLocal( Map<String, String> params, String out, String tmp, final boolean quiet, final boolean silent, Configuration conf, String queue_name, String additional_jars, File pig_tmp, ArrayList<String> D_options, String PIG_DIR, FileSystem fs) throws IllegalArgumentException, IOException { // Create temp file on local to hold data to sort final File local_tmp = Files.createTempDir(); local_tmp.deleteOnExit(); Runtime.getRuntime() .addShutdownHook( new Thread( new Runnable() { @Override public void run() { try { logConsole(quiet, silent, warn, "Deleting tmp files in local tmp"); delete(local_tmp); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } })); // Set input parameter for pig job params.put("tmpdir", local_tmp.toString() + "/" + tmp); // Check for an out of '-', meaning write to stdout String pigout; if (out.equals("-")) { params.put("out", local_tmp + "/" + tmp + "/final"); pigout = local_tmp + "/" + tmp + "/final"; } else { params.put("out", local_tmp + "/" + StringEscapeUtils.escapeJava(out)); pigout = StringEscapeUtils.escapeJava(out); } // Copy the tmp folder from HDFS to the local tmp directory, and delete the remote folder fs.copyToLocalFile(true, new Path(tmp), new Path(local_tmp + "/" + tmp)); try { logConsole(quiet, silent, info, "Running PIG Command"); conf.set("mapred.job.queue.name", queue_name); conf.set("pig.additional.jars", additional_jars); conf.set("pig.exec.reducers.bytes.per.reducer", Integer.toString(100 * 1000 * 1000)); conf.set("pig.logfile", pig_tmp.toString()); conf.set("hadoopversion", "23"); // PIG temp directory set to be able to delete all temp files/directories conf.set("pig.temp.dir", local_tmp.getAbsolutePath()); // Setting output separator for logdriver String DEFAULT_OUTPUT_SEPARATOR = "\t"; Charset UTF_8 = Charset.forName("UTF-8"); String outputSeparator = conf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { System.err.println( ";******************** The output separator must be a single byte in UTF-8. ******************** "); System.exit(1); } conf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); dOpts(D_options, silent, out, conf); PigServer pigServer = new PigServer(ExecType.LOCAL, conf); UserGroupInformation.setConfiguration(new Configuration(false)); pigServer.registerScript(PIG_DIR + "/formatAndSortLocal.pg", params); } catch (Exception e) { e.printStackTrace(); System.exit(1); } logConsole(quiet, silent, warn, "PIG Job Completed."); if (out.equals("-")) { System.out.println(";#################### DATA RESULTS ####################"); try { File results = new File(pigout); String[] resultList = results.list(); // Find the files in the directory, open and printout results for (int i = 0; i < resultList.length; i++) { if (resultList[i].contains("part-") && !resultList[i].contains(".crc")) { BufferedReader br = new BufferedReader(new FileReader(new File(pigout + "/" + resultList[i]))); String line; line = br.readLine(); while (line != null) { System.out.println(line); line = br.readLine(); } br.close(); } } System.out.println(";#################### END OF RESULTS ####################"); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } else { fs.copyFromLocalFile( new Path(local_tmp + "/" + StringEscapeUtils.escapeJava(out)), new Path(pigout)); System.out.println( ";#################### Done. Search results are in " + pigout + " ####################"); } }
// Run Pig Remotely public void runPigRemote( Map<String, String> params, String out, String tmp, boolean quiet, boolean silent, Configuration conf, String queue_name, String additional_jars, File pig_tmp, ArrayList<String> D_options, String PIG_DIR, FileSystem fs) { // Set input parameter for pig job - calling Pig directly params.put("tmpdir", StringEscapeUtils.escapeJava(tmp)); // Check for an out of '-', meaning write to stdout String pigout; if (out.equals("-")) { params.put("out", tmp + "/final"); pigout = tmp + "/final"; } else { params.put("out", StringEscapeUtils.escapeJava(out)); pigout = StringEscapeUtils.escapeJava(out); } try { logConsole(quiet, silent, info, "Running PIG Command"); conf.set("mapred.job.queue.name", queue_name); conf.set("pig.additional.jars", additional_jars); conf.set("pig.exec.reducers.bytes.per.reducer", Integer.toString(100 * 1000 * 1000)); conf.set("pig.logfile", pig_tmp.toString()); conf.set("hadoopversion", "23"); // PIG temp directory set to be able to delete all temp files/directories conf.set("pig.temp.dir", tmp); // Setting output separator for logdriver String DEFAULT_OUTPUT_SEPARATOR = "\t"; Charset UTF_8 = Charset.forName("UTF-8"); String outputSeparator = conf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { logConsole(true, true, error, "The output separator must be a single byte in UTF-8."); System.exit(1); } conf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); dOpts(D_options, silent, out, conf); PigServer pigServer = new PigServer(ExecType.MAPREDUCE, conf); pigServer.registerScript(PIG_DIR + "/formatAndSort.pg", params); } catch (Exception e) { e.printStackTrace(); System.exit(1); } logConsole(quiet, silent, warn, "PIG Job Completed."); if (out.equals("-")) { System.out.println(";#################### DATA RESULTS ####################"); try { // Create filter to find files with the results from PIG job PathFilter filter = new PathFilter() { public boolean accept(Path file) { return file.getName().contains("part-"); } }; // Find the files in the directory, open and printout results FileStatus[] status = fs.listStatus(new Path(tmp + "/final"), filter); for (int i = 0; i < status.length; i++) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath()))); String line; line = br.readLine(); while (line != null) { System.out.println(line); line = br.readLine(); } } System.out.println(";#################### END OF RESULTS ####################"); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } else { System.out.println( ";#################### Done. Search results are in " + pigout + " ####################"); } }