private void addDependencies(HashSet<byte[]> deps, CommandData data) { for (byte[] dep : data.dependencies) { deps.add(dep); } for (byte[] dep : data.runbundles) { deps.add(dep); } }
/** Generates the transitive closure of the Class-Path attribute for the specified jar file. */ List<String> getJarPath(String jar) throws IOException { List<String> files = new ArrayList<String>(); files.add(jar); jarPaths.add(jar); // take out the current path String path = jar.substring(0, Math.max(0, jar.lastIndexOf('/') + 1)); // class path attribute will give us jar file name with // '/' as separators, so we need to change them to the // appropriate one before we open the jar file. JarFile rf = new JarFile(jar.replace('/', File.separatorChar)); if (rf != null) { Manifest man = rf.getManifest(); if (man != null) { Attributes attr = man.getMainAttributes(); if (attr != null) { String value = attr.getValue(Attributes.Name.CLASS_PATH); if (value != null) { StringTokenizer st = new StringTokenizer(value); while (st.hasMoreTokens()) { String ajar = st.nextToken(); if (!ajar.endsWith("/")) { // it is a jar file ajar = path.concat(ajar); /* check on cyclic dependency */ if (!jarPaths.contains(ajar)) { files.addAll(getJarPath(ajar)); } } } } } } } rf.close(); return files; }
/** Creates a patch from the two passed in files, writing the result to <code>os</code>. */ public static void createPatch(String oldPath, String newPath, OutputStream os, boolean minimal) throws IOException { JarFile2 oldJar = new JarFile2(oldPath); JarFile2 newJar = new JarFile2(newPath); try { Iterator entries; HashMap moved = new HashMap(); HashSet visited = new HashSet(); HashSet implicit = new HashSet(); HashSet moveSrc = new HashSet(); HashSet newEntries = new HashSet(); // FIRST PASS // Go through the entries in new jar and // determine which files are candidates for implicit moves // ( files that has the same filename and same content in old.jar // and new.jar ) // and for files that cannot be implicitly moved, we will either // find out whether it is moved or new (modified) entries = newJar.getJarEntries(); if (entries != null) { while (entries.hasNext()) { JarEntry newEntry = (JarEntry) entries.next(); String newname = newEntry.getName(); // Return best match of contents, will return a name match if possible String oldname = oldJar.getBestMatch(newJar, newEntry); if (oldname == null) { // New or modified entry if (_debug) { System.out.println("NEW: " + newname); } newEntries.add(newname); } else { // Content already exist - need to do a move // Should do implicit move? Yes, if names are the same, and // no move command already exist from oldJar if (oldname.equals(newname) && !moveSrc.contains(oldname)) { if (_debug) { System.out.println(newname + " added to implicit set!"); } implicit.add(newname); } else { // The 1.0.1/1.0 JarDiffPatcher cannot handle // multiple MOVE command with same src. // The work around here is if we are going to generate // a MOVE command with duplicate src, we will // instead add the target as a new file. This way // the jardiff can be applied by 1.0.1/1.0 // JarDiffPatcher also. if (!minimal && (implicit.contains(oldname) || moveSrc.contains(oldname))) { // generate non-minimal jardiff // for backward compatibility if (_debug) { System.out.println("NEW: " + newname); } newEntries.add(newname); } else { // Use newname as key, since they are unique if (_debug) { System.err.println("moved.put " + newname + " " + oldname); } moved.put(newname, oldname); moveSrc.add(oldname); } // Check if this disables an implicit 'move <oldname> <oldname>' if (implicit.contains(oldname) && minimal) { if (_debug) { System.err.println("implicit.remove " + oldname); System.err.println("moved.put " + oldname + " " + oldname); } implicit.remove(oldname); moved.put(oldname, oldname); moveSrc.add(oldname); } } } } } // if (entries != null) // SECOND PASS: <deleted files> = <oldjarnames> - <implicitmoves> - // <source of move commands> - <new or modified entries> ArrayList deleted = new ArrayList(); entries = oldJar.getJarEntries(); if (entries != null) { while (entries.hasNext()) { JarEntry oldEntry = (JarEntry) entries.next(); String oldName = oldEntry.getName(); if (!implicit.contains(oldName) && !moveSrc.contains(oldName) && !newEntries.contains(oldName)) { if (_debug) { System.err.println("deleted.add " + oldName); } deleted.add(oldName); } } } // DEBUG if (_debug) { // DEBUG: print out moved map entries = moved.keySet().iterator(); if (entries != null) { System.out.println("MOVED MAP!!!"); while (entries.hasNext()) { String newName = (String) entries.next(); String oldName = (String) moved.get(newName); System.out.println("key is " + newName + " value is " + oldName); } } // DEBUG: print out IMOVE map entries = implicit.iterator(); if (entries != null) { System.out.println("IMOVE MAP!!!"); while (entries.hasNext()) { String newName = (String) entries.next(); System.out.println("key is " + newName); } } } JarOutputStream jos = new JarOutputStream(os); // Write out all the MOVEs and REMOVEs createIndex(jos, deleted, moved); // Put in New and Modified entries entries = newEntries.iterator(); if (entries != null) { while (entries.hasNext()) { String newName = (String) entries.next(); if (_debug) { System.out.println("New File: " + newName); } writeEntry(jos, newJar.getEntryByName(newName), newJar); } } jos.finish(); jos.close(); } catch (IOException ioE) { throw ioE; } finally { try { oldJar.getJarFile().close(); } catch (IOException e1) { // ignore } try { newJar.getJarFile().close(); } catch (IOException e1) { // ignore } } // finally }
/** * The Main-Class for the Pig Jar that will provide a shell and setup a classpath appropriate for * executing Jar files. * * @param args -jar can be used to add additional jar files (colon separated). - will start a * shell. -e will execute the rest of the command line as if it was input to the shell. * @throws IOException */ public static void main(String args[]) { int rc = 1; Properties properties = new Properties(); PropertiesUtil.loadPropertiesFromFile(properties); boolean verbose = false; boolean gruntCalled = false; String logFileName = null; try { BufferedReader pin = null; boolean debug = false; boolean dryrun = false; ArrayList<String> params = new ArrayList<String>(); ArrayList<String> paramFiles = new ArrayList<String>(); HashSet<String> optimizerRules = new HashSet<String>(); CmdLineParser opts = new CmdLineParser(args); opts.registerOpt('4', "log4jconf", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('b', "brief", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('c', "cluster", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('d', "debug", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('e', "execute", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('f', "file", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('h', "help", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('i', "version", CmdLineParser.ValueExpected.OPTIONAL); opts.registerOpt('j', "jar", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('l', "logfile", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('m', "param_file", CmdLineParser.ValueExpected.OPTIONAL); opts.registerOpt('o', "hod", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('p', "param", CmdLineParser.ValueExpected.OPTIONAL); opts.registerOpt('r', "dryrun", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('t', "optimizer_off", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('v', "verbose", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('w', "warning", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('x', "exectype", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('F', "stop_on_failure", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('M', "no_multiquery", CmdLineParser.ValueExpected.NOT_ACCEPTED); ExecMode mode = ExecMode.UNKNOWN; String file = null; ExecType execType = ExecType.MAPREDUCE; String execTypeString = properties.getProperty("exectype"); if (execTypeString != null && execTypeString.length() > 0) { execType = PigServer.parseExecType(execTypeString); } String cluster = "local"; String clusterConfigured = properties.getProperty("cluster"); if (clusterConfigured != null && clusterConfigured.length() > 0) { cluster = clusterConfigured; } // by default warning aggregation is on properties.setProperty("aggregate.warning", "" + true); // by default multiquery optimization is on properties.setProperty("opt.multiquery", "" + true); // by default we keep going on error on the backend properties.setProperty("stop.on.failure", "" + false); char opt; while ((opt = opts.getNextOpt()) != CmdLineParser.EndOfOpts) { switch (opt) { case '4': String log4jconf = opts.getValStr(); if (log4jconf != null) { properties.setProperty(LOG4J_CONF, log4jconf); } break; case 'b': properties.setProperty(BRIEF, "true"); break; case 'c': // Needed away to specify the cluster to run the MR job on // Bug 831708 - fixed String clusterParameter = opts.getValStr(); if (clusterParameter != null && clusterParameter.length() > 0) { cluster = clusterParameter; } break; case 'd': String logLevel = opts.getValStr(); if (logLevel != null) { properties.setProperty(DEBUG, logLevel); } debug = true; break; case 'e': mode = ExecMode.STRING; break; case 'f': mode = ExecMode.FILE; file = opts.getValStr(); break; case 'F': properties.setProperty("stop.on.failure", "" + true); break; case 'h': usage(); return; case 'i': System.out.println(getVersionString()); return; case 'j': String jarsString = opts.getValStr(); if (jarsString != null) { properties.setProperty(JAR, jarsString); } break; case 'l': // call to method that validates the path to the log file // and sets up the file to store the client side log file String logFileParameter = opts.getValStr(); if (logFileParameter != null && logFileParameter.length() > 0) { logFileName = validateLogFile(logFileParameter, null); } else { logFileName = validateLogFile(logFileName, null); } properties.setProperty("pig.logfile", logFileName); break; case 'm': paramFiles.add(opts.getValStr()); break; case 'M': // turns off multiquery optimization properties.setProperty("opt.multiquery", "" + false); break; case 'o': // TODO sgroschupf using system properties is always a very bad idea String gateway = System.getProperty("ssh.gateway"); if (gateway == null || gateway.length() == 0) { properties.setProperty("hod.server", "local"); } else { properties.setProperty("hod.server", System.getProperty("ssh.gateway")); } break; case 'p': String val = opts.getValStr(); params.add(opts.getValStr()); break; case 'r': // currently only used for parameter substitution // will be extended in the future dryrun = true; break; case 't': optimizerRules.add(opts.getValStr()); break; case 'v': properties.setProperty(VERBOSE, "" + true); verbose = true; break; case 'w': properties.setProperty("aggregate.warning", "" + false); break; case 'x': try { execType = PigServer.parseExecType(opts.getValStr()); } catch (IOException e) { throw new RuntimeException("ERROR: Unrecognized exectype.", e); } break; default: { Character cc = new Character(opt); throw new AssertionError("Unhandled option " + cc.toString()); } } } // configure logging configureLog4J(properties); // create the context with the parameter PigContext pigContext = new PigContext(execType, properties); if (logFileName == null) { logFileName = validateLogFile(null, null); } pigContext.getProperties().setProperty("pig.logfile", logFileName); if (optimizerRules.size() > 0) { pigContext .getProperties() .setProperty("pig.optimizer.rules", ObjectSerializer.serialize(optimizerRules)); } LogicalPlanBuilder.classloader = pigContext.createCl(null); // construct the parameter substitution preprocessor Grunt grunt = null; BufferedReader in; String substFile = null; switch (mode) { case FILE: { // Run, using the provided file as a pig file in = new BufferedReader(new FileReader(file)); // run parameter substitution preprocessor first substFile = file + ".substituted"; pin = runParamPreprocessor(in, params, paramFiles, substFile, debug || dryrun); if (dryrun) { log.info("Dry run completed. Substituted pig script is at " + substFile); return; } logFileName = validateLogFile(logFileName, file); pigContext.getProperties().setProperty("pig.logfile", logFileName); // Set job name based on name of the script pigContext .getProperties() .setProperty(PigContext.JOB_NAME, "PigLatin:" + new File(file).getName()); if (!debug) { new File(substFile).deleteOnExit(); } grunt = new Grunt(pin, pigContext); gruntCalled = true; int results[] = grunt.exec(); rc = getReturnCodeForStats(results); return; } case STRING: { // Gather up all the remaining arguments into a string and pass them into // grunt. StringBuffer sb = new StringBuffer(); String remainders[] = opts.getRemainingArgs(); for (int i = 0; i < remainders.length; i++) { if (i != 0) sb.append(' '); sb.append(remainders[i]); } in = new BufferedReader(new StringReader(sb.toString())); grunt = new Grunt(in, pigContext); gruntCalled = true; int results[] = grunt.exec(); rc = getReturnCodeForStats(results); return; } default: break; } // If we're here, we don't know yet what they want. They may have just // given us a jar to execute, they might have given us a pig script to // execute, or they might have given us a dash (or nothing) which means to // run grunt interactive. String remainders[] = opts.getRemainingArgs(); if (remainders == null) { // Interactive mode = ExecMode.SHELL; ConsoleReader reader = new ConsoleReader(System.in, new OutputStreamWriter(System.out)); reader.setDefaultPrompt("grunt> "); final String HISTORYFILE = ".pig_history"; String historyFile = System.getProperty("user.home") + File.separator + HISTORYFILE; reader.setHistory(new History(new File(historyFile))); ConsoleReaderInputStream inputStream = new ConsoleReaderInputStream(reader); grunt = new Grunt(new BufferedReader(new InputStreamReader(inputStream)), pigContext); grunt.setConsoleReader(reader); gruntCalled = true; grunt.run(); rc = 0; return; } else { // They have a pig script they want us to run. if (remainders.length > 1) { throw new RuntimeException( "You can only run one pig script " + "at a time from the command line."); } mode = ExecMode.FILE; in = new BufferedReader(new FileReader(remainders[0])); // run parameter substitution preprocessor first substFile = remainders[0] + ".substituted"; pin = runParamPreprocessor(in, params, paramFiles, substFile, debug || dryrun); if (dryrun) { log.info("Dry run completed. Substituted pig script is at " + substFile); return; } logFileName = validateLogFile(logFileName, remainders[0]); pigContext.getProperties().setProperty("pig.logfile", logFileName); if (!debug) { new File(substFile).deleteOnExit(); } // Set job name based on name of the script pigContext .getProperties() .setProperty(PigContext.JOB_NAME, "PigLatin:" + new File(remainders[0]).getName()); grunt = new Grunt(pin, pigContext); gruntCalled = true; int[] results = grunt.exec(); rc = getReturnCodeForStats(results); return; } // Per Utkarsh and Chris invocation of jar file via pig depricated. } catch (ParseException e) { usage(); rc = 2; } catch (NumberFormatException e) { usage(); rc = 2; } catch (PigException pe) { if (pe.retriable()) { rc = 1; } else { rc = 2; } if (!gruntCalled) { LogUtils.writeLog(pe, logFileName, log, verbose); } } catch (Throwable e) { rc = 2; if (!gruntCalled) { LogUtils.writeLog(e, logFileName, log, verbose); } } finally { // clear temp files FileLocalizer.deleteTempFiles(); PerformanceTimerFactory.getPerfTimerFactory().dumpTimers(); System.exit(rc); } }