public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldList)
      throws FrontendException {
    if (requiredFieldList == null) {
      return null;
    }
    if (requiredFieldList.getFields() != null) {
      int lastColumn = -1;
      for (RequiredField rf : requiredFieldList.getFields()) {
        if (rf.getIndex() > lastColumn) {
          lastColumn = rf.getIndex();
        }
      }
      requiredFields = new boolean[lastColumn + 1];
      for (RequiredField rf : requiredFieldList.getFields()) {
        if (rf.getIndex() != -1) requiredFields[rf.getIndex()] = true;
      }
      Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
      try {
        p.setProperty(REQUIRED_FIELDS_SIGNATURE, ObjectSerializer.serialize(requiredFields));
      } catch (Exception e) {
        throw new RuntimeException("Cannot serialize mRequiredColumns");
      }
    }

    return new RequiredFieldResponse(true);
  }
Пример #2
0
  /**
   * stolen from JobControlCompiler TODO: refactor it to share this
   *
   * @param physicalPlan
   * @param poLoad
   * @param jobConf
   * @return
   * @throws java.io.IOException
   */
  private static JobConf configureLoader(PhysicalPlan physicalPlan, POLoad poLoad, JobConf jobConf)
      throws IOException {

    // 这部分似乎没用
    Job job = new Job(jobConf);
    LoadFunc loadFunc = poLoad.getLoadFunc();
    loadFunc.setLocation(poLoad.getLFile().getFileName(), job);

    // stolen from JobControlCompiler
    ArrayList<FileSpec> pigInputs = new ArrayList<FileSpec>();
    // Store the inp filespecs
    pigInputs.add(poLoad.getLFile());

    ArrayList<List<OperatorKey>> inpTargets = Lists.newArrayList();
    ArrayList<String> inpSignatures = Lists.newArrayList();
    ArrayList<Long> inpLimits = Lists.newArrayList();

    // Store the target operators for tuples read
    // from this input
    List<PhysicalOperator> loadSuccessors = physicalPlan.getSuccessors(poLoad);
    List<OperatorKey> loadSuccessorsKeys = Lists.newArrayList();
    if (loadSuccessors != null) {
      for (PhysicalOperator loadSuccessor : loadSuccessors) {
        loadSuccessorsKeys.add(loadSuccessor.getOperatorKey());
      }
    }

    inpTargets.add(loadSuccessorsKeys);
    inpSignatures.add(poLoad.getSignature());
    inpLimits.add(poLoad.getLimit());

    jobConf.set("pig.inputs", ObjectSerializer.serialize(pigInputs));
    jobConf.set("pig.inpTargets", ObjectSerializer.serialize(inpTargets));
    jobConf.set("pig.inpSignatures", ObjectSerializer.serialize(inpSignatures));
    jobConf.set("pig.inpLimits", ObjectSerializer.serialize(inpLimits));

    return jobConf;
  }
Пример #3
0
 @Override
 public ResourceSchema getSchema(String location, Job job) throws IOException {
   if (schema != null) {
     return schema;
   }
   final Configuration configuration = job.getConfiguration();
   this.initializePhoenixPigConfiguration(location, configuration);
   this.schema = PhoenixPigSchemaUtil.getResourceSchema(this.config);
   if (LOG.isDebugEnabled()) {
     LOG.debug(
         String.format(
             "Resource Schema generated for location [%s] is [%s]", location, schema.toString()));
   }
   this.storeInUDFContext(
       this.contextSignature, RESOURCE_SCHEMA_SIGNATURE, ObjectSerializer.serialize(schema));
   return schema;
 }
Пример #4
0
  /**
   * The Main-Class for the Pig Jar that will provide a shell and setup a classpath appropriate for
   * executing Jar files.
   *
   * @param args -jar can be used to add additional jar files (colon separated). - will start a
   *     shell. -e will execute the rest of the command line as if it was input to the shell.
   * @throws IOException
   */
  public static void main(String args[]) {
    int rc = 1;
    Properties properties = new Properties();
    PropertiesUtil.loadPropertiesFromFile(properties);

    boolean verbose = false;
    boolean gruntCalled = false;
    String logFileName = null;

    try {
      BufferedReader pin = null;
      boolean debug = false;
      boolean dryrun = false;
      ArrayList<String> params = new ArrayList<String>();
      ArrayList<String> paramFiles = new ArrayList<String>();
      HashSet<String> optimizerRules = new HashSet<String>();

      CmdLineParser opts = new CmdLineParser(args);
      opts.registerOpt('4', "log4jconf", CmdLineParser.ValueExpected.REQUIRED);
      opts.registerOpt('b', "brief", CmdLineParser.ValueExpected.NOT_ACCEPTED);
      opts.registerOpt('c', "cluster", CmdLineParser.ValueExpected.REQUIRED);
      opts.registerOpt('d', "debug", CmdLineParser.ValueExpected.REQUIRED);
      opts.registerOpt('e', "execute", CmdLineParser.ValueExpected.NOT_ACCEPTED);
      opts.registerOpt('f', "file", CmdLineParser.ValueExpected.REQUIRED);
      opts.registerOpt('h', "help", CmdLineParser.ValueExpected.NOT_ACCEPTED);
      opts.registerOpt('i', "version", CmdLineParser.ValueExpected.OPTIONAL);
      opts.registerOpt('j', "jar", CmdLineParser.ValueExpected.REQUIRED);
      opts.registerOpt('l', "logfile", CmdLineParser.ValueExpected.REQUIRED);
      opts.registerOpt('m', "param_file", CmdLineParser.ValueExpected.OPTIONAL);
      opts.registerOpt('o', "hod", CmdLineParser.ValueExpected.NOT_ACCEPTED);
      opts.registerOpt('p', "param", CmdLineParser.ValueExpected.OPTIONAL);
      opts.registerOpt('r', "dryrun", CmdLineParser.ValueExpected.NOT_ACCEPTED);
      opts.registerOpt('t', "optimizer_off", CmdLineParser.ValueExpected.REQUIRED);
      opts.registerOpt('v', "verbose", CmdLineParser.ValueExpected.NOT_ACCEPTED);
      opts.registerOpt('w', "warning", CmdLineParser.ValueExpected.NOT_ACCEPTED);
      opts.registerOpt('x', "exectype", CmdLineParser.ValueExpected.REQUIRED);
      opts.registerOpt('F', "stop_on_failure", CmdLineParser.ValueExpected.NOT_ACCEPTED);
      opts.registerOpt('M', "no_multiquery", CmdLineParser.ValueExpected.NOT_ACCEPTED);

      ExecMode mode = ExecMode.UNKNOWN;
      String file = null;
      ExecType execType = ExecType.MAPREDUCE;
      String execTypeString = properties.getProperty("exectype");
      if (execTypeString != null && execTypeString.length() > 0) {
        execType = PigServer.parseExecType(execTypeString);
      }
      String cluster = "local";
      String clusterConfigured = properties.getProperty("cluster");
      if (clusterConfigured != null && clusterConfigured.length() > 0) {
        cluster = clusterConfigured;
      }

      // by default warning aggregation is on
      properties.setProperty("aggregate.warning", "" + true);

      // by default multiquery optimization is on
      properties.setProperty("opt.multiquery", "" + true);

      // by default we keep going on error on the backend
      properties.setProperty("stop.on.failure", "" + false);

      char opt;
      while ((opt = opts.getNextOpt()) != CmdLineParser.EndOfOpts) {
        switch (opt) {
          case '4':
            String log4jconf = opts.getValStr();
            if (log4jconf != null) {
              properties.setProperty(LOG4J_CONF, log4jconf);
            }
            break;

          case 'b':
            properties.setProperty(BRIEF, "true");
            break;

          case 'c':
            // Needed away to specify the cluster to run the MR job on
            // Bug 831708 - fixed
            String clusterParameter = opts.getValStr();
            if (clusterParameter != null && clusterParameter.length() > 0) {
              cluster = clusterParameter;
            }
            break;

          case 'd':
            String logLevel = opts.getValStr();
            if (logLevel != null) {
              properties.setProperty(DEBUG, logLevel);
            }
            debug = true;
            break;

          case 'e':
            mode = ExecMode.STRING;
            break;

          case 'f':
            mode = ExecMode.FILE;
            file = opts.getValStr();
            break;

          case 'F':
            properties.setProperty("stop.on.failure", "" + true);
            break;

          case 'h':
            usage();
            return;

          case 'i':
            System.out.println(getVersionString());
            return;

          case 'j':
            String jarsString = opts.getValStr();
            if (jarsString != null) {
              properties.setProperty(JAR, jarsString);
            }
            break;

          case 'l':
            // call to method that validates the path to the log file
            // and sets up the file to store the client side log file
            String logFileParameter = opts.getValStr();
            if (logFileParameter != null && logFileParameter.length() > 0) {
              logFileName = validateLogFile(logFileParameter, null);
            } else {
              logFileName = validateLogFile(logFileName, null);
            }
            properties.setProperty("pig.logfile", logFileName);
            break;

          case 'm':
            paramFiles.add(opts.getValStr());
            break;

          case 'M':
            // turns off multiquery optimization
            properties.setProperty("opt.multiquery", "" + false);
            break;

          case 'o':
            // TODO sgroschupf using system properties is always a very bad idea
            String gateway = System.getProperty("ssh.gateway");
            if (gateway == null || gateway.length() == 0) {
              properties.setProperty("hod.server", "local");
            } else {
              properties.setProperty("hod.server", System.getProperty("ssh.gateway"));
            }
            break;

          case 'p':
            String val = opts.getValStr();
            params.add(opts.getValStr());
            break;

          case 'r':
            // currently only used for parameter substitution
            // will be extended in the future
            dryrun = true;
            break;

          case 't':
            optimizerRules.add(opts.getValStr());
            break;

          case 'v':
            properties.setProperty(VERBOSE, "" + true);
            verbose = true;
            break;

          case 'w':
            properties.setProperty("aggregate.warning", "" + false);
            break;

          case 'x':
            try {
              execType = PigServer.parseExecType(opts.getValStr());
            } catch (IOException e) {
              throw new RuntimeException("ERROR: Unrecognized exectype.", e);
            }
            break;
          default:
            {
              Character cc = new Character(opt);
              throw new AssertionError("Unhandled option " + cc.toString());
            }
        }
      }
      // configure logging
      configureLog4J(properties);
      // create the context with the parameter
      PigContext pigContext = new PigContext(execType, properties);

      if (logFileName == null) {
        logFileName = validateLogFile(null, null);
      }

      pigContext.getProperties().setProperty("pig.logfile", logFileName);

      if (optimizerRules.size() > 0) {
        pigContext
            .getProperties()
            .setProperty("pig.optimizer.rules", ObjectSerializer.serialize(optimizerRules));
      }

      LogicalPlanBuilder.classloader = pigContext.createCl(null);

      // construct the parameter substitution preprocessor
      Grunt grunt = null;
      BufferedReader in;
      String substFile = null;
      switch (mode) {
        case FILE:
          {
            // Run, using the provided file as a pig file
            in = new BufferedReader(new FileReader(file));

            // run parameter substitution preprocessor first
            substFile = file + ".substituted";
            pin = runParamPreprocessor(in, params, paramFiles, substFile, debug || dryrun);
            if (dryrun) {
              log.info("Dry run completed. Substituted pig script is at " + substFile);
              return;
            }

            logFileName = validateLogFile(logFileName, file);
            pigContext.getProperties().setProperty("pig.logfile", logFileName);

            // Set job name based on name of the script
            pigContext
                .getProperties()
                .setProperty(PigContext.JOB_NAME, "PigLatin:" + new File(file).getName());

            if (!debug) {
              new File(substFile).deleteOnExit();
            }

            grunt = new Grunt(pin, pigContext);
            gruntCalled = true;
            int results[] = grunt.exec();
            rc = getReturnCodeForStats(results);
            return;
          }

        case STRING:
          {
            // Gather up all the remaining arguments into a string and pass them into
            // grunt.
            StringBuffer sb = new StringBuffer();
            String remainders[] = opts.getRemainingArgs();
            for (int i = 0; i < remainders.length; i++) {
              if (i != 0) sb.append(' ');
              sb.append(remainders[i]);
            }
            in = new BufferedReader(new StringReader(sb.toString()));
            grunt = new Grunt(in, pigContext);
            gruntCalled = true;
            int results[] = grunt.exec();
            rc = getReturnCodeForStats(results);
            return;
          }

        default:
          break;
      }

      // If we're here, we don't know yet what they want.  They may have just
      // given us a jar to execute, they might have given us a pig script to
      // execute, or they might have given us a dash (or nothing) which means to
      // run grunt interactive.
      String remainders[] = opts.getRemainingArgs();
      if (remainders == null) {
        // Interactive
        mode = ExecMode.SHELL;
        ConsoleReader reader = new ConsoleReader(System.in, new OutputStreamWriter(System.out));
        reader.setDefaultPrompt("grunt> ");
        final String HISTORYFILE = ".pig_history";
        String historyFile = System.getProperty("user.home") + File.separator + HISTORYFILE;
        reader.setHistory(new History(new File(historyFile)));
        ConsoleReaderInputStream inputStream = new ConsoleReaderInputStream(reader);
        grunt = new Grunt(new BufferedReader(new InputStreamReader(inputStream)), pigContext);
        grunt.setConsoleReader(reader);
        gruntCalled = true;
        grunt.run();
        rc = 0;
        return;
      } else {
        // They have a pig script they want us to run.
        if (remainders.length > 1) {
          throw new RuntimeException(
              "You can only run one pig script " + "at a time from the command line.");
        }
        mode = ExecMode.FILE;
        in = new BufferedReader(new FileReader(remainders[0]));

        // run parameter substitution preprocessor first
        substFile = remainders[0] + ".substituted";
        pin = runParamPreprocessor(in, params, paramFiles, substFile, debug || dryrun);
        if (dryrun) {
          log.info("Dry run completed. Substituted pig script is at " + substFile);
          return;
        }

        logFileName = validateLogFile(logFileName, remainders[0]);
        pigContext.getProperties().setProperty("pig.logfile", logFileName);

        if (!debug) {
          new File(substFile).deleteOnExit();
        }

        // Set job name based on name of the script
        pigContext
            .getProperties()
            .setProperty(PigContext.JOB_NAME, "PigLatin:" + new File(remainders[0]).getName());

        grunt = new Grunt(pin, pigContext);
        gruntCalled = true;
        int[] results = grunt.exec();
        rc = getReturnCodeForStats(results);
        return;
      }

      // Per Utkarsh and Chris invocation of jar file via pig depricated.
    } catch (ParseException e) {
      usage();
      rc = 2;
    } catch (NumberFormatException e) {
      usage();
      rc = 2;
    } catch (PigException pe) {
      if (pe.retriable()) {
        rc = 1;
      } else {
        rc = 2;
      }

      if (!gruntCalled) {
        LogUtils.writeLog(pe, logFileName, log, verbose);
      }
    } catch (Throwable e) {
      rc = 2;
      if (!gruntCalled) {
        LogUtils.writeLog(e, logFileName, log, verbose);
      }
    } finally {
      // clear temp files
      FileLocalizer.deleteTempFiles();
      PerformanceTimerFactory.getPerfTimerFactory().dumpTimers();
      System.exit(rc);
    }
  }