Java PigContext.getProperties Beispiele, org.apache.pig.impl.PigContext.getProperties Java Beispiele

Beispiel #1

0

Datei anzeigen

Datei: TestBZip.java Projekt: kidaak/Hadoop-MapReduce-1

  @Test
  public void testBzipStoreInMultiQuery2() throws Exception {
    String[] inputData = new String[] {"1\t2\r3\t4"};

    String inputFileName = "input2.txt";
    Util.createInputFile(cluster, inputFileName, inputData);

    PigServer pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
    PigContext pigContext = pig.getPigContext();
    pigContext.getProperties().setProperty("output.compression.enabled", "true");
    pigContext
        .getProperties()
        .setProperty("output.compression.codec", "org.apache.hadoop.io.compress.BZip2Codec");

    pig.setBatchOn();
    pig.registerQuery("a = load '" + inputFileName + "';");
    pig.registerQuery("store a into 'output2.bz2';");
    pig.registerQuery("store a into 'output2';");
    pig.executeBatch();

    FileSystem fs =
        FileSystem.get(ConfigurationUtil.toConfiguration(pig.getPigContext().getProperties()));
    FileStatus stat = fs.getFileStatus(new Path("output2/part-m-00000.bz2"));
    assertTrue(stat.getLen() > 0);

    stat = fs.getFileStatus(new Path("output2.bz2/part-m-00000.bz2"));
    assertTrue(stat.getLen() > 0);
  }

Beispiel #2

0

Datei anzeigen

Datei: TezJobCompiler.java Projekt: vladistan/pig

 private TezJob getJob(TezPlanContainerNode tezPlanNode, TezPlanContainer planContainer)
     throws JobCreationException {
   try {
     Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
     localResources.putAll(planContainer.getLocalResources());
     TezOperPlan tezPlan = tezPlanNode.getTezOperPlan();
     localResources.putAll(tezPlan.getExtraResources());
     String shipFiles = pigContext.getProperties().getProperty("pig.streaming.ship.files");
     if (shipFiles != null) {
       for (String file : shipFiles.split(",")) {
         TezResourceManager.getInstance().addTezResource(new File(file.trim()).toURI());
       }
     }
     String cacheFiles = pigContext.getProperties().getProperty("pig.streaming.cache.files");
     if (cacheFiles != null) {
       addCacheResources(cacheFiles.split(","));
     }
     for (Map.Entry<String, LocalResource> entry : localResources.entrySet()) {
       log.info("Local resource: " + entry.getKey());
     }
     DAG tezDag = buildDAG(tezPlanNode, localResources);
     tezDag.setDAGInfo(createDagInfo(TezScriptState.get().getScript()));
     // set Tez caller context
     // Reflection for the following code since it is only available since tez 0.8.1:
     // CallerContext context = CallerContext.create(ATSService.CallerContext,
     // ATSService.getPigAuditId(pigContext),
     //     ATSService.EntityType, "");
     // tezDag.setCallerContext(context);
     Class callerContextClass = null;
     try {
       callerContextClass = Class.forName("org.apache.tez.client.CallerContext");
     } catch (ClassNotFoundException e) {
       // If pre-Tez 0.8.1, skip setting CallerContext
     }
     if (callerContextClass != null) {
       Method builderBuildMethod =
           callerContextClass.getMethod(
               "create", String.class, String.class, String.class, String.class);
       Object context =
           builderBuildMethod.invoke(
               null,
               PigATSClient.CALLER_CONTEXT,
               PigATSClient.getPigAuditId(pigContext),
               PigATSClient.ENTITY_TYPE,
               "");
       Method dagSetCallerContext =
           tezDag.getClass().getMethod("setCallerContext", context.getClass());
       dagSetCallerContext.invoke(tezDag, context);
     }
     log.info("Total estimated parallelism is " + tezPlan.getEstimatedTotalParallelism());
     return new TezJob(tezConf, tezDag, localResources, tezPlan);
   } catch (Exception e) {
     int errCode = 2017;
     String msg = "Internal error creating job configuration.";
     throw new JobCreationException(msg, errCode, PigException.BUG, e);
   }
 }

Beispiel #3

0

Datei anzeigen

Datei: FetchPOStoreImpl.java Projekt: nfouka/hadoop_single_node

  @Override
  public StoreFuncInterface createStoreFunc(POStore store) throws IOException {

    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    StoreFuncInterface storeFunc = store.getStoreFunc();
    JobContext jc = HadoopShims.createJobContext(conf, new JobID());

    OutputFormat<?, ?> outputFormat = storeFunc.getOutputFormat();
    PigOutputFormat.setLocation(jc, store);
    context = HadoopShims.createTaskAttemptContext(conf, HadoopShims.getNewTaskAttemptID());
    PigOutputFormat.setLocation(context, store);

    try {
      outputFormat.checkOutputSpecs(jc);
    } catch (InterruptedException e) {
      throw new IOException(e);
    }

    try {
      outputCommitter = outputFormat.getOutputCommitter(context);
      outputCommitter.setupJob(jc);
      outputCommitter.setupTask(context);
      writer = outputFormat.getRecordWriter(context);
    } catch (InterruptedException e) {
      throw new IOException(e);
    }
    storeFunc.prepareToWrite(writer);
    return storeFunc;
  }

Beispiel #4

0

Datei anzeigen

Datei: ConstantCalculator.java Projekt: nfouka/hadoop_single_node

 private void setDefaultTimeZone() {
   String dtzStr = pc.getProperties().getProperty("pig.datetime.default.tz");
   if (dtzStr != null && dtzStr.length() > 0) {
     currentDTZ = DateTimeZone.getDefault();
     DateTimeZone.setDefault(DateTimeZone.forID(dtzStr));
   }
 }

Beispiel #5

0

Datei anzeigen

Datei: PigGenericMapReduce.java Projekt: hrishikeshvganu/spork

    /**
     * The reduce function which packages the key and List&lt;Tuple&gt; into key, Bag&lt;Tuple&gt;
     * after converting Hadoop type key into Pig type. The package result is either collected as is,
     * if the reduce plan is empty or after passing through the reduce plan.
     */
    @Override
    protected void reduce(PigNullableWritable key, Iterable<NullableTuple> tupIter, Context context)
        throws IOException, InterruptedException {

      if (!initialized) {
        initialized = true;

        // cache the collector for use in runPipeline()
        // which could additionally be called from close()
        this.outputCollector = context;
        pigReporter.setRep(context);
        PhysicalOperator.setReporter(pigReporter);

        boolean aggregateWarning =
            "true".equalsIgnoreCase(pigContext.getProperties().getProperty("aggregate.warning"));
        PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
        pigStatusReporter.setContext(new MRTaskContext(context));
        PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
        pigHadoopLogger.setReporter(pigStatusReporter);
        pigHadoopLogger.setAggregate(aggregateWarning);
        PhysicalOperator.setPigLogger(pigHadoopLogger);

        if (!inIllustrator)
          for (POStore store : stores) {
            MapReducePOStoreImpl impl = new MapReducePOStoreImpl(context);
            store.setStoreImpl(impl);
            store.setUp();
          }
      }

      // In the case we optimize the join, we combine
      // POPackage and POForeach - so we could get many
      // tuples out of the getnext() call of POJoinPackage
      // In this case, we process till we see EOP from
      // POJoinPacakage.getNext()
      if (pack.getPkgr() instanceof JoinPackager) {
        pack.attachInput(key, tupIter.iterator());
        while (true) {
          if (processOnePackageOutput(context)) break;
        }
      } else {
        // join is not optimized, so package will
        // give only one tuple out for the key
        pack.attachInput(key, tupIter.iterator());
        processOnePackageOutput(context);
      }
    }

Beispiel #6

0

Datei anzeigen

Datei: SchemaTupleFrontend.java Projekt: nfouka/hadoop_single_node

  /**
   * This must be called when the code has been generated and the generated code needs to be shipped
   * to the cluster, so that it may be used by the mappers and reducers.
   *
   * @param pigContext
   * @param conf
   */
  public static void copyAllGeneratedToDistributedCache(PigContext pigContext, Configuration conf) {
    if (stf == null) {
      LOG.debug("Nothing registered to generate.");
      return;
    }
    SchemaTupleFrontendGenHelper stfgh = new SchemaTupleFrontendGenHelper(pigContext, conf);
    stfgh.generateAll(stf.getSchemasToGenerate());
    stfgh.internalCopyAllGeneratedToDistributedCache();

    Properties prop = pigContext.getProperties();
    String value = conf.get(GENERATED_CLASSES_KEY);
    if (value != null) {
      prop.setProperty(GENERATED_CLASSES_KEY, value);
    } else {
      prop.remove(GENERATED_CLASSES_KEY);
    }
    value = conf.get(LOCAL_CODE_DIR);
    if (value != null) {
      prop.setProperty(LOCAL_CODE_DIR, value);
    } else {
      prop.remove(LOCAL_CODE_DIR);
    }
  }

Beispiel #7

0

Datei anzeigen

Datei: TestJobSubmission.java Projekt: scr/pig

  @Test
  public void testReducerNumEstimationForOrderBy() throws Exception {
    // Skip the test for Tez. Tez use a different mechanism.
    // Equivalent test is in TestTezAutoParallelism
    Assume.assumeTrue("Skip this test for TEZ", Util.isMapredExecType(cluster.getExecType()));
    // use the estimation
    pc.getProperties().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getProperties().setProperty("pig.exec.reducers.max", "10");

    String query = "a = load '/passwd';" + "b = order a by $0;" + "store b into 'output';";
    PigServer ps = new PigServer(cluster.getExecType(), cluster.getProperties());
    PhysicalPlan pp = Util.buildPp(ps, query);

    MROperPlan mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);
    JobControl jobControl = jcc.compile(mrPlan, query);

    assertEquals(2, mrPlan.size());

    // first job uses a single reducer for the sampling
    Util.assertParallelValues(-1, 1, -1, 1, jobControl.getWaitingJobs().get(0).getJobConf());

    // Simulate the first job having run so estimation kicks in.
    MapReduceOper sort = mrPlan.getLeaves().get(0);
    jcc.updateMROpPlan(jobControl.getReadyJobs());
    FileLocalizer.create(sort.getQuantFile(), pc);
    jobControl = jcc.compile(mrPlan, query);

    sort = mrPlan.getLeaves().get(0);
    long reducer =
        Math.min(
            (long) Math.ceil(new File("test/org/apache/pig/test/data/passwd").length() / 100.0),
            10);
    assertEquals(reducer, sort.getRequestedParallelism());

    // the second job estimates reducers
    Util.assertParallelValues(
        -1, -1, reducer, reducer, jobControl.getWaitingJobs().get(0).getJobConf());

    // use the PARALLEL key word, it will override the estimated reducer number
    query = "a = load '/passwd';" + "b = order a by $0 PARALLEL 2;" + "store b into 'output';";
    pp = Util.buildPp(ps, query);

    mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);

    assertEquals(2, mrPlan.size());

    sort = mrPlan.getLeaves().get(0);
    assertEquals(2, sort.getRequestedParallelism());

    // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as
    // hbase
    query =
        "a = load 'hbase://passwd' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');"
            + "b = order a by $0 ;"
            + "store b into 'output';";
    pp = Util.buildPp(ps, query);

    mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
    assertEquals(2, mrPlan.size());

    sort = mrPlan.getLeaves().get(0);

    // the requested parallel will be -1 if users don't set any of default_parallel, paralllel
    // and the estimation doesn't take effect. MR framework will finally set it to 1.
    assertEquals(-1, sort.getRequestedParallelism());

    // test order by with three jobs (after optimization)
    query =
        "a = load '/passwd';"
            + "b = foreach a generate $0, $1, $2;"
            + "c = order b by $0;"
            + "store c into 'output';";
    pp = Util.buildPp(ps, query);

    mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
    assertEquals(3, mrPlan.size());

    // Simulate the first 2 jobs having run so estimation kicks in.
    sort = mrPlan.getLeaves().get(0);
    FileLocalizer.create(sort.getQuantFile(), pc);

    jobControl = jcc.compile(mrPlan, query);
    Util.copyFromLocalToCluster(
        cluster,
        "test/org/apache/pig/test/data/passwd",
        ((POLoad) sort.mapPlan.getRoots().get(0)).getLFile().getFileName());

    // First job is just foreach with projection, mapper-only job, so estimate gets ignored
    Util.assertParallelValues(-1, -1, -1, 0, jobControl.getWaitingJobs().get(0).getJobConf());

    jcc.updateMROpPlan(jobControl.getReadyJobs());
    jobControl = jcc.compile(mrPlan, query);
    jcc.updateMROpPlan(jobControl.getReadyJobs());

    // Second job is a sampler, which requests and gets 1 reducer
    Util.assertParallelValues(-1, 1, -1, 1, jobControl.getWaitingJobs().get(0).getJobConf());

    jobControl = jcc.compile(mrPlan, query);
    sort = mrPlan.getLeaves().get(0);
    assertEquals(reducer, sort.getRequestedParallelism());

    // Third job is the order, which uses the estimated number of reducers
    Util.assertParallelValues(
        -1, -1, reducer, reducer, jobControl.getWaitingJobs().get(0).getJobConf());
  }

Beispiel #8

0

Datei anzeigen

Datei: TestJobSubmission.java Projekt: scr/pig

  @Test
  public void testReducerNumEstimation() throws Exception {
    // Skip the test for Tez. Tez use a different mechanism.
    // Equivalent test is in TestTezAutoParallelism
    Assume.assumeTrue("Skip this test for TEZ", Util.isMapredExecType(cluster.getExecType()));
    // use the estimation
    Configuration conf = HBaseConfiguration.create(new Configuration());
    HBaseTestingUtility util = new HBaseTestingUtility(conf);
    int clientPort = util.startMiniZKCluster().getClientPort();
    util.startMiniHBaseCluster(1, 1);

    String query = "a = load '/passwd';" + "b = group a by $0;" + "store b into 'output';";
    PigServer ps = new PigServer(cluster.getExecType(), cluster.getProperties());
    PhysicalPlan pp = Util.buildPp(ps, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");
    pc.getConf().setProperty(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(clientPort));
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);
    JobControl jc = jcc.compile(mrPlan, "Test");
    Job job = jc.getWaitingJobs().get(0);
    long reducer =
        Math.min(
            (long) Math.ceil(new File("test/org/apache/pig/test/data/passwd").length() / 100.0),
            10);

    Util.assertParallelValues(-1, -1, reducer, reducer, job.getJobConf());

    // use the PARALLEL key word, it will override the estimated reducer number
    query = "a = load '/passwd';" + "b = group a by $0 PARALLEL 2;" + "store b into 'output';";
    pp = Util.buildPp(ps, query);
    mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    jcc = new JobControlCompiler(pc, conf);
    jc = jcc.compile(mrPlan, "Test");
    job = jc.getWaitingJobs().get(0);

    Util.assertParallelValues(-1, 2, -1, 2, job.getJobConf());

    final byte[] COLUMNFAMILY = Bytes.toBytes("pig");
    util.createTable(Bytes.toBytesBinary("test_table"), COLUMNFAMILY);

    // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as
    // hbase
    query =
        "a = load 'hbase://test_table' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');"
            + "b = group a by $0 ;"
            + "store b into 'output';";
    pp = Util.buildPp(ps, query);
    mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");

    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    jcc = new JobControlCompiler(pc, conf);
    jc = jcc.compile(mrPlan, "Test");
    job = jc.getWaitingJobs().get(0);

    Util.assertParallelValues(-1, -1, 1, 1, job.getJobConf());

    util.deleteTable(Bytes.toBytesBinary("test_table"));
    // In HBase 0.90.1 and above we can use util.shutdownMiniHBaseCluster()
    // here instead.
    MiniHBaseCluster hbc = util.getHBaseCluster();
    if (hbc != null) {
      hbc.shutdown();
      hbc.join();
    }
    util.shutdownMiniZKCluster();
  }

Beispiel #9

0

Datei anzeigen

Datei: Main.java Projekt: rjurney/Cloud-Stenography

  /**
   * The Main-Class for the Pig Jar that will provide a shell and setup a classpath appropriate for
   * executing Jar files.
   *
   * @param args -jar can be used to add additional jar files (colon separated). - will start a
   *     shell. -e will execute the rest of the command line as if it was input to the shell.
   * @throws IOException
   */
  public static void main(String args[]) {
    int rc = 1;
    Properties properties = new Properties();
    PropertiesUtil.loadPropertiesFromFile(properties);

    boolean verbose = false;
    boolean gruntCalled = false;
    String logFileName = null;

    try {
      BufferedReader pin = null;
      boolean debug = false;
      boolean dryrun = false;
      ArrayList<String> params = new ArrayList<String>();
      ArrayList<String> paramFiles = new ArrayList<String>();
      HashSet<String> optimizerRules = new HashSet<String>();

      CmdLineParser opts = new CmdLineParser(args);
      opts.registerOpt('4', "log4jconf", CmdLineParser.ValueExpected.REQUIRED);
      opts.registerOpt('b', "brief", CmdLineParser.ValueExpected.NOT_ACCEPTED);
      opts.registerOpt('c', "cluster", CmdLineParser.ValueExpected.REQUIRED);
      opts.registerOpt('d', "debug", CmdLineParser.ValueExpected.REQUIRED);
      opts.registerOpt('e', "execute", CmdLineParser.ValueExpected.NOT_ACCEPTED);
      opts.registerOpt('f', "file", CmdLineParser.ValueExpected.REQUIRED);
      opts.registerOpt('h', "help", CmdLineParser.ValueExpected.NOT_ACCEPTED);
      opts.registerOpt('i', "version", CmdLineParser.ValueExpected.OPTIONAL);
      opts.registerOpt('j', "jar", CmdLineParser.ValueExpected.REQUIRED);
      opts.registerOpt('l', "logfile", CmdLineParser.ValueExpected.REQUIRED);
      opts.registerOpt('m', "param_file", CmdLineParser.ValueExpected.OPTIONAL);
      opts.registerOpt('o', "hod", CmdLineParser.ValueExpected.NOT_ACCEPTED);
      opts.registerOpt('p', "param", CmdLineParser.ValueExpected.OPTIONAL);
      opts.registerOpt('r', "dryrun", CmdLineParser.ValueExpected.NOT_ACCEPTED);
      opts.registerOpt('t', "optimizer_off", CmdLineParser.ValueExpected.REQUIRED);
      opts.registerOpt('v', "verbose", CmdLineParser.ValueExpected.NOT_ACCEPTED);
      opts.registerOpt('w', "warning", CmdLineParser.ValueExpected.NOT_ACCEPTED);
      opts.registerOpt('x', "exectype", CmdLineParser.ValueExpected.REQUIRED);
      opts.registerOpt('F', "stop_on_failure", CmdLineParser.ValueExpected.NOT_ACCEPTED);
      opts.registerOpt('M', "no_multiquery", CmdLineParser.ValueExpected.NOT_ACCEPTED);

      ExecMode mode = ExecMode.UNKNOWN;
      String file = null;
      ExecType execType = ExecType.MAPREDUCE;
      String execTypeString = properties.getProperty("exectype");
      if (execTypeString != null && execTypeString.length() > 0) {
        execType = PigServer.parseExecType(execTypeString);
      }
      String cluster = "local";
      String clusterConfigured = properties.getProperty("cluster");
      if (clusterConfigured != null && clusterConfigured.length() > 0) {
        cluster = clusterConfigured;
      }

      // by default warning aggregation is on
      properties.setProperty("aggregate.warning", "" + true);

      // by default multiquery optimization is on
      properties.setProperty("opt.multiquery", "" + true);

      // by default we keep going on error on the backend
      properties.setProperty("stop.on.failure", "" + false);

      char opt;
      while ((opt = opts.getNextOpt()) != CmdLineParser.EndOfOpts) {
        switch (opt) {
          case '4':
            String log4jconf = opts.getValStr();
            if (log4jconf != null) {
              properties.setProperty(LOG4J_CONF, log4jconf);
            }
            break;

          case 'b':
            properties.setProperty(BRIEF, "true");
            break;

          case 'c':
            // Needed away to specify the cluster to run the MR job on
            // Bug 831708 - fixed
            String clusterParameter = opts.getValStr();
            if (clusterParameter != null && clusterParameter.length() > 0) {
              cluster = clusterParameter;
            }
            break;

          case 'd':
            String logLevel = opts.getValStr();
            if (logLevel != null) {
              properties.setProperty(DEBUG, logLevel);
            }
            debug = true;
            break;

          case 'e':
            mode = ExecMode.STRING;
            break;

          case 'f':
            mode = ExecMode.FILE;
            file = opts.getValStr();
            break;

          case 'F':
            properties.setProperty("stop.on.failure", "" + true);
            break;

          case 'h':
            usage();
            return;

          case 'i':
            System.out.println(getVersionString());
            return;

          case 'j':
            String jarsString = opts.getValStr();
            if (jarsString != null) {
              properties.setProperty(JAR, jarsString);
            }
            break;

          case 'l':
            // call to method that validates the path to the log file
            // and sets up the file to store the client side log file
            String logFileParameter = opts.getValStr();
            if (logFileParameter != null && logFileParameter.length() > 0) {
              logFileName = validateLogFile(logFileParameter, null);
            } else {
              logFileName = validateLogFile(logFileName, null);
            }
            properties.setProperty("pig.logfile", logFileName);
            break;

          case 'm':
            paramFiles.add(opts.getValStr());
            break;

          case 'M':
            // turns off multiquery optimization
            properties.setProperty("opt.multiquery", "" + false);
            break;

          case 'o':
            // TODO sgroschupf using system properties is always a very bad idea
            String gateway = System.getProperty("ssh.gateway");
            if (gateway == null || gateway.length() == 0) {
              properties.setProperty("hod.server", "local");
            } else {
              properties.setProperty("hod.server", System.getProperty("ssh.gateway"));
            }
            break;

          case 'p':
            String val = opts.getValStr();
            params.add(opts.getValStr());
            break;

          case 'r':
            // currently only used for parameter substitution
            // will be extended in the future
            dryrun = true;
            break;

          case 't':
            optimizerRules.add(opts.getValStr());
            break;

          case 'v':
            properties.setProperty(VERBOSE, "" + true);
            verbose = true;
            break;

          case 'w':
            properties.setProperty("aggregate.warning", "" + false);
            break;

          case 'x':
            try {
              execType = PigServer.parseExecType(opts.getValStr());
            } catch (IOException e) {
              throw new RuntimeException("ERROR: Unrecognized exectype.", e);
            }
            break;
          default:
            {
              Character cc = new Character(opt);
              throw new AssertionError("Unhandled option " + cc.toString());
            }
        }
      }
      // configure logging
      configureLog4J(properties);
      // create the context with the parameter
      PigContext pigContext = new PigContext(execType, properties);

      if (logFileName == null) {
        logFileName = validateLogFile(null, null);
      }

      pigContext.getProperties().setProperty("pig.logfile", logFileName);

      if (optimizerRules.size() > 0) {
        pigContext
            .getProperties()
            .setProperty("pig.optimizer.rules", ObjectSerializer.serialize(optimizerRules));
      }

      LogicalPlanBuilder.classloader = pigContext.createCl(null);

      // construct the parameter substitution preprocessor
      Grunt grunt = null;
      BufferedReader in;
      String substFile = null;
      switch (mode) {
        case FILE:
          {
            // Run, using the provided file as a pig file
            in = new BufferedReader(new FileReader(file));

            // run parameter substitution preprocessor first
            substFile = file + ".substituted";
            pin = runParamPreprocessor(in, params, paramFiles, substFile, debug || dryrun);
            if (dryrun) {
              log.info("Dry run completed. Substituted pig script is at " + substFile);
              return;
            }

            logFileName = validateLogFile(logFileName, file);
            pigContext.getProperties().setProperty("pig.logfile", logFileName);

            // Set job name based on name of the script
            pigContext
                .getProperties()
                .setProperty(PigContext.JOB_NAME, "PigLatin:" + new File(file).getName());

            if (!debug) {
              new File(substFile).deleteOnExit();
            }

            grunt = new Grunt(pin, pigContext);
            gruntCalled = true;
            int results[] = grunt.exec();
            rc = getReturnCodeForStats(results);
            return;
          }

        case STRING:
          {
            // Gather up all the remaining arguments into a string and pass them into
            // grunt.
            StringBuffer sb = new StringBuffer();
            String remainders[] = opts.getRemainingArgs();
            for (int i = 0; i < remainders.length; i++) {
              if (i != 0) sb.append(' ');
              sb.append(remainders[i]);
            }
            in = new BufferedReader(new StringReader(sb.toString()));
            grunt = new Grunt(in, pigContext);
            gruntCalled = true;
            int results[] = grunt.exec();
            rc = getReturnCodeForStats(results);
            return;
          }

        default:
          break;
      }

      // If we're here, we don't know yet what they want.  They may have just
      // given us a jar to execute, they might have given us a pig script to
      // execute, or they might have given us a dash (or nothing) which means to
      // run grunt interactive.
      String remainders[] = opts.getRemainingArgs();
      if (remainders == null) {
        // Interactive
        mode = ExecMode.SHELL;
        ConsoleReader reader = new ConsoleReader(System.in, new OutputStreamWriter(System.out));
        reader.setDefaultPrompt("grunt> ");
        final String HISTORYFILE = ".pig_history";
        String historyFile = System.getProperty("user.home") + File.separator + HISTORYFILE;
        reader.setHistory(new History(new File(historyFile)));
        ConsoleReaderInputStream inputStream = new ConsoleReaderInputStream(reader);
        grunt = new Grunt(new BufferedReader(new InputStreamReader(inputStream)), pigContext);
        grunt.setConsoleReader(reader);
        gruntCalled = true;
        grunt.run();
        rc = 0;
        return;
      } else {
        // They have a pig script they want us to run.
        if (remainders.length > 1) {
          throw new RuntimeException(
              "You can only run one pig script " + "at a time from the command line.");
        }
        mode = ExecMode.FILE;
        in = new BufferedReader(new FileReader(remainders[0]));

        // run parameter substitution preprocessor first
        substFile = remainders[0] + ".substituted";
        pin = runParamPreprocessor(in, params, paramFiles, substFile, debug || dryrun);
        if (dryrun) {
          log.info("Dry run completed. Substituted pig script is at " + substFile);
          return;
        }

        logFileName = validateLogFile(logFileName, remainders[0]);
        pigContext.getProperties().setProperty("pig.logfile", logFileName);

        if (!debug) {
          new File(substFile).deleteOnExit();
        }

        // Set job name based on name of the script
        pigContext
            .getProperties()
            .setProperty(PigContext.JOB_NAME, "PigLatin:" + new File(remainders[0]).getName());

        grunt = new Grunt(pin, pigContext);
        gruntCalled = true;
        int[] results = grunt.exec();
        rc = getReturnCodeForStats(results);
        return;
      }

      // Per Utkarsh and Chris invocation of jar file via pig depricated.
    } catch (ParseException e) {
      usage();
      rc = 2;
    } catch (NumberFormatException e) {
      usage();
      rc = 2;
    } catch (PigException pe) {
      if (pe.retriable()) {
        rc = 1;
      } else {
        rc = 2;
      }

      if (!gruntCalled) {
        LogUtils.writeLog(pe, logFileName, log, verbose);
      }
    } catch (Throwable e) {
      rc = 2;
      if (!gruntCalled) {
        LogUtils.writeLog(e, logFileName, log, verbose);
      }
    } finally {
      // clear temp files
      FileLocalizer.deleteTempFiles();
      PerformanceTimerFactory.getPerfTimerFactory().dumpTimers();
      System.exit(rc);
    }
  }

Beispiel #10

0

Datei anzeigen

Datei: ConstantCalculator.java Projekt: nfouka/hadoop_single_node

      @Override
      protected void execute(LogicalExpression op) throws FrontendException {
        if (op instanceof UserFuncExpression) {
          UserFuncExpression udf = (UserFuncExpression) op;
          if (!udf.getEvalFunc().allowCompileTimeCalculation()) {
            return;
          }
        }
        boolean valSet = false;
        Object val = null;
        if (currentWalker.getPlan().getSuccessors(op) != null) {
          // If has successors and all successors are constant, calculate the constant
          for (Operator succ : currentWalker.getPlan().getSuccessors(op)) {
            if (!(succ instanceof ConstantExpression)) {
              return;
            }
          }
          // All successors are constant, calculate the value
          OperatorPlan expLogicalPlan = new LogicalExpressionPlan();
          ((BaseOperatorPlan) currentWalker.getPlan())
              .moveTree(op, (BaseOperatorPlan) expLogicalPlan);
          PhysicalPlan expPhysicalPlan = new PhysicalPlan();
          Map<Operator, PhysicalOperator> logToPhyMap = new HashMap<Operator, PhysicalOperator>();
          PlanWalker childWalker = new ReverseDependencyOrderWalkerWOSeenChk(expLogicalPlan);

          // Save the old walker and use childWalker as current Walker
          pushWalker(childWalker);
          ExpToPhyTranslationVisitor expTranslationVisitor =
              new ExpToPhyTranslationVisitor(
                  expLogicalPlan, childWalker, currentOp, expPhysicalPlan, logToPhyMap);
          expTranslationVisitor.visit();
          popWalker();
          PhysicalOperator root = expPhysicalPlan.getLeaves().get(0);
          try {
            UDFContext.getUDFContext()
                .addJobConf(ConfigurationUtil.toConfiguration(pc.getProperties(), true));
            PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
            PhysicalOperator.setPigLogger(pigHadoopLogger);
            setDefaultTimeZone();
            val = root.getNext(root.getResultType()).result;
            restoreDefaultTimeZone();
            UDFContext.getUDFContext().addJobConf(null);
          } catch (ExecException e) {
            throw new FrontendException(e);
          }
          valSet = true;
        } else if (op instanceof UserFuncExpression) {
          // If solo UDF, calculate UDF
          UserFuncExpression udf = (UserFuncExpression) op;
          try {
            UDFContext.getUDFContext()
                .addJobConf(ConfigurationUtil.toConfiguration(pc.getProperties(), true));
            setDefaultTimeZone();
            val = udf.getEvalFunc().exec(null);
            restoreDefaultTimeZone();
            UDFContext.getUDFContext().addJobConf(null);
          } catch (IOException e) {
            throw new FrontendException(e);
          }
          valSet = true;
        }
        if (valSet) {
          ConstantExpression constantExpr;
          constantExpr = new ConstantExpression(currentWalker.getPlan(), val);
          constantExpr.inheritSchema(op);
          currentWalker.getPlan().replace(op, constantExpr);
        }
      }