Пример #1
   * This is similar in spirit to Hive's own SetProcessor
  public List<ConfigVariable> get_default_configuration(boolean includeHadoop) throws TException {
    HiveConf conf = new HiveConf(BeeswaxServiceImpl.class);
    Properties p;
    if (includeHadoop) {
      p = conf.getAllProperties();
    } else {
      p = conf.getChangedProperties();
    List<ConfigVariable> ret = new ArrayList<ConfigVariable>();
    for (Entry<Object, Object> e : p.entrySet()) {
      String key = (String) e.getKey();
      String value = (String) e.getValue();

      ConfigVariable cv = new ConfigVariable();
    return ret;
Пример #2
/** Implementation of BeeswaxService interface. */
public class BeeswaxServiceImpl implements BeeswaxService.Iface {
  // We wrap each query in its own little Runnable, then use the executor to run them.
  private ExecutorService executor;

  // runningQueries keeps track of the concurrently running queries, with the query id as key.
  private ConcurrentHashMap<String, RunningQueryState> runningQueries;

  private String notifyUrl;

  /** Mapping between configuration variable names and descriptions. */
  private ConfigDescriptions configDescriptions = ConfigDescriptions.get();

  private static final long RUNNING_QUERY_LIFETIME = 7 * 24 * 60 * 60 * 1000; // 1 week
  private static final long EVICTION_INTERVAL = 3 * 60 * 60 * 1000; // 3 hours
  private static final String NOTIFY_URL_BASE = "/beeswax/query_cb/done/";

  private static Logger LOG = Logger.getLogger(BeeswaxServiceImpl.class.getName());

   * To be read and modified while holding a lock on the state object.
   * <p>Essentially, this annotates a Driver with a SessionState object. These go together, but
   * SessionState is accessed via a thread-local, so manual management has to happen.
   * is also valid.
  private class RunningQueryState {
    private QueryState state = QueryState.CREATED;
    // Thread local used by Hive quite a bit.
    private SessionState sessionState;
    private Throwable exception;
    private Driver driver;
    private ByteArrayOutputStream errStream = new ByteArrayOutputStream();
    private ByteArrayOutputStream outStream = new ByteArrayOutputStream();
    private long startRow = 0;
    private HiveConf hiveConf = null;
    private final Query query;
    private long atime = 0;
    private LogContext logContext;
    /** The client handle, if applicable */
    private QueryHandle handle = null;

     * Create an instance with the given query and LogContext.
     * @param query The Beeswax Thrift Query object.
     * @param logContext The context to associate with.
    public RunningQueryState(Query query, LogContext logContext) {
      this.query = query;
      this.atime = System.currentTimeMillis();
      this.logContext = logContext;

    public String toString() {
      return "RunningQueryState"
          + (handle == null ? "" : " id: " + handle.id)
          + " started "
          + new SimpleDateFormat().format(new Date(atime))
          + "; (state "
          + state
          + "); query: "
          + query.query;

    public long getAtime() {
      return this.atime;

    public HiveConf getHiveConf() {
      return hiveConf;

    public void setQueryHandle(QueryHandle handle) {
      this.handle = handle;

    public QueryHandle getQueryHandle() {
      return handle;

    public String getInfoStreamAsString() {
      try {
        return outStream.toString("UTF-8");
      } catch (UnsupportedEncodingException e) {
        throw new RuntimeException(e);

    public String getErrorStreamAsString() {
      try {
        return errStream.toString("UTF-8");
      } catch (UnsupportedEncodingException e) {
        throw new RuntimeException(e);

    public synchronized void compile() throws BeeswaxException {
      state = QueryState.COMPILED;

    private void assertState(QueryState expected) {
      if (state != expected) {
        throw new IllegalStateException(
            String.format("Expected %s, but state is: %s", expected, state));

     * Set the state to EXCEPTION and remember the Throwable that is causing it. Then rethrow the
     * exception.
    private <T extends Throwable> void throwException(T ex) throws T {
      throw ex;

    private void checkedCompile() throws BeeswaxException {
      // Run through configuration commands
      for (String cmd : query.configuration) {
        // This is pretty whacky; SET and ADD get treated differently
        // than CREATE TEMPORARY FUNCTION...  The trimming logic
        // here is borrowed from CliDriver; oy.
        String cmd_trimmed = cmd.trim();
        String[] tokens = cmd_trimmed.split("\\s+");
        String cmd1 = cmd_trimmed.substring(tokens[0].length()).trim();
        CommandProcessor p = CommandProcessorFactory.get(tokens[0]);
        int res;
        if (p instanceof Driver) {
          res = p.run(cmd).getResponseCode();
        } else {
          res = p.run(cmd1).getResponseCode();
        if (res != 0) {
          throwException(new RuntimeException(getErrorStreamAsString()));

      // Note that driver.compile() talks to HDFS, so it's
      // not as quick as one might think.
      int compileRes = driver.compile(query.query);
      if (compileRes != 0) {
            new BeeswaxException(getErrorStreamAsString(), this.logContext.getName(), this.handle));

     * Separate from constructor, because initialize() may need to be called in a separate thread.
    synchronized void initialize() {
      this.hiveConf = new HiveConf(Driver.class);

      // Update configuration with user/group info.
      if (query.hadoop_user == null) {
        throw new RuntimeException("User must be specified.");

      // Update scratch dir (to have one per user)
      File scratchDir = new File("/tmp/hive-beeswax-" + query.hadoop_user);
      hiveConf.set(HiveConf.ConfVars.SCRATCHDIR.varname, scratchDir.getPath());
      // Create the temporary directory if necessary.
      // If mapred.job.tracker is set to local, this is used by MapRedTask.
      if (!scratchDir.isDirectory()) {
        if (scratchDir.exists() || !scratchDir.mkdirs()) {
          LOG.warn("Could not create tmp dir:" + scratchDir);

      driver = new Driver(hiveConf);
      ClassLoader loader = hiveConf.getClassLoader();
      String auxJars = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVEAUXJARS);
      if (StringUtils.isNotBlank(auxJars)) {
        try {
          loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ","));
        } catch (Exception e) {
          LOG.error("Failed to add jars to class loader: " + auxJars, e);
      SessionState.start(hiveConf); // this is thread-local
      this.sessionState = SessionState.get();

      // If this work has a LogContext, associate the children output to the logContext
      OutputStream lcOutStream = null;
      if (this.logContext != null) lcOutStream = this.logContext.getOutputStream();

      // A copy of everything goes to the LogContext.
      // In addition, stderr goes to errStream for error reporting.
      // Note that child output is explicitly tee to System.{out,err},
      // otherwise it'll be swallowed by outStream.
      this.sessionState.out = new PrintStream(new TeeOutputStream(lcOutStream, this.outStream));
      this.sessionState.err = new PrintStream(new TeeOutputStream(lcOutStream, this.errStream));
      this.sessionState.childOut =
          new PrintStream(new TeeOutputStream(System.out, sessionState.out));
      this.sessionState.childErr =
          new PrintStream(new TeeOutputStream(System.err, sessionState.err));

      this.state = QueryState.INITIALIZED;

     * Executes query. Updates state. (QueryState variable can be polled.)
     * @throws BeeswaxException
    public void execute() throws BeeswaxException {
      synchronized (this) {
        state = QueryState.RUNNING;
      int ret = driver.execute();
      try {
        synchronized (this) {
          if (ret == 0) {
            state = QueryState.FINISHED;
          } else {
                new BeeswaxException(
                    "Driver returned: " + ret + ".  Errors: " + getErrorStreamAsString(),
      } finally {

    public void bringUp() {

    private void materializeResults(Results r, boolean startOver) throws IOException {
      if (driver.getPlan().getFetchTask() == null) {
        // This query is never going to return anything.
        r.has_more = false;

      if (startOver) {
        // This is totally inappropriately reaching into internals.
        driver.getPlan().getFetchTask().initialize(hiveConf, driver.getPlan(), null);
        startRow = 0;

      ArrayList<String> v = new ArrayList<String>();
      r.has_more = driver.getResults(v);
      r.start_row = startRow;
      startRow += v.size();

      r.setColumns(new ArrayList<String>());
      try {
        for (FieldSchema f : driver.getSchema().getFieldSchemas()) {
      } catch (Exception e) {
        // An empty partitioned table may not have table description
        LOG.error("Error getting column names of results.", e);

    /** Get the result schema and misc metadata, in the context of SELECT. */
    public synchronized ResultsMetadata getResultMetadata() {
      Schema schema = null;
      try {
        schema = driver.getSchema();
      } catch (Exception ex) {
        LOG.error("Error getting schema for query: " + query.query, ex);

      FetchWork work = getFetchWork();
      TableDesc desc = work.getTblDesc();
      String tabledir = null;
      String tablename = null;
      String sep = null;
      if (work != null) {
        tabledir = work.getTblDir();
      if (desc != null) {
        sep =
                .getProperty(Constants.SERIALIZATION_FORMAT, "" + Utilities.ctrlaCode);
        tablename = desc.getTableName();
      return new ResultsMetadata(schema, tabledir, tablename, sep);

    /** Get the FetchWork. Only SELECTs have them. */
    private synchronized FetchWork getFetchWork() {
      QueryPlan plan = driver.getPlan();
      FetchTask fetchTask = null;
      if (plan != null) {
        fetchTask = plan.getFetchTask();
        if (fetchTask != null) {
          fetchTask.initialize(hiveConf, plan, null);

      if (fetchTask == null) {
        return null;

      FetchWork work = fetchTask.getWork();
      return work;

    public synchronized QueryExplanation explain() throws BeeswaxException {
      // By manipulating the query, this will make errors harder to find.
      query.query = "EXPLAIN " + query.query;

      int ret;
      if (0 != (ret = driver.execute())) {
        throwException(new RuntimeException("Failed to execute: EXPLAIN " + ret));
      StringBuilder sb = new StringBuilder();
      ArrayList<String> v = new ArrayList<String>();
      try {
        while (driver.getResults(v)) {
          for (String s : v) {
      } catch (IOException e) {
        throwException(new RuntimeException(e));
      } finally {
        // Don't let folks re-use the state object.
        state = QueryState.FINISHED;
      return new QueryExplanation(sb.toString());

    public Results fetch(boolean fromBeginning) throws BeeswaxException {
      this.atime = System.currentTimeMillis();
      Results r = new Results();
      // Only one person can access a running query at a time.
      synchronized (this) {
        switch (state) {
          case RUNNING:
            r.ready = false;
          case FINISHED:
            r.ready = true;
            try {
              materializeResults(r, fromBeginning);
            } catch (IOException e) {
              throw new BeeswaxException(e.toString(), logContext.getName(), handle);
          case EXCEPTION:
            if (exception instanceof BeeswaxException) {
              throw (BeeswaxException) exception;
            } else {
              throw new BeeswaxException(exception.toString(), logContext.getName(), handle);
      return r;

    /** Store the first exception we see. */
    private void saveException(Throwable t) {
      synchronized (this) {
        if (state != QueryState.EXCEPTION) {
          state = QueryState.EXCEPTION;
          exception = t;

     * Submits this query to the given executor to be run.
     * @param executor
     * @param lc
    void submitTo(ExecutorService executor, final LogContext lc) throws java.io.IOException {
      final UserGroupInformation ugi = UserGroupInformation.getCurrentUser();

      final RunningQueryState state = this;
          new Runnable() {
            public void run() {
                  new PrivilegedAction<Void>() {
                    public Void run() {
                      try {
                        try {
                        } catch (HiveException ex) {
                          throw new RuntimeException(ex);
                      } catch (Throwable t) {
                        LOG.error("Exception while processing query", t);
                      return null;

   * Notify Desktop that this query has finished, by sending a GET request to a specific URL. We
   * expect Desktop that view to always return HTTP_OK, so that we know the notification has been
   * delivered to the right view. (We don't care whether the notification handling fails or
   * succeeds.)
  void notifyDone(RunningQueryState state) {
    QueryHandle handle = state.getQueryHandle();
    if (handle == null) {
      LOG.error("Finished execution of a query without a handle: " + state.toString());

    String urlString = notifyUrl + handle.id;

    try {
      URL url = new URL(urlString);
      HttpURLConnection conn = (HttpURLConnection) url.openConnection();
      if (conn.getResponseCode() != HttpURLConnection.HTTP_OK) {
        throw new IOException("Desktop returns error: " + conn.getResponseMessage());

      LOG.debug("Notified query done at " + url);
    } catch (IOException ioe) {
      LOG.error("Error when notifying Desktop at " + urlString, ioe);

   * Create a new BeeswaxServiceImpl.
   * @param dtHost The Hue host (ip or hostname).
   * @param dtPort The port Desktop runs on.
   * @param dtHttps Whether Desktop is running https.
  public BeeswaxServiceImpl(String dtHost, int dtPort, boolean dtHttps) {
    this.executor = Executors.newCachedThreadPool(new NamingThreadFactory("Beeswax-%d"));
    this.runningQueries = new ConcurrentHashMap<String, RunningQueryState>();

    String protocol;
    if (dtHttps) {
      protocol = "https";
      try {
        // Disable SSL verification. HUE cert may be signed by untrusted CA.
        SSLContext sslcontext = SSLContext.getInstance("SSL");
            null, new DummyX509TrustManager[] {new DummyX509TrustManager()}, new SecureRandom());
      } catch (NoSuchAlgorithmException ex) {
        LOG.warn("Failed to disable SSL certificate check " + ex);
      } catch (KeyManagementException ex) {
        LOG.warn("Failed to disable SSL certificate check " + ex);
      DummyHostnameVerifier dummy = new DummyHostnameVerifier();
    } else {
      protocol = "http";
    this.notifyUrl = protocol + "://" + dtHost + ":" + dtPort + NOTIFY_URL_BASE;

    // A daemon thread that periodically evict stale RunningQueryState objects
    Thread evicter =
        new Thread(
            new Runnable() {
              public void run() {
                while (true) {
                  long now = System.currentTimeMillis();
                  for (Map.Entry<String, RunningQueryState> entry : runningQueries.entrySet()) {
                    RunningQueryState rqState = entry.getValue();
                    if (rqState.getAtime() + RUNNING_QUERY_LIFETIME < now) {
                      String id = entry.getKey();
                      LOG.debug("Removed " + rqState.toString());
                      Thread.yield(); // be nice


                  long wakeup = now + EVICTION_INTERVAL;
                  while (System.currentTimeMillis() < wakeup) {
                    try {
                    } catch (InterruptedException e) {

  private <T> T doWithState(RunningQueryState state, PrivilegedExceptionAction<T> action)
      throws BeeswaxException {
    try {
      UserGroupInformation ugi;
      if (UserGroupInformation.isSecurityEnabled())
        ugi =
                state.query.hadoop_user, UserGroupInformation.getLoginUser());
      else {
        ugi = UserGroupInformation.createRemoteUser(state.query.hadoop_user);
      return ugi.doAs(action);
    } catch (UndeclaredThrowableException e) {
      if (e.getUndeclaredThrowable() instanceof PrivilegedActionException) {
        Throwable bwe = e.getUndeclaredThrowable().getCause();
        if (bwe instanceof BeeswaxException) {
          LOG.error("Caught BeeswaxException", (BeeswaxException) bwe);
          throw (BeeswaxException) bwe;
      LOG.error("Caught unexpected exception.", e);
      throw new BeeswaxException(e.getMessage(), state.handle.log_context, state.handle);
    } catch (IOException e) {
      LOG.error("Caught IOException", e);
      throw new BeeswaxException(e.getMessage(), state.handle.log_context, state.handle);
    } catch (InterruptedException e) {
      LOG.error("Caught InterruptedException", e);
      throw new BeeswaxException(e.getMessage(), state.handle.log_context, state.handle);

   * Submit a query and return a handle (QueryHandle). The query runs asynchronously. Queries can be
   * long-lasting, so we push the execution into a new state. Compiling happens in the current
   * context so we report errors early.
  public QueryHandle query(final Query query) throws BeeswaxException {
    // First, create an id and reset the LogContext
    String uuid = UUID.randomUUID().toString();
    final QueryHandle handle = new QueryHandle(uuid, uuid);
    final LogContext lc = LogContext.registerCurrentThread(handle.log_context);

    // Make an administrative record
    final RunningQueryState state = new RunningQueryState(query, lc);
    try {
      return doWithState(
          new PrivilegedExceptionAction<QueryHandle>() {
            public QueryHandle run() throws Exception {
              runningQueries.put(handle.id, state);
              // All kinds of things can go wrong when we compile it. So catch all.
              try {
              } catch (BeeswaxException perr) {
                throw perr;
              } catch (Throwable t) {
                throw new BeeswaxException(t.toString(), handle.log_context, handle);
              // Now spin off the query.
              state.submitTo(executor, lc);
              return handle;
    } catch (BeeswaxException e) {
      throw e;

  /** Verify that the handle data is not null. */
  private void validateHandle(QueryHandle handle) throws QueryNotFoundException {
    if (handle == null) {
      LOG.error("Encountered null QueryHandle");
      throw new QueryNotFoundException();
    if (handle.id == null || handle.log_context == null) {
      LOG.error("Invalid QueryHandle: id " + handle.id + "; log context " + handle.log_context);
      throw new QueryNotFoundException();

  public String echo(String s) throws TException {
    return s;

  /** Get the query plan for a query. */
  public QueryExplanation explain(final Query query) throws BeeswaxException, TException {
    final String contextName = UUID.randomUUID().toString();
    LogContext lc = LogContext.registerCurrentThread(contextName);
    final RunningQueryState state = new RunningQueryState(query, lc);
    try {
      return doWithState(
          new PrivilegedExceptionAction<QueryExplanation>() {
            public QueryExplanation run() throws Exception {
              QueryExplanation exp;
              // All kinds of things can go wrong when we compile it. So catch all.
              try {
                exp = state.explain();
              } catch (BeeswaxException perr) {
                throw perr;
              } catch (Throwable t) {
                throw new BeeswaxException(t.toString(), contextName, null);
              // On success, we remove the LogContext
              return exp;
    } catch (BeeswaxException e) {
      throw e;

   * Get the results of a query. This is non-blocking. Caller should check Results.ready to
   * determine if the results are in yet.
   * @param handle The handle from query()
   * @param fromBeginning If true, rewind to the first row. Otherwise fetch from last position.
  public Results fetch(final QueryHandle handle, final boolean fromBeginning)
      throws QueryNotFoundException, BeeswaxException {
    final RunningQueryState state = runningQueries.get(handle.id);
    try {
      return doWithState(
          new PrivilegedExceptionAction<Results>() {
            public Results run() throws Exception {
              if (state == null) {
                throw new QueryNotFoundException();
              return state.fetch(fromBeginning);
    } catch (BeeswaxException e) {
      throw e;

  public String dump_config() throws TException {
    HiveConf c = new HiveConf();
    ByteArrayOutputStream b = new ByteArrayOutputStream();
    try {
      return new String(b.toByteArray(), "UTF-8");
    } catch (IOException e) {
      throw new TException(e);

   * Get the state of the query
   * @param handle The handle from query()
  public QueryState get_state(final QueryHandle handle) throws QueryNotFoundException {
    RunningQueryState state = runningQueries.get(handle.id);
    if (state == null) {
      throw new QueryNotFoundException();
    return state.state;

   * Get the results metadata
   * @param handle
  public ResultsMetadata get_results_metadata(final QueryHandle handle)
      throws QueryNotFoundException {
    final RunningQueryState state = runningQueries.get(handle.id);
    try {
      return doWithState(
          new PrivilegedExceptionAction<ResultsMetadata>() {
            public ResultsMetadata run() throws Exception {
              if (state == null) {
                throw new QueryNotFoundException();
              return state.getResultMetadata();
    } catch (BeeswaxException e) {
      LOG.error("Caught BeeswaxException.", e);
      throw new QueryNotFoundException();

   * Get the log messages related to the context.
   * @param contextName The log context name
   * @return The log message as a string.
  public String get_log(String contextName) throws QueryNotFoundException, TException {
    final boolean DONT_CREATE = false;
    if (contextName == null) {
      throw new QueryNotFoundException();
    LogContext lc = LogContext.getByName(contextName, DONT_CREATE);
    if (lc == null) throw new QueryNotFoundException();
    return lc.readLog();

   * This is similar in spirit to Hive's own SetProcessor
  public List<ConfigVariable> get_default_configuration(boolean includeHadoop) throws TException {
    HiveConf conf = new HiveConf(BeeswaxServiceImpl.class);
    Properties p;
    if (includeHadoop) {
      p = conf.getAllProperties();
    } else {
      p = conf.getChangedProperties();
    List<ConfigVariable> ret = new ArrayList<ConfigVariable>();
    for (Entry<Object, Object> e : p.entrySet()) {
      String key = (String) e.getKey();
      String value = (String) e.getValue();

      ConfigVariable cv = new ConfigVariable();
    return ret;