public boolean populate(CrawlURI curi, HttpClient http, HttpMethod method, String payload) { // http is not used. // payload is not used. boolean result = false; Map formItems = null; try { formItems = getFormItems(curi); } catch (AttributeNotFoundException e1) { logger.severe("Failed get of form items for " + curi); } if (formItems == null || formItems.size() <= 0) { try { logger.severe("No form items for " + method.getURI()); } catch (URIException e) { logger.severe("No form items and exception getting uri: " + e.getMessage()); } return result; } NameValuePair[] data = new NameValuePair[formItems.size()]; int index = 0; String key = null; for (Iterator i = formItems.keySet().iterator(); i.hasNext(); ) { key = (String) i.next(); data[index++] = new NameValuePair(key, (String) formItems.get(key)); } if (method instanceof PostMethod) { ((PostMethod) method).setRequestBody(data); result = true; } else if (method instanceof GetMethod) { // Append these values to the query string. // Get current query string, then add data, then get it again // only this time its our data only... then append. HttpMethodBase hmb = (HttpMethodBase) method; String currentQuery = hmb.getQueryString(); hmb.setQueryString(data); String newQuery = hmb.getQueryString(); hmb.setQueryString(((currentQuery != null) ? currentQuery : "") + "&" + newQuery); result = true; } else { logger.severe("Unknown method type: " + method); } return result; }
/** * @param since last modified time to use * @param req * @param url if null, ignored * @param redirCount number of redirs we've done */ public static HttpData getDataOnce( HttpServletRequest req, HttpServletResponse res, long since, String surl, int redirCount, int timeout) throws IOException, HttpException, DataSourceException, MalformedURLException { HttpMethodBase request = null; HostConfiguration hcfg = new HostConfiguration(); /* [todo hqm 2006-02-01] Anyone know why this code was here? It is setting the mime type to something which just confuses the DHTML parser. if (res != null) { res.setContentType("application/x-www-form-urlencoded;charset=UTF-8"); } */ try { // TODO: [2002-01-09 bloch] cope with cache-control // response headers (no-store, no-cache, must-revalidate, // proxy-revalidate). if (surl == null) { surl = getURL(req); } if (surl == null || surl.equals("")) { throw new MalformedURLException( /* (non-Javadoc) * @i18n.test * @org-mes="url is empty or null" */ org.openlaszlo.i18n.LaszloMessages.getMessage( HTTPDataSource.class.getName(), "051018-312")); } String reqType = ""; String headers = ""; if (req != null) { reqType = req.getParameter("reqtype"); headers = req.getParameter("headers"); } boolean isPost = false; mLogger.debug("reqtype = " + reqType); if (reqType != null && reqType.equals("POST")) { request = new LZPostMethod(); request.setRequestHeader("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8"); isPost = true; mLogger.debug("setting POST req method"); } else if (reqType != null && reqType.equals("PUT")) { request = new LZPutMethod(); // todo [hqm 2007] treat PUT like POST? isPost = true; mLogger.debug("setting PUT req method"); } else if (reqType != null && reqType.equals("DELETE")) { request = new LZDeleteMethod(); mLogger.debug("setting DELETE req method"); } else { mLogger.debug("setting GET (default) req method"); request = new LZGetMethod(); } request.setHttp11(mUseHttp11); // Proxy the request headers if (req != null) { LZHttpUtils.proxyRequestHeaders(req, request); } // Set headers from query string if (headers != null && headers.length() > 0) { StringTokenizer st = new StringTokenizer(headers, "\n"); while (st.hasMoreTokens()) { String h = st.nextToken(); int i = h.indexOf(":"); if (i > -1) { String n = h.substring(0, i); String v = h.substring(i + 2, h.length()); request.setRequestHeader(n, v); mLogger.debug( /* (non-Javadoc) * @i18n.test * @org-mes="setting header " + p[0] + "=" + p[1] */ org.openlaszlo.i18n.LaszloMessages.getMessage( HTTPDataSource.class.getName(), "051018-359", new Object[] {n, v})); } } } mLogger.debug("Parsing url"); URI uri = LZHttpUtils.newURI(surl); try { hcfg.setHost(uri); } catch (Exception e) { throw new MalformedURLException( /* (non-Javadoc) * @i18n.test * @org-mes="can't form uri from " + p[0] */ org.openlaszlo.i18n.LaszloMessages.getMessage( HTTPDataSource.class.getName(), "051018-376", new Object[] {surl})); } // This gets us the url-encoded (escaped) path and query string String path = uri.getEscapedPath(); String query = uri.getEscapedQuery(); mLogger.debug( /* (non-Javadoc) * @i18n.test * @org-mes="encoded path: " + p[0] */ org.openlaszlo.i18n.LaszloMessages.getMessage( HTTPDataSource.class.getName(), "051018-389", new Object[] {path})); mLogger.debug( /* (non-Javadoc) * @i18n.test * @org-mes="encoded query: " + p[0] */ org.openlaszlo.i18n.LaszloMessages.getMessage( HTTPDataSource.class.getName(), "051018-397", new Object[] {query})); // This call takes a decoded (unescaped) path request.setPath(path); boolean hasQuery = (query != null && query.length() > 0); String rawcontent = null; // Newer rawpost protocol puts lzpostbody as a separate // top level query arg in the request. rawcontent = req.getParameter("lzpostbody"); if (isPost) { // Older rawpost protocol put the "lzpostbody" arg // embedded in the "url" args's query args if (rawcontent == null && hasQuery) { rawcontent = findQueryArg("lzpostbody", query); } if (rawcontent != null) { // Get the unescaped query string ((EntityEnclosingMethod) request).setRequestBody(rawcontent); } else if (hasQuery) { StringTokenizer st = new StringTokenizer(query, "&"); while (st.hasMoreTokens()) { String it = st.nextToken(); int i = it.indexOf("="); if (i > 0) { String n = it.substring(0, i); String v = it.substring(i + 1, it.length()); // POST encodes values during request ((PostMethod) request).addParameter(n, URLDecoder.decode(v, "UTF-8")); } else { mLogger.warn( /* (non-Javadoc) * @i18n.test * @org-mes="ignoring bad token (missing '=' char) in query string: " + p[0] */ org.openlaszlo.i18n.LaszloMessages.getMessage( HTTPDataSource.class.getName(), "051018-429", new Object[] {it})); } } } } else { // This call takes an encoded (escaped) query string request.setQueryString(query); } // Put in the If-Modified-Since headers if (since != -1) { String lms = LZHttpUtils.getDateString(since); request.setRequestHeader(LZHttpUtils.IF_MODIFIED_SINCE, lms); mLogger.debug( /* (non-Javadoc) * @i18n.test * @org-mes="proxying lms: " + p[0] */ org.openlaszlo.i18n.LaszloMessages.getMessage( HTTPDataSource.class.getName(), "051018-450", new Object[] {lms})); } mLogger.debug( /* (non-Javadoc) * @i18n.test * @org-mes="setting up http client" */ org.openlaszlo.i18n.LaszloMessages.getMessage( HTTPDataSource.class.getName(), "051018-460")); HttpClient htc = null; if (mConnectionMgr != null) { htc = new HttpClient(mConnectionMgr); } else { htc = new HttpClient(); } htc.setHostConfiguration(hcfg); // This is the data timeout mLogger.debug( /* (non-Javadoc) * @i18n.test * @org-mes="timeout set to " + p[0] */ org.openlaszlo.i18n.LaszloMessages.getMessage( HTTPDataSource.class.getName(), "051018-478", new Object[] {new Integer(timeout)})); htc.setTimeout(timeout); // Set connection timeout the same htc.setConnectionTimeout(mConnectionTimeout); // Set timeout for getting a connection htc.setHttpConnectionFactoryTimeout(mConnectionPoolTimeout); // TODO: [2003-03-05 bloch] this should be more configurable (per app?) if (!isPost) { request.setFollowRedirects(mFollowRedirects > 0); } long t1 = System.currentTimeMillis(); mLogger.debug("starting remote request"); int rc = htc.executeMethod(hcfg, request); String status = HttpStatus.getStatusText(rc); if (status == null) { status = "" + rc; } mLogger.debug( /* (non-Javadoc) * @i18n.test * @org-mes="remote response status: " + p[0] */ org.openlaszlo.i18n.LaszloMessages.getMessage( HTTPDataSource.class.getName(), "051018-504", new Object[] {status})); HttpData data = null; if (isRedirect(rc) && mFollowRedirects > redirCount) { String loc = request.getResponseHeader("Location").toString(); String hostURI = loc.substring(loc.indexOf(": ") + 2, loc.length()); mLogger.info( /* (non-Javadoc) * @i18n.test * @org-mes="Following URL from redirect: " + p[0] */ org.openlaszlo.i18n.LaszloMessages.getMessage( HTTPDataSource.class.getName(), "051018-517", new Object[] {hostURI})); long t2 = System.currentTimeMillis(); if (timeout > 0) { timeout -= (t2 - t1); if (timeout < 0) { throw new InterruptedIOException( /* (non-Javadoc) * @i18n.test * @org-mes=p[0] + " timed out after redirecting to " + p[1] */ org.openlaszlo.i18n.LaszloMessages.getMessage( HTTPDataSource.class.getName(), "051018-529", new Object[] {surl, loc})); } } data = getDataOnce(req, res, since, hostURI, redirCount++, timeout); } else { data = new HttpData(request, rc); } if (req != null && res != null) { // proxy response headers LZHttpUtils.proxyResponseHeaders(request, res, req.isSecure()); } return data; } catch (ConnectTimeoutException ce) { // Transduce to an InterrupedIOException, since lps takes these to be timeouts. if (request != null) { request.releaseConnection(); } throw new InterruptedIOException( /* (non-Javadoc) * @i18n.test * @org-mes="connecting to " + p[0] + ":" + p[1] + " timed out beyond " + p[2] + " msecs." */ org.openlaszlo.i18n.LaszloMessages.getMessage( HTTPDataSource.class.getName(), "051018-557", new Object[] { hcfg.getHost(), new Integer(hcfg.getPort()), new Integer(mConnectionTimeout) })); } catch (HttpRecoverableException hre) { if (request != null) { request.releaseConnection(); } throw hre; } catch (HttpException e) { if (request != null) { request.releaseConnection(); } throw e; } catch (IOException ie) { if (request != null) { request.releaseConnection(); } throw ie; } catch (RuntimeException e) { if (request != null) { request.releaseConnection(); } throw e; } }