public HtmlRendererContext getTop() { HtmlRendererContext ancestor = this.parentRcontext; if (ancestor == null) { return this; } return ancestor.getTop(); }
/** * Implements simple navigation with incremental rendering, and target processing, including frame * lookup. Should be overridden to allow for more robust browser navigation. * * <p><b>Notes:</b> * * <ul> * <li>Encoding ISO-8859-1 assumed always. * <li>Caching is not implemented. * <li>Cookies are not implemented. * <li>Incremental rendering is not optimized for ignorable document change notifications. * <li>Other HTTP features are not implemented. * </ul> */ public void navigate(final URL href, String target) { // This method implements simple incremental rendering. if (target != null) { HtmlRendererContext topCtx = getTop(); HTMLCollection frames = topCtx.getFrames(); if (frames != null) { org.w3c.dom.Node frame = frames.namedItem(target); if (frame instanceof FrameNode) { BrowserFrame bframe = ((FrameNode) frame).getBrowserFrame(); if (bframe == null) { throw new IllegalStateException("Frame node without a BrowserFrame instance: " + frame); } if (bframe.getHtmlRendererContext() != this) { bframe.loadURL(href); return; } } } target = target.trim().toLowerCase(); if ("_top".equals(target)) { getTop().navigate(href, null); return; } else if ("_parent".equals(target)) { HtmlRendererContext parent = getParent(); if (parent != null) { parent.navigate(href, null); return; } } else if ("_blank".equals(target)) { this.open(href.toExternalForm(), "cobra.blank", "", false); return; } else { // fall through } } URL urlForLoading; if (href.getProtocol().equals("file")) { // Remove query so it works. try { urlForLoading = new URL(href.getProtocol(), href.getHost(), href.getPort(), href.getPath()); } catch (java.net.MalformedURLException throwable) { this.warn("malformed", throwable); urlForLoading = href; } } else { urlForLoading = href; } final URL finalURLForLoading = urlForLoading; CobraUserAgent uaContext = (CobraUserAgent) this.getUserAgentContext(); URI requestURI = null; try { requestURI = urlForLoading.toURI(); } catch (URISyntaxException e) { logger.warning("navigate(): Error in converting URL to URI for uri=" + urlForLoading); } int transactionId = -1; boolean wasRequested = false; String username = ""; AbstractHttpTransaction rawTransaction = null; final long time0 = System.currentTimeMillis(); if (requestURI != null) { // try { // // username = ""; // //TODO: // //username = // uaContext.getScan().getTransactionRecord().getTransaction(uaContext.getId()).getUsername(); // // if(username != null && !username.equalsIgnoreCase("")) { // transactionId = // uaContext.getScan().getTransactionRecord().getTransactionId("GET", // finalURLForLoading.toString(), username); // // wasRequested = // uaContext.getScan().getTransactionRecord().isUriRequested("GET", // finalURLForLoading.toString(), username, false); // } else { // transactionId = // uaContext.getScan().getTransactionRecord().getTransactionId("GET", // finalURLForLoading.toString(), null); // // wasRequested = // uaContext.getScan().getTransactionRecord().isUriRequested("GET", // finalURLForLoading.toString(), null, false); // } // /* * Possibility of 4 cases Case 1: wasRequested = True, transactionId * > 0 This means we can fetch the request from Cache/DB Case 2: * wasRequested = False, transactionId < 0 This means the request is * new. So we have to take care of it here and also add it for * spidering Case 3: wasRequested = False, transactionId > 0 This is * a Race condition. Very uncertain, unpredicatble and with low * probability. Case 4: wasRequested = True, transactionId < 0 This * is also a Race condition. This state happens when spidering added * the URL and before the transaction was saved to DB. Cache DB * add/update/refresh happens just after the transaction is saved to * DB */ // if (transactionId == -1 && !wasRequested) { // /* // * Solves Case 2 If the transaction is not in cache and if // * it was not requested before, add it to the requester // * queue for spidering // */ // rawTransaction = // HttpTransactionFactory.createTransaction(uaContext.getScan(), // "GET", requestURI, // uaContext.getId(), TransactionSource.COBRA); // // Debug.debug("Debug: raw ID: " + rawTransaction.getId() + // // " rawUri: " + rawTransaction.getMethod() + " " + // // rawTransaction.getAbsoluteUriString() + // // " Reason_ren: No cache"); // TransactionSource referenceTransactionSource = // uaContext.getScan().getTransactionRecord() // .getTransaction(uaContext.getId()).getSource(); // if (referenceTransactionSource != TransactionSource.TEST) { // uaContext.getScan().getRequesterQueue().addSpiderRequest(rawTransaction, // false, // "Javascript: Asynchronous Requests "); // } // // } else if (transactionId > 0) { // /* // * Solves Case 1 and Case 3 If the request is found in // * cache, then fetch it and continue // */ // rawTransaction = // uaContext.getScan().getTransactionRecord().getTransaction(transactionId); // // Debug.debug("Debug: raw ID: " + rawTransaction.getId() + // // " rawUri: " + rawTransaction.getMethod() + " " // // +rawTransaction.getAbsoluteUriString() + // // " Reason_ren: Cache hit"); // // } else if (transactionId == -1 && wasRequested) { // /* // * Solves Case 4 1. Wait to complete the processing by // * requesterThread? or 2. Duplicate request Currently // * duplicating. // */ // rawTransaction = // HttpTransactionFactory.createTransaction(uaContext.getScan(), // "GET", requestURI, // uaContext.getId(), TransactionSource.COBRA); // // Debug.debug("Debug: raw ID: " + rawTransaction.getId() + // // " rawUri: " + rawTransaction.getMethod() + " " + // // rawTransaction.getAbsoluteUriString() + // // " Reason_ren: No cache, but requested"); // } // // if (rawTransaction != null) { // synchronized (rawTransaction) { // // /* // * Solves uncertain conditions // */ // // if (!rawTransaction.isResponsePresent() || // !rawTransaction.isSuccessfullExecution()) { // // /* // * This should solve uncertain conditions like - // * Transaction from cache/DB might not have response // * - A newly created transaction which is not // * executed // */ // // rawTransaction.execute("CobraHttpRequest", false); // // } else { // // // /* // * Load it from cache // */ // // } // } // } // // } catch (Exception e) { // // TODO: handle exception // } // } final AbstractHttpTransaction finalTransaction = rawTransaction; // Make request asynchronously. new Thread() { @Override public void run() { try { URL uri = href; logger.info("process(): Loading URI=[" + uri + "]."); if (finalTransaction != null) { // try { // sourceCode = null; // long time1 = System.currentTimeMillis(); // byte[] input = finalTransaction.getResponseBody(); // if (input != null) { // ByteArrayInputStream inputStream = new // ByteArrayInputStream(input); // InputSource inputSource = new // InputSourceImpl(inputStream, // finalTransaction.getURI() // , finalTransaction.getContentType().getCharset()); // // DocumentBuilderImpl builder = new // DocumentBuilderImpl(SimpleHtmlRendererContext.this // .getUserAgentContext(), // SimpleHtmlRendererContext.this); // // if (inputSource != null) { // HTMLDocumentImpl document = (HTMLDocumentImpl) // builder.createDocument(inputSource); // // Now start loading. // document.load(); // long time2 = System.currentTimeMillis(); // logger.info("Parsed URI=[" + uri + // "]: Parse elapsed: " + (time2 - time1) // + " ms. Fetched from Cache/DB: " + (time1 - time0) + // " ms."); // sourceCode = inputStream.toString(); // } // // } // } finally { // // } } else { /* * The last ditch effort using java libraries */ // Using potentially different URL for loading. URLConnection connection = finalURLForLoading.openConnection(); connection.setRequestProperty("User-Agent", getUserAgentContext().getUserAgent()); connection.setRequestProperty("Cookie", ""); if (connection instanceof HttpURLConnection) { HttpURLConnection hc = (HttpURLConnection) connection; hc.setInstanceFollowRedirects(true); int responseCode = hc.getResponseCode(); logger.info("process(): HTTP response code: " + responseCode); } InputStream in = connection.getInputStream(); try { sourceCode = null; long time1 = System.currentTimeMillis(); RecordedInputStream rin = new RecordedInputStream(in, 8192); InputStream bin = new BufferedInputStream(rin, 8192); // HtmlParserContext pcontext = // createParserContext(uri); // SimpleUserAgentContext ucontext = new // SimpleUserAgentContext(); DocumentBuilderImpl builder = new DocumentBuilderImpl( SimpleHtmlRendererContext.this.getUserAgentContext(), SimpleHtmlRendererContext.this); String actualURI = uri.toExternalForm(); // Only create document, don't parse. HTMLDocumentImpl document = (HTMLDocumentImpl) builder.createDocument(new InputSourceImpl(bin, actualURI, "ISO-8859-1")); // Set document in HtmlPanel. Safe to call outside // GUI // thread. // SimpleHtmlRendererContext.this.htmlPanel.setDocument(document, // SimpleHtmlRendererContext.this); // Now start loading. document.load(); long time2 = System.currentTimeMillis(); logger.info( "Parsed URI=[" + uri + "]: Parse elapsed: " + (time2 - time1) + " ms. Connection elapsed: " + (time1 - time0) + " ms."); sourceCode = rin.getString("ISO-8859-1"); } finally { in.close(); } } } catch (Exception err) { SimpleHtmlRendererContext.this.error( "navigate(): Error loading or parsing request.", err); } } }.start(); }
/** * Constructs a SimpleHtmlRendererContext that is a child of another <code> * {@link HtmlRendererContext}</code>. * * @param contextComponent The component that will render HTML. * @param parentRcontext The parent's renderer context. */ public SimpleHtmlRendererContext(final HtmlRendererContext parentRcontext) { super(); this.parentRcontext = parentRcontext; this.bcontext = parentRcontext == null ? null : parentRcontext.getUserAgentContext(); }