private void getImgLinks(HTML dequeuedHtml) { // TODO Auto-generated method stub String[] allValidExtension = m_ConfigData.getImageExtensions(); StringBuilder extenstionAddToRegex = new StringBuilder(); extenstionAddToRegex.append("("); for (int i = 0; i < allValidExtension.length; i++) { if (i == allValidExtension.length - 1) { extenstionAddToRegex.append(allValidExtension[i].toLowerCase() + "|"); extenstionAddToRegex.append(allValidExtension[i].toUpperCase() + ")"); } else { extenstionAddToRegex.append(allValidExtension[i].toLowerCase() + "|"); extenstionAddToRegex.append(allValidExtension[i].toUpperCase() + "|"); } } String regex = "<(img|IMG)\\s+(src|SRC)=\"(.*?\\." + extenstionAddToRegex.toString() + ")\""; System.out.println("Regex for image is " + regex); Pattern pattern = Pattern.compile(regex); Matcher m = pattern.matcher(dequeuedHtml.GetBody()); System.out.println("fetching image links"); while (m.find()) { String link = m.group(3); System.out.println("enqueue to Downloader: " + link); if (!m_AllTraversedLinks.Exists(link)) { // m_DownloadQueue.enqueue(link); m_DownloadQueue.enqueue(link); // m_AllTraversedLinks.enqueue(link); System.out.println("Analyzer: adding image link: " + link); } } System.out.println("Done with fetching image links"); }
public void assertConsistency() { if (queue instanceof SynchronizedQueue) { ((SynchronizedQueue) queue).assertConsistency(); } int mapSize = map.size(); int effectiveQueueSize = queue.size(); if (effectiveQueueSize != mapSize) { throw new AssertionError( "The map size is " + mapSize + " is different from the queue size " + effectiveQueueSize); } }
private void getUrlLinks(HTML dequeuedHtml) { // String regex = "<(a|A) (href|HREF)=\"(http|HTTP)://" + m_Domain + // "/(.*?)\">"; String regex = "<(a|A)\\s+(href|HREF)=\"(?!#)(.*?)\">"; // TODO: do HTTP // prefix means // its external? Pattern pattern = Pattern.compile(regex); Matcher m = pattern.matcher(dequeuedHtml.GetBody()); System.out.println("fetching links"); while (m.find()) { String link = m.group(3); System.out.println("enqueue to Downloader: " + link); if (!m_AllTraversedLinks.Exists(link)) { // m_DownloadQueue.enqueue(link); m_DownloadQueue.enqueue(link); // m_AllTraversedLinks.enqueue(link); System.out.println("Analyzer: adding link: " + link); } } System.out.println("Done with fetching links"); }
@Override public void run() { while (ThreadManager.isRunning) { // m_DownloadQueue.getSize() !=0 || // m_AnalyzeQueue.getSize() != 0) { // TODO: think about // this condition // HTML dequeuedHtml = m_AnalyzeQueue.dequeue(); ThreadManager.updateStateAnalyzer(true); HTML dequeuedHtml = m_AnalyzeQueue.dequeue(); if (dequeuedHtml != null) { ThreadManager.updateStateAnalyzer(false); System.out.println("Analyzer: dequeued: \n Header:\n" + dequeuedHtml.GetHeader()); try { String contentTypeOfHtml = getContentTypeFromHeader( dequeuedHtml.GetHeader()); // dequeuedHtml.GetContentType().toLowerCase(); if (contentTypeOfHtml.contains(IMAGE)) { // in case of image // handle case of image System.out.println("Analzyer: it is an image"); // TODO: check extension supported // handleCaseOfImage(dequeuedHtml); HandleHTML(dequeuedHtml, HTML.TypeOfHTML.IMAGE); } else if (contentTypeOfHtml.contains(VIDEO)) { // in case System.out.println("Analzyer: it is an video"); // of // Video // handle case of video } else if (contentTypeOfHtml.contains(DOCUMENT)) { // in // TODO: what is the command for document? <a href? <doc // src=? // case System.out.println("Analzyer: it is an document"); // // // of // Document // handle case of document } else if (contentTypeOfHtml.contains("text")) { HandleHTML(dequeuedHtml, HTML.TypeOfHTML.TEXT); // int contentLenght = // extractContentLength(dequeuedHtml); // TODO: should // be done in different method using enum // if (contentLenght != -1) { // synchronized (m_ResultWrapper) { // m_ResultWrapper.m_PageAggregator.m_NumOfPages++; // m_ResultWrapper.m_PageAggregator.m_TotalSizeOfPagesInBytes+= // contentLenght; // } // } // System.out.println("(Data of page was aggregated) content is text"); // getImgLinks(dequeuedHtml); // getUrlLinks(dequeuedHtml); // TODO: can links have // HTTP:// ? } else { System.out.println("Downloader: Error in content type of " + dequeuedHtml.toString()); } } catch (Exception e) { System.out.println("Couldnt get content type from: " + dequeuedHtml.GetHeader()); System.out.println(e.getMessage()); e.printStackTrace(); } } } System.out.println("Analyzer Finished!"); // m_DownloadQueue.unregisterProducer(); }
private void processRequest() throws Exception { while (true) { Socket socket = socketRequestsQueue.take(); DataOutputStream socketOutputStream = new DataOutputStream(socket.getOutputStream()); HtmlRequest htmlRequest = readRequest(socket); // If the request is empty than the socket was closed on the other side if (htmlRequest.equals(null)) { try { socket.close(); } catch (Exception e) { System.out.println("Error on trying to close socket on empty request: " + e.toString()); } continue; } HtmlResponse responseToClient; if (!htmlRequest.isLegalRequest) { // The request format is illegal responseToClient = respond400(htmlRequest); } else if (!legalRequestType(htmlRequest)) { // The request method is unimplemented responseToClient = respond501(htmlRequest); } else if (directRequestToResultPages(htmlRequest)) { responseToClient = respond403(htmlRequest); } else { if (!htmlRequest.type.equals("TRACE") && !htmlRequest.type.equals("POST")) { boolean isFileLegal = false; try { isFileLegal = checkIfRequestedFileLegal(htmlRequest); } catch (IOException e) { responseToClient = respond500(htmlRequest); } if (!isFileLegal) { responseToClient = respond404(htmlRequest); } else { responseToClient = respond200(htmlRequest); } } else { responseToClient = respond200(htmlRequest); } } try { // Send the status line. socketOutputStream.writeBytes(responseToClient.getStatusLine()); // Send the content type line. socketOutputStream.writeBytes(responseToClient.getContentType()); // Send content length. if (!htmlRequest.isChunked) { socketOutputStream.writeBytes(responseToClient.getContentLengthLine()); } if (htmlRequest.isChunked) { socketOutputStream.writeBytes(responseToClient.getTransferEncoding()); } // Send a blank line to indicate the end of the header lines. socketOutputStream.writeBytes(CRLF); } catch (Exception e) { System.out.println("Writing the header caused an error" + e.toString()); } // Send the content of the HTTP. if (!htmlRequest.type.equals("HEAD")) { sendEntityBodyToClient(socketOutputStream, responseToClient, htmlRequest.isChunked); } // Close streams and socket. try { socketOutputStream.close(); socket.close(); } catch (Exception e) { System.out.println("closing the socket caused an error"); } } }
@Test public void testEquals() { SynchronizedQueue<Object> q = new SynchronizedQueue<Object>(); assertTrue(q.equals(q)); }