/**
   * Tests whether the page parser can pull prices, URL's, and titles from valid and empty pages.
   *
   * @throws ParserConfigurationException
   * @throws SAXException
   * @throws IOException
   */
  @Test
  public void testPageParser() throws ParserConfigurationException, SAXException, IOException {

    // Test a page that has valid results.
    InputStream validResultsInputStream =
        new FileInputStream(new File(PROJECT_PATH + TEST_VALID_XML_PATH));
    List<Ad> adsFromValidPage = PageParser.fromInputStream(validResultsInputStream);

    // Test a few prices.
    assertEquals(1500000, adsFromValidPage.get(0).getPriceInCents());
    assertEquals(119900000, adsFromValidPage.get(10).getPriceInCents());
    assertEquals(209800000, adsFromValidPage.get(24).getPriceInCents());

    // Test a title.
    assertEquals(
        "One Bluxome Street #316 OPEN HOUSE TODAY 1-4:00 PM 2Bed/1.5 Bath (SOMA / south beach) $1199000 2bd 1361sqft",
        adsFromValidPage.get(10).getDcTitle());

    // Test a URL.
    assertEquals(
        "http://sfbay.craigslist.org/sfc/reb/3963411826.html", adsFromValidPage.get(11).getURL());

    // Test whether a page that has no results can instantiate.
    InputStream noResultsFileStream =
        new FileInputStream(new File(PROJECT_PATH + TEST_NO_RESULTS_XML_PATH));
    try {
      List<Ad> adsFromNoResultsPage = PageParser.fromInputStream(noResultsFileStream);
    } catch (Exception e) {
      e.printStackTrace();
      fail("Could not parse a page with no results");
    }
  }
示例#2
0
 @SuppressWarnings("unchecked")
 private boolean process() throws IOException {
   boolean keepAlive = false;
   String head = readHeaderLine();
   if (head.startsWith("GET ") || head.startsWith("POST ")) {
     int begin = head.indexOf('/'), end = head.lastIndexOf(' ');
     String file;
     if (begin < 0 || end < begin) {
       file = "";
     } else {
       file = head.substring(begin + 1, end).trim();
     }
     trace(head + ": " + file);
     file = getAllowedFile(file);
     attributes = new Properties();
     int paramIndex = file.indexOf("?");
     session = null;
     if (paramIndex >= 0) {
       String attrib = file.substring(paramIndex + 1);
       parseAttributes(attrib);
       String sessionId = attributes.getProperty("jsessionid");
       file = file.substring(0, paramIndex);
       session = server.getSession(sessionId);
     }
     keepAlive = parseHeader();
     String hostAddr = socket.getInetAddress().getHostAddress();
     file = processRequest(file, hostAddr);
     if (file.length() == 0) {
       // asynchronous request
       return true;
     }
     String message;
     byte[] bytes;
     if (cache && ifModifiedSince != null && ifModifiedSince.equals(server.getStartDateTime())) {
       bytes = null;
       message = "HTTP/1.1 304 Not Modified\r\n";
     } else {
       bytes = server.getFile(file);
       if (bytes == null) {
         message = "HTTP/1.0 404 Not Found\r\n";
         bytes = ("File not found: " + file).getBytes(Constants.UTF8);
       } else {
         if (session != null && file.endsWith(".jsp")) {
           String page = new String(bytes, Constants.UTF8);
           if (SysProperties.CONSOLE_STREAM) {
             Iterator<String> it = (Iterator<String>) session.map.remove("chunks");
             if (it != null) {
               message = "HTTP/1.1 200 OK\r\n";
               message += "Content-Type: " + mimeType + "\r\n";
               message += "Cache-Control: no-cache\r\n";
               message += "Transfer-Encoding: chunked\r\n";
               message += "\r\n";
               trace(message);
               output.write(message.getBytes());
               while (it.hasNext()) {
                 String s = it.next();
                 s = PageParser.parse(s, session.map);
                 bytes = s.getBytes(Constants.UTF8);
                 if (bytes.length == 0) {
                   continue;
                 }
                 output.write(Integer.toHexString(bytes.length).getBytes());
                 output.write("\r\n".getBytes());
                 output.write(bytes);
                 output.write("\r\n".getBytes());
                 output.flush();
               }
               output.write("0\r\n\r\n".getBytes());
               output.flush();
               return keepAlive;
             }
           }
           page = PageParser.parse(page, session.map);
           bytes = page.getBytes(Constants.UTF8);
         }
         message = "HTTP/1.1 200 OK\r\n";
         message += "Content-Type: " + mimeType + "\r\n";
         if (!cache) {
           message += "Cache-Control: no-cache\r\n";
         } else {
           message += "Cache-Control: max-age=10\r\n";
           message += "Last-Modified: " + server.getStartDateTime() + "\r\n";
         }
         message += "Content-Length: " + bytes.length + "\r\n";
       }
     }
     message += "\r\n";
     trace(message);
     output.write(message.getBytes());
     if (bytes != null) {
       output.write(bytes);
     }
     output.flush();
   }
   return keepAlive;
 }
 @Before
 protected void setUp() throws SAXException, IOException, ParserConfigurationException {
   fileStream = new FileInputStream(new File(TestPageParser.PROJECT_PATH + TEST_STATS_XML));
   stats = new Stats(PageParser.fromInputStream(fileStream));
 }