@Override protected String doInBackground(Void... params) { String html = ""; try { Connection.Response loginForm; loginForm = Jsoup.connect("https://ta.yrdsb.ca/yrdsb/").method(Connection.Method.GET).execute(); // Login to page using user/pass entered in MainActivity Document document = Jsoup.connect("https://ta.yrdsb.ca/yrdsb/") .data("cookieexists", "false") .data("username", MainActivity.usernameString) .data("password", MainActivity.passwordString) .data("submit", "Login") .cookies(loginForm.cookies()) .post(); // Convert document into string for easier processing html = document.toString(); Document doc = Jsoup.parse(html); // for (int x = 0; x < doc.select("[width=85%], [border=0], [cellspacing=0], // [cellpadding=5]").size(); x++){ System.out.println( doc.select("[width=85%], [border=0], [cellspacing=0], [cellpadding=5]") .select("tr") .size()); // System.out.println(doc.select("[width=85%], [border=0], [cellspacing=0], // [cellpadding=5]").select("tr").get(5)); // } // Prepare array to store grades grades = new ArrayList<>(); courses = new ArrayList<>(); // Regex to search html string for grades, then add to array Pattern p = Pattern.compile("current mark\\s?=\\s?(\\d+\\.?\\d*)"); Matcher m = p.matcher(html); while (m.find()) { grades.add(new Double(m.group(1))); } Pattern p1 = Pattern.compile("([a-zA-Z]{3}[0-9]{1}[a-zA-Z]{1}[0-9]{1})"); Matcher m1 = p1.matcher(html); while (m1.find()) { courses.add(new String(m1.group(1))); } } catch (IOException e) { e.printStackTrace(); } return html; }
// set up url, method, header, cookies private void setupFromConnection(HttpURLConnection conn, Connection.Response previousResponse) throws IOException { method = Connection.Method.valueOf(conn.getRequestMethod()); url = conn.getURL(); statusCode = conn.getResponseCode(); statusMessage = conn.getResponseMessage(); contentType = conn.getContentType(); Map<String, List<String>> resHeaders = conn.getHeaderFields(); processResponseHeaders(resHeaders); // if from a redirect, map previous response cookies into this response if (previousResponse != null) { for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) { if (!hasCookie(prevCookie.getKey())) cookie(prevCookie.getKey(), prevCookie.getValue()); } } }
/** * @param connection Jsoup connection object * @param method HTTP method * @return Jsoup Connection.Response object */ public Connection.Response execute(Connection connection, Connection.Method method) { Connection.Response response; if (method != null) { connection.method(method); } try { System.out.println("Calling " + connection.request().url()); if (props.getMode() == Mode.TEST) { return null; } response = connection.execute(); } catch (IOException e) { e.printStackTrace(); return null; } this.cookies.putAll(response.cookies()); return response; }
private void jButton1ActionPerformed( java.awt.event.ActionEvent evt) { // GEN-FIRST:event_jButton1ActionPerformed Connection.Response res = null; String url_country; String prefix; try { url_search = url_search + jTextField_search.getText() + "\""; res = Jsoup.connect(url_search).method(Method.GET).timeout(20000).execute(); doc = res.parse(); switch (jComboBox_country.getSelectedIndex()) { case 0: url_country = "http://ru.kompass.com/en/searchCompanies/facet?value=RU&label=%20Russian%20Federation&filterType=countrynational&searchType=ALL&checked=true"; prefix = "/ru"; break; case 1: url_country = "http://ru.kompass.com/en/searchCompanies/facet?value=BR&label=Brazil&filterType=country&searchType=ALL&checked=true"; prefix = "/br"; break; case 2: url_country = "http://ru.kompass.com/en/searchCompanies/facet?value=CN&label=China&filterType=country&searchType=ALL&checked=true"; prefix = "/cn"; break; case 3: url_country = "http://ru.kompass.com/en/searchCompanies/facet?value=ZA&label=South%20Africa&filterType=country&searchType=ALL&checked=true"; prefix = "/za"; break; case 4: url_country = "http://ru.kompass.com/en/searchCompanies/facet?value=IN&label=India&filterType=country&searchType=ALL&checked=true"; prefix = "/in"; break; default: url_country = "http://ru.kompass.com/en/searchCompanies/facet?value=RU&label=%20Russian%20Federation&filterType=countrynational&searchType=ALL&checked=true"; prefix = "/ru"; } doc = Jsoup.connect(url_country) .cookies(res.cookies()) .userAgent( "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36") .timeout(20000) .get(); Elements sheet = doc.select("#paginatorDivId > li > a"); for (int i = 0; i < sheet.size(); i++) { sheet_urls[i] = "http://ru.kompass.com" + sheet.get(i).attr("href"); jTextArea_sheets.append(sheet_urls[i] + "\n"); } jTextArea_company.setText(""); count_urls = 0; int count_sheets = sheet.size(); jLabel_sheets.setText(Integer.toString(count_sheets)); for (int s = 0; s < count_sheets; s++) { doc = Jsoup.connect(sheet_urls[s]) .cookies(res.cookies()) .userAgent( "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36") .timeout(20000) .get(); Elements links = doc.select("div.details > h2 > a[href]"); for (int i = 0; i < links.size(); i++) { String buf_str = links.get(i).attr("href"); urls[count_urls] = "http://ru.kompass.com" + buf_str.substring(buf_str.indexOf(prefix, 0)); jTextArea_company.append(urls[count_urls] + "\n"); count_urls++; } jLabel_name.setText(doc.title()); jLabel1.setText(String.valueOf(count_urls)); } } catch (IOException ex) { Logger.getLogger(main_window.class.getName()).log(Level.SEVERE, null, ex); } } // GEN-LAST:event_jButton1ActionPerformed