public static void main(String[] args) { if (args.length == 0) { System.out.println("You must specify the idRun you want to relaunch"); } try { String idRun = args[0]; DataBaseManagement.instantiante_connection(); DataBaseManagement.reopen(idRun); String idCheck = DataBaseManagement.get_check_id(idRun); // getting the target associated with the group // ResultSet target_resultSet = DataBaseManagement.search_target(idGroup); // while (target_resultSet.next()) { // target=target_resultSet.getString("name"); // idTarget=target_resultSet.getString("idTarget"); // System.out.println("target: " + target); // System.out.println("idTarget: " + idTarget); // } String target = "wwww.cdiscount.com"; String idTarget = "1"; // ResultSet missing_keyword_resultSet = DataBaseManagement.search_missing_keywords(idCheck); while (missing_keyword_resultSet.next()) { String idKeyword = missing_keyword_resultSet.getString("idKeyword"); String keyword_name = missing_keyword_resultSet.getString("name"); // System.out.println("idKeyword: " + idKeyword); System.out.println("Launching keyword: " + keyword_name); // asynchronous launch // GoogleSearchSaveTask beep=new GoogleSearchSaveTask(checkId, idTarget, // idKeyword,keyword_name); // Synchronous launch but waiting after RankInfo loc_info = ranking_keyword(keyword_name, target); DataBaseManagement.insertKeyword( idCheck, idTarget, idKeyword, loc_info.getPosition(), loc_info.getUrl()); } // closing the run by inserting a stopping date ! DataBaseManagement.close_current_run(); } catch (SQLException e) { e.printStackTrace(); } finally { DataBaseManagement.close(); } }
@Override public void run() { System.out.println( "ReminderTask is completed by Java timer :" + keyword_to_search + idCheck + idTarget + idKeyword); timer.cancel(); // Not necessary because we call System.exit // System.exit(0); //Stops the AWT thread (and everything else) RankInfo loc_info = ranking_keyword(keyword_to_search); try { DataBaseManagement.insertKeyword( idCheck, idTarget, idKeyword, loc_info.getPosition(), loc_info.getUrl()); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); System.out.println("Error when database saving " + keyword_to_search); } }
public static void main(String[] args) { System.setProperty("http.agent", ""); // we here add just a single user agent user_agents.add( "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13 (.NET CLR 3.5.30729)"); try { DataBaseManagement.instantiante_connection(); int counter = DataBaseManagement.check_alive_run(); if (counter >= 1) { System.out.println("Cron : Fatal, another job is running"); return; } java.sql.Date current_Date = new java.sql.Date(System.currentTimeMillis()); long generated_run_id = DataBaseManagement.insertRunById(current_Date); String runId = Long.toString(generated_run_id); System.out.println("Generated run id for the cron job" + runId); // getting our groups ResultSet group_resultSet = DataBaseManagement.search_group(args); // we here loop over each group while (group_resultSet.next()) { String idGroup = group_resultSet.getString("idGroup"); String name = group_resultSet.getString("name"); String module = group_resultSet.getString("module"); String options = group_resultSet.getString("options"); System.out.println("idGroup: " + idGroup); System.out.println("module: " + module); System.out.println("name: " + name); System.out.println("options: " + options); // Getting the targeted site to SERP // for the moment we only target cdiscount ! ResultSet target_resultSet = DataBaseManagement.search_target(idGroup); String target = null; String idTarget = null; while (target_resultSet.next()) { target = target_resultSet.getString("name"); idTarget = target_resultSet.getString("idTarget"); System.out.println("target: " + target); System.out.println("idTarget: " + idTarget); } // inserting a check row in the check table long idCheck = DataBaseManagement.insertCheckById( current_Date, idGroup, Long.toString(generated_run_id)); String checkId = Long.toString(idCheck); System.out.println("Generated check id for the cron job" + checkId); // select the keywords to update ResultSet keyword_resultSet = DataBaseManagement.search_keywords(idGroup); while (keyword_resultSet.next()) { String idKeyword = keyword_resultSet.getString("idKeyword"); String keyword_name = keyword_resultSet.getString("name"); // System.out.println("idKeyword: " + idKeyword); System.out.println("Launching keyword: " + keyword_name); // asynchronous launch // GoogleSearchSaveTask beep=new GoogleSearchSaveTask(checkId, idTarget, // idKeyword,keyword_name); // Synchronous launch but waiting after RankInfo loc_info = proxy_ranking_keyword(keyword_name, target); DataBaseManagement.insertKeyword( checkId, idTarget, idKeyword, loc_info.getPosition(), loc_info.getUrl()); } // closing the run by inserting a stopping date ! DataBaseManagement.close_current_run(); } } catch (SQLException e) { e.printStackTrace(); } finally { DataBaseManagement.close(); } }
public static RankInfo proxy_ranking_keyword(String keyword, String targe_name) { RankInfo info = new RankInfo(); keyword = keyword.replace(" ", "%20"); info.setKeyword(keyword); // we here fetch up to three paginations int nb_depth = 3; long startTimeMs = System.currentTimeMillis(); org.jsoup.nodes.Document doc; int depth = 0; int nb_results = 0; int my_rank = 30; String my_url = ""; boolean found = false; while (depth < nb_depth && !found) { try { // we wait between x and xx seconds Thread.sleep(randInt(min_number_of_wait_times, max_number_of_wait_times) * 1000); System.out.println("Fetching a new page"); String constructed_url = "https://www.google.fr/search?q=" + keyword + "&start=" + Integer.toString(depth * 10); // we here use our properly configured squid proxy on port 3128 on localhost Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("localhost", 3128)); URL url = new URL(constructed_url); HttpURLConnection connection = (HttpURLConnection) url.openConnection(proxy); connection.setConnectTimeout(1000000000); String randomAgent = randomUserAgent(); connection.setRequestProperty("User-Agent", randomAgent); connection.connect(); InputStreamReader in = new InputStreamReader((InputStream) connection.getContent()); BufferedReader buff = new BufferedReader(in); StringBuilder builder = new StringBuilder(); String line; do { line = buff.readLine(); builder.append(line); } while (line != null); String pageString = builder.toString(); connection.disconnect(); doc = Jsoup.parse(pageString); Elements serps = doc.select("h3[class=r]"); for (Element serp : serps) { Element link = serp.getElementsByTag("a").first(); if (link != null) { String linkref = link.attr("href"); if (linkref.startsWith("/url?q=") || linkref.startsWith("http://")) { nb_results++; if (linkref.startsWith("/url?q=")) { linkref = linkref.substring(7, linkref.indexOf("&")); } else { if (linkref.indexOf("&") != -1) { linkref = linkref.substring(0, linkref.indexOf("&")); } } } if (linkref.contains(targe_name) && !found) { my_rank = nb_results; my_url = linkref; found = true; } } } if (nb_results == 0) { System.out.println("Warning captcha"); } depth++; } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } long taskTimeMs = System.currentTimeMillis() - startTimeMs; // System.out.println(taskTimeMs); info.setPosition(my_rank); info.setUrl(my_url); if (nb_results == 0) { System.out.println("Warning captcha"); } else { System.out.println("Number of links read in the pages : " + nb_results); } System.out.println("My rank : " + my_rank + " for keyword : " + keyword); System.out.println("My URL : " + my_url + " for keyword : " + keyword); return info; }
public static RankInfo ranking_keyword(String keyword, String targe_name) { RankInfo info = new RankInfo(); info.setKeyword(keyword); // we here fetch up to five paginations int nb_depth = 5; long startTimeMs = System.currentTimeMillis(); org.jsoup.nodes.Document doc; int depth = 0; int nb_results = 0; int my_rank = 50; String my_url = ""; boolean found = false; while (depth < nb_depth && !found) { try { // we wait between 30 and 70 seconds Thread.sleep(randInt(30, 50) * 1000); System.out.println("Fetching a new page"); doc = Jsoup.connect( "https://www.google.fr/search?q=" + keyword + "&start=" + Integer.toString(depth * 10)) .userAgent( "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13 (.NET CLR 3.5.30729)") .referrer("accounterlive.com") .ignoreHttpErrors(true) .timeout(0) .get(); Elements serps = doc.select("h3[class=r]"); for (Element serp : serps) { Element link = serp.getElementsByTag("a").first(); String linkref = link.attr("href"); if (linkref.startsWith("/url?q=")) { nb_results++; linkref = linkref.substring(7, linkref.indexOf("&")); } if (linkref.contains(targe_name)) { my_rank = nb_results; my_url = linkref; found = true; } // System.out.println("Link ref: "+linkref); // System.out.println("Title: "+serp.text()); } if (nb_results == 0) { System.out.println("Warning captcha"); } depth++; } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } long taskTimeMs = System.currentTimeMillis() - startTimeMs; // System.out.println(taskTimeMs); info.setPosition(my_rank); info.setUrl(my_url); if (nb_results == 0) { System.out.println("Warning captcha"); } else { System.out.println("Number of links : " + nb_results); } System.out.println("My rank : " + my_rank + " for keyword : " + keyword); System.out.println("My URL : " + my_url + " for keyword : " + keyword); return info; }