Example #1
0
  public static void main(String[] args) {
    if (args.length == 0) {
      System.out.println("You must specify the idRun you want to relaunch");
    }
    try {
      String idRun = args[0];
      DataBaseManagement.instantiante_connection();
      DataBaseManagement.reopen(idRun);
      String idCheck = DataBaseManagement.get_check_id(idRun);
      // getting the target associated with the group
      //			ResultSet target_resultSet = DataBaseManagement.search_target(idGroup);
      //			while (target_resultSet.next()) {
      //			target=target_resultSet.getString("name");
      //			idTarget=target_resultSet.getString("idTarget");
      //			System.out.println("target: " + target);
      //			System.out.println("idTarget: " + idTarget);
      //		}

      String target = "wwww.cdiscount.com";
      String idTarget = "1";
      //
      ResultSet missing_keyword_resultSet = DataBaseManagement.search_missing_keywords(idCheck);
      while (missing_keyword_resultSet.next()) {
        String idKeyword = missing_keyword_resultSet.getString("idKeyword");
        String keyword_name = missing_keyword_resultSet.getString("name");
        // System.out.println("idKeyword: " + idKeyword);
        System.out.println("Launching keyword: " + keyword_name);

        // asynchronous launch
        // GoogleSearchSaveTask beep=new GoogleSearchSaveTask(checkId, idTarget,
        // idKeyword,keyword_name);

        // Synchronous launch but waiting after
        RankInfo loc_info = ranking_keyword(keyword_name, target);
        DataBaseManagement.insertKeyword(
            idCheck, idTarget, idKeyword, loc_info.getPosition(), loc_info.getUrl());
      }

      // closing the run by inserting a stopping date !
      DataBaseManagement.close_current_run();
    } catch (SQLException e) {
      e.printStackTrace();
    } finally {
      DataBaseManagement.close();
    }
  }
 @Override
 public void run() {
   System.out.println(
       "ReminderTask is completed by Java timer :"
           + keyword_to_search
           + idCheck
           + idTarget
           + idKeyword);
   timer.cancel(); // Not necessary because we call System.exit
   // System.exit(0); //Stops the AWT thread (and everything else)
   RankInfo loc_info = ranking_keyword(keyword_to_search);
   try {
     DataBaseManagement.insertKeyword(
         idCheck, idTarget, idKeyword, loc_info.getPosition(), loc_info.getUrl());
   } catch (SQLException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
     System.out.println("Error when database saving " + keyword_to_search);
   }
 }
  public static void main(String[] args) {
    System.setProperty("http.agent", "");
    // we here add just a single  user agent
    user_agents.add(
        "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB;     rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13 (.NET CLR 3.5.30729)");

    try {
      DataBaseManagement.instantiante_connection();
      int counter = DataBaseManagement.check_alive_run();
      if (counter >= 1) {
        System.out.println("Cron : Fatal, another job is running");
        return;
      }
      java.sql.Date current_Date = new java.sql.Date(System.currentTimeMillis());
      long generated_run_id = DataBaseManagement.insertRunById(current_Date);
      String runId = Long.toString(generated_run_id);
      System.out.println("Generated run id for the cron job" + runId);

      // getting our groups
      ResultSet group_resultSet = DataBaseManagement.search_group(args);

      // we here loop over each group
      while (group_resultSet.next()) {
        String idGroup = group_resultSet.getString("idGroup");
        String name = group_resultSet.getString("name");
        String module = group_resultSet.getString("module");
        String options = group_resultSet.getString("options");
        System.out.println("idGroup: " + idGroup);
        System.out.println("module: " + module);
        System.out.println("name: " + name);
        System.out.println("options: " + options);
        // Getting the targeted site to SERP
        // for the moment we only target cdiscount !
        ResultSet target_resultSet = DataBaseManagement.search_target(idGroup);
        String target = null;
        String idTarget = null;
        while (target_resultSet.next()) {
          target = target_resultSet.getString("name");
          idTarget = target_resultSet.getString("idTarget");
          System.out.println("target: " + target);
          System.out.println("idTarget: " + idTarget);
        }

        // inserting a check row in the check table
        long idCheck =
            DataBaseManagement.insertCheckById(
                current_Date, idGroup, Long.toString(generated_run_id));
        String checkId = Long.toString(idCheck);
        System.out.println("Generated check id for the cron job" + checkId);
        // select the keywords to update
        ResultSet keyword_resultSet = DataBaseManagement.search_keywords(idGroup);
        while (keyword_resultSet.next()) {
          String idKeyword = keyword_resultSet.getString("idKeyword");
          String keyword_name = keyword_resultSet.getString("name");
          // System.out.println("idKeyword: " + idKeyword);
          System.out.println("Launching keyword: " + keyword_name);

          // asynchronous launch
          // GoogleSearchSaveTask beep=new GoogleSearchSaveTask(checkId, idTarget,
          // idKeyword,keyword_name);

          // Synchronous launch but waiting after
          RankInfo loc_info = proxy_ranking_keyword(keyword_name, target);
          DataBaseManagement.insertKeyword(
              checkId, idTarget, idKeyword, loc_info.getPosition(), loc_info.getUrl());
        }

        // closing the run by inserting a stopping date !
        DataBaseManagement.close_current_run();
      }
    } catch (SQLException e) {
      e.printStackTrace();
    } finally {
      DataBaseManagement.close();
    }
  }
  public static RankInfo proxy_ranking_keyword(String keyword, String targe_name) {
    RankInfo info = new RankInfo();
    keyword = keyword.replace(" ", "%20");
    info.setKeyword(keyword);
    // we here fetch up to three paginations
    int nb_depth = 3;
    long startTimeMs = System.currentTimeMillis();
    org.jsoup.nodes.Document doc;
    int depth = 0;
    int nb_results = 0;
    int my_rank = 30;
    String my_url = "";
    boolean found = false;
    while (depth < nb_depth && !found) {
      try {
        // we wait between x and xx seconds
        Thread.sleep(randInt(min_number_of_wait_times, max_number_of_wait_times) * 1000);
        System.out.println("Fetching a new page");
        String constructed_url =
            "https://www.google.fr/search?q=" + keyword + "&start=" + Integer.toString(depth * 10);
        // we here use our properly configured squid proxy on port 3128 on localhost
        Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("localhost", 3128));
        URL url = new URL(constructed_url);
        HttpURLConnection connection = (HttpURLConnection) url.openConnection(proxy);
        connection.setConnectTimeout(1000000000);
        String randomAgent = randomUserAgent();
        connection.setRequestProperty("User-Agent", randomAgent);
        connection.connect();
        InputStreamReader in = new InputStreamReader((InputStream) connection.getContent());
        BufferedReader buff = new BufferedReader(in);
        StringBuilder builder = new StringBuilder();
        String line;
        do {
          line = buff.readLine();
          builder.append(line);
        } while (line != null);
        String pageString = builder.toString();
        connection.disconnect();

        doc = Jsoup.parse(pageString);
        Elements serps = doc.select("h3[class=r]");
        for (Element serp : serps) {
          Element link = serp.getElementsByTag("a").first();
          if (link != null) {
            String linkref = link.attr("href");
            if (linkref.startsWith("/url?q=") || linkref.startsWith("http://")) {
              nb_results++;
              if (linkref.startsWith("/url?q=")) {
                linkref = linkref.substring(7, linkref.indexOf("&"));
              } else {
                if (linkref.indexOf("&") != -1) {
                  linkref = linkref.substring(0, linkref.indexOf("&"));
                }
              }
            }
            if (linkref.contains(targe_name) && !found) {
              my_rank = nb_results;
              my_url = linkref;
              found = true;
            }
          }
        }
        if (nb_results == 0) {
          System.out.println("Warning captcha");
        }
        depth++;
      } catch (IOException e) {
        e.printStackTrace();
      } catch (InterruptedException e) {
        e.printStackTrace();
      }
    }
    long taskTimeMs = System.currentTimeMillis() - startTimeMs;
    // System.out.println(taskTimeMs);
    info.setPosition(my_rank);
    info.setUrl(my_url);
    if (nb_results == 0) {
      System.out.println("Warning captcha");
    } else {
      System.out.println("Number of links read in the pages : " + nb_results);
    }
    System.out.println("My rank : " + my_rank + " for keyword : " + keyword);
    System.out.println("My URL : " + my_url + " for keyword : " + keyword);
    return info;
  }
Example #5
0
 public static RankInfo ranking_keyword(String keyword, String targe_name) {
   RankInfo info = new RankInfo();
   info.setKeyword(keyword);
   // we here fetch up to five paginations
   int nb_depth = 5;
   long startTimeMs = System.currentTimeMillis();
   org.jsoup.nodes.Document doc;
   int depth = 0;
   int nb_results = 0;
   int my_rank = 50;
   String my_url = "";
   boolean found = false;
   while (depth < nb_depth && !found) {
     try {
       // we wait between 30 and 70 seconds
       Thread.sleep(randInt(30, 50) * 1000);
       System.out.println("Fetching a new page");
       doc =
           Jsoup.connect(
                   "https://www.google.fr/search?q="
                       + keyword
                       + "&start="
                       + Integer.toString(depth * 10))
               .userAgent(
                   "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB;     rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13 (.NET CLR 3.5.30729)")
               .referrer("accounterlive.com")
               .ignoreHttpErrors(true)
               .timeout(0)
               .get();
       Elements serps = doc.select("h3[class=r]");
       for (Element serp : serps) {
         Element link = serp.getElementsByTag("a").first();
         String linkref = link.attr("href");
         if (linkref.startsWith("/url?q=")) {
           nb_results++;
           linkref = linkref.substring(7, linkref.indexOf("&"));
         }
         if (linkref.contains(targe_name)) {
           my_rank = nb_results;
           my_url = linkref;
           found = true;
         }
         //					System.out.println("Link ref: "+linkref);
         //					System.out.println("Title: "+serp.text());
       }
       if (nb_results == 0) {
         System.out.println("Warning captcha");
       }
       depth++;
     } catch (IOException e) {
       e.printStackTrace();
     } catch (InterruptedException e) {
       // TODO Auto-generated catch block
       e.printStackTrace();
     }
   }
   long taskTimeMs = System.currentTimeMillis() - startTimeMs;
   // System.out.println(taskTimeMs);
   info.setPosition(my_rank);
   info.setUrl(my_url);
   if (nb_results == 0) {
     System.out.println("Warning captcha");
   } else {
     System.out.println("Number of links : " + nb_results);
   }
   System.out.println("My rank : " + my_rank + " for keyword : " + keyword);
   System.out.println("My URL : " + my_url + " for keyword : " + keyword);
   return info;
 }