/**
   * The method makes a GetMethod object and an HttpClient object. The HttpClient then executes the
   * Getmethod and returns the pagesource to the caller.
   *
   * @param url Url to fetch the pagesource for
   * @param followRedirect Boolean variable to specify GetMethod followRedirect value
   * @param doAuthentication Boolean variable to specify GetMethod doAuthentication value
   * @return String
   */
  public String getPageSourceWithoutProxy(
      String url, boolean followRedirects, boolean doAuthentication) {

    GetMethod getMethod = null;
    String pageSouce = null;
    HttpClient httpClient = new HttpClient();
    try {
      getMethod = new GetMethod(url);
      getMethod.setFollowRedirects(followRedirects);
      getMethod.setDoAuthentication(doAuthentication);
      httpClient.executeMethod(getMethod);
      pageSouce = getMethod.getResponseBodyAsString();
    } catch (Exception e) {
      l.error(e + "  " + e.getMessage() + "exception occured for url" + url);
      try {
        getMethod = new GetMethod(url);
        getMethod.setFollowRedirects(followRedirects);
        getMethod.setDoAuthentication(doAuthentication);
        httpClient.executeMethod(getMethod);
        pageSouce = getMethod.getResponseBodyAsString();
        getMethod.releaseConnection();
      } catch (Exception ex) {
        l.error(ex + "  " + ex.getMessage() + "exception occured for url " + url);
        getMethod.releaseConnection();
      }
    } finally {
      getMethod.releaseConnection();
    }
    return pageSouce;
  }
Пример #2
0
 public void setErrResponse(Exception ex, Response response) {
   if (ex instanceof java.sql.SQLException
       || ex instanceof com.mysql.jdbc.exceptions.jdbc4.MySQLSyntaxErrorException
       || ex instanceof org.springframework.jdbc.BadSqlGrammarException
       || ex instanceof org.springframework.dao.InvalidDataAccessApiUsageException
       || ex instanceof org.springframework.dao.DataAccessException
       || ex instanceof org.springframework.web.util.NestedServletException
       || ex
           instanceof com.mysql.jdbc.exceptions.jdbc4.MySQLIntegrityConstraintViolationException) {
     response.setResponseMsg(ex.getMessage());
     response.setResponseStatus(ConstException.ERR_CODE_DB_ERROR);
   } else if (ex instanceof java.lang.NullPointerException) {
     response.setResponseMsg(ex.getMessage());
     response.setResponseStatus(ConstException.ERR_CODE_UNKNOWN);
   } else if (ex instanceof ConstException) {
     ConstException constException = (ConstException) ex;
     response.setResponseMsg(constException.getMessage());
     response.setResponseStatus(constException.getCode());
   } else if (ex instanceof java.lang.IllegalThreadStateException) {
     response.setResponseMsg(ex.getMessage());
     response.setResponseStatus(ConstException.ERR_CODE_INVALID_THREAD_STOP);
   } else {
     setErrResponse(ex, response, ConstException.ERR_CODE_UNKNOWN);
   }
 }
  private String getStoreKey() {
    try {
      System.out.println("CURRENT STORE KEY VALUE: " + storeKey);

      if (storeKey == null) {
        Store store = KeyStorage.INSTANCE.getByName("jsonhub-store-key");

        if (store != null) {
          this.storeKey = store.getKey();

          System.out.println("OBJDB STORE KEY VALUE: " + storeKey);

        } else {
          JSONObject jshubkey = register();

          this.storeKey = jshubkey.getString("storeKey");

          System.out.println("REGISTRATION STORE KEY VALUE: " + storeKey);

          store = new Store();
          store.setName("jsonhub-store-key");
          store.setKey(storeKey);

          KeyStorage.INSTANCE.add(store);
        }
      }

      return storeKey;
    } catch (Exception e) {
      throw new RuntimeException("could not get jsonhub store key! \n Reason: \n" + e.getMessage());
    }
  }
 /**
  * Method to make a GET HTTP connecton to the given url and return the output
  *
  * @param urlToFetch url to be connected
  * @return the http get response
  */
 public String makeRequest(String urlToFetch, String region) throws IOException {
   String responseBody = "";
   try {
     HttpClientParams clientParams = new HttpClientParams();
     clientParams.setSoTimeout(40000);
     clientParams.setConnectionManagerTimeout(40000);
     HttpClient httpclient = new HttpClient(clientParams);
     GetMethod httpget = new GetMethod(urlToFetch);
     if (urlToFetch.contains("linkedin.com/countserv")) {
       httpget.addRequestHeader("Host", "www.linkedin.com");
       httpget.addRequestHeader(
           "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:20.0) Gecko/20100101 Firefox/20.0");
       httpget.addRequestHeader(
           "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
       httpget.addRequestHeader(
           "Cookie",
           "X-LI-IDC=C1; bcookie=\"v=2&618ed56d-275f-4dcd-86cd-6a44c8421879\"; bscookie=\"v=1&201305170926410ddaf2a1-e238-4bf6-8a0a-ce8a3df25934AQFZX82SfErJuFQKMJV49JfQTvewhfzh\"; X-LI-IDC=C1");
     }
     //            httpclient.getHostConfiguration().setProxy("46.227.68.2", 3128);
     //            Credentials cred = new UsernamePasswordCredentials("mmongoose",
     // "I-PHNBV9JHW6US");
     //            httpclient.getState().setProxyCredentials(AuthScope.ANY, cred);
     int i = httpclient.executeMethod(httpget);
     responseBody = httpget.getResponseBodyAsString();
   } catch (Exception e) {
     l.error(e + "  " + e.getMessage() + " url " + urlToFetch);
   }
   return responseBody;
 }
 /**
  * This method makes a Json post to google plus weburl to read the google plus likes for a given
  * url. The json post includes a registered google account developer key and 13 request headers to
  * simulate browser behaviour. The json response is returned to caller class.
  *
  * @param url to fetch the data for
  * @param objProxyDao the database layer ProxyDao object variable
  * @return String
  */
 public String getPostSourceGoogle(String url, ProxyDao objProxyDao) {
   String source = "";
   org.apache.http.client.HttpClient httpClient = new DefaultHttpClient();
   try {
     HttpPost request =
         new HttpPost(
             "https://clients6.google.com/rpc?key=AIzaSyCKSbrvQasunBoV16zDH9R33D88CeLr9gQ");
     request.addHeader("Host", "clients6.google.com");
     request.addHeader(
         "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:18.0) Gecko/20100101 Firefox/18.0");
     request.addHeader(
         "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
     request.addHeader("Accept-Language", "en-US,en;q=0.5");
     request.addHeader("X-Goog-AuthUser", "0");
     request.addHeader("Authorization", "SAPISIDHASH 8b80e6cb4f438e869e8f6cc227fca5cf8856c6ab");
     request.addHeader(
         "X-ClientDetails",
         "appVersion=5.0%20(Windows)&platform=Win32&userAgent=Mozilla%2F5.0%20(Windows%20NT%206.1%3B%20rv%3A18.0)%20Gecko%2F20100101%20Firefox%2F18.0");
     request.addHeader("Content-Type", "application/json; charset=UTF-8");
     request.addHeader("X-JavaScript-User-Agent", "google-api-javascript-client/1.0.0-alpha");
     request.addHeader("X-Origin", "https://plusone.google.com");
     request.addHeader("X-Referer", "https://plusone.google.com");
     request.addHeader("X-Goog-Encode-Response-If-Executable", "base64");
     request.addHeader(
         "Cookie",
         "PREF=ID=a426d8fb1e2750ee:U=6af15e4f74c74ed0:FF=0:LD=en:TM=1361159234:LM=1361169098:S=IFSJimDkqaNmvAhe; NID=67=shTVymtcFTEolwk0opYlWFkokW8qk9CqIGlSKZNTb6yJd684LQ5aJoSk5bMrQdp29SaMHo5sDCH48dxea7HgWXoPvBIdKd3xT7f3Pm9QsY3tgvscweUNp6FVTCGMJoypsV5OniwmQun0eVPUeFtauZbS; SID=DQAAAMIAAADKWgTA8SrvfpQcPBYOvtZMfeRs8gskEeTPpe4tGoK58rkzZNsIW0d0EawqI-0PmrxWF4Je7CbcWOdh0IaJtspvsAl2IaTftD28of_srBD-19rOobp9BRDsFNgro5V6NSKozVNTOl8cmFJUdBlUZ5iU5miI9X8lixN9kvjbydirkOKsb_ptkLELKHv0ZruwADrDNQm4-80T0accTOvBTT5MI_JP3kT9bjrvP3EOlE1dd71Va2VhpjPze_J5akbj3iQQA6ENor-1r9dN2G48UVGe; HSID=AJ3vWp905JFQ4Fsm4; SSID=ALbvnRrBXqNVAquz1; APISID=hJVIUcaXfkppC-OW/A6YIudTulrIg4f-HT; SAPISID=3EZqx7bM9IesGpqO/ARcqGVQeQoWyxxxnL");
     StringEntity params =
         new StringEntity(
             "[{\"method\":\"pos.plusones.get\",\"id\":\"p\",\"params\":{\"nolog\":true,\"id\":\""
                 + url
                 + "\",\"source\":\"widget\",\"userId\":\"@viewer\",\"groupId\":\"@self\"},\"jsonrpc\":\"2.0\",\"key\":\"p\",\"apiVersion\":\"v1\"}]");
     request.setEntity(params);
     HttpResponse response = httpClient.execute(request);
     org.apache.http.StatusLine statusLine = response.getStatusLine();
     BufferedReader rd =
         new BufferedReader(new InputStreamReader(response.getEntity().getContent()));
     String line = "";
     while ((line = rd.readLine()) != null) {
       source = source + line;
     }
   } catch (Exception ex) {
     l.error(ex + "  " + ex.getMessage() + " Exception occured for url " + url);
   } finally {
     httpClient.getConnectionManager().shutdown();
   }
   return source;
 }
Пример #6
0
 /** 根据模版及参数产生静态页面 */
 public boolean createHtmlPage(String url, String htmlFileName) {
   boolean status = false;
   int statusCode = 0;
   try {
     // 创建一个HttpClient实例充当模拟浏览器
     httpClient = new HttpClient();
     // 设置httpclient读取内容时使用的字符集
     httpClient.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, "gbk");
     // 创建GET方法的实例
     getMethod = new GetMethod(url);
     // 使用系统提供的默认的恢复策略,在发生异常时候将自动重试3次
     getMethod
         .getParams()
         .setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());
     // 设置Get方法提交参数时使用的字符集,以支持中文参数的正常传递
     getMethod.addRequestHeader("Content-Type", "text/html;charset=gbk");
     // 执行Get方法并取得返回状态码,200表示正常,其它代码为异常
     statusCode = httpClient.executeMethod(getMethod);
     if (statusCode != 200) {
       logger.fatal("静态页面引擎在解析" + url + "产生静态页面" + htmlFileName + "时出错!");
     } else {
       // 读取解析结果
       sb = new StringBuffer();
       in = getMethod.getResponseBodyAsStream();
       br = new BufferedReader(new InputStreamReader(in));
       while ((line = br.readLine()) != null) {
         sb.append(line + "\n");
       }
       if (br != null) br.close();
       page = sb.toString();
       // 将页面中的相对路径替换成绝对路径,以确保页面资源正常访问
       page = formatPage(page);
       // 将解析结果写入指定的静态HTML文件中,实现静态HTML生成
       writeHtml(htmlFileName, page);
       status = true;
     }
   } catch (Exception ex) {
     logger.fatal("静态页面引擎在解析" + url + "产生静态页面" + htmlFileName + "时出错:" + ex.getMessage());
   } finally {
     // 释放http连接
     getMethod.releaseConnection();
   }
   return status;
 }
    private static SSLContext createEasySSLContext() {
      try {
        TrustManager[] trustAllCerts =
            new TrustManager[] {
              new X509TrustManager() {
                public X509Certificate[] getAcceptedIssuers() {
                  return null;
                }

                public void checkClientTrusted(X509Certificate[] certs, String authType) {}

                public void checkServerTrusted(X509Certificate[] certs, String authType) {}
              }
            };

        SSLContext context = SSLContext.getInstance("SSL");
        context.init(null, trustAllCerts, null);
        return context;
      } catch (Exception e) {
        LOG.error(e.getMessage(), e);
        throw new HttpClientError(e.toString());
      }
    }
  /**
   * The method reads a proxy object from database, makes a GetMethod object, appends required
   * cookies to the HttpClient object. The HttpClient then executes the Getmethod and returns the
   * pagesource to the caller.
   *
   * @param iCount Counter variable for passing thread group information
   * @param url Url to fetch the pagesource for
   * @param followRedirect Boolean variable to specify GetMethod followRedirect value
   * @param doAuthentication Boolean variable to specify GetMethod doAuthentication value
   * @param region the local region of a given url
   * @param objProxyDao the database layer ProxyDao object variable
   * @param useErrsy Boolean variable to specify usage of Errsy as proxy source
   * @return String
   */
  public String getPageSourceWithProxy(
      String url,
      boolean followRedirect,
      boolean doAuthentication,
      String region,
      Boolean useErrsy,
      String google) {

    String page = " ";
    String pageSource = "";
    int i = 0;
    String exception = " ";
    HttpClientParams clientParams = new HttpClientParams();
    clientParams.setSoTimeout(40000);
    clientParams.setConnectionManagerTimeout(40000);
    HttpClient httpclient = new HttpClient(clientParams);
    GetMethod getmethod = null;

    HttpState state = new HttpState();

    //  if (ProxyDao.lstProxyData.size() == 16) {
    ProxyData objProxyData = null;
    // if (!useErrsy) {
    try {
      // objProxyData = ProxyDao.lstProxyData.get(iCount);
      objProxyData = ProxyDao.objProxyData;
      if (objProxyData == null) {
        //                objProxyDao.changeProxy(google);
        objProxyData = ProxyDao.objProxyData;
      }
      httpclient
          .getHostConfiguration()
          .setProxy(objProxyData.getIPAddress(), objProxyData.getPortNo());
    } catch (Exception e) {
      pageSource = i + "@@@@" + exception + "@@@@" + page + "@@@@" + url;
      return pageSource;
    }
    /*} else {
    try {
    objProxyData = new ProxyData(0, "46.227.68.2", 3128, "Mongoose", "I-C5GS0FTAL61L", 0, 0);
    Credentials defaultcreds = new UsernamePasswordCredentials(objProxyData.getProxyUser(), objProxyData.getProxyPassword());
    httpclient.getState().setCredentials(AuthScope.ANY, defaultcreds);
    httpclient.getHostConfiguration().setProxy(objProxyData.getIpaddress(), objProxyData.getPortNo());
    state.setProxyCredentials(null, null, new UsernamePasswordCredentials(objProxyData.getProxyUser(), objProxyData.getProxyPassword()));
    httpclient.setState(state);
    } catch (Exception e) {
    pageSource = i + "@@@@" + exception + "@@@@" + page + "@@@@" + url;
    return pageSource;
    }
    }*/
    try {
      getmethod = new GetMethod(url);
      getmethod.addRequestHeader(
          "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:19.0) Gecko/20100101 Firefox/19.0");
      if (url.contains("bing.com")) {

        if (region.equalsIgnoreCase("co.uk")) {
          getmethod.addRequestHeader(
              "Cookie", "_FP=mkt=en-GB;SRCHHPGUSR=NEWWND=0&NRSLT=50&SRCHLANG=&AS=1;");
        } else if (region.equalsIgnoreCase("com.sg")) {
          getmethod.addRequestHeader(
              "Cookie", "_FP=mkt=en-SG;SRCHHPGUSR=NEWWND=0&NRSLT=50&SRCHLANG=&AS=1;");
        } else if (region.equalsIgnoreCase("com.au")) {
          getmethod.addRequestHeader(
              "Cookie", "_FP=mkt=en-AU;SRCHHPGUSR=NEWWND=0&NRSLT=50&SRCHLANG=&AS=1;");
        } else if (region.equalsIgnoreCase("co.in")) {
          getmethod.addRequestHeader(
              "Cookie", "_FP=mkt=en-IN;SRCHHPGUSR=NEWWND=0&NRSLT=50&SRCHLANG=&AS=1;");
        } else if (region.equalsIgnoreCase("ca")) {
          getmethod.addRequestHeader(
              "Cookie", "_FP=mkt=en-CA;SRCHHPGUSR=NEWWND=0&NRSLT=50&SRCHLANG=&AS=1;");
        } else if (region.equalsIgnoreCase("com.ph")) {
          getmethod.addRequestHeader(
              "Cookie", "_FP=mkt=en-PH;SRCHHPGUSR=NEWWND=0&NRSLT=50&SRCHLANG=&AS=1;");
        } else if (region.equalsIgnoreCase("com.my")) {
          getmethod.addRequestHeader(
              "Cookie", "_FP=mkt=en-WW;SRCHHPGUSR=NEWWND=0&NRSLT=50&SRCHLANG=&AS=1;");
        } else if (region.equalsIgnoreCase("it")) {
          getmethod.addRequestHeader(
              "Cookie", "_FP=mkt=en-IT;SRCHHPGUSR=NEWWND=0&NRSLT=50&SRCHLANG=&AS=1;");
        } else {
          getmethod.addRequestHeader(
              "Cookie", "_FP=mkt=en-US;SRCHHPGUSR=NEWWND=0&NRSLT=50&SRCHLANG=&AS=1;");
        }
      }
      getmethod.setFollowRedirects(true);
      getmethod.setDoAuthentication(true);
      httpclient.getParams().setAuthenticationPreemptive(true);
      httpclient.setState(state);
      String num100Header = "";
      //            if (url.contains("google")) {
      //                int j = 0;
      //                String url1 = "http://www.google.com/";
      //                try {
      //                    GetMethod objGetMethod = new GetMethod(url1);
      //                    j = httpclient.executeMethod(objGetMethod);
      //                    Header responseHeader = objGetMethod.getResponseHeader("Set-Cookie");
      //                    String header = responseHeader.getValue();
      //                    String[] headerValue = header.split(";");
      //
      //                    for (String head : headerValue) {
      //                        if (head.contains("PREF=ID")) {
      //                            header = head;
      //                            break;
      //                        }
      //                    }
      //                    String[] splitAll = header.split(":");
      //                    long time = System.currentTimeMillis()+400;
      //                    String sTime = "" + time;
      //                    sTime = sTime.substring(0, 10);
      //                    //num100Header = splitAll[0].replace("PREF=", "") + ":" + splitAll[1]  +
      // ":LD=en:NR=100:" + splitAll[2] + ":" + splitAll[3] + ":" + splitAll[4];
      //                    num100Header = splitAll[0].replace("PREF=", "") + ":" + splitAll[1] +
      // ":LD=en:NR=100:" + "TM=" + sTime + ":LM=" + sTime + ":SG=2:" + splitAll[4];
      //                    Cookie ck = new Cookie("PREF", "PREF", num100Header);
      //                    httpclient.getState().clearCookies();
      //                    httpclient.getState().addCookie(ck);
      //                    getmethod.addRequestHeader("Host", "www.google.com");
      //                    getmethod.addRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1;
      // rv:19.0) Gecko/20100101 Firefox/19.0");
      //                    getmethod.addRequestHeader("Accept",
      // "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
      //                    getmethod.addRequestHeader("Accept-Language", "en-US,en;q=0.5");
      //                    getmethod.addRequestHeader("Accept-Encoding", "gzip, deflate");
      //                    getmethod.addRequestHeader("Referer", "https://www.google.com/");
      //                    System.out.println(num100Header);
      //                } catch (Exception ex) {
      //                    exception = ex.getMessage();
      //                    l.debug(ex + "  " + ex.getMessage() + "Exception occured for url" +
      // url);
      //                    pageSource = j + "@@@@" + exception + "@@@@" + page + "@@@@" + url1;
      //                    return pageSource;
      //                }
      //            }
      i = httpclient.executeMethod(getmethod);
      if (i / 100 == 4 || i / 100 == 5) {
        page = "<PROXY ERROR>";

      } else {

        page = getmethod.getResponseBodyAsString();
      }
    } catch (SocketTimeoutException ex) {
      exception = ex.getMessage();
      l.error(ex + "  " + ex.getMessage() + "Exception occured for url" + url);
    } catch (SocketException ex) {
      exception = ex.getMessage();
      l.error(ex + "  " + ex.getMessage() + "Exception occured for url" + url);
    } catch (Exception ex) {
      exception = ex.getMessage();
      l.error(ex + "  " + ex.getMessage() + "Exception occured for url" + url);

    } finally {
      getmethod.releaseConnection();
    }
    pageSource = i + "@@@@" + exception + "@@@@" + page + "@@@@" + url;

    // }
    return pageSource;
  }
  /**
   * The method delegates call to
   *
   * @method getPageSourceWithProxy(
   * @params), reads and parses the pagesource to verify the correctness of pagesource. If a
   *     restricted phrase is found in pagesource, it recursively make calls to
   * @method getPageSourceWithProxy(
   * @params) unless correct pagesource is obtained.
   * @param iCount Counter variable for passing thread group information
   * @param url Url to fetch the pagesource for
   * @param objProxyDao the database layer ProxyDao object variable
   * @param region the local region of a given url
   * @return String
   */
  public String getSource(String url, String region, String google) {

    String page = "";
    String exception = "";
    url = ReplaceAllHtmlTags.replaceUrlsFrenchCanCode(url);
    String pageSource = getPageSourceWithProxy(url, true, true, region, false, google);
    int c = 0;
    while (true) {
      c++;
      //            if (c == 10) {
      //                try {
      //                    c = 0;
      //                    pageSource = ProxyPageSource.makeRequest(url, region);
      //
      //                } catch (Exception e) {
      //                    l.error(e + "pagesource not come by makeRequest()");
      //                }
      //            }
      String[] proxyLog = pageSource.split("@@@@");
      try {
        page = proxyLog[2];
        exception = proxyLog[1];
        if (!exception.equals(" ")) {
          page = "";
        }
      } catch (Exception e) {
        page = "";
        exception = "";
      }
      if (page == null || page.equals("")) {
        //                objProxyDao.changeProxy(google);
        pageSource = getPageSourceWithProxy(url, true, true, region, false, google);
      } else if (page.contains("407 Proxy Authentication Required")
          || page.contains(
              "<span class=\"uiButtonText\">Rejestracja</span></a><span class=\"signup_box_content\">")
          || page.contains("<title>ERROR: The requested URL could not be retrieved</title>")
          || page.contains("404 Not Found")
          || page.contains("The page cannot be found")
          || page.contains("<p>HTTP Error 404. The requested resource is not found.</p>")
          || page.contains("Error 404: Not Found")
          || page.contains("HTTP Status 404 -")
          || page.contains("403 Access Denied")
          || page.contains("<h1>ERROR</h1>")
          || page.contains("400 Bad request")
          || page.contains("403 Forbidden")
          || page.contains("<title>Site not allowed - PacketIP</title>")
          || page.contains("502 Bad Gateway")
          || page.contains("but your computer or network may be sending automated queries")
          || (page.contains("To continue, please type the characters below:<br>")
              && page.contains("id=\"captcha\""))
          || page.contains("m.baidu.com")
          || page.contains("<PROXY ERROR>")
          || page.contains("Pardon the interruption")
          || page.equals(" ")) {
        // l.error("Using Errsy" + "url:" + url + "page:");
        //                objProxyDao.changeProxy(google);
        pageSource = getPageSourceWithProxy(url, true, true, region, false, google);
      } else {
        try {
          // objProxyDao.updateUseCount(ProxyDao.lstProxyData.get(iCount));
          // objProxyDao.updateUseCount(ProxyDao.objProxyData);
        } catch (Exception e) {
          l.error(e + "  " + e.getMessage() + "Here is the problem" + url);
        }
        break;
      }
    }
    return page;
  }