public List<String> harvest() {
    String size = "5000"; // max size we can get.
    String params = "?q=*&size=" + size + "from=0";
    List<String> records = null;
    HttpURLConnection urlConn = null;
    String json = null;
    boolean loop = true;
    int count = 0;
    String request = istexApiUrl + "/" + params;
    while (loop) {
      records = new ArrayList<String>();
      try {
        URL url = new URL(request);
        logger.info(request);
        urlConn = (HttpURLConnection) url.openConnection();
        if (urlConn != null) {
          urlConn.setDoInput(true);
          urlConn.setRequestMethod("GET");

          InputStream in = urlConn.getInputStream();
          json = Utilities.convertStreamToString(in);
          JSONParser jsonParser = new JSONParser();
          JSONObject jsonObject = (JSONObject) jsonParser.parse(json);
          JSONArray hits = (JSONArray) jsonObject.get("hits");
          request = (String) jsonObject.get("nextPageURI");

          Iterator i = hits.iterator();
          while (i.hasNext()) {
            JSONObject hit = (JSONObject) i.next();
            records.add((String) hit.get("id"));
          }
          processRecords(records);
          if (request == null) {
            loop = false;
          }
        }
      } catch (Exception e) {
        e.printStackTrace();
      }
    }
    logger.info(" count :" + count);
    return records;
  }
 public void processRecords(List<String> records)
     throws MalformedURLException, ProtocolException, IOException {
   HttpURLConnection urlConn = null;
   String request = istexApiUrl;
   System.out.println(records.size());
   for (String id : records) {
     request = istexApiUrl + "/" + id + "/fulltext/tei";
     URL url = new URL(request);
     urlConn = (HttpURLConnection) url.openConnection();
     System.out.println(" id :" + id);
     if (urlConn != null) {
       urlConn.setDoInput(true);
       urlConn.setRequestMethod("GET");
       InputStream in = urlConn.getInputStream();
       String xml = Utilities.convertStreamToString(in);
       mm.insertExternalTeiDocument(in, id, "istex", MongoCollectionsInterface.ISTEX_TEIS, date);
       in.close();
     }
   }
 }