예제 #1
0
  public void trapException(String output) throws CrowdFlowerException {

    try {
      JSONObject error = new JSONObject(output);

      if (error.has("error")) {
        throw new CrowdFlowerException(error.get("error").toString());
      }

    } catch (JSONException e) {
      // ignore
    }
  }
  public void doProcess(HttpServletRequest req, HttpServletResponse res, boolean isPost) {
    StringBuffer bodyContent = null;
    OutputStream out = null;
    PrintWriter writer = null;
    String serviceKey = null;

    try {
      BufferedReader in = req.getReader();
      String line = null;
      while ((line = in.readLine()) != null) {
        if (bodyContent == null) bodyContent = new StringBuffer();
        bodyContent.append(line);
      }
    } catch (Exception e) {
    }
    try {
      if (requireSession) {
        // check to see if there was a session created for this request
        // if not assume it was from another domain and blow up
        // Wrap this to prevent Portlet exeptions
        HttpSession session = req.getSession(false);
        if (session == null) {
          res.setStatus(HttpServletResponse.SC_FORBIDDEN);
          return;
        }
      }
      serviceKey = req.getParameter("id");
      // only to preven regressions - Remove before 1.0
      if (serviceKey == null) serviceKey = req.getParameter("key");
      // check if the services have been loaded or if they need to be reloaded
      if (services == null || configUpdated()) {
        getServices(res);
      }
      String urlString = null;
      String xslURLString = null;
      String userName = null;
      String password = null;
      String format = "json";
      String callback = req.getParameter("callback");
      String urlParams = req.getParameter("urlparams");
      String countString = req.getParameter("count");
      // encode the url to prevent spaces from being passed along
      if (urlParams != null) {
        urlParams = urlParams.replace(' ', '+');
      }

      try {
        if (services.has(serviceKey)) {
          JSONObject service = services.getJSONObject(serviceKey);
          // default to the service default if no url parameters are specified
          if (urlParams == null && service.has("defaultURLParams")) {
            urlParams = service.getString("defaultURLParams");
          }
          String serviceURL = service.getString("url");
          // build the URL
          if (urlParams != null && serviceURL.indexOf("?") == -1) {
            serviceURL += "?";
          } else if (urlParams != null) {
            serviceURL += "&";
          }
          String apikey = "";
          if (service.has("username")) userName = service.getString("username");
          if (service.has("password")) password = service.getString("password");
          if (service.has("apikey")) apikey = service.getString("apikey");
          urlString = serviceURL + apikey;
          if (urlParams != null) urlString += "&" + urlParams;
          if (service.has("xslStyleSheet")) {
            xslURLString = service.getString("xslStyleSheet");
          }
        }
        // code for passing the url directly through instead of using configuration file
        else if (req.getParameter("url") != null) {
          String serviceURL = req.getParameter("url");
          // build the URL
          if (urlParams != null && serviceURL.indexOf("?") == -1) {
            serviceURL += "?";
          } else if (urlParams != null) {
            serviceURL += "&";
          }
          urlString = serviceURL;
          if (urlParams != null) urlString += urlParams;
        } else {
          writer = res.getWriter();
          if (serviceKey == null)
            writer.write("XmlHttpProxyServlet Error: id parameter specifying serivce required.");
          else
            writer.write(
                "XmlHttpProxyServlet Error : service for id '" + serviceKey + "' not  found.");
          writer.flush();
          return;
        }
      } catch (Exception ex) {
        getLogger().severe("XmlHttpProxyServlet Error loading service: " + ex);
      }

      Map paramsMap = new HashMap();
      paramsMap.put("format", format);
      // do not allow for xdomain unless the context level setting is enabled.
      if (callback != null && allowXDomain) {
        paramsMap.put("callback", callback);
      }
      if (countString != null) {
        paramsMap.put("count", countString);
      }

      InputStream xslInputStream = null;

      if (urlString == null) {
        writer = res.getWriter();
        writer.write(
            "XmlHttpProxyServlet parameters:  id[Required] urlparams[Optional] format[Optional] callback[Optional]");
        writer.flush();
        return;
      }
      // default to JSON
      res.setContentType(responseContentType);
      out = res.getOutputStream();
      // get the stream for the xsl stylesheet
      if (xslURLString != null) {
        // check the web root for the resource
        URL xslURL = null;
        xslURL = ctx.getResource(resourcesDir + "xsl/" + xslURLString);
        // if not in the web root check the classpath
        if (xslURL == null) {
          xslURL =
              XmlHttpProxyServlet.class.getResource(classpathResourcesDir + "xsl/" + xslURLString);
        }
        if (xslURL != null) {
          xslInputStream = xslURL.openStream();
        } else {
          String message =
              "Could not locate the XSL stylesheet provided for service id "
                  + serviceKey
                  + ". Please check the XMLHttpProxy configuration.";
          getLogger().severe(message);
          try {
            out.write(message.getBytes());
            out.flush();
            return;
          } catch (java.io.IOException iox) {
          }
        }
      }
      if (!isPost) {
        xhp.doGet(urlString, out, xslInputStream, paramsMap, userName, password);
      } else {
        if (bodyContent == null)
          getLogger()
              .info(
                  "XmlHttpProxyServlet attempting to post to url "
                      + urlString
                      + " with no body content");
        xhp.doPost(
            urlString,
            out,
            xslInputStream,
            paramsMap,
            bodyContent.toString(),
            req.getContentType(),
            userName,
            password);
      }
    } catch (Exception iox) {
      iox.printStackTrace();
      getLogger().severe("XmlHttpProxyServlet: caught " + iox);
      try {
        writer = res.getWriter();
        writer.write(iox.toString());
        writer.flush();
      } catch (java.io.IOException ix) {
        ix.printStackTrace();
      }
      return;
    } finally {
      try {
        if (out != null) out.close();
        if (writer != null) writer.close();
      } catch (java.io.IOException iox) {
      }
    }
  }
예제 #3
0
  /**
   * Parses a text file of JSONs into an array of tweets
   *
   * @param textFile The textfile containing the JSONs
   */
  public parseJSON() {

    // get folder paths
    String currentDir = System.getProperty("user.dir");
    String root = currentDir;
    String textFile = root + "/tweet_input/tweets.txt";
    String outputFolderF1 = root + "/tweet_output/f1.txt";
    String outputFolderF2 = root + "/tweet_output/f2.txt";

    int numTweets = 0;
    int numGoodTweets = 0;
    hashtagEdges hashEdges = new hashtagEdges();

    // try objects
    try {
      // create a reader object for tweet's textfile and read-in first line
      BufferedReader reader = new BufferedReader(new FileReader(textFile));
      String currentJSON = reader.readLine();
      // System.out.println(textFile);

      // initate a writer object with the outputFolder name
      PrintWriter writerF1 = new PrintWriter(outputFolderF1, "UTF-8");
      PrintWriter writerF2 = new PrintWriter(outputFolderF2, "UTF-8");

      // create an array list to save parsedTweets and declare problem variables
      List<parsedTweet> tweetList = new ArrayList<parsedTweet>();
      String tweetText, tweetTime;
      JSONObject objJSON;
      String[] hashArray;
      parsedTweet tweetObj;
      float currentAverage;

      while (currentJSON != null) {
        try {
          objJSON = new JSONObject(currentJSON);
          // if the JSON has a text and time stamp, process it
          numTweets = numTweets + 1;
          if (objJSON.has("created_at") && objJSON.has("text")) {

            // get timestamp and text from JSON object
            tweetTime = objJSON.getString("created_at");
            tweetText = objJSON.getString("text");

            // create a tweet object and add it to folder
            tweetObj = new parsedTweet(tweetTime, tweetText, currentJSON);
            tweetList.add(tweetObj);

            // update hashObjet
            hashArray = tweetObj.hashtags.toArray(new String[tweetObj.hashtags.size()]);
            currentAverage = hashEdges.updateEdges(hashArray, tweetTime);

            // write correctly-formated cleen-tweet to output folder
            writerF1.println(tweetObj.formatTweet());
            numGoodTweets = numGoodTweets + 1;
            writerF2.format("%.2f%n", currentAverage);
          }

        } catch (Exception e) {
          System.out.println("Error in parseJSON - 1");
          e.printStackTrace();
        }

        // read next line (which has the next JSON object)
        currentJSON = reader.readLine();
      }
      writerF1.close();
      writerF2.close();
    } catch (Exception e) {

      System.out.println("Error in parseJSON - 2");
      e.printStackTrace();
    }
  }
예제 #4
0
  public List<Tweet> readFile(String fileLocation, int limit, String fromTweetId) {

    long startTime = System.currentTimeMillis();

    int errors = 0;
    int added = 0;
    int ignored = 0;
    int skipped = 0;

    List<Tweet> tweets = new ArrayList<Tweet>();

    try {

      // Read gzFile
      InputStream inputStream = new GZIPInputStream(new FileInputStream(fileLocation));
      Reader decoder = new InputStreamReader(inputStream);
      BufferedReader br = new BufferedReader(decoder);

      String status;

      while ((status = br.readLine()) != null) {

        // Ignore garbage and log output lines, all statuses start with JSON bracket
        if (!status.equals("") && status.charAt(0) == '{') {
          try {
            JSONObject jsonObject = new JSONObject(status);

            // We use created_at as an indicator that this is a Tweet.
            if (jsonObject.has("created_at")) {

              Status statusObject = TwitterObjectFactory.createStatus(status);

              Tweet tweet = this.getTweetObjectFromStatus(statusObject);

              if (fromTweetId != null) {

                if (fromTweetId.equals(tweet.getId())) {
                  this.log.write("StatusFileReader - Scanner pickup from " + fromTweetId);
                  fromTweetId = null;
                } else {
                  skipped++;
                }

                continue;
              }

              added++;
              tweets.add(tweet);

              if (limit > 0 && added >= limit) {
                break;
              }

            } else {
              ignored++;
            }

          } catch (JSONException e) {
            this.log.write(
                "Exception in StatusFileReader: Json Parse Failure on: "
                    + status
                    + ", "
                    + e.getMessage());
          }
        } else {
          ignored++;
        }
      }

      br.close();
      decoder.close();
      inputStream.close();

    } catch (Exception e) {
      this.log.write(
          "Exception in StatusFileReader: Error Reading File: "
              + e.getClass().getName()
              + ": "
              + e.getMessage());
    }

    long runTimeSeconds = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - startTime);

    double ops = ((double) added / (double) Math.max(runTimeSeconds, 1));

    this.log.write(
        "StatusFileReader - "
            + fileLocation
            + " processed in "
            + runTimeSeconds
            + "s. "
            + added
            + " ok / "
            + errors
            + " errors / "
            + ignored
            + " ignored / "
            + skipped
            + " skipped. "
            + ops
            + " ops. Limit: "
            + limit
            + ", Fetch: "
            + tweets.size());

    return tweets;
  }