예제 #1
1
 // ----This function gets as a parameter a list of terms----
 // ---------the function returns a list of tweet ids from collection search_results-----------
 // ----------------------------------------------------------------------------------------
 public LinkedList<String> get_tweets(LinkedList<String> search_terms) {
   log4j.info("starting function get_tweets");
   LinkedList<String> result = new LinkedList<String>();
   Iterator<String> terms = search_terms.iterator();
   long curr_time = System.currentTimeMillis();
   long min_time = curr_time - this.frame_time; // time below min_time will be ignored
   int count_all = 0; // tweets counter
   while (terms.hasNext()) {
     int count = 0;
     String term = terms.next();
     DBObject st = new BasicDBObject();
     try {
       st.put("searchword", term);
       DBObject obj = this.collsr.findOne(st); // look for the relevant document
       String[] tweets_plus_time =
           obj.get("tweets")
               .toString()
               .split(","); // make an array, even indexes are tweet_id's and odd indexes are their
       // time
       String new_string =
           ""; // the string to replace eventually the current field 'tweets' in the document
       for (int i = 0;
           i < tweets_plus_time.length - 1;
           i += 2) // go over the tweet ids from the document
       {
         if (Long.parseLong(tweets_plus_time[i + 1])
             >= min_time) // tweet time is within the time frame
         {
           result.add(tweets_plus_time[i]); // add tweet id to result
           count++;
           if (new_string == "") // add tweet information without leading comma
           {
             new_string += tweets_plus_time[i] + "," + tweets_plus_time[i + 1];
             // count++;
           } else // add tweet information with leading comma
           {
             new_string += "," + tweets_plus_time[i] + "," + tweets_plus_time[i + 1];
           }
         }
       }
       count_all += count;
       log4j.info(count + " tweets for term: " + term);
       obj.put("tweets", new_string); // replace 'tweets' field
       obj.put("last_update", System.currentTimeMillis()); // update time of update
       collsr.save(obj);
     } catch (NullPointerException e) {
       log4j.info("search_term: " + term + ", is not in collection search_results");
     }
   }
   log4j.info("over_all there are " + count_all + " tweets to compare!!!");
   log4j.info("ending function get_tweets");
   return result;
 }
예제 #2
1
  // ----This function getting search term and tweet id ----
  // ----the function adding the tweet id and the time of search to the collection search_results--
  public void SearchResultId(String searchword, String tweet_id) {
    log4j.info(
        "starting function SearchResultId with parameters: searchword = "
            + searchword
            + ", tweet_id"
            + tweet_id);
    try {
      DBObject searchobj = new BasicDBObject();
      searchobj.put("searchword", searchword);
      DBObject obj = this.collsr.findOne(searchobj); // get document if exists
      long min_time =
          System.currentTimeMillis() - this.frame_time; // minimum time to keep in document

      if (Long.parseLong(obj.get("last_update").toString()) < min_time) {
        // last updated before minimum time - checking each tweet
        String[] tweets = obj.get("tweets").toString().split(",");
        String new_string = "";
        for (int i = 1; i < tweets.length; i += 2) // going over all existing tweets in document
        {
          if (Long.parseLong(tweets[i]) >= min_time) {
            // tweet stays in document
            if (new_string == "") {
              // no leading comma
              new_string += tweets[i - 1] + "," + tweets[i];
            } else {
              // leading comma
              new_string += "," + tweets[i - 1] + "," + tweets[i];
            }
          }
        }
        obj.put("tweets", new_string + "," + tweet_id);
        obj.put("last_update", System.currentTimeMillis());
        // obj.put("in_process", 0);
        this.collsr.save(obj);
      } else {
        // last updated after minimum time - just adding tweet
        obj.put("tweets", obj.get("tweets").toString() + "," + tweet_id);
        this.collsr.save(obj);
      }

    } catch (NullPointerException e) {
      // there is no document yet, creating one
      DBObject searchobj = new BasicDBObject();
      searchobj.put("searchword", searchword);
      searchobj.put("tweets", tweet_id);
      searchobj.put("last_update", System.currentTimeMillis());
      this.collsr.save(searchobj);
    }
    log4j.info("ending function SearchResultId");
  }
예제 #3
0
 public long GetRateTimeFrame(Long UserId, Long numofhours) {
   this.log4j.info("=================================================================");
   this.log4j.info(
       "getting rate for user id: " + UserId + " within the last " + numofhours + " hours");
   long diff = numofhours * 60 * 60 * 1000; // hours to millis
   BasicDBObject docline = new BasicDBObject();
   docline.put("user_id", UserId); // querying to find the right userId
   DBObject doc = this.collrate.findOne(docline);
   if (doc == null) // there is no document for the user
   {
     this.log4j.error("user id : " + UserId + " does not exist");
     return -1L;
   } else // document exists
   {
     long result = 0;
     long currstart = Long.parseLong(doc.get("current_slot_start_time_millis").toString());
     if (System.currentTimeMillis() - diff > currstart) {
       this.log4j.info("result is 0");
       return 0;
     } else {
       double backslots = diff / this.slot_time_millis;
       if (backslots > this.num_of_slots) {
         this.log4j.info(
             "you requested longer time than the time frame, the result will be only for the previous timeframe");
       }
       for (int i = 0; i < backslots || i < this.num_of_slots; i++) {
         int slot = (int) ((this.current_slot_index - i + this.num_of_slots) % this.num_of_slots);
         result += Long.parseLong(doc.get("slot" + slot).toString());
       }
       this.log4j.info("result is " + result);
       return result;
     }
   }
 }
예제 #4
0
  // ----This function getting user id and inserts/updates counter with time slots handling----
  // ----------------------------------------------------------------------------------------
  public void rate_user(long user_id, String user_name, double max_time_frame_hours)
      throws MongoException {
    BasicDBObject objterm = new BasicDBObject();
    DBObject objtoupd = new BasicDBObject();
    DBObject update = new BasicDBObject();
    log4j.info(
        "starting function rate user for : user_name = "
            + user_name
            + ", user_id = "
            + user_id
            + ", max_time_frame_hours = "
            + max_time_frame_hours);
    try {
      objterm.put("user_id", user_id);
      DBObject term = this.collrate.findOne(objterm); // get user's document if exists
      int previous_slot =
          (Integer) term.get("current_slot"); // get last slot updated in user's document
      double delta =
          (System.currentTimeMillis() - (Long) term.get("current_slot_start_time_millis"))
              / this.slot_time_millis;
      if (delta < 1) { // user was last updated in current slot
        // updating counter for current slot
        term.put(
            "slot" + current_slot_index,
            Integer.parseInt(term.get("slot" + this.current_slot_index).toString()) + 1);
        log4j.info(
            "updating counter in current slot for userid: "
                + user_id
                + " user_name : "
                + user_name);
        this.collrate.update(objterm, term);
      } else if (delta < this.num_of_slots) {
        // updating current slot to 1 and go as much as needed backwards and updating to zero
        for (long h = 0; h < delta; h++) {
          long slot = (long) ((long) (this.current_slot_index + num_of_slots - h) % num_of_slots);

          if (h == 0) {
            term.put("slot" + slot, 1); // current slot
          } else {
            term.put("slot" + slot, 0); // other slots since last updated
          }
        }
        term.put("current_slot", this.current_slot_index);
        term.put("current_slot_start_time_millis", this.current_slot_start_time);
        log4j.info("updating all slots needed: " + user_id + " user_name : " + user_name);
        this.collrate.update(objterm, term);
      } else { // time frame has finished since last updated
        // updating all slots to zero except current slot to 1
        term.put("current_slot", current_slot_index);
        term.put("current_slot_start_time_millis", current_slot_start_time);
        for (int j = 0; j < num_of_slots; j++) {
          if (j == current_slot_index) {
            term.put("slot" + current_slot_index, 1);
          } else {
            term.put("slot" + j, 0);
          }
        }
      }

    } catch (NullPointerException e) { // no document in collection for this user
      // creating document for user
      log4j.info(user_name + " is not yet in collection , inserting it");
      DBObject newline = new BasicDBObject();
      newline.put("user_id", user_id);
      newline.put("user_name", user_name);
      newline.put("current_slot", current_slot_index);
      newline.put("current_slot_start_time_millis", current_slot_start_time);
      for (int j = 0; j < num_of_slots; j++) {
        if (j == current_slot_index) {
          newline.put("slot" + current_slot_index, 1);
        } else {
          newline.put("slot" + j, 0);
        }
      }

      this.collrate.insert(newline);
    }

    log4j.info("end rate_user");
  }
예제 #5
0
  // ----This function getting search terms and inserts/updates counter with time slots handling----
  // ---------the function making shore the data is always up to date-----------
  // ----------------------------------------------------------------------------------------
  @SuppressWarnings("deprecation")
  public void update_search_terms(
      String text, double num_of_slots, double max_time_frame_hours, String query)
      throws MongoException {
    // long starttime = System.currentTimeMillis();
    log4j.info(
        "starting function update_search_terms, num_of_slots = "
            + num_of_slots
            + ", max_time_frame_hours = "
            + max_time_frame_hours
            + ", query = "
            + query);
    String[] textarray = text.split(" "); // split tweet text into a words array
    log4j.info("split tweet text into a word array");
    BasicDBObject objterm = new BasicDBObject();
    DBObject objtoupd = new BasicDBObject();
    DBObject update = new BasicDBObject();
    DBObject curr_slot = new BasicDBObject();

    log4j.info("starting function update_search_terms");
    curr_slot = this.collslot.findOne(); // get current time slot information
    this.current_slot_start_time =
        (long) (Double.parseDouble((curr_slot.get("slot_start_time").toString())));
    Date resultdate = new Date(this.current_slot_start_time);
    log4j.info("current_slot_start_time is : " + resultdate.toLocaleString());
    this.current_slot_index = Integer.parseInt(curr_slot.get("current_slot").toString());
    log4j.info("current time slot is  : " + this.current_slot_index);
    long difference = System.currentTimeMillis() - this.current_slot_start_time;

    if (difference > this.slot_time_millis) { // starting a new time slot
      // update current slot information
      this.current_slot_start_time += (long) this.slot_time_millis;
      this.current_slot_index = (int) ((this.current_slot_index + 1) % num_of_slots);
      log4j.info("new slot time has come, new slot is slot number " + this.current_slot_index);
      curr_slot.put("current_slot", this.current_slot_index);
      curr_slot.put("slot_start_time", this.current_slot_start_time);
      curr_slot.put(
          "slot_start_time_string", new Date(this.current_slot_start_time).toLocaleString());
      log4j.info("updating new current slot time and number in db");
      this.collslot.save(curr_slot);

      DBCursor terms =
          this.collsearch.find(); // get all search_terms documents to update new slot to zero
      while (terms.hasNext()) {
        try {
          // update new slot to zero and reducing from over_all the old data in all documents
          DBObject term = terms.next();
          if (term.get("search_term") != null) {
            objtoupd.put("search_term", term.get("search_term"));
            term.put("slot" + this.current_slot_index, 0);
            term.put("current_slot", this.current_slot_index);
            term.put(
                "over_all",
                Integer.parseInt(term.get("over_all").toString())
                    - Integer.parseInt(term.get("slot" + this.current_slot_index).toString()));

            this.collsearch.save(term);
          }
        } catch (NullPointerException e) {
          e.printStackTrace();
          log4j.info(e);
        }
      }
    }
    // start looking for new search terms in text
    log4j.info("going over the tweet text");
    query = query.replaceAll("%40", "@"); // utf-8 code of @
    query = query.replaceAll("%23", "#"); // utf-8 code of #
    DBObject nodes = new BasicDBObject();
    nodes.put("parent", query);
    nodes =
        colltree.findOne(nodes); // check if there is a document for parent in tree_nodes collection
    if (nodes == null) // there is no document in tree_nodes
    {
      nodes = new BasicDBObject();
      nodes.put("son", "no");
      nodes.put("parent", query);
    } else // there is document in tree_nodes
    {
      nodes.put("in_process", 1); // mark as busy
      this.colltree.save(nodes);
      // nodes.put("son", nodes.get("son").toString() + "");
    }
    for (int i = 0; i < textarray.length; i++) { // loop over the words of the tweet
      if (textarray[i].trim().startsWith("@") || textarray[i].trim().startsWith("#")) {
        String thisterm = textarray[i].trim(); // cut white spaces
        String[] no_ddot = thisterm.split("[:,., ,;,\n]");
        thisterm = no_ddot[0];
        thisterm = thisterm.replaceAll("%40", "@");
        thisterm = thisterm.replaceAll("%23", "#");
        if (thisterm.length() > 1) {
          log4j.info("search word: " + thisterm);
          objterm.put("search_term", thisterm); // object to find the search word in collection
          log4j.info("inserting tree nodes to mongodb");
          if (String.valueOf(query)
              != String.valueOf(thisterm)) { // query and search term not equal
            if (nodes.get("son").toString() == "no") // no document in collection yet
            {

              nodes.put("son", thisterm);
            } else // there is document in collection
            {
              nodes.put("son", nodes.get("son").toString() + "," + thisterm);
            }
            // nodes.put("son", thisterm);

            // neo4j.addNode(query, thisterm, log4j);
          }

          // objtoupd = collsearch.findOne(objterm); // find the search word in collection
          try {
            DBObject term =
                this.collsearch.findOne(objterm); // get document os search_term if exists
            // update current slot and over_all for existing document
            term.put("over_all", Integer.parseInt(term.get("over_all").toString()) + 1);
            term.put(
                "slot" + current_slot_index,
                Integer.parseInt(term.get("slot" + this.current_slot_index).toString()) + 1);
            // term.put("current_slot_start_time_millis", current_slot_start_time);
            log4j.info("updating counter in current slot for word: " + thisterm);
            this.collsearch.update(objterm, term);
          } catch (NullPointerException e) { // there is no document for search term in collection
            // creating a new document
            log4j.info(thisterm + " is not yet in collection , inserting it");
            DBObject newline = new BasicDBObject();
            newline.put("search_term", thisterm);
            newline.put("over_all", 1);
            newline.put("max_id", 0);
            newline.put("current_slot", current_slot_index);
            newline.put("current_slot_start_time_millis", current_slot_start_time);
            // creating all slots for document
            for (int j = 0; j < num_of_slots; j++) {
              if (j == current_slot_index) {
                newline.put("slot" + current_slot_index, 1); // current slot = 1
              } else {
                newline.put("slot" + j, 0); // non current slot = 0
              }
            }

            this.collsearch.insert(newline);
          }
        }
        nodes.put("in_process", 0); // update tree_nodes document as not busy
        this.colltree.save(nodes);
      }
    }
    log4j.info("end update_search_terms");
  }