// ----This function gets as a parameter a list of terms---- // ---------the function returns a list of tweet ids from collection search_results----------- // ---------------------------------------------------------------------------------------- public LinkedList<String> get_tweets(LinkedList<String> search_terms) { log4j.info("starting function get_tweets"); LinkedList<String> result = new LinkedList<String>(); Iterator<String> terms = search_terms.iterator(); long curr_time = System.currentTimeMillis(); long min_time = curr_time - this.frame_time; // time below min_time will be ignored int count_all = 0; // tweets counter while (terms.hasNext()) { int count = 0; String term = terms.next(); DBObject st = new BasicDBObject(); try { st.put("searchword", term); DBObject obj = this.collsr.findOne(st); // look for the relevant document String[] tweets_plus_time = obj.get("tweets") .toString() .split(","); // make an array, even indexes are tweet_id's and odd indexes are their // time String new_string = ""; // the string to replace eventually the current field 'tweets' in the document for (int i = 0; i < tweets_plus_time.length - 1; i += 2) // go over the tweet ids from the document { if (Long.parseLong(tweets_plus_time[i + 1]) >= min_time) // tweet time is within the time frame { result.add(tweets_plus_time[i]); // add tweet id to result count++; if (new_string == "") // add tweet information without leading comma { new_string += tweets_plus_time[i] + "," + tweets_plus_time[i + 1]; // count++; } else // add tweet information with leading comma { new_string += "," + tweets_plus_time[i] + "," + tweets_plus_time[i + 1]; } } } count_all += count; log4j.info(count + " tweets for term: " + term); obj.put("tweets", new_string); // replace 'tweets' field obj.put("last_update", System.currentTimeMillis()); // update time of update collsr.save(obj); } catch (NullPointerException e) { log4j.info("search_term: " + term + ", is not in collection search_results"); } } log4j.info("over_all there are " + count_all + " tweets to compare!!!"); log4j.info("ending function get_tweets"); return result; }
// ----This function getting search term and tweet id ---- // ----the function adding the tweet id and the time of search to the collection search_results-- public void SearchResultId(String searchword, String tweet_id) { log4j.info( "starting function SearchResultId with parameters: searchword = " + searchword + ", tweet_id" + tweet_id); try { DBObject searchobj = new BasicDBObject(); searchobj.put("searchword", searchword); DBObject obj = this.collsr.findOne(searchobj); // get document if exists long min_time = System.currentTimeMillis() - this.frame_time; // minimum time to keep in document if (Long.parseLong(obj.get("last_update").toString()) < min_time) { // last updated before minimum time - checking each tweet String[] tweets = obj.get("tweets").toString().split(","); String new_string = ""; for (int i = 1; i < tweets.length; i += 2) // going over all existing tweets in document { if (Long.parseLong(tweets[i]) >= min_time) { // tweet stays in document if (new_string == "") { // no leading comma new_string += tweets[i - 1] + "," + tweets[i]; } else { // leading comma new_string += "," + tweets[i - 1] + "," + tweets[i]; } } } obj.put("tweets", new_string + "," + tweet_id); obj.put("last_update", System.currentTimeMillis()); // obj.put("in_process", 0); this.collsr.save(obj); } else { // last updated after minimum time - just adding tweet obj.put("tweets", obj.get("tweets").toString() + "," + tweet_id); this.collsr.save(obj); } } catch (NullPointerException e) { // there is no document yet, creating one DBObject searchobj = new BasicDBObject(); searchobj.put("searchword", searchword); searchobj.put("tweets", tweet_id); searchobj.put("last_update", System.currentTimeMillis()); this.collsr.save(searchobj); } log4j.info("ending function SearchResultId"); }
public long GetRateTimeFrame(Long UserId, Long numofhours) { this.log4j.info("================================================================="); this.log4j.info( "getting rate for user id: " + UserId + " within the last " + numofhours + " hours"); long diff = numofhours * 60 * 60 * 1000; // hours to millis BasicDBObject docline = new BasicDBObject(); docline.put("user_id", UserId); // querying to find the right userId DBObject doc = this.collrate.findOne(docline); if (doc == null) // there is no document for the user { this.log4j.error("user id : " + UserId + " does not exist"); return -1L; } else // document exists { long result = 0; long currstart = Long.parseLong(doc.get("current_slot_start_time_millis").toString()); if (System.currentTimeMillis() - diff > currstart) { this.log4j.info("result is 0"); return 0; } else { double backslots = diff / this.slot_time_millis; if (backslots > this.num_of_slots) { this.log4j.info( "you requested longer time than the time frame, the result will be only for the previous timeframe"); } for (int i = 0; i < backslots || i < this.num_of_slots; i++) { int slot = (int) ((this.current_slot_index - i + this.num_of_slots) % this.num_of_slots); result += Long.parseLong(doc.get("slot" + slot).toString()); } this.log4j.info("result is " + result); return result; } } }
// ----This function getting user id and inserts/updates counter with time slots handling---- // ---------------------------------------------------------------------------------------- public void rate_user(long user_id, String user_name, double max_time_frame_hours) throws MongoException { BasicDBObject objterm = new BasicDBObject(); DBObject objtoupd = new BasicDBObject(); DBObject update = new BasicDBObject(); log4j.info( "starting function rate user for : user_name = " + user_name + ", user_id = " + user_id + ", max_time_frame_hours = " + max_time_frame_hours); try { objterm.put("user_id", user_id); DBObject term = this.collrate.findOne(objterm); // get user's document if exists int previous_slot = (Integer) term.get("current_slot"); // get last slot updated in user's document double delta = (System.currentTimeMillis() - (Long) term.get("current_slot_start_time_millis")) / this.slot_time_millis; if (delta < 1) { // user was last updated in current slot // updating counter for current slot term.put( "slot" + current_slot_index, Integer.parseInt(term.get("slot" + this.current_slot_index).toString()) + 1); log4j.info( "updating counter in current slot for userid: " + user_id + " user_name : " + user_name); this.collrate.update(objterm, term); } else if (delta < this.num_of_slots) { // updating current slot to 1 and go as much as needed backwards and updating to zero for (long h = 0; h < delta; h++) { long slot = (long) ((long) (this.current_slot_index + num_of_slots - h) % num_of_slots); if (h == 0) { term.put("slot" + slot, 1); // current slot } else { term.put("slot" + slot, 0); // other slots since last updated } } term.put("current_slot", this.current_slot_index); term.put("current_slot_start_time_millis", this.current_slot_start_time); log4j.info("updating all slots needed: " + user_id + " user_name : " + user_name); this.collrate.update(objterm, term); } else { // time frame has finished since last updated // updating all slots to zero except current slot to 1 term.put("current_slot", current_slot_index); term.put("current_slot_start_time_millis", current_slot_start_time); for (int j = 0; j < num_of_slots; j++) { if (j == current_slot_index) { term.put("slot" + current_slot_index, 1); } else { term.put("slot" + j, 0); } } } } catch (NullPointerException e) { // no document in collection for this user // creating document for user log4j.info(user_name + " is not yet in collection , inserting it"); DBObject newline = new BasicDBObject(); newline.put("user_id", user_id); newline.put("user_name", user_name); newline.put("current_slot", current_slot_index); newline.put("current_slot_start_time_millis", current_slot_start_time); for (int j = 0; j < num_of_slots; j++) { if (j == current_slot_index) { newline.put("slot" + current_slot_index, 1); } else { newline.put("slot" + j, 0); } } this.collrate.insert(newline); } log4j.info("end rate_user"); }
// ----This function getting search terms and inserts/updates counter with time slots handling---- // ---------the function making shore the data is always up to date----------- // ---------------------------------------------------------------------------------------- @SuppressWarnings("deprecation") public void update_search_terms( String text, double num_of_slots, double max_time_frame_hours, String query) throws MongoException { // long starttime = System.currentTimeMillis(); log4j.info( "starting function update_search_terms, num_of_slots = " + num_of_slots + ", max_time_frame_hours = " + max_time_frame_hours + ", query = " + query); String[] textarray = text.split(" "); // split tweet text into a words array log4j.info("split tweet text into a word array"); BasicDBObject objterm = new BasicDBObject(); DBObject objtoupd = new BasicDBObject(); DBObject update = new BasicDBObject(); DBObject curr_slot = new BasicDBObject(); log4j.info("starting function update_search_terms"); curr_slot = this.collslot.findOne(); // get current time slot information this.current_slot_start_time = (long) (Double.parseDouble((curr_slot.get("slot_start_time").toString()))); Date resultdate = new Date(this.current_slot_start_time); log4j.info("current_slot_start_time is : " + resultdate.toLocaleString()); this.current_slot_index = Integer.parseInt(curr_slot.get("current_slot").toString()); log4j.info("current time slot is : " + this.current_slot_index); long difference = System.currentTimeMillis() - this.current_slot_start_time; if (difference > this.slot_time_millis) { // starting a new time slot // update current slot information this.current_slot_start_time += (long) this.slot_time_millis; this.current_slot_index = (int) ((this.current_slot_index + 1) % num_of_slots); log4j.info("new slot time has come, new slot is slot number " + this.current_slot_index); curr_slot.put("current_slot", this.current_slot_index); curr_slot.put("slot_start_time", this.current_slot_start_time); curr_slot.put( "slot_start_time_string", new Date(this.current_slot_start_time).toLocaleString()); log4j.info("updating new current slot time and number in db"); this.collslot.save(curr_slot); DBCursor terms = this.collsearch.find(); // get all search_terms documents to update new slot to zero while (terms.hasNext()) { try { // update new slot to zero and reducing from over_all the old data in all documents DBObject term = terms.next(); if (term.get("search_term") != null) { objtoupd.put("search_term", term.get("search_term")); term.put("slot" + this.current_slot_index, 0); term.put("current_slot", this.current_slot_index); term.put( "over_all", Integer.parseInt(term.get("over_all").toString()) - Integer.parseInt(term.get("slot" + this.current_slot_index).toString())); this.collsearch.save(term); } } catch (NullPointerException e) { e.printStackTrace(); log4j.info(e); } } } // start looking for new search terms in text log4j.info("going over the tweet text"); query = query.replaceAll("%40", "@"); // utf-8 code of @ query = query.replaceAll("%23", "#"); // utf-8 code of # DBObject nodes = new BasicDBObject(); nodes.put("parent", query); nodes = colltree.findOne(nodes); // check if there is a document for parent in tree_nodes collection if (nodes == null) // there is no document in tree_nodes { nodes = new BasicDBObject(); nodes.put("son", "no"); nodes.put("parent", query); } else // there is document in tree_nodes { nodes.put("in_process", 1); // mark as busy this.colltree.save(nodes); // nodes.put("son", nodes.get("son").toString() + ""); } for (int i = 0; i < textarray.length; i++) { // loop over the words of the tweet if (textarray[i].trim().startsWith("@") || textarray[i].trim().startsWith("#")) { String thisterm = textarray[i].trim(); // cut white spaces String[] no_ddot = thisterm.split("[:,., ,;,\n]"); thisterm = no_ddot[0]; thisterm = thisterm.replaceAll("%40", "@"); thisterm = thisterm.replaceAll("%23", "#"); if (thisterm.length() > 1) { log4j.info("search word: " + thisterm); objterm.put("search_term", thisterm); // object to find the search word in collection log4j.info("inserting tree nodes to mongodb"); if (String.valueOf(query) != String.valueOf(thisterm)) { // query and search term not equal if (nodes.get("son").toString() == "no") // no document in collection yet { nodes.put("son", thisterm); } else // there is document in collection { nodes.put("son", nodes.get("son").toString() + "," + thisterm); } // nodes.put("son", thisterm); // neo4j.addNode(query, thisterm, log4j); } // objtoupd = collsearch.findOne(objterm); // find the search word in collection try { DBObject term = this.collsearch.findOne(objterm); // get document os search_term if exists // update current slot and over_all for existing document term.put("over_all", Integer.parseInt(term.get("over_all").toString()) + 1); term.put( "slot" + current_slot_index, Integer.parseInt(term.get("slot" + this.current_slot_index).toString()) + 1); // term.put("current_slot_start_time_millis", current_slot_start_time); log4j.info("updating counter in current slot for word: " + thisterm); this.collsearch.update(objterm, term); } catch (NullPointerException e) { // there is no document for search term in collection // creating a new document log4j.info(thisterm + " is not yet in collection , inserting it"); DBObject newline = new BasicDBObject(); newline.put("search_term", thisterm); newline.put("over_all", 1); newline.put("max_id", 0); newline.put("current_slot", current_slot_index); newline.put("current_slot_start_time_millis", current_slot_start_time); // creating all slots for document for (int j = 0; j < num_of_slots; j++) { if (j == current_slot_index) { newline.put("slot" + current_slot_index, 1); // current slot = 1 } else { newline.put("slot" + j, 0); // non current slot = 0 } } this.collsearch.insert(newline); } } nodes.put("in_process", 0); // update tree_nodes document as not busy this.colltree.save(nodes); } } log4j.info("end update_search_terms"); }