public static void printPropertyOfUserAgent(String[] userAgentArr) { for (int i = 0; i <= userAgentArr.length - 1; i++) { UserAgent ua = UserAgent.parseUserAgentString(userAgentArr[i]); System.out.println( "the user agent is:\t" + ua + "\n the browser is:\t" + ua.getBrowser() + "\n the browser's version is:\t" + ua.getBrowserVersion() + "\n operating system is:\t" + ua.getOperatingSystem().getName() + "\n the browser type is:\t" + ua.getBrowser().getBrowserType().getName() + "\n the os manifacture is:\t" + ua.getOperatingSystem().getManufacturer().getName() + "\n the device type is:\t" + ua.getOperatingSystem().getDeviceType().getName() + "\n the group is:\t" + ua.getOperatingSystem().getGroup()); System.out.println(); System.out.println(); } }
/** * Parses the input row String into a Java object. For performance reasons this works in-place * updating the fields within this SnowPlowEventStruct, rather than creating a new one. * * @param row The raw String containing the row contents * @return true if row was successfully parsed, false otherwise * @throws SerDeException For any exception during parsing */ public boolean updateByParsing(String row) throws SerDeException { // First we reset the object's fields nullify(); // We have to handle any header rows if (row.startsWith("#Version:") || row.startsWith("#Fields:")) { return false; // Empty row will be discarded by Hive } final Matcher m = cfRegex.matcher(row); // 0. First check our row is kosher // -> is row from a CloudFront log? Will throw an exception if not if (!m.matches()) throw new SerDeException("Row does not match expected CloudFront regexp pattern"); // -> was generated by sp.js? If not (e.g. favicon.ico request), then silently return final String object = m.group(8); final String querystring = m.group(12); if (!(object.startsWith("/ice.png") || object.equals("/i") || object.startsWith("/i?")) || isNullField(querystring)) { // Also works if Forward Query String = yes return false; } // 1. Now we retrieve the fields which get directly passed through this.dt = m.group(1); this.tm = m.group(2); // CloudFront date format matches Hive's this.user_ipaddress = m.group(5); // 2. Now grab the user agent final String ua = m.group(11); try { this.useragent = decodeSafeString(ua); } catch (Exception e) { getLog().warn(e.getClass().getSimpleName() + " on { useragent: " + ua + " }"); } // 3. Next we dis-assemble the user agent... final UserAgent userAgent = UserAgent.parseUserAgentString(ua); // -> browser fields final Browser b = userAgent.getBrowser(); this.br_name = b.getName(); this.br_family = b.getGroup().getName(); final Version v = userAgent.getBrowserVersion(); this.br_version = (v == null) ? null : v.getVersion(); this.br_type = b.getBrowserType().getName(); this.br_renderengine = b.getRenderingEngine().toString(); // -> OS-related fields final OperatingSystem os = userAgent.getOperatingSystem(); this.os_name = os.getName(); this.os_family = os.getGroup().getName(); this.os_manufacturer = os.getManufacturer().getName(); // -> device/hardware-related fields this.dvce_type = os.getDeviceType().getName(); this.dvce_ismobile = os.isMobileDevice(); this.dvce_ismobile_bt = (byte) (this.dvce_ismobile ? 1 : 0); // 4. Now for the versioning (tracker versioning is handled below) this.v_collector = collectorVersion; this.v_etl = "serde-" + ProjectSettings.VERSION; // 5. Now we generate the event ID this.event_id = generateEventId(); // 6. Now we dis-assemble the querystring. String qsUrl = null; // We use this in the block below, and afterwards List<NameValuePair> params = null; // We re-use this for efficiency try { params = URLEncodedUtils.parse(URI.create("http://localhost/?" + querystring), cfEncoding); // For performance, don't convert to a map, just loop through and match to our variables as we // go for (NameValuePair pair : params) { final String name = pair.getName(); final String value = pair.getValue(); final QuerystringFields field; try { field = QuerystringFields.valueOf( name.toUpperCase()); // Java pre-7 can't switch on a string, so hash the string } catch (IllegalArgumentException e) { getLog().warn("Unexpected params { " + name + ": " + value + " }"); continue; } try { switch (field) { // Common fields case E: this.event = asEventType(value); break; case IP: this.user_ipaddress = value; break; case AID: this.app_id = value; break; case P: this.platform = value; break; case TID: this.txn_id = value; break; case UID: this.user_id = value; break; case FP: this.user_fingerprint = value; break; case VID: this.visit_id = Integer.parseInt(value); break; case TSTAMP: // Replace our timestamp fields with the client's timestamp String[] timestamp = value.split(" "); this.dt = timestamp[0]; this.tm = timestamp[1]; break; case TV: this.v_tracker = value; break; case LANG: this.br_lang = value; break; case F_PDF: if ((this.br_features_pdf = stringToByte(value)) == 1) this.br_features.add("pdf"); break; case F_FLA: if ((this.br_features_flash = stringToByte(value)) == 1) this.br_features.add("fla"); break; case F_JAVA: if ((this.br_features_java = stringToByte(value)) == 1) this.br_features.add("java"); break; case F_DIR: if ((this.br_features_director = stringToByte(value)) == 1) this.br_features.add("dir"); break; case F_QT: if ((this.br_features_quicktime = stringToByte(value)) == 1) this.br_features.add("qt"); break; case F_REALP: if ((this.br_features_realplayer = stringToByte(value)) == 1) this.br_features.add("realp"); break; case F_WMA: if ((this.br_features_windowsmedia = stringToByte(value)) == 1) this.br_features.add("wma"); break; case F_GEARS: if ((this.br_features_gears = stringToByte(value)) == 1) this.br_features.add("gears"); break; case F_AG: if ((this.br_features_silverlight = stringToByte(value)) == 1) this.br_features.add("ag"); break; case COOKIE: this.br_cookies = stringToBoolean(value); this.br_cookies_bt = (byte) (this.br_cookies ? 1 : 0); break; case RES: String[] resolution = value.split("x"); if (resolution.length != 2) throw new Exception("Couldn't parse res field"); this.dvce_screenwidth = Integer.parseInt(resolution[0]); this.dvce_screenheight = Integer.parseInt(resolution[1]); break; case CD: this.br_colordepth = value; break; case TZ: this.os_timezone = decodeSafeString(value); break; case REFR: this.page_referrer = decodeSafeString(value); break; case URL: qsUrl = pair.getValue(); // We might use this later for the page URL break; // Page-view only case PAGE: this.page_title = decodeSafeString(value); break; // Event only case EV_CA: this.ev_category = decodeSafeString(value); break; case EV_AC: this.ev_action = decodeSafeString(value); break; case EV_LA: this.ev_label = decodeSafeString(value); break; case EV_PR: this.ev_property = decodeSafeString(value); break; case EV_VA: this.ev_value = decodeSafeString(value); break; // Ecommerce case TR_ID: this.tr_orderid = decodeSafeString(value); break; case TR_AF: this.tr_affiliation = decodeSafeString(value); break; case TR_TT: this.tr_total = decodeSafeString(value); break; case TR_TX: this.tr_tax = decodeSafeString(value); break; case TR_SH: this.tr_shipping = decodeSafeString(value); break; case TR_CI: this.tr_city = decodeSafeString(value); break; case TR_ST: this.tr_state = decodeSafeString(value); break; case TR_CO: this.tr_country = decodeSafeString(value); break; case TI_ID: this.ti_orderid = decodeSafeString(value); break; case TI_SK: this.ti_sku = decodeSafeString(value); break; case TI_NA: this.ti_name = decodeSafeString(value); break; case TI_CA: this.ti_category = decodeSafeString(value); break; case TI_PR: this.ti_price = decodeSafeString(value); break; case TI_QU: this.ti_quantity = decodeSafeString(value); break; } } catch (Exception e) { getLog().warn(e.getClass().getSimpleName() + " on { " + name + ": " + value + " }"); } } } catch (IllegalArgumentException e) { getLog().warn("Corrupted querystring { " + querystring + " }"); } // 7. Choose the page_url final String cfUrl = m.group(10); if (!isNullField(cfUrl)) { // CloudFront didn't provide the URL as cs(Referer) this.page_url = cfUrl; // The CloudFront cs(Referer) URL } else { try { this.page_url = decodeSafeString( qsUrl); // Use the decoded querystring URL. Might be null (returned as null) } catch (Exception e) { getLog().warn(e.getClass().getSimpleName() + " on { qsUrl: " + qsUrl + " }"); } } // 8. Finally handle the marketing fields in the page_url // Re-use params to avoid creating another object if (this.page_url != null) { params = null; try { params = URLEncodedUtils.parse(URI.create(this.page_url), "UTF-8"); } catch (IllegalArgumentException e) { getLog().warn("Couldn't parse page_url: " + page_url); } if (params != null) { // For performance, don't convert to a map, just loop through and match to our variables as // we go for (NameValuePair pair : params) { final String name = pair.getName(); final String value = pair.getValue(); final MarketingFields field; try { field = MarketingFields.valueOf( name.toUpperCase()); // Java pre-7 can't switch on a string, so hash the string } catch (IllegalArgumentException e) { // Do nothing: non-marketing related querystring fields are not an issue. continue; } try { switch (field) { // Marketing fields case UTM_MEDIUM: this.mkt_medium = decodeSafeString(value); break; case UTM_SOURCE: this.mkt_source = decodeSafeString(value); break; case UTM_TERM: this.mkt_term = decodeSafeString(value); break; case UTM_CONTENT: this.mkt_content = decodeSafeString(value); break; case UTM_CAMPAIGN: this.mkt_campaign = decodeSafeString(value); break; } } catch (Exception e) { getLog().warn(e.getClass().getSimpleName() + " on { " + name + ": " + value + " }"); } } } } return true; // Successfully updated the row. }