private static void fetch_magasin_info(String magasin_to_analyse) { // getting the URLs infos for each rayon PreparedStatement field_pst; try { field_pst = con.prepareStatement( "SELECT NB_ATTRIBUTES,ATTRIBUTES,URL,MAGASIN,RAYON,PRODUIT,PAGE_TYPE,CDISCOUNT_VENDOR FROM CRAWL_RESULTS WHERE MAGASIN='" + magasin_to_analyse + "'"); System.out.println("I am requesting the database, please wait a few seconds"); ResultSet field_rs = field_pst.executeQuery(); while (field_rs.next()) { URLContentInfo url_info = new URLContentInfo(); int nb_attributes = field_rs.getInt(1); String attributes = field_rs.getString(2); System.out.println("Adding attributes " + attributes); String my_url = field_rs.getString(3); String my_magasin = field_rs.getString(4); String my_rayon = field_rs.getString(5); String my_produit = field_rs.getString(6); String my_page_type = field_rs.getString(7); boolean isCdiscountVendor = field_rs.getBoolean(8); url_info.setNb_attributes(nb_attributes); url_info.setAttributes(attributes); url_info.setUrl(my_url); url_info.setMagasin(my_magasin); url_info.setRayon(my_rayon); url_info.setProduit(my_produit); url_info.setPageType(my_page_type); url_info.setCdiscountVendor(isCdiscountVendor); magasins_datas.add(url_info); } } catch (SQLException e) { // TODO Auto-generated catch block System.out.println("Database trouble with the magasin :" + magasin_to_analyse); e.printStackTrace(); } }
private static void analyse_magasin_per_category( String magasin_to_analyse, String output_directory) { // Map to store the counter of product occurence for a certain category Map<String, Integer> category_counter = new HashMap<String, Integer>(); // Map to store for each attribut of a category its own counter Map<String, Map<String, Integer>> attributs_count_inside_category_map = new HashMap<String, Map<String, Integer>>(); System.out.println("Assessing " + magasins_datas.size() + " URLs"); // Looping over the collected datas for the magasin for (URLContentInfo rayon_info : magasins_datas) { String attributes_listing = rayon_info.getAttributes(); String checkType = rayon_info.getPageType(); // we handle only the <Fiche product> if ("FicheProduit".equals(checkType)) { // we make sure that attributes were found for our <Fiche product> if (attributes_listing.contains("|||")) { // we parse the found attribute Map<String, String> arguments_map = parse_arguments(attributes_listing); // we locate the <Category> value in the list of our arguments String category_value = arguments_map.get(categoryString); if (category_value == null) { category_value = unknownCategory; } System.out.println("Adding a product to the category : " + category_value); Integer counter = category_counter.get(category_value); if (counter == null) { counter = new Integer(1); category_counter.put(category_value, counter); } else { counter = counter + 1; category_counter.put(category_value, counter); } System.out.println( "Incrementing counter for each attribute inside the following category : " + category_value); Map<String, Integer> attributs_count_inside_category = attributs_count_inside_category_map.get(category_value); if (attributs_count_inside_category == null) { attributs_count_inside_category = new HashMap<String, Integer>(); attributs_count_inside_category_map.put( category_value, attributs_count_inside_category); } // iterating over every attribut found Iterator<Map.Entry<String, String>> arg_it = arguments_map.entrySet().iterator(); while (arg_it.hasNext()) { Map.Entry<String, String> pairs = (Map.Entry<String, String>) arg_it.next(); // we are here just interested by our argument naming String argument_name = pairs.getKey(); Integer arg_counter = attributs_count_inside_category.get(argument_name); if (arg_counter == null) { arg_counter = new Integer(1); attributs_count_inside_category.put(argument_name, arg_counter); } else { arg_counter = arg_counter + 1; attributs_count_inside_category.put(argument_name, arg_counter); } } } } } // to do : save the results for the rayon System.out.println( "Saving the results for magasin : " + magasin_to_analyse + " as a csv file in : " + output_directory); savingDataArguments( category_counter, attributs_count_inside_category_map, magasin_to_analyse, output_directory); }