public static void main(String[] args) throws IOException { try { String type = "rawWords"; Utility u = new Utility(type); String folder = u.returnFolderName(); Double avg_doc_length = u.returnAvgLength(); Map<String, DocBean> docCatBean = u.getDocCat(); Map<String, TokenCatalogBean> tokenCatBean = u.getTokenCat(); System.out.println("Avg Doc Length:: " + avg_doc_length); System.out.println("DocCatBean Size:: " + docCatBean.size()); System.out.println("Token Cat Bean:: " + tokenCatBean.size()); System.out.println("Doc Bean:::: " + u.getDocBean().size()); long vocabSize = tokenCatBean.size(); System.out.println("Vocab Size ::" + vocabSize); System.out.println("Avg Doc Length:::: " + avg_doc_length); /* Method to read the query file */ String query_file_path = "C:/Users/Nitin/NEU/Summer Sem/IR/Data/Assign 1/AP89_DATA/AP_DATA/query_desc.51-100.short.txt"; /* This will be later replaced by path from the config file */ GetFinalQueries qu = new GetFinalQueries(); List<String> queries = qu.readQueryFile(query_file_path); /* * Method to get stopwords from the file and append the common words * from query file */ /* Changed the split regex from space to space and hypen */ List<String> stop_words_final = qu.getStopWords(); /* * List<String> stop_words_final = new ArrayList<String>(); * * for (int i = 0; i < stop_words_custom.length; i++) { * stop_words_final.add(stop_words_custom[i]); } */ /* * Method to remove stopwords from query and just get the final * query */ /* * Iterating the queries one by one. Each Query is a list of String * (Query Words) */ List<List<String>> final_query = new ArrayList<List<String>>(); final_query = qu.getFinalQueryList(queries, stop_words_final); /* * for (List<String> query : final_query) { * System.out.println("___________________________"); for (String q * : query) { System.out.println(q); } * System.out.println("___________________________"); } */ List<List<String>> resultOkapi = new ArrayList<List<String>>(); /* for (String query : queries) { */ for (List<String> query : final_query) { Map<String, String> queryTFList = new HashMap<String, String>(); Map<String, Double> rankTerm = new HashMap<String, Double>(); String querynum = null; System.out.println("Query Minus stop words"); System.out.println("======================="); // System.out.println(query.get(0)); /* * For every word in a query calculates the okapif value and * sums it up */ querynum = query.get(0).replace(".", ""); // System.out.println("Query Numm::: "+ querynum); System.out.println("Query Size::" + query.size()); for (int i = 1; i < query.size(); i++) { /* Method to calculate tfs for each term in query */ String w = query.get(i).toLowerCase(); Map<String, Integer> tfMap = new HashMap<String, Integer>(); // System.out.println("Calculating for Word::: " + q); tfMap = laplaceSmoothing( w.replaceAll("[,\"()]", ""), avg_doc_length, tokenCatBean, docCatBean, u, vocabSize, folder); System.out.println( "Size of TF Results:: " + tfMap.size() + "for :" + w.replaceAll("[,\"()]", "")); for (Map.Entry<String, Integer> term : tfMap.entrySet()) { if (queryTFList.get(term.getKey()) == null) { queryTFList.put(term.getKey(), term.getValue().toString()); } else { queryTFList.put( term.getKey(), queryTFList.get(term.getKey()) + " " + term.getValue().toString()); // System.out.println("TF:: "+queryTFList.get(term.getKey())); } } } System.out.println("Final DOc List Size::: " + queryTFList.size()); System.out.println("Calculating Laplace Smoothing Score for each ::::::: "); for (Map.Entry<String, String> d : queryTFList.entrySet()) { double docLen = getDocLength(d.getKey(), docCatBean); rankTerm.put( d.getKey(), laplacePerTerm(d.getValue(), docLen, avg_doc_length, vocabSize, query.size())); } /* Method to Sort Hashmap based on the value */ SortMap sm = new SortMap(); LinkedHashMap<String, Double> sortedRanks = (LinkedHashMap<String, Double>) sm.getSortedRankMap(rankTerm); int j = 1; List<String> queryResults = new ArrayList<String>(); for (Entry<String, Double> term : sortedRanks.entrySet()) { if (j <= 1000) { String toWrite = querynum + " " + "Q0" + " " + term.getKey() + " " + j + " " + term.getValue() + " " + "EXP"; // System.out.println(toWrite); queryResults.add(toWrite); } else { // bw.newLine(); break; } j++; } resultOkapi.add(queryResults); } WriteFile w = new WriteFile(); w.writeToFile(resultOkapi, "Laplace-1.txt", type); // node.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }