@Override public void constructIndex() throws IOException { String corpusFile = _options._corpusPrefix + "/"; System.out.println("Construct index from: " + corpusFile); chooseFiles cf = new chooseFiles(_options); int times = cf.writeTimes(); System.out.println(times); FileOps filewriter = new FileOps(_options._indexPrefix + "/"); for (int i = 0; i < times; i++) { Vector<String> files = cf.loadFile(i); for (String name : files) { String filepath = corpusFile + name; File file = new File(filepath); time = new Date().getTime(); String content = ProcessHtml.process(file); totalTime += new Date().getTime() - time; if (content != null) processDocument(content, name); } // System.out.println("Times here : " + i); // System.out.println("processes:" + totalTime); String name = "temp" + i + ".txt"; Map<String, String> content = new HashMap<String, String>(); for (String term : _index.keySet()) { StringBuilder builder = new StringBuilder(); for (Integer x : _index.get(term)) { builder.append(x + "|"); } builder.deleteCharAt(builder.length() - 1); content.put(term, builder.toString()); } filewriter.write(name, content); _index.clear(); _terms.clear(); } String corpus_statistics = _options._indexPrefix + "/" + "statistics"; BufferedWriter outsta = new BufferedWriter(new FileWriter(corpus_statistics)); // the first line in the corpus_statistics is the number of docs in the corpus outsta.write(_numDocs + "\n"); outsta.write(String.valueOf(_totalTermFrequency) + "\n"); outsta.close(); String[] files = new String[times]; for (int count = 0; count < times; count++) { files[count] = "temp" + count + ".txt"; } filewriter.merge(files, "index.txt", "|"); }
public void generateDataExchangeFiles() { long startTime = System.currentTimeMillis(); if (CmlOptions.SHOW_LOGS) System.out.print("- Generating SQLite DB from EPha drug interactions csv... "); extractInteractionMap(); // Save interactions to DB try { // Create Sqlite database createDB(); // CSV file String csv_file = ""; // int intercnt = 0; for (Map.Entry<String, ArrayList<String>> entry : m_drug_interactions_map.entrySet()) { String key = entry.getKey().toUpperCase(); ArrayList<String> value = entry.getValue(); System.out.println(++intercnt + ": " + key + " interacts with " + value.size() + " meds"); /* key: ATC1 inter0: Name1 inter1: ATC2 inter2: Name2 inter3: Info inter4: Mechanismus inter5: Effekt inter6: Massnahmen inter7: Grad */ /* Risikoklassen ------------- A: Keine Massnahmen notwendig (grün) B: Vorsichtsmassnahmen empfohlen (gelb) C: Regelmässige Überwachung (orange) D: Kombination vermeiden (pinky) X: Kontraindiziert (hellrot) 0: Keine Angaben (grau) */ for (String s : value) { String[] inter = s.split("\\|\\|"); String risk_class = ""; if (inter[7].equals("A")) risk_class = "Keine Massnahmen notwendig"; else if (inter[7].equals("B")) risk_class = "Vorsichtsmassnahmen empfohlen"; else if (inter[7].equals("C")) risk_class = "Regelmässige Überwachung"; else if (inter[7].equals("D")) risk_class = "Kombination vermeiden"; else if (inter[7].equals("X")) risk_class = "Kontraindiziert"; else if (inter[7].equals("0")) risk_class = "Keine Angaben"; String para_class = "paragraph" + inter[7]; String html_content = "<div>" + "<div class=\"" + para_class + "\" id=\"" + key + "-" + inter[1] + "\">" + "<div class=\"absTitle\">" + key + " [" + inter[0] + "] → " + inter[1] + " [" + inter[2] + "]</div></div>" + "<p class=\"spacing2\">" + "<i>Risikoklasse:</i> " + risk_class + " (" + inter[7] + ")</p>" + "<p class=\"spacing2\">" + "<i>Möglicher Effekt:</i> " + inter[3] + "</p>" + "<p class=\"spacing2\">" + "<i>Mechanismus:</i> " + inter[4] + "</p>" + "<p class=\"spacing2\">" + "<i>Empfohlene Massnahmen:</i> " + inter[6] + "</p></div>"; // Add to sqlite database addDB(key, inter[0], inter[1], inter[2], html_content); // Add to csv file csv_file += key + "||" + inter[1] + "||" + html_content + "\n"; // Add to atc code - html map String atc1_atc2_key = key + "-" + inter[1]; if (atc1_atc2_key != null) { ArrayList<String> interaction_html = m_atc1_atc2_html_map.get(atc1_atc2_key); if (interaction_html == null) interaction_html = new ArrayList<String>(); interaction_html.add(html_content); m_atc1_atc2_html_map.put(atc1_atc2_key, interaction_html); } } // Assume batch size of 20 if (batch_cnt > 20) { conn.setAutoCommit(false); prep.executeBatch(); conn.setAutoCommit(true); batch_cnt = 0; } batch_cnt++; } // Add the rest conn.setAutoCommit(false); prep.executeBatch(); conn.setAutoCommit(true); // Compress stat.executeUpdate("VACUUM;"); // Write CSV to file FileOps.writeToFile( csv_file, Constants.DIR_OUTPUT, "drug_interactions_csv_" + CmlOptions.DB_LANGUAGE + ".csv"); // Zip file FileOps.zipToFile( Constants.DIR_OUTPUT, "drug_interactions_csv_" + CmlOptions.DB_LANGUAGE + ".csv"); // Generate report if (CmlOptions.GENERATE_REPORTS == true) { // Create interactions error report file InteractionsReport interactions_errors = new InteractionsReport( Constants.FILE_INTERACTIONS_REPORT, CmlOptions.DB_LANGUAGE, "html"); String report_style_str = FileOps.readCSSfromFile(Constants.FILE_REPORT_CSS_BASE + ".css"); interactions_errors.addStyleSheet(report_style_str); if (CmlOptions.DB_LANGUAGE.equals("de")) interactions_errors.addHtmlHeader( "Schweizer Arzneimittel-Kompendium", Constants.FI_DB_VERSION); else if (CmlOptions.DB_LANGUAGE.equals("fr")) interactions_errors.addHtmlHeader( "Compendium des Médicaments Suisse", Constants.FI_DB_VERSION); interactions_errors.append(interactions_errors.treemapToHtmlTable(m_atc1_atc2_html_map)); interactions_errors.writeHtmlToFile(); interactions_errors.getBWriter().close(); } } catch (IOException e) { e.printStackTrace(); } catch (SQLException e) { System.out.println("SQLException!"); } catch (ClassNotFoundException e) { System.out.println("ClassNotFoundException!"); } long stopTime = System.currentTimeMillis(); if (CmlOptions.SHOW_LOGS) { System.out.println( "processed " + m_drug_interactions_map.size() + " drug interactions in " + (stopTime - startTime) / 1000.0f + " sec"); } }