@Override
  public void constructIndex() throws IOException {
    String corpusFile = _options._corpusPrefix + "/";
    System.out.println("Construct index from: " + corpusFile);

    chooseFiles cf = new chooseFiles(_options);
    int times = cf.writeTimes();
    System.out.println(times);
    FileOps filewriter = new FileOps(_options._indexPrefix + "/");
    for (int i = 0; i < times; i++) {
      Vector<String> files = cf.loadFile(i);
      for (String name : files) {
        String filepath = corpusFile + name;
        File file = new File(filepath);
        time = new Date().getTime();
        String content = ProcessHtml.process(file);
        totalTime += new Date().getTime() - time;
        if (content != null) processDocument(content, name);
      }
      // System.out.println("Times here : " + i);
      // System.out.println("processes:" + totalTime);
      String name = "temp" + i + ".txt";
      Map<String, String> content = new HashMap<String, String>();
      for (String term : _index.keySet()) {
        StringBuilder builder = new StringBuilder();
        for (Integer x : _index.get(term)) {
          builder.append(x + "|");
        }
        builder.deleteCharAt(builder.length() - 1);
        content.put(term, builder.toString());
      }

      filewriter.write(name, content);
      _index.clear();
      _terms.clear();
    }
    String corpus_statistics = _options._indexPrefix + "/" + "statistics";
    BufferedWriter outsta = new BufferedWriter(new FileWriter(corpus_statistics));
    // the first line in the corpus_statistics is the number of docs in the corpus
    outsta.write(_numDocs + "\n");
    outsta.write(String.valueOf(_totalTermFrequency) + "\n");
    outsta.close();
    String[] files = new String[times];
    for (int count = 0; count < times; count++) {
      files[count] = "temp" + count + ".txt";
    }
    filewriter.merge(files, "index.txt", "|");
  }
Exemplo n.º 2
0
  public void generateDataExchangeFiles() {

    long startTime = System.currentTimeMillis();
    if (CmlOptions.SHOW_LOGS)
      System.out.print("- Generating SQLite DB from EPha drug interactions csv... ");

    extractInteractionMap();

    // Save interactions to DB
    try {
      // Create Sqlite database
      createDB();
      // CSV file
      String csv_file = "";
      //
      int intercnt = 0;
      for (Map.Entry<String, ArrayList<String>> entry : m_drug_interactions_map.entrySet()) {
        String key = entry.getKey().toUpperCase();
        ArrayList<String> value = entry.getValue();
        System.out.println(++intercnt + ": " + key + " interacts with " + value.size() + " meds");

        /*
        	key: ATC1
        	inter0: Name1
        	inter1: ATC2
        	inter2: Name2
        	inter3: Info
        	inter4: Mechanismus
        	inter5: Effekt
        	inter6: Massnahmen
        	inter7: Grad
        */

        /*
         Risikoklassen
         -------------
          A: Keine Massnahmen notwendig (grün)
          B: Vorsichtsmassnahmen empfohlen (gelb)
          C: Regelmässige Überwachung (orange)
          D: Kombination vermeiden (pinky)
          X: Kontraindiziert (hellrot)
          0: Keine Angaben (grau)
        */
        for (String s : value) {
          String[] inter = s.split("\\|\\|");
          String risk_class = "";
          if (inter[7].equals("A")) risk_class = "Keine Massnahmen notwendig";
          else if (inter[7].equals("B")) risk_class = "Vorsichtsmassnahmen empfohlen";
          else if (inter[7].equals("C")) risk_class = "Regelmässige Überwachung";
          else if (inter[7].equals("D")) risk_class = "Kombination vermeiden";
          else if (inter[7].equals("X")) risk_class = "Kontraindiziert";
          else if (inter[7].equals("0")) risk_class = "Keine Angaben";

          String para_class = "paragraph" + inter[7];
          String html_content =
              "<div>"
                  + "<div class=\""
                  + para_class
                  + "\" id=\""
                  + key
                  + "-"
                  + inter[1]
                  + "\">"
                  + "<div class=\"absTitle\">"
                  + key
                  + " ["
                  + inter[0]
                  + "] &rarr; "
                  + inter[1]
                  + " ["
                  + inter[2]
                  + "]</div></div>"
                  + "<p class=\"spacing2\">"
                  + "<i>Risikoklasse:</i> "
                  + risk_class
                  + " ("
                  + inter[7]
                  + ")</p>"
                  + "<p class=\"spacing2\">"
                  + "<i>Möglicher Effekt:</i> "
                  + inter[3]
                  + "</p>"
                  + "<p class=\"spacing2\">"
                  + "<i>Mechanismus:</i> "
                  + inter[4]
                  + "</p>"
                  + "<p class=\"spacing2\">"
                  + "<i>Empfohlene Massnahmen:</i> "
                  + inter[6]
                  + "</p></div>";
          // Add to sqlite database
          addDB(key, inter[0], inter[1], inter[2], html_content);
          // Add to csv file
          csv_file += key + "||" + inter[1] + "||" + html_content + "\n";
          // Add to atc code - html map
          String atc1_atc2_key = key + "-" + inter[1];
          if (atc1_atc2_key != null) {
            ArrayList<String> interaction_html = m_atc1_atc2_html_map.get(atc1_atc2_key);
            if (interaction_html == null) interaction_html = new ArrayList<String>();
            interaction_html.add(html_content);
            m_atc1_atc2_html_map.put(atc1_atc2_key, interaction_html);
          }
        }
        // Assume batch size of 20
        if (batch_cnt > 20) {
          conn.setAutoCommit(false);
          prep.executeBatch();
          conn.setAutoCommit(true);
          batch_cnt = 0;
        }
        batch_cnt++;
      }
      // Add the rest
      conn.setAutoCommit(false);
      prep.executeBatch();
      conn.setAutoCommit(true);
      // Compress
      stat.executeUpdate("VACUUM;");

      // Write CSV to file
      FileOps.writeToFile(
          csv_file,
          Constants.DIR_OUTPUT,
          "drug_interactions_csv_" + CmlOptions.DB_LANGUAGE + ".csv");
      // Zip file
      FileOps.zipToFile(
          Constants.DIR_OUTPUT, "drug_interactions_csv_" + CmlOptions.DB_LANGUAGE + ".csv");
      // Generate report
      if (CmlOptions.GENERATE_REPORTS == true) {
        // Create interactions error report file
        InteractionsReport interactions_errors =
            new InteractionsReport(
                Constants.FILE_INTERACTIONS_REPORT, CmlOptions.DB_LANGUAGE, "html");
        String report_style_str = FileOps.readCSSfromFile(Constants.FILE_REPORT_CSS_BASE + ".css");
        interactions_errors.addStyleSheet(report_style_str);
        if (CmlOptions.DB_LANGUAGE.equals("de"))
          interactions_errors.addHtmlHeader(
              "Schweizer Arzneimittel-Kompendium", Constants.FI_DB_VERSION);
        else if (CmlOptions.DB_LANGUAGE.equals("fr"))
          interactions_errors.addHtmlHeader(
              "Compendium des Médicaments Suisse", Constants.FI_DB_VERSION);
        interactions_errors.append(interactions_errors.treemapToHtmlTable(m_atc1_atc2_html_map));
        interactions_errors.writeHtmlToFile();
        interactions_errors.getBWriter().close();
      }
    } catch (IOException e) {
      e.printStackTrace();
    } catch (SQLException e) {
      System.out.println("SQLException!");
    } catch (ClassNotFoundException e) {
      System.out.println("ClassNotFoundException!");
    }

    long stopTime = System.currentTimeMillis();
    if (CmlOptions.SHOW_LOGS) {
      System.out.println(
          "processed "
              + m_drug_interactions_map.size()
              + " drug interactions in "
              + (stopTime - startTime) / 1000.0f
              + " sec");
    }
  }