Example #1
0
  public void testGetEncodings() {
    System.out.println("getEncodings");

    Map expResult = null;
    Map result = Encodings.getEncodings();
    for (Object o : result.keySet()) {
      // System.out.println(o.toString());
    }
  }
Example #2
0
  public void testFromTo() {
    System.out.println("FromTo");

    long t_start, t_end;
    float t_work;
    t_start = System.currentTimeMillis();

    String text = "text";
    for (int i = 0; i < 300000; i++) {
      String encode_from = "UTF8";
      String encode_to = "ISO8859_1";
      text = Encodings.FromTo(text, encode_from, encode_to);
      text = Encodings.FromTo(text, encode_to, encode_from);
    }

    t_end = System.currentTimeMillis();
    t_work = (t_end - t_start) / 1000f; // in sec

    System.out.println("FromTo() total time: " + t_work + "sec.");
  }
  /**
   * Counts number of translations of native word's meaning into each foreign language by selecting
   * all records from the table 'translation' from the database of the parsed Wiktionary.<br>
   * <br>
   * SELECT * FROM translation;
   *
   * @param connect connection to the database of the parsed Wiktionary
   * @return map from the language into a number of translation boxes which contain synonyms,
   *     antonyms, etc. in English (etc.)
   */
  public static Map<LanguageType, Integer> countTranslationPerLanguage(Connect wikt_parsed_conn) {
    // translation -> lang -> count

    Statement s = null;
    ResultSet rs = null;
    long t_start;

    int n_unknown_lang_pos = 0; // translations into unknown languages

    int n_total = Statistics.Count(wikt_parsed_conn, "translation");
    // System.out.println("Total translation boxes (translated meanings of words): " + n_total);
    t_start = System.currentTimeMillis();

    Map<LanguageType, Integer> m_lang_n = new HashMap<LanguageType, Integer>();
    LanguageType native_lang = wikt_parsed_conn.getNativeLanguage();

    try {
      s = wikt_parsed_conn.conn.createStatement();
      StringBuilder str_sql = new StringBuilder();
      // SELECT id,lang_pos_id,meaning_summary,meaning_id FROM translation
      str_sql.append("SELECT id,lang_pos_id,meaning_summary FROM translation");
      s.executeQuery(str_sql.toString());
      rs = s.getResultSet();
      int n_cur = 0;
      while (rs.next()) {
        n_cur++;
        int id = rs.getInt("id");
        TLangPOS lang_pos = TLangPOS.getByID(wikt_parsed_conn, rs.getInt("lang_pos_id"));
        String meaning_summary = Encodings.bytesToUTF8(rs.getBytes("meaning_summary"));

        TLang tlang = lang_pos.getLang();
        LanguageType lt = tlang.getLanguage(); // see: Wiktionary:About Translingual
        if (null != tlang && native_lang != lt && LanguageType.mul != lt) {
          System.out.print(
              "Error (TranslationTableAll.countTranslationPerLanguage()): There is a translation box from a foreign language, code="
                  + tlang.getLanguage().getCode());
          TPage p = lang_pos.getPage();
          if (null != p) System.out.println(", page_title=" + p.getPageTitle());
        }

        if (null != lang_pos) {
          TTranslation trans =
              new TTranslation(id, lang_pos, meaning_summary, null); // meaning = null

          TTranslationEntry[] t_entries =
              TTranslationEntry.getByTranslation(wikt_parsed_conn, trans);

          for (TTranslationEntry entry : t_entries) {
            LanguageType lang = entry.getLang().getLanguage();
            if (m_lang_n.containsKey(lang)) {
              int n = m_lang_n.get(lang);
              m_lang_n.put(lang, n + 1);
            } else m_lang_n.put(lang, 1);
          }

          if (DEBUG && 0 == n_cur % 1000) { // % 100
            // if(n_cur > 333)
            //  break;
            long t_cur, t_remain;

            t_cur = System.currentTimeMillis() - t_start;
            t_remain = (long) ((n_total - n_cur) * t_cur / (60f * 1000f * (float) (n_cur)));
            // where time for 1 page = t_cur / n_cur
            // in min, since /(60*1000)
            t_cur = (long) (t_cur / (60f * 1000f));
            // t_cur = t_cur/(60f*1000f));

            TPage tpage = lang_pos.getPage();
            if (null != tpage) {
              System.out.println(
                  n_cur
                      + ": "
                      + tpage.getPageTitle()
                      + ", duration: "
                      + t_cur
                      + // t_cur/(60f*1000f) +
                      " min, remain: "
                      + t_remain
                      + " min");
            }
          }
        } else n_unknown_lang_pos++;
      }
    } catch (SQLException ex) {
      System.out.println(
          "SQLException (TranslationTableAll.countTranslationPerLanguage()): " + ex.getMessage());
    } finally {
      if (rs != null) {
        try {
          rs.close();
        } catch (SQLException sqlEx) {
        }
        rs = null;
      }
      if (s != null) {
        try {
          s.close();
        } catch (SQLException sqlEx) {
        }
        s = null;
      }
    }

    // long  t_end;
    // float   t_work;
    // t_end  = System.currentTimeMillis();
    // t_work = (t_end - t_start)/1000f; // in sec
    System.out.println( // "\nTime sec:" + t_work +
        "\nTotal translation boxes (translated meanings of words): "
            + n_total
            + "\n\nUnknown<ref>'''Unknown''' - words which have translations but have unknown language code and POS</ref>: "
            + n_unknown_lang_pos);

    return m_lang_n;
  }