public void testGetEncodings() { System.out.println("getEncodings"); Map expResult = null; Map result = Encodings.getEncodings(); for (Object o : result.keySet()) { // System.out.println(o.toString()); } }
public void testFromTo() { System.out.println("FromTo"); long t_start, t_end; float t_work; t_start = System.currentTimeMillis(); String text = "text"; for (int i = 0; i < 300000; i++) { String encode_from = "UTF8"; String encode_to = "ISO8859_1"; text = Encodings.FromTo(text, encode_from, encode_to); text = Encodings.FromTo(text, encode_to, encode_from); } t_end = System.currentTimeMillis(); t_work = (t_end - t_start) / 1000f; // in sec System.out.println("FromTo() total time: " + t_work + "sec."); }
/** * Counts number of translations of native word's meaning into each foreign language by selecting * all records from the table 'translation' from the database of the parsed Wiktionary.<br> * <br> * SELECT * FROM translation; * * @param connect connection to the database of the parsed Wiktionary * @return map from the language into a number of translation boxes which contain synonyms, * antonyms, etc. in English (etc.) */ public static Map<LanguageType, Integer> countTranslationPerLanguage(Connect wikt_parsed_conn) { // translation -> lang -> count Statement s = null; ResultSet rs = null; long t_start; int n_unknown_lang_pos = 0; // translations into unknown languages int n_total = Statistics.Count(wikt_parsed_conn, "translation"); // System.out.println("Total translation boxes (translated meanings of words): " + n_total); t_start = System.currentTimeMillis(); Map<LanguageType, Integer> m_lang_n = new HashMap<LanguageType, Integer>(); LanguageType native_lang = wikt_parsed_conn.getNativeLanguage(); try { s = wikt_parsed_conn.conn.createStatement(); StringBuilder str_sql = new StringBuilder(); // SELECT id,lang_pos_id,meaning_summary,meaning_id FROM translation str_sql.append("SELECT id,lang_pos_id,meaning_summary FROM translation"); s.executeQuery(str_sql.toString()); rs = s.getResultSet(); int n_cur = 0; while (rs.next()) { n_cur++; int id = rs.getInt("id"); TLangPOS lang_pos = TLangPOS.getByID(wikt_parsed_conn, rs.getInt("lang_pos_id")); String meaning_summary = Encodings.bytesToUTF8(rs.getBytes("meaning_summary")); TLang tlang = lang_pos.getLang(); LanguageType lt = tlang.getLanguage(); // see: Wiktionary:About Translingual if (null != tlang && native_lang != lt && LanguageType.mul != lt) { System.out.print( "Error (TranslationTableAll.countTranslationPerLanguage()): There is a translation box from a foreign language, code=" + tlang.getLanguage().getCode()); TPage p = lang_pos.getPage(); if (null != p) System.out.println(", page_title=" + p.getPageTitle()); } if (null != lang_pos) { TTranslation trans = new TTranslation(id, lang_pos, meaning_summary, null); // meaning = null TTranslationEntry[] t_entries = TTranslationEntry.getByTranslation(wikt_parsed_conn, trans); for (TTranslationEntry entry : t_entries) { LanguageType lang = entry.getLang().getLanguage(); if (m_lang_n.containsKey(lang)) { int n = m_lang_n.get(lang); m_lang_n.put(lang, n + 1); } else m_lang_n.put(lang, 1); } if (DEBUG && 0 == n_cur % 1000) { // % 100 // if(n_cur > 333) // break; long t_cur, t_remain; t_cur = System.currentTimeMillis() - t_start; t_remain = (long) ((n_total - n_cur) * t_cur / (60f * 1000f * (float) (n_cur))); // where time for 1 page = t_cur / n_cur // in min, since /(60*1000) t_cur = (long) (t_cur / (60f * 1000f)); // t_cur = t_cur/(60f*1000f)); TPage tpage = lang_pos.getPage(); if (null != tpage) { System.out.println( n_cur + ": " + tpage.getPageTitle() + ", duration: " + t_cur + // t_cur/(60f*1000f) + " min, remain: " + t_remain + " min"); } } } else n_unknown_lang_pos++; } } catch (SQLException ex) { System.out.println( "SQLException (TranslationTableAll.countTranslationPerLanguage()): " + ex.getMessage()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } rs = null; } if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } s = null; } } // long t_end; // float t_work; // t_end = System.currentTimeMillis(); // t_work = (t_end - t_start)/1000f; // in sec System.out.println( // "\nTime sec:" + t_work + "\nTotal translation boxes (translated meanings of words): " + n_total + "\n\nUnknown<ref>'''Unknown''' - words which have translations but have unknown language code and POS</ref>: " + n_unknown_lang_pos); return m_lang_n; }