protected void setUp() throws Exception { super.setUp(); ruwikt_conn = new Connect(context, LanguageType.ru); ruwikt_conn.openDatabase(); db = ruwikt_conn.getDB(); TLang.createFastMaps(db); TPOS.createFastMaps(db); // once upon a time: use Wiktionary parsed db }
public static void main(String[] args) { // Connect to wikt_parsed database Connect wikt_parsed_conn = new Connect(); // Russian // LanguageType native_lang = LanguageType.ru; // wikt_parsed_conn.Open(Connect.RUWIKT_HOST, Connect.RUWIKT_PARSED_DB, Connect.RUWIKT_USER, // Connect.RUWIKT_PASS, LanguageType.ru); // English LanguageType native_lang = LanguageType.en; wikt_parsed_conn.Open( Connect.ENWIKT_HOST, Connect.ENWIKT_PARSED_DB, Connect.ENWIKT_USER, Connect.ENWIKT_PASS, LanguageType.en); TLang.createFastMaps(wikt_parsed_conn); TPOS.createFastMaps(wikt_parsed_conn); // TRelationType.createFastMaps(wikt_parsed_conn); String db_name = wikt_parsed_conn.getDBName(); System.out.println("\n== Statistics of translations in the Wiktionary parsed database =="); CommonPrinter.printHeader(db_name); Map<LanguageType, Integer> m = TranslationTableAll.countTranslationPerLanguage(wikt_parsed_conn); wikt_parsed_conn.Close(); System.out.println(); int total_trans = CommonPrinter.printSomethingPerLanguage(native_lang, m); System.out.println("Total translations: " + total_trans); System.out.println("\nThere are translations into " + m.size() + " languages."); CommonPrinter.printFooter(); }
/** * Counts number of translations of native word's meaning into each foreign language by selecting * all records from the table 'translation' from the database of the parsed Wiktionary.<br> * <br> * SELECT * FROM translation; * * @param connect connection to the database of the parsed Wiktionary * @return map from the language into a number of translation boxes which contain synonyms, * antonyms, etc. in English (etc.) */ public static Map<LanguageType, Integer> countTranslationPerLanguage(Connect wikt_parsed_conn) { // translation -> lang -> count Statement s = null; ResultSet rs = null; long t_start; int n_unknown_lang_pos = 0; // translations into unknown languages int n_total = Statistics.Count(wikt_parsed_conn, "translation"); // System.out.println("Total translation boxes (translated meanings of words): " + n_total); t_start = System.currentTimeMillis(); Map<LanguageType, Integer> m_lang_n = new HashMap<LanguageType, Integer>(); LanguageType native_lang = wikt_parsed_conn.getNativeLanguage(); try { s = wikt_parsed_conn.conn.createStatement(); StringBuilder str_sql = new StringBuilder(); // SELECT id,lang_pos_id,meaning_summary,meaning_id FROM translation str_sql.append("SELECT id,lang_pos_id,meaning_summary FROM translation"); s.executeQuery(str_sql.toString()); rs = s.getResultSet(); int n_cur = 0; while (rs.next()) { n_cur++; int id = rs.getInt("id"); TLangPOS lang_pos = TLangPOS.getByID(wikt_parsed_conn, rs.getInt("lang_pos_id")); String meaning_summary = Encodings.bytesToUTF8(rs.getBytes("meaning_summary")); TLang tlang = lang_pos.getLang(); LanguageType lt = tlang.getLanguage(); // see: Wiktionary:About Translingual if (null != tlang && native_lang != lt && LanguageType.mul != lt) { System.out.print( "Error (TranslationTableAll.countTranslationPerLanguage()): There is a translation box from a foreign language, code=" + tlang.getLanguage().getCode()); TPage p = lang_pos.getPage(); if (null != p) System.out.println(", page_title=" + p.getPageTitle()); } if (null != lang_pos) { TTranslation trans = new TTranslation(id, lang_pos, meaning_summary, null); // meaning = null TTranslationEntry[] t_entries = TTranslationEntry.getByTranslation(wikt_parsed_conn, trans); for (TTranslationEntry entry : t_entries) { LanguageType lang = entry.getLang().getLanguage(); if (m_lang_n.containsKey(lang)) { int n = m_lang_n.get(lang); m_lang_n.put(lang, n + 1); } else m_lang_n.put(lang, 1); } if (DEBUG && 0 == n_cur % 1000) { // % 100 // if(n_cur > 333) // break; long t_cur, t_remain; t_cur = System.currentTimeMillis() - t_start; t_remain = (long) ((n_total - n_cur) * t_cur / (60f * 1000f * (float) (n_cur))); // where time for 1 page = t_cur / n_cur // in min, since /(60*1000) t_cur = (long) (t_cur / (60f * 1000f)); // t_cur = t_cur/(60f*1000f)); TPage tpage = lang_pos.getPage(); if (null != tpage) { System.out.println( n_cur + ": " + tpage.getPageTitle() + ", duration: " + t_cur + // t_cur/(60f*1000f) + " min, remain: " + t_remain + " min"); } } } else n_unknown_lang_pos++; } } catch (SQLException ex) { System.out.println( "SQLException (TranslationTableAll.countTranslationPerLanguage()): " + ex.getMessage()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } rs = null; } if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } s = null; } } // long t_end; // float t_work; // t_end = System.currentTimeMillis(); // t_work = (t_end - t_start)/1000f; // in sec System.out.println( // "\nTime sec:" + t_work + "\nTotal translation boxes (translated meanings of words): " + n_total + "\n\nUnknown<ref>'''Unknown''' - words which have translations but have unknown language code and POS</ref>: " + n_unknown_lang_pos); return m_lang_n; }
protected void tearDown() throws Exception { super.tearDown(); ruwikt_conn.close(); }