/** * Deletes all records from the table 'label_category', loads names of labels categories from * LabelCategory.java, sorts by name, fills the table. */ public static void recreateTable(Connect connect) { System.out.println("Recreating the table `label_category`..."); Map<Integer, LabelCategory> _id2category = fillLocalMaps(); UtilSQL.deleteAllRecordsResetAutoIncrement(connect, "label_category"); fillDB(connect, _id2category); { int db_current_size = wikokit.base.wikipedia.sql.Statistics.Count(connect, "label_category"); assert (db_current_size == LabelCategory.size()); // ~ NNN label categories entered by hand } }
/** * Read all records from the table 'label_category', fills the internal map from a table ID to a * label category .<br> * <br> * REM: during a creation of Wiktionary parsed database the functions recreateTable() should be * called (before this function). */ public static void createFastMaps(Connect connect) { System.out.println("Loading table `label_category`..."); int size = Statistics.Count(connect, "label_category"); if (0 == size) { System.out.println( "Error (wikt_parsed TLabelCategory.createFastMaps()):: The table `label_category` is empty!"); return; } if (null != category2id && category2id.size() > 0) category2id.clear(); if (null != id2category && id2category.size() > 0) id2category.clear(); category2id = new LinkedHashMap<>(size); id2category = new LinkedHashMap<>(size); Collection<LabelCategory> labs = LabelCategory.getAllLabelCats(); for (LabelCategory lc : labs) { String name = lc.getName(); int id = getIDByName(connect, name); if (0 == id) { System.out.println( "Error (wikt_parsed TLabelCategory.createFastMaps()):: There is an empty label category name, check the table `label_category`!"); continue; } category2id.put(lc, id); id2category.put(id, lc); } if (size != LabelCategory.size()) System.out.println( "Warning (wikt_parsed TLabelCategory.createFastMaps()):: LabelCategory.size (" + LabelCategory.size() + ") is not equal to size of table 'label_category'(" + size + "). Is the database outdated?"); }
/** * Counts number of translations of native word's meaning into each foreign language by selecting * all records from the table 'translation' from the database of the parsed Wiktionary.<br> * <br> * SELECT * FROM translation; * * @param connect connection to the database of the parsed Wiktionary * @return map from the language into a number of translation boxes which contain synonyms, * antonyms, etc. in English (etc.) */ public static Map<LanguageType, Integer> countTranslationPerLanguage(Connect wikt_parsed_conn) { // translation -> lang -> count Statement s = null; ResultSet rs = null; long t_start; int n_unknown_lang_pos = 0; // translations into unknown languages int n_total = Statistics.Count(wikt_parsed_conn, "translation"); // System.out.println("Total translation boxes (translated meanings of words): " + n_total); t_start = System.currentTimeMillis(); Map<LanguageType, Integer> m_lang_n = new HashMap<LanguageType, Integer>(); LanguageType native_lang = wikt_parsed_conn.getNativeLanguage(); try { s = wikt_parsed_conn.conn.createStatement(); StringBuilder str_sql = new StringBuilder(); // SELECT id,lang_pos_id,meaning_summary,meaning_id FROM translation str_sql.append("SELECT id,lang_pos_id,meaning_summary FROM translation"); s.executeQuery(str_sql.toString()); rs = s.getResultSet(); int n_cur = 0; while (rs.next()) { n_cur++; int id = rs.getInt("id"); TLangPOS lang_pos = TLangPOS.getByID(wikt_parsed_conn, rs.getInt("lang_pos_id")); String meaning_summary = Encodings.bytesToUTF8(rs.getBytes("meaning_summary")); TLang tlang = lang_pos.getLang(); LanguageType lt = tlang.getLanguage(); // see: Wiktionary:About Translingual if (null != tlang && native_lang != lt && LanguageType.mul != lt) { System.out.print( "Error (TranslationTableAll.countTranslationPerLanguage()): There is a translation box from a foreign language, code=" + tlang.getLanguage().getCode()); TPage p = lang_pos.getPage(); if (null != p) System.out.println(", page_title=" + p.getPageTitle()); } if (null != lang_pos) { TTranslation trans = new TTranslation(id, lang_pos, meaning_summary, null); // meaning = null TTranslationEntry[] t_entries = TTranslationEntry.getByTranslation(wikt_parsed_conn, trans); for (TTranslationEntry entry : t_entries) { LanguageType lang = entry.getLang().getLanguage(); if (m_lang_n.containsKey(lang)) { int n = m_lang_n.get(lang); m_lang_n.put(lang, n + 1); } else m_lang_n.put(lang, 1); } if (DEBUG && 0 == n_cur % 1000) { // % 100 // if(n_cur > 333) // break; long t_cur, t_remain; t_cur = System.currentTimeMillis() - t_start; t_remain = (long) ((n_total - n_cur) * t_cur / (60f * 1000f * (float) (n_cur))); // where time for 1 page = t_cur / n_cur // in min, since /(60*1000) t_cur = (long) (t_cur / (60f * 1000f)); // t_cur = t_cur/(60f*1000f)); TPage tpage = lang_pos.getPage(); if (null != tpage) { System.out.println( n_cur + ": " + tpage.getPageTitle() + ", duration: " + t_cur + // t_cur/(60f*1000f) + " min, remain: " + t_remain + " min"); } } } else n_unknown_lang_pos++; } } catch (SQLException ex) { System.out.println( "SQLException (TranslationTableAll.countTranslationPerLanguage()): " + ex.getMessage()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } rs = null; } if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } s = null; } } // long t_end; // float t_work; // t_end = System.currentTimeMillis(); // t_work = (t_end - t_start)/1000f; // in sec System.out.println( // "\nTime sec:" + t_work + "\nTotal translation boxes (translated meanings of words): " + n_total + "\n\nUnknown<ref>'''Unknown''' - words which have translations but have unknown language code and POS</ref>: " + n_unknown_lang_pos); return m_lang_n; }