예제 #1
0
  /**
   * Deletes all records from the table 'label_category', loads names of labels categories from
   * LabelCategory.java, sorts by name, fills the table.
   */
  public static void recreateTable(Connect connect) {

    System.out.println("Recreating the table `label_category`...");
    Map<Integer, LabelCategory> _id2category = fillLocalMaps();
    UtilSQL.deleteAllRecordsResetAutoIncrement(connect, "label_category");
    fillDB(connect, _id2category);
    {
      int db_current_size = wikokit.base.wikipedia.sql.Statistics.Count(connect, "label_category");
      assert (db_current_size == LabelCategory.size()); // ~ NNN label categories entered by hand
    }
  }
예제 #2
0
  /**
   * Read all records from the table 'label_category', fills the internal map from a table ID to a
   * label category .<br>
   * <br>
   * REM: during a creation of Wiktionary parsed database the functions recreateTable() should be
   * called (before this function).
   */
  public static void createFastMaps(Connect connect) {

    System.out.println("Loading table `label_category`...");

    int size = Statistics.Count(connect, "label_category");
    if (0 == size) {
      System.out.println(
          "Error (wikt_parsed TLabelCategory.createFastMaps()):: The table `label_category` is empty!");
      return;
    }

    if (null != category2id && category2id.size() > 0) category2id.clear();

    if (null != id2category && id2category.size() > 0) id2category.clear();

    category2id = new LinkedHashMap<>(size);
    id2category = new LinkedHashMap<>(size);

    Collection<LabelCategory> labs = LabelCategory.getAllLabelCats();
    for (LabelCategory lc : labs) {

      String name = lc.getName();
      int id = getIDByName(connect, name);

      if (0 == id) {
        System.out.println(
            "Error (wikt_parsed TLabelCategory.createFastMaps()):: There is an empty label category name, check the table `label_category`!");
        continue;
      }

      category2id.put(lc, id);
      id2category.put(id, lc);
    }

    if (size != LabelCategory.size())
      System.out.println(
          "Warning (wikt_parsed TLabelCategory.createFastMaps()):: LabelCategory.size ("
              + LabelCategory.size()
              + ") is not equal to size of table 'label_category'("
              + size
              + "). Is the database outdated?");
  }
예제 #3
0
  /**
   * Counts number of translations of native word's meaning into each foreign language by selecting
   * all records from the table 'translation' from the database of the parsed Wiktionary.<br>
   * <br>
   * SELECT * FROM translation;
   *
   * @param connect connection to the database of the parsed Wiktionary
   * @return map from the language into a number of translation boxes which contain synonyms,
   *     antonyms, etc. in English (etc.)
   */
  public static Map<LanguageType, Integer> countTranslationPerLanguage(Connect wikt_parsed_conn) {
    // translation -> lang -> count

    Statement s = null;
    ResultSet rs = null;
    long t_start;

    int n_unknown_lang_pos = 0; // translations into unknown languages

    int n_total = Statistics.Count(wikt_parsed_conn, "translation");
    // System.out.println("Total translation boxes (translated meanings of words): " + n_total);
    t_start = System.currentTimeMillis();

    Map<LanguageType, Integer> m_lang_n = new HashMap<LanguageType, Integer>();
    LanguageType native_lang = wikt_parsed_conn.getNativeLanguage();

    try {
      s = wikt_parsed_conn.conn.createStatement();
      StringBuilder str_sql = new StringBuilder();
      // SELECT id,lang_pos_id,meaning_summary,meaning_id FROM translation
      str_sql.append("SELECT id,lang_pos_id,meaning_summary FROM translation");
      s.executeQuery(str_sql.toString());
      rs = s.getResultSet();
      int n_cur = 0;
      while (rs.next()) {
        n_cur++;
        int id = rs.getInt("id");
        TLangPOS lang_pos = TLangPOS.getByID(wikt_parsed_conn, rs.getInt("lang_pos_id"));
        String meaning_summary = Encodings.bytesToUTF8(rs.getBytes("meaning_summary"));

        TLang tlang = lang_pos.getLang();
        LanguageType lt = tlang.getLanguage(); // see: Wiktionary:About Translingual
        if (null != tlang && native_lang != lt && LanguageType.mul != lt) {
          System.out.print(
              "Error (TranslationTableAll.countTranslationPerLanguage()): There is a translation box from a foreign language, code="
                  + tlang.getLanguage().getCode());
          TPage p = lang_pos.getPage();
          if (null != p) System.out.println(", page_title=" + p.getPageTitle());
        }

        if (null != lang_pos) {
          TTranslation trans =
              new TTranslation(id, lang_pos, meaning_summary, null); // meaning = null

          TTranslationEntry[] t_entries =
              TTranslationEntry.getByTranslation(wikt_parsed_conn, trans);

          for (TTranslationEntry entry : t_entries) {
            LanguageType lang = entry.getLang().getLanguage();
            if (m_lang_n.containsKey(lang)) {
              int n = m_lang_n.get(lang);
              m_lang_n.put(lang, n + 1);
            } else m_lang_n.put(lang, 1);
          }

          if (DEBUG && 0 == n_cur % 1000) { // % 100
            // if(n_cur > 333)
            //  break;
            long t_cur, t_remain;

            t_cur = System.currentTimeMillis() - t_start;
            t_remain = (long) ((n_total - n_cur) * t_cur / (60f * 1000f * (float) (n_cur)));
            // where time for 1 page = t_cur / n_cur
            // in min, since /(60*1000)
            t_cur = (long) (t_cur / (60f * 1000f));
            // t_cur = t_cur/(60f*1000f));

            TPage tpage = lang_pos.getPage();
            if (null != tpage) {
              System.out.println(
                  n_cur
                      + ": "
                      + tpage.getPageTitle()
                      + ", duration: "
                      + t_cur
                      + // t_cur/(60f*1000f) +
                      " min, remain: "
                      + t_remain
                      + " min");
            }
          }
        } else n_unknown_lang_pos++;
      }
    } catch (SQLException ex) {
      System.out.println(
          "SQLException (TranslationTableAll.countTranslationPerLanguage()): " + ex.getMessage());
    } finally {
      if (rs != null) {
        try {
          rs.close();
        } catch (SQLException sqlEx) {
        }
        rs = null;
      }
      if (s != null) {
        try {
          s.close();
        } catch (SQLException sqlEx) {
        }
        s = null;
      }
    }

    // long  t_end;
    // float   t_work;
    // t_end  = System.currentTimeMillis();
    // t_work = (t_end - t_start)/1000f; // in sec
    System.out.println( // "\nTime sec:" + t_work +
        "\nTotal translation boxes (translated meanings of words): "
            + n_total
            + "\n\nUnknown<ref>'''Unknown''' - words which have translations but have unknown language code and POS</ref>: "
            + n_unknown_lang_pos);

    return m_lang_n;
  }