private int Import_url_chk(
     byte[] rel_url_prefix,
     byte[] src,
     int src_len,
     int old_pos,
     int find_bgn,
     byte[] url_raw,
     Bry_bfr bfr) {
   if (find_bgn < Bry_import_len) return Bry_find_.Not_found;
   if (!Bry_.Match(src, find_bgn - Bry_import_len, find_bgn, Bry_import))
     return Bry_find_.Not_found;
   byte[] css_url = url_raw;
   int css_url_len = css_url.length;
   if (css_url_len > 0
       && css_url[0]
           == Byte_ascii
               .Slash) { // css_url starts with "/"; EX: "/page" or "//site/page" DATE:2014-02-03
     if (css_url_len > 1
         && css_url[1] != Byte_ascii.Slash) // skip if css_url starts with "//"; EX: "//site/page"
     css_url = Bry_.Add(rel_url_prefix, css_url); // "/w/a.css" -> "//en.wikipedia.org/w/a.css"
   }
   css_url =
       Bry_.Replace(
           css_url,
           Byte_ascii.Space,
           Byte_ascii
               .Underline); // NOTE: must replace spaces with underlines else download will fail;
                            // EX:https://it.wikivoyage.org/w/index.php?title=MediaWiki:Container e
                            // Infobox.css&action=raw&ctype=text/css; DATE:2015-03-08
   byte[] css_src_bry = Import_url_build(stylesheet_prefix, rel_url_prefix, css_url);
   String css_src_str = String_.new_u8(css_src_bry);
   download_wkr.Download_xrg()
       .Prog_fmt_hdr_(
           usr_dlg.Log_many(
               GRP_KEY, "logo.download", "downloading import for '~{0}'", css_src_str));
   byte[] css_trg_bry = download_wkr.Download_xrg().Exec_as_bry(css_src_str);
   if (css_trg_bry == null) {
     usr_dlg.Warn_many("", "", "could not import css: url=~{0}", css_src_str);
     return Bry_find_.Not_found; // css not found
   }
   bfr.Add_mid(src, old_pos, find_bgn - Bry_import_len).Add_byte_nl();
   bfr.Add(Bry_comment_bgn).Add(css_url).Add(Bry_comment_end).Add_byte_nl();
   if (Bry_find_.Find_fwd(css_url, Wikisource_dynimg_ttl) != -1)
     css_trg_bry =
         Bry_.Replace(
             css_trg_bry,
             Wikisource_dynimg_find,
             Wikisource_dynimg_repl); // FreedImg hack;
                                      // PAGE:en.s:Page:Notes_on_Osteology_of_Baptanodon._With_a_Description_of_a_New_Species.pdf/3 DATE:2014-09-06
   bfr.Add(css_trg_bry).Add_byte_nl();
   bfr.Add_byte_nl();
   int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, find_bgn + url_raw.length, src_len);
   return semic_pos + Int_.Const_dlm_len;
 }
Ejemplo n.º 2
0
 private static boolean Navigate(
     Gfo_usr_dlg usr_dlg,
     Xoae_app app,
     Wdata_wiki_mgr wdata_mgr,
     Xoae_page page,
     byte[] site_bry,
     byte[] page_bry) {
   page_bry =
       Xoa_app_.Utl__encoder_mgr()
           .Http_url()
           .Decode(page_bry); // NOTE: space is converted to + on postback to url; decode
   byte[] wiki_domain = Xow_wiki_alias.Parse_wmf_key(site_bry);
   if (wiki_domain == null) {
     usr_dlg.Warn_many("", "", "site_bry parse failed; site_bry:~{0}", String_.new_u8(site_bry));
     return false;
   }
   Xowe_wiki wiki = app.Wiki_mgr().Get_by_key_or_make(wiki_domain);
   if (wiki == null) {
     usr_dlg.Warn_many(
         "", "", "wiki_domain does not exist; wiki_domain:~{0}", String_.new_u8(wiki_domain));
     return false;
   }
   Xoa_ttl wdata_ttl = Xoa_ttl.parse_(wiki, page_bry);
   if (wdata_ttl == null) {
     usr_dlg.Warn_many("", "", "ttl is invalid; ttl:~{0}", String_.new_u8(page_bry));
     return false;
   }
   Wdata_doc doc = wdata_mgr.Pages_get(wiki, wdata_ttl);
   if (doc == null) {
     usr_dlg.Warn_many(
         "", "", "ttl cannot be found in wikidata; ttl:~{0}", String_.new_u8(wdata_ttl.Raw()));
     return false;
   }
   byte[] qid_bry = doc.Qid();
   Xoae_page qid_page = wdata_mgr.Wdata_wiki().Data_mgr().Redirect(page, qid_bry);
   if (qid_page.Missing()) {
     usr_dlg.Warn_many(
         "", "", "qid cannot be found in wikidata; qid:~{0}", String_.new_u8(qid_bry));
     return false;
   }
   return true;
 }
Ejemplo n.º 3
0
 public void Fsdb_search_by_list(
     List_adp itms, Xow_wiki cur_wiki, Xoa_page page, Xog_js_wkr js_wkr) {
   if (!fsdb_enabled) return;
   int len = itms.Count();
   Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_.Instance;
   Xow_wiki wiki = page.Commons_mgr().Source_wiki_or(cur_wiki);
   Xou_cache_mgr cache_mgr = wiki.App().User().User_db_mgr().Cache_mgr();
   for (int i = 0; i < len; i++) {
     if (usr_dlg.Canceled()) return;
     Xof_fsdb_itm fsdb = (Xof_fsdb_itm) itms.Get_at(i);
     if (fsdb.Hdump_mode() == Xof_fsdb_itm.Hdump_mode__null) {
       Xof_orig_itm orig = wiki.File__orig_mgr().Find_by_ttl_or_null(fsdb.Lnki_ttl(), i, len);
       if (orig != Xof_orig_itm.Null) { // orig exists;
         gplx.xowa.files.repos.Xof_repo_itm repo =
             wiki.File__repo_mgr().Get_trg_by_id_or_null(orig.Repo(), fsdb.Lnki_ttl(), Bry_.Empty);
         if (repo == null) continue;
         fsdb.Init_at_orig(
             orig.Repo(),
             repo.Wiki_domain(),
             orig.Ttl(),
             orig.Ext(),
             orig.W(),
             orig.H(),
             orig.Redirect());
       }
     }
     fsdb.Init_at_xfer(i, len);
     Xof_file_wkr.Show_img(
         fsdb,
         usr_dlg,
         wiki.File__bin_mgr(),
         wiki.File__mnt_mgr(),
         cache_mgr,
         wiki.File__repo_mgr(),
         js_wkr,
         img_size,
         url_bldr,
         page);
   }
 }
 public byte[] Convert_to_local_urls(byte[] rel_url_prefix, byte[] src, List_adp list) {
   try {
     int src_len = src.length;
     int prv_pos = 0;
     Bry_bfr bfr = Bry_bfr_.New_w_size(src_len);
     Hash_adp img_hash = Hash_adp_bry.cs();
     while (true) {
       int url_pos = Bry_find_.Find_fwd(src, Bry_url, prv_pos);
       if (url_pos == Bry_find_.Not_found) {
         bfr.Add_mid(src, prv_pos, src_len);
         break;
       } // no more "url("; exit;
       int bgn_pos = url_pos + Bry_url_len; // set bgn_pos after "url("
       byte bgn_byte = src[bgn_pos];
       byte end_byte = Byte_ascii.Null;
       boolean quoted = true;
       switch (bgn_byte) { // find end_byte
         case Byte_ascii.Quote:
         case Byte_ascii.Apos: // quoted; end_byte is ' or "
           end_byte = bgn_byte;
           ++bgn_pos;
           break;
         default: // not quoted; end byte is ")"
           end_byte = Byte_ascii.Paren_end;
           quoted = false;
           break;
       }
       int end_pos = Bry_find_.Find_fwd(src, end_byte, bgn_pos, src_len);
       if (end_pos
           == Bry_find_.Not_found) { // unclosed "url("; exit since nothing else will be found
         usr_dlg.Warn_many(
             GRP_KEY,
             "parse.invalid_url.end_missing",
             "could not find end_sequence for 'url(': bgn='~{0}' end='~{1}'",
             prv_pos,
             String_.new_u8__by_len(src, prv_pos, prv_pos + 25));
         bfr.Add_mid(src, prv_pos, src_len);
         break;
       }
       if (end_pos - bgn_pos == 0) { // empty; "url()"; ignore
         usr_dlg.Warn_many(
             GRP_KEY,
             "parse.invalid_url.empty",
             "'url(' is empty: bgn='~{0}' end='~{1}'",
             prv_pos,
             String_.new_u8__by_len(src, prv_pos, prv_pos + 25));
         bfr.Add_mid(src, prv_pos, bgn_pos);
         prv_pos = bgn_pos;
         continue;
       }
       byte[] img_raw = Bry_.Mid(src, bgn_pos, end_pos);
       int img_raw_len = img_raw.length;
       if (Bry_.Has_at_bgn(img_raw, Bry_data_image, 0, img_raw_len)) { // base64
         bfr.Add_mid(src, prv_pos, end_pos); // nothing to download; just add entire String
         prv_pos = end_pos;
         continue;
       }
       int import_url_end =
           Import_url_chk(
               rel_url_prefix,
               src,
               src_len,
               prv_pos,
               url_pos,
               img_raw,
               bfr); // check for embedded stylesheets via @import tag
       if (import_url_end != Bry_find_.Not_found) {
         prv_pos = import_url_end;
         continue;
       }
       byte[] img_cleaned = Xob_url_fixer.Fix(wiki_domain, img_raw, img_raw_len);
       if (img_cleaned == null) { // could not clean img
         usr_dlg.Warn_many(
             GRP_KEY,
             "parse.invalid_url.clean_failed",
             "could not extract valid http src: bgn='~{0}' end='~{1}'",
             prv_pos,
             String_.new_u8(img_raw));
         bfr.Add_mid(src, prv_pos, bgn_pos);
         prv_pos = bgn_pos;
         continue;
       }
       if (!img_hash.Has(img_cleaned)) { // only add unique items for download;
         img_hash.Add_as_key_and_val(img_cleaned);
         list.Add(String_.new_u8(img_cleaned));
       }
       img_cleaned =
           Replace_invalid_chars(
               Bry_.Copy(
                   img_cleaned)); // NOTE: must call ByteAry.Copy else img_cleaned will change
                                  // *inside* hash
       bfr.Add_mid(src, prv_pos, bgn_pos);
       if (!quoted) bfr.Add_byte(Byte_ascii.Quote);
       bfr.Add(img_cleaned);
       if (!quoted) bfr.Add_byte(Byte_ascii.Quote);
       prv_pos = end_pos;
     }
     return bfr.To_bry_and_clear();
   } catch (Exception e) {
     usr_dlg.Warn_many(
         "",
         "",
         "failed to convert local_urls: ~{0} ~{1}",
         String_.new_u8(rel_url_prefix),
         Err_.Message_gplx_full(e));
     return src;
   }
 }
Ejemplo n.º 5
0
 @Override
 public void Cmd_run() {
   wiki.Init_by_wiki();
   Xow_db_file core_db = wiki.Data__core_mgr().Db__core();
   Db_conn core_db_conn = core_db.Conn();
   Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_.Instance;
   usr_dlg.Plog_many("", "", "creating page_filter");
   if (!core_db_conn.Meta_tbl_exists("page_filter")) {
     core_db_conn.Meta_tbl_create(
         Dbmeta_tbl_itm.New(
             "page_filter",
             new Dbmeta_fld_itm[] {
               Dbmeta_fld_itm.new_int("page_id").Primary_y_(),
               Dbmeta_fld_itm.new_int("page_text_db_id")
             },
             Dbmeta_idx_itm.new_normal_by_tbl(
                 "page_filter", "db_id__page", "page_text_db_id", "page_id"),
             Dbmeta_idx_itm.new_normal_by_tbl("page_filter", "page_id", "page_id")));
   }
   core_db_conn.Exec_sql_plog_ntx(
       "finding missing redirects",
       String_.Concat_lines_nl_skip_last(
           "INSERT INTO page_filter (page_id, page_text_db_id)",
           "SELECT  ptr.page_id, ptr.page_text_db_id",
           "FROM    page ptr",
           "        LEFT JOIN page orig ON ptr.page_redirect_id = orig.page_id",
           "WHERE   ptr.page_is_redirect = 1",
           "AND     orig.page_id IS NULL",
           "UNION",
           "SELECT  ptr.page_id, ptr.page_text_db_id",
           "FROM    page ptr",
           "WHERE   ptr.page_is_redirect = 1",
           "AND     ptr.page_redirect_id = -1",
           ";"));
   try {
     Xow_db_file[] db_file_ary =
         core_db.Tbl__db().Select_all(wiki.Data__core_mgr().Props(), wiki.Fsys_mgr().Root_dir());
     int len = db_file_ary.length;
     for (int i = 0; i < len; ++i) {
       boolean db_file_is_text = Bool_.N, db_file_is_cat = Bool_.N, db_file_is_search = Bool_.N;
       Xow_db_file db_file = db_file_ary[i];
       switch (db_file.Tid()) {
         case Xow_db_file_.Tid__core:
         case Xow_db_file_.Tid__wiki_solo:
         case Xow_db_file_.Tid__text_solo:
           if (wiki.Data__core_mgr().Props().Layout_text().Tid_is_lot())
             continue; // if mode is lot, then "core" db does not have text, cat, search; skip;
           // DATE:2016-01-31
           db_file_is_text = db_file_is_cat = db_file_is_search = Bool_.Y;
           break;
         case Xow_db_file_.Tid__text:
           db_file_is_text = Bool_.Y;
           break;
         case Xow_db_file_.Tid__cat:
           db_file_is_cat = Bool_.Y;
           break;
         case Xow_db_file_.Tid__search_core:
           db_file_is_search = Bool_.Y;
           break;
       }
       int db_id = db_file.Id();
       if (db_file_is_text)
         Run_sql(
             core_db_conn,
             db_file.Url(),
             db_id,
             "deleting text: " + db_id,
             "DELETE FROM <data_db>text WHERE page_id IN (SELECT page_id FROM page_filter WHERE page_text_db_id = {0});");
       if (db_file_is_cat)
         Run_sql(
             core_db_conn,
             db_file.Url(),
             db_id,
             "deleting cat: " + db_id,
             "DELETE FROM <data_db>cat_link WHERE cl_from IN (SELECT page_id FROM page_filter);");
       if (db_file_is_search)
         Run_sql(
             core_db_conn,
             db_file.Url(),
             db_id,
             "deleting search:" + db_id,
             "DELETE FROM <data_db>search_link WHERE page_id IN (SELECT page_id FROM page_filter);");
       if (db_file_is_text || db_file_is_cat || db_file_is_search) db_file.Conn().Env_vacuum();
     }
   } catch (Exception e) {
     Gfo_usr_dlg_.Instance.Warn_many(
         "", "", "fatal error during page deletion: err=~{0}", Err_.Message_gplx_log(e));
   }
   core_db_conn.Exec_sql_plog_ntx(
       "deleting from table: page",
       "DELETE FROM page WHERE page_id IN (SELECT page_id FROM page_filter);");
   // core_db_conn.Meta_tbl_delete("page_filter");
   core_db_conn.Env_vacuum();
   usr_dlg.Plog_many("", "", "");
 }