private int Import_url_chk( byte[] rel_url_prefix, byte[] src, int src_len, int old_pos, int find_bgn, byte[] url_raw, Bry_bfr bfr) { if (find_bgn < Bry_import_len) return Bry_find_.Not_found; if (!Bry_.Match(src, find_bgn - Bry_import_len, find_bgn, Bry_import)) return Bry_find_.Not_found; byte[] css_url = url_raw; int css_url_len = css_url.length; if (css_url_len > 0 && css_url[0] == Byte_ascii .Slash) { // css_url starts with "/"; EX: "/page" or "//site/page" DATE:2014-02-03 if (css_url_len > 1 && css_url[1] != Byte_ascii.Slash) // skip if css_url starts with "//"; EX: "//site/page" css_url = Bry_.Add(rel_url_prefix, css_url); // "/w/a.css" -> "//en.wikipedia.org/w/a.css" } css_url = Bry_.Replace( css_url, Byte_ascii.Space, Byte_ascii .Underline); // NOTE: must replace spaces with underlines else download will fail; // EX:https://it.wikivoyage.org/w/index.php?title=MediaWiki:Container e // Infobox.css&action=raw&ctype=text/css; DATE:2015-03-08 byte[] css_src_bry = Import_url_build(stylesheet_prefix, rel_url_prefix, css_url); String css_src_str = String_.new_u8(css_src_bry); download_wkr.Download_xrg() .Prog_fmt_hdr_( usr_dlg.Log_many( GRP_KEY, "logo.download", "downloading import for '~{0}'", css_src_str)); byte[] css_trg_bry = download_wkr.Download_xrg().Exec_as_bry(css_src_str); if (css_trg_bry == null) { usr_dlg.Warn_many("", "", "could not import css: url=~{0}", css_src_str); return Bry_find_.Not_found; // css not found } bfr.Add_mid(src, old_pos, find_bgn - Bry_import_len).Add_byte_nl(); bfr.Add(Bry_comment_bgn).Add(css_url).Add(Bry_comment_end).Add_byte_nl(); if (Bry_find_.Find_fwd(css_url, Wikisource_dynimg_ttl) != -1) css_trg_bry = Bry_.Replace( css_trg_bry, Wikisource_dynimg_find, Wikisource_dynimg_repl); // FreedImg hack; // PAGE:en.s:Page:Notes_on_Osteology_of_Baptanodon._With_a_Description_of_a_New_Species.pdf/3 DATE:2014-09-06 bfr.Add(css_trg_bry).Add_byte_nl(); bfr.Add_byte_nl(); int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, find_bgn + url_raw.length, src_len); return semic_pos + Int_.Const_dlm_len; }
private static boolean Navigate( Gfo_usr_dlg usr_dlg, Xoae_app app, Wdata_wiki_mgr wdata_mgr, Xoae_page page, byte[] site_bry, byte[] page_bry) { page_bry = Xoa_app_.Utl__encoder_mgr() .Http_url() .Decode(page_bry); // NOTE: space is converted to + on postback to url; decode byte[] wiki_domain = Xow_wiki_alias.Parse_wmf_key(site_bry); if (wiki_domain == null) { usr_dlg.Warn_many("", "", "site_bry parse failed; site_bry:~{0}", String_.new_u8(site_bry)); return false; } Xowe_wiki wiki = app.Wiki_mgr().Get_by_key_or_make(wiki_domain); if (wiki == null) { usr_dlg.Warn_many( "", "", "wiki_domain does not exist; wiki_domain:~{0}", String_.new_u8(wiki_domain)); return false; } Xoa_ttl wdata_ttl = Xoa_ttl.parse_(wiki, page_bry); if (wdata_ttl == null) { usr_dlg.Warn_many("", "", "ttl is invalid; ttl:~{0}", String_.new_u8(page_bry)); return false; } Wdata_doc doc = wdata_mgr.Pages_get(wiki, wdata_ttl); if (doc == null) { usr_dlg.Warn_many( "", "", "ttl cannot be found in wikidata; ttl:~{0}", String_.new_u8(wdata_ttl.Raw())); return false; } byte[] qid_bry = doc.Qid(); Xoae_page qid_page = wdata_mgr.Wdata_wiki().Data_mgr().Redirect(page, qid_bry); if (qid_page.Missing()) { usr_dlg.Warn_many( "", "", "qid cannot be found in wikidata; qid:~{0}", String_.new_u8(qid_bry)); return false; } return true; }
public void Fsdb_search_by_list( List_adp itms, Xow_wiki cur_wiki, Xoa_page page, Xog_js_wkr js_wkr) { if (!fsdb_enabled) return; int len = itms.Count(); Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_.Instance; Xow_wiki wiki = page.Commons_mgr().Source_wiki_or(cur_wiki); Xou_cache_mgr cache_mgr = wiki.App().User().User_db_mgr().Cache_mgr(); for (int i = 0; i < len; i++) { if (usr_dlg.Canceled()) return; Xof_fsdb_itm fsdb = (Xof_fsdb_itm) itms.Get_at(i); if (fsdb.Hdump_mode() == Xof_fsdb_itm.Hdump_mode__null) { Xof_orig_itm orig = wiki.File__orig_mgr().Find_by_ttl_or_null(fsdb.Lnki_ttl(), i, len); if (orig != Xof_orig_itm.Null) { // orig exists; gplx.xowa.files.repos.Xof_repo_itm repo = wiki.File__repo_mgr().Get_trg_by_id_or_null(orig.Repo(), fsdb.Lnki_ttl(), Bry_.Empty); if (repo == null) continue; fsdb.Init_at_orig( orig.Repo(), repo.Wiki_domain(), orig.Ttl(), orig.Ext(), orig.W(), orig.H(), orig.Redirect()); } } fsdb.Init_at_xfer(i, len); Xof_file_wkr.Show_img( fsdb, usr_dlg, wiki.File__bin_mgr(), wiki.File__mnt_mgr(), cache_mgr, wiki.File__repo_mgr(), js_wkr, img_size, url_bldr, page); } }
public byte[] Convert_to_local_urls(byte[] rel_url_prefix, byte[] src, List_adp list) { try { int src_len = src.length; int prv_pos = 0; Bry_bfr bfr = Bry_bfr_.New_w_size(src_len); Hash_adp img_hash = Hash_adp_bry.cs(); while (true) { int url_pos = Bry_find_.Find_fwd(src, Bry_url, prv_pos); if (url_pos == Bry_find_.Not_found) { bfr.Add_mid(src, prv_pos, src_len); break; } // no more "url("; exit; int bgn_pos = url_pos + Bry_url_len; // set bgn_pos after "url(" byte bgn_byte = src[bgn_pos]; byte end_byte = Byte_ascii.Null; boolean quoted = true; switch (bgn_byte) { // find end_byte case Byte_ascii.Quote: case Byte_ascii.Apos: // quoted; end_byte is ' or " end_byte = bgn_byte; ++bgn_pos; break; default: // not quoted; end byte is ")" end_byte = Byte_ascii.Paren_end; quoted = false; break; } int end_pos = Bry_find_.Find_fwd(src, end_byte, bgn_pos, src_len); if (end_pos == Bry_find_.Not_found) { // unclosed "url("; exit since nothing else will be found usr_dlg.Warn_many( GRP_KEY, "parse.invalid_url.end_missing", "could not find end_sequence for 'url(': bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8__by_len(src, prv_pos, prv_pos + 25)); bfr.Add_mid(src, prv_pos, src_len); break; } if (end_pos - bgn_pos == 0) { // empty; "url()"; ignore usr_dlg.Warn_many( GRP_KEY, "parse.invalid_url.empty", "'url(' is empty: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8__by_len(src, prv_pos, prv_pos + 25)); bfr.Add_mid(src, prv_pos, bgn_pos); prv_pos = bgn_pos; continue; } byte[] img_raw = Bry_.Mid(src, bgn_pos, end_pos); int img_raw_len = img_raw.length; if (Bry_.Has_at_bgn(img_raw, Bry_data_image, 0, img_raw_len)) { // base64 bfr.Add_mid(src, prv_pos, end_pos); // nothing to download; just add entire String prv_pos = end_pos; continue; } int import_url_end = Import_url_chk( rel_url_prefix, src, src_len, prv_pos, url_pos, img_raw, bfr); // check for embedded stylesheets via @import tag if (import_url_end != Bry_find_.Not_found) { prv_pos = import_url_end; continue; } byte[] img_cleaned = Xob_url_fixer.Fix(wiki_domain, img_raw, img_raw_len); if (img_cleaned == null) { // could not clean img usr_dlg.Warn_many( GRP_KEY, "parse.invalid_url.clean_failed", "could not extract valid http src: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8(img_raw)); bfr.Add_mid(src, prv_pos, bgn_pos); prv_pos = bgn_pos; continue; } if (!img_hash.Has(img_cleaned)) { // only add unique items for download; img_hash.Add_as_key_and_val(img_cleaned); list.Add(String_.new_u8(img_cleaned)); } img_cleaned = Replace_invalid_chars( Bry_.Copy( img_cleaned)); // NOTE: must call ByteAry.Copy else img_cleaned will change // *inside* hash bfr.Add_mid(src, prv_pos, bgn_pos); if (!quoted) bfr.Add_byte(Byte_ascii.Quote); bfr.Add(img_cleaned); if (!quoted) bfr.Add_byte(Byte_ascii.Quote); prv_pos = end_pos; } return bfr.To_bry_and_clear(); } catch (Exception e) { usr_dlg.Warn_many( "", "", "failed to convert local_urls: ~{0} ~{1}", String_.new_u8(rel_url_prefix), Err_.Message_gplx_full(e)); return src; } }
@Override public void Cmd_run() { wiki.Init_by_wiki(); Xow_db_file core_db = wiki.Data__core_mgr().Db__core(); Db_conn core_db_conn = core_db.Conn(); Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_.Instance; usr_dlg.Plog_many("", "", "creating page_filter"); if (!core_db_conn.Meta_tbl_exists("page_filter")) { core_db_conn.Meta_tbl_create( Dbmeta_tbl_itm.New( "page_filter", new Dbmeta_fld_itm[] { Dbmeta_fld_itm.new_int("page_id").Primary_y_(), Dbmeta_fld_itm.new_int("page_text_db_id") }, Dbmeta_idx_itm.new_normal_by_tbl( "page_filter", "db_id__page", "page_text_db_id", "page_id"), Dbmeta_idx_itm.new_normal_by_tbl("page_filter", "page_id", "page_id"))); } core_db_conn.Exec_sql_plog_ntx( "finding missing redirects", String_.Concat_lines_nl_skip_last( "INSERT INTO page_filter (page_id, page_text_db_id)", "SELECT ptr.page_id, ptr.page_text_db_id", "FROM page ptr", " LEFT JOIN page orig ON ptr.page_redirect_id = orig.page_id", "WHERE ptr.page_is_redirect = 1", "AND orig.page_id IS NULL", "UNION", "SELECT ptr.page_id, ptr.page_text_db_id", "FROM page ptr", "WHERE ptr.page_is_redirect = 1", "AND ptr.page_redirect_id = -1", ";")); try { Xow_db_file[] db_file_ary = core_db.Tbl__db().Select_all(wiki.Data__core_mgr().Props(), wiki.Fsys_mgr().Root_dir()); int len = db_file_ary.length; for (int i = 0; i < len; ++i) { boolean db_file_is_text = Bool_.N, db_file_is_cat = Bool_.N, db_file_is_search = Bool_.N; Xow_db_file db_file = db_file_ary[i]; switch (db_file.Tid()) { case Xow_db_file_.Tid__core: case Xow_db_file_.Tid__wiki_solo: case Xow_db_file_.Tid__text_solo: if (wiki.Data__core_mgr().Props().Layout_text().Tid_is_lot()) continue; // if mode is lot, then "core" db does not have text, cat, search; skip; // DATE:2016-01-31 db_file_is_text = db_file_is_cat = db_file_is_search = Bool_.Y; break; case Xow_db_file_.Tid__text: db_file_is_text = Bool_.Y; break; case Xow_db_file_.Tid__cat: db_file_is_cat = Bool_.Y; break; case Xow_db_file_.Tid__search_core: db_file_is_search = Bool_.Y; break; } int db_id = db_file.Id(); if (db_file_is_text) Run_sql( core_db_conn, db_file.Url(), db_id, "deleting text: " + db_id, "DELETE FROM <data_db>text WHERE page_id IN (SELECT page_id FROM page_filter WHERE page_text_db_id = {0});"); if (db_file_is_cat) Run_sql( core_db_conn, db_file.Url(), db_id, "deleting cat: " + db_id, "DELETE FROM <data_db>cat_link WHERE cl_from IN (SELECT page_id FROM page_filter);"); if (db_file_is_search) Run_sql( core_db_conn, db_file.Url(), db_id, "deleting search:" + db_id, "DELETE FROM <data_db>search_link WHERE page_id IN (SELECT page_id FROM page_filter);"); if (db_file_is_text || db_file_is_cat || db_file_is_search) db_file.Conn().Env_vacuum(); } } catch (Exception e) { Gfo_usr_dlg_.Instance.Warn_many( "", "", "fatal error during page deletion: err=~{0}", Err_.Message_gplx_log(e)); } core_db_conn.Exec_sql_plog_ntx( "deleting from table: page", "DELETE FROM page WHERE page_id IN (SELECT page_id FROM page_filter);"); // core_db_conn.Meta_tbl_delete("page_filter"); core_db_conn.Env_vacuum(); usr_dlg.Plog_many("", "", ""); }