public static String Resolve_file(boolean use_file_protocol, Io_url root_dir, String file) { String rv = file; // resolve relative urls; EX: "./a.js" -> "/xowa/wiki/simple.wikipedia.org/bin/script/a.js" if (String_.Has_at_bgn(rv, "./")) { // remove "./" rv = String_.Mid(rv, 2); if (use_file_protocol) rv = root_dir.To_http_file_str() + rv; else { // if fsys_url && wnt, replace "\" with "/" if (Op_sys.Cur().Tid_is_wnt()) rv = String_.Replace(rv, Op_sys.Lnx.Fsys_dir_spr_str(), Op_sys.Wnt.Fsys_dir_spr_str()); rv = root_dir.Xto_api() + Op_sys.Cur().Fsys_dir_spr_str() + rv; } } return rv; }
private int Import_url_chk( byte[] rel_url_prefix, byte[] src, int src_len, int old_pos, int find_bgn, byte[] url_raw, Bry_bfr bfr) { if (find_bgn < Bry_import_len) return Bry_find_.Not_found; if (!Bry_.Match(src, find_bgn - Bry_import_len, find_bgn, Bry_import)) return Bry_find_.Not_found; byte[] css_url = url_raw; int css_url_len = css_url.length; if (css_url_len > 0 && css_url[0] == Byte_ascii .Slash) { // css_url starts with "/"; EX: "/page" or "//site/page" DATE:2014-02-03 if (css_url_len > 1 && css_url[1] != Byte_ascii.Slash) // skip if css_url starts with "//"; EX: "//site/page" css_url = Bry_.Add(rel_url_prefix, css_url); // "/w/a.css" -> "//en.wikipedia.org/w/a.css" } css_url = Bry_.Replace( css_url, Byte_ascii.Space, Byte_ascii .Underline); // NOTE: must replace spaces with underlines else download will fail; // EX:https://it.wikivoyage.org/w/index.php?title=MediaWiki:Container e // Infobox.css&action=raw&ctype=text/css; DATE:2015-03-08 byte[] css_src_bry = Import_url_build(stylesheet_prefix, rel_url_prefix, css_url); String css_src_str = String_.new_u8(css_src_bry); download_wkr.Download_xrg() .Prog_fmt_hdr_( usr_dlg.Log_many( GRP_KEY, "logo.download", "downloading import for '~{0}'", css_src_str)); byte[] css_trg_bry = download_wkr.Download_xrg().Exec_as_bry(css_src_str); if (css_trg_bry == null) { usr_dlg.Warn_many("", "", "could not import css: url=~{0}", css_src_str); return Bry_find_.Not_found; // css not found } bfr.Add_mid(src, old_pos, find_bgn - Bry_import_len).Add_byte_nl(); bfr.Add(Bry_comment_bgn).Add(css_url).Add(Bry_comment_end).Add_byte_nl(); if (Bry_find_.Find_fwd(css_url, Wikisource_dynimg_ttl) != -1) css_trg_bry = Bry_.Replace( css_trg_bry, Wikisource_dynimg_find, Wikisource_dynimg_repl); // FreedImg hack; // PAGE:en.s:Page:Notes_on_Osteology_of_Baptanodon._With_a_Description_of_a_New_Species.pdf/3 DATE:2014-09-06 bfr.Add(css_trg_bry).Add_byte_nl(); bfr.Add_byte_nl(); int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, find_bgn + url_raw.length, src_len); return semic_pos + Int_.Const_dlm_len; }
public static Fsdb_db_mgr new_src_bin_db_mgr(Xow_wiki wiki, String version) { String domain_str = wiki.Domain_str(); Fsdb_db_mgr rv = null; Io_url url = null; if (String_.Eq(version, "v1")) { url = wiki.Fsys_mgr() .File_dir() .OwnerDir() .GenSubDir(domain_str + "-prv"); // v1: EX: /xowa/file/en.wikipedia.org-prv/ rv = new Fsdb_db_mgr__v1(url); } else if (String_.Eq(version, "v2")) { url = wiki.Fsys_mgr().Root_dir().GenSubDir("prv"); // v2: EX: /xowa/wiki/en.wikipedia.org/prv/ rv = Fsdb_db_mgr_.new_detect(wiki, url, url); // note that v2 is prioritized over v1 } else throw Err_.new_wo_type("fsdb.make:unknown fsdb_type", "version", version); if (rv == null) throw Err_.new_wo_type( "fsdb.make:source fsdb not found", "version", version, "url", url.Raw()); return rv; }
private void Download_exec(Xodb_tbl_oimg_xfer_itm fsdb) { Io_stream_rdr src_rdr = src_bin_mgr.Find_as_rdr(Xof_exec_tid.Tid_wiki_page, fsdb); try { if (src_rdr == Io_stream_rdr_.Noop) { // download failed ++exec_fail; usr_dlg.Warn_many( "", "", "failed: ttl=~{0}", String_.Format("[[File:{0}|{1}px]]", fsdb.Orig_ttl(), fsdb.Html_w())); Print_progress(fsdb); } else { // download passed long src_rdr_len = src_rdr.Len(); int lnki_tier_id = fsdb.Lnki_tier_id(); if (src_rdr_len > download_size_max && !Int_.In(lnki_tier_id, download_keep_tier_ids)) { usr_dlg.Warn_many( "", "", "skipped; ttl=~{0} w=~{1} size=~{2} tier=~{3}", fsdb.Orig_ttl(), fsdb.Lnki_w(), src_rdr_len, lnki_tier_id); return; } if (trg_bin_fil == null) // no trg_bin_fil Make_trg_bin_file(Bool_.Y, fsdb, src_rdr_len); else if (trg_bin_fil.Bin_len() + src_rdr_len > trg_bin_db_max) // or trg_bin_fil is out of space Make_trg_bin_file(Bool_.N, fsdb, src_rdr_len); else if (prv_lnki_tier_id != lnki_tier_id) { // or tier has changed if (prv_lnki_tier_id != -1 && !bin_db_mgr.Schema_is_1()) // do not increment dbs for v1 Make_trg_bin_file(Bool_.Y, fsdb, src_rdr_len); prv_lnki_tier_id = lnki_tier_id; } trg_bin_updater.Save_bin(trg_mnt_itm, trg_atr_fil, trg_bin_fil, fsdb, src_rdr, src_rdr_len); } } finally { src_rdr.Rls(); } }
public byte[] Convert_to_local_urls(byte[] rel_url_prefix, byte[] src, List_adp list) { try { int src_len = src.length; int prv_pos = 0; Bry_bfr bfr = Bry_bfr_.New_w_size(src_len); Hash_adp img_hash = Hash_adp_bry.cs(); while (true) { int url_pos = Bry_find_.Find_fwd(src, Bry_url, prv_pos); if (url_pos == Bry_find_.Not_found) { bfr.Add_mid(src, prv_pos, src_len); break; } // no more "url("; exit; int bgn_pos = url_pos + Bry_url_len; // set bgn_pos after "url(" byte bgn_byte = src[bgn_pos]; byte end_byte = Byte_ascii.Null; boolean quoted = true; switch (bgn_byte) { // find end_byte case Byte_ascii.Quote: case Byte_ascii.Apos: // quoted; end_byte is ' or " end_byte = bgn_byte; ++bgn_pos; break; default: // not quoted; end byte is ")" end_byte = Byte_ascii.Paren_end; quoted = false; break; } int end_pos = Bry_find_.Find_fwd(src, end_byte, bgn_pos, src_len); if (end_pos == Bry_find_.Not_found) { // unclosed "url("; exit since nothing else will be found usr_dlg.Warn_many( GRP_KEY, "parse.invalid_url.end_missing", "could not find end_sequence for 'url(': bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8__by_len(src, prv_pos, prv_pos + 25)); bfr.Add_mid(src, prv_pos, src_len); break; } if (end_pos - bgn_pos == 0) { // empty; "url()"; ignore usr_dlg.Warn_many( GRP_KEY, "parse.invalid_url.empty", "'url(' is empty: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8__by_len(src, prv_pos, prv_pos + 25)); bfr.Add_mid(src, prv_pos, bgn_pos); prv_pos = bgn_pos; continue; } byte[] img_raw = Bry_.Mid(src, bgn_pos, end_pos); int img_raw_len = img_raw.length; if (Bry_.Has_at_bgn(img_raw, Bry_data_image, 0, img_raw_len)) { // base64 bfr.Add_mid(src, prv_pos, end_pos); // nothing to download; just add entire String prv_pos = end_pos; continue; } int import_url_end = Import_url_chk( rel_url_prefix, src, src_len, prv_pos, url_pos, img_raw, bfr); // check for embedded stylesheets via @import tag if (import_url_end != Bry_find_.Not_found) { prv_pos = import_url_end; continue; } byte[] img_cleaned = Xob_url_fixer.Fix(wiki_domain, img_raw, img_raw_len); if (img_cleaned == null) { // could not clean img usr_dlg.Warn_many( GRP_KEY, "parse.invalid_url.clean_failed", "could not extract valid http src: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8(img_raw)); bfr.Add_mid(src, prv_pos, bgn_pos); prv_pos = bgn_pos; continue; } if (!img_hash.Has(img_cleaned)) { // only add unique items for download; img_hash.Add_as_key_and_val(img_cleaned); list.Add(String_.new_u8(img_cleaned)); } img_cleaned = Replace_invalid_chars( Bry_.Copy( img_cleaned)); // NOTE: must call ByteAry.Copy else img_cleaned will change // *inside* hash bfr.Add_mid(src, prv_pos, bgn_pos); if (!quoted) bfr.Add_byte(Byte_ascii.Quote); bfr.Add(img_cleaned); if (!quoted) bfr.Add_byte(Byte_ascii.Quote); prv_pos = end_pos; } return bfr.To_bry_and_clear(); } catch (Exception e) { usr_dlg.Warn_many( "", "", "failed to convert local_urls: ~{0} ~{1}", String_.new_u8(rel_url_prefix), Err_.Message_gplx_full(e)); return src; } }
String Replace_invalid_chars_str(String raw_str) { return String_.new_u8(Replace_invalid_chars(Bry_.new_u8(raw_str))); }
@Override public void Cmd_bgn(Xob_bldr bldr) { wiki.Init_assert(); this.poll_interval = poll_mgr.Poll_interval(); this.bin_db_mgr = new Xob_bin_db_mgr(ns_ids); // src_bin_mgr if (src_bin_mgr__fsdb_version != null) { this.src_fsdb_wkr = Xof_bin_wkr__fsdb_sql.new_(wiki.File__mnt_mgr()); src_bin_mgr.Wkrs__add(src_fsdb_wkr); src_fsdb_wkr.Mnt_mgr().Ctor_by_load(new_src_bin_db_mgr(wiki, src_bin_mgr__fsdb_version)); src_fsdb_wkr.Mnt_mgr() .Mnts__get_main() .Txn_bgn(); // NOTE: txn on atr speeds up from 50 -> 300; DATE:2015-03-21 if (src_bin_mgr__fsdb_skip_wkrs != null) { src_fsdb_wkr.Skip_mgr_init( src_fsdb_wkr.Mnt_mgr().Mnts__get_main().Cfg_mgr(), src_bin_mgr__fsdb_skip_wkrs); } if (src_bin_mgr__cache_enabled) { usr_dlg.Prog_many("", "", "src_bin_mgr.cache.bgn"); src_fsdb_wkr.Mnt_mgr().Mnts__get_main().Atr_mgr().Db__core().Fil_cache_enabled_y_(); usr_dlg.Prog_many("", "", "src_bin_mgr.cache.end"); } } if (src_bin_mgr__wmf_enabled) { Xof_bin_wkr__http_wmf wmf_wkr = Xof_bin_wkr__http_wmf.new_(wiki); src_bin_mgr.Wkrs__add(wmf_wkr); wmf_wkr.Fail_timeout_( 0); // 1000; NOTE: set Fail_timeout here; DATE:2014-06-21; NOTE: do not put in ctor, or // else will be 1st wkr; DATE:2014-06-28 } // trg_mnt_itm this.trg_bin_db_max = app.Api_root().Bldr().Wiki().Import().File_db_max(); Io_url trg_file_dir_v1 = String_.Eq(trg_bin_mgr__fsdb_version, "v1") ? wiki.Fsys_mgr().File_dir().GenNewNameOnly(wiki.Domain_str() + "-prv") : wiki.Fsys_mgr().File_dir(); // NOTE: convoluted way of setting trg to -prv if // trg_bin_mgr__fsdb_version_v1 is set; otherwise set to // "en.wikipedia.org" which will noop; DATE:2015-12-02 Fsdb_db_mgr trg_db_mgr = Fsdb_db_mgr_.new_detect(wiki, wiki.Fsys_mgr().Root_dir(), trg_file_dir_v1); if (trg_db_mgr == null) trg_db_mgr = Fsdb_db_mgr__v2_bldr.Get_or_make(wiki, Bool_.Y); Fsm_mnt_mgr trg_mnt_mgr = new Fsm_mnt_mgr(); trg_mnt_mgr.Ctor_by_load(trg_db_mgr); trg_mnt_mgr.Mnts__get_insert_idx_( Fsm_mnt_mgr .Mnt_idx_main); // NOTE: do not delete; mnt_mgr default to Mnt_idx_user; DATE:2014-04-25 this.trg_mnt_itm = trg_mnt_mgr.Mnts__get_insert(); Fsm_mnt_mgr.Patch( trg_mnt_itm.Cfg_mgr() .Tbl()); // NOTE: always patch again; fsdb_make may be run separately without lnki_temp; // DATE:2014-04-26 this.trg_atr_fil = trg_mnt_itm.Atr_mgr().Db__core(); this.trg_cfg_mgr = trg_mnt_itm.Cfg_mgr(); bin_db_mgr.Init_by_mnt_mgr(trg_mnt_mgr); trg_atr_fil.Conn().Txn_bgn("bldr__fsdb_make__trg_atr_fil"); if (!trg_atr_fil.Conn() .Eq(trg_cfg_mgr.Tbl().Conn())) // need to create txn for v1; DATE:2015-07-04 trg_cfg_mgr.Tbl().Conn().Txn_bgn("bldr__fsdb_make__trg_cfg_fil"); // bldr_db Xob_db_file bldr_db = Xob_db_file.New__file_make(wiki.Fsys_mgr().Root_dir()); this.bldr_conn = bldr_db.Conn(); this.bldr_cfg_tbl = bldr_db.Tbl__cfg(); // NOTE: cfg and atr is in same db; use it bldr_cfg_tbl.Conn().Txn_bgn("bldr__fsdb_make__bldr_cfg_tbl"); }