public void Chk(byte[] wiki_domain, Io_url css_fil) { this.wiki_domain = wiki_domain; List_adp img_list = List_adp_.New(); byte[] old_bry = Io_mgr.Instance.LoadFilBry(css_fil); byte[] rel_url_prefix = Bry_.Add(Bry_fwd_slashes, wiki_domain); byte[] new_bry = Convert_to_local_urls(rel_url_prefix, old_bry, img_list); Io_url img_dir = css_fil.OwnerDir(); Download_fils(img_dir, img_list.To_str_ary()); Io_mgr.Instance.SaveFilBry(css_fil, new_bry); }
@Override public void Cmd_run() { Init_bldr_bmks(); this.time_bgn = System_.Ticks(); int total_pending = Xob_xfer_regy_tbl.Select_total_pending(bldr_conn); // if (total_pending > 250000 && src_bin_mgr__fsdb_version == null) usr_dlg.Note_many("", "", "total pending: ~{0}", total_pending); List_adp list = List_adp_.New(); boolean loop = true; while (loop) { byte rslt = Select_fsdb_itms(list); switch (rslt) { case Select_rv_stop: if (bin_db_mgr.Tier_id_is_last(tier_id_val)) loop = false; else { ++tier_id_val; page_id_val = 0; continue; } break; case Select_rv_next_page: ++page_id_val; lnki_id_val = 0; continue; case Select_rv_process: break; } if (!loop) break; // no more ttls found int len = list.Count(); usr_dlg.Prog_many("", "", "fetched pages: ~{0}", len); for (int i = 0; i < len; ++i) { Xodb_tbl_oimg_xfer_itm fsdb = (Xodb_tbl_oimg_xfer_itm) list.Get_at(i); Download_itm(fsdb); if (exit_now || exec_count >= exec_count_max || exec_fail >= exec_fail_max || page_id_val >= page_id_end) { this.Txn_sav(); return; } } } exec_done = true; }
private byte Select_fsdb_itms(List_adp list) { list.Clear(); boolean pages_found = false, links_found = false; DataRdr rdr = Xob_xfer_regy_tbl.Select_by_tier_page(bldr_conn, tier_id_val, page_id_val, select_interval); try { while (rdr.MoveNextPeer()) { pages_found = true; // at least one page found; set true Xodb_tbl_oimg_xfer_itm itm = Xodb_tbl_oimg_xfer_itm.new_rdr_(rdr); if (itm.Lnki_page_id() == page_id_val // same page_id && itm.Lnki_id() <= lnki_id_val // ... but lnki_id < last ) continue; // ... ignore; note that select is by page_id, not page_id + link_id; needed // else restarts would not resume exactly at same point; links_found = true; list.Add(itm); } } finally { rdr.Rls(); } if (pages_found && !links_found) return Select_rv_next_page; // pages found, but all links processed else if (!pages_found) return Select_rv_stop; // no more pages found else return Select_rv_process; // pages and links found }
public byte[] Convert_to_local_urls(byte[] rel_url_prefix, byte[] src, List_adp list) { try { int src_len = src.length; int prv_pos = 0; Bry_bfr bfr = Bry_bfr_.New_w_size(src_len); Hash_adp img_hash = Hash_adp_bry.cs(); while (true) { int url_pos = Bry_find_.Find_fwd(src, Bry_url, prv_pos); if (url_pos == Bry_find_.Not_found) { bfr.Add_mid(src, prv_pos, src_len); break; } // no more "url("; exit; int bgn_pos = url_pos + Bry_url_len; // set bgn_pos after "url(" byte bgn_byte = src[bgn_pos]; byte end_byte = Byte_ascii.Null; boolean quoted = true; switch (bgn_byte) { // find end_byte case Byte_ascii.Quote: case Byte_ascii.Apos: // quoted; end_byte is ' or " end_byte = bgn_byte; ++bgn_pos; break; default: // not quoted; end byte is ")" end_byte = Byte_ascii.Paren_end; quoted = false; break; } int end_pos = Bry_find_.Find_fwd(src, end_byte, bgn_pos, src_len); if (end_pos == Bry_find_.Not_found) { // unclosed "url("; exit since nothing else will be found usr_dlg.Warn_many( GRP_KEY, "parse.invalid_url.end_missing", "could not find end_sequence for 'url(': bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8__by_len(src, prv_pos, prv_pos + 25)); bfr.Add_mid(src, prv_pos, src_len); break; } if (end_pos - bgn_pos == 0) { // empty; "url()"; ignore usr_dlg.Warn_many( GRP_KEY, "parse.invalid_url.empty", "'url(' is empty: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8__by_len(src, prv_pos, prv_pos + 25)); bfr.Add_mid(src, prv_pos, bgn_pos); prv_pos = bgn_pos; continue; } byte[] img_raw = Bry_.Mid(src, bgn_pos, end_pos); int img_raw_len = img_raw.length; if (Bry_.Has_at_bgn(img_raw, Bry_data_image, 0, img_raw_len)) { // base64 bfr.Add_mid(src, prv_pos, end_pos); // nothing to download; just add entire String prv_pos = end_pos; continue; } int import_url_end = Import_url_chk( rel_url_prefix, src, src_len, prv_pos, url_pos, img_raw, bfr); // check for embedded stylesheets via @import tag if (import_url_end != Bry_find_.Not_found) { prv_pos = import_url_end; continue; } byte[] img_cleaned = Xob_url_fixer.Fix(wiki_domain, img_raw, img_raw_len); if (img_cleaned == null) { // could not clean img usr_dlg.Warn_many( GRP_KEY, "parse.invalid_url.clean_failed", "could not extract valid http src: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8(img_raw)); bfr.Add_mid(src, prv_pos, bgn_pos); prv_pos = bgn_pos; continue; } if (!img_hash.Has(img_cleaned)) { // only add unique items for download; img_hash.Add_as_key_and_val(img_cleaned); list.Add(String_.new_u8(img_cleaned)); } img_cleaned = Replace_invalid_chars( Bry_.Copy( img_cleaned)); // NOTE: must call ByteAry.Copy else img_cleaned will change // *inside* hash bfr.Add_mid(src, prv_pos, bgn_pos); if (!quoted) bfr.Add_byte(Byte_ascii.Quote); bfr.Add(img_cleaned); if (!quoted) bfr.Add_byte(Byte_ascii.Quote); prv_pos = end_pos; } return bfr.To_bry_and_clear(); } catch (Exception e) { usr_dlg.Warn_many( "", "", "failed to convert local_urls: ~{0} ~{1}", String_.new_u8(rel_url_prefix), Err_.Message_gplx_full(e)); return src; } }