private void Download_itm(Xodb_tbl_oimg_xfer_itm fsdb) { try { tier_id_val = fsdb.Lnki_tier_id(); page_id_val = fsdb.Lnki_page_id(); lnki_id_val = fsdb.Lnki_id(); fsdb.Orig_repo_name_( fsdb.Orig_repo_id() == Xof_repo_tid_.Tid__local ? wiki.Domain_bry() : Xow_domain_itm_.Bry__commons); Download_exec(fsdb); ++exec_count; if (exec_count % progress_interval == 0) Print_progress(fsdb); if (exec_count % poll_interval == 0) poll_mgr.Poll(); if (exec_count % commit_interval == 0) Txn_sav(); if (exec_count % delete_interval == 0) Delete_files(); } catch (Exception exc) { ++exec_fail; usr_dlg.Warn_many( "", "", "download error; ttl=~{0} w=~{1} err=~{2}", fsdb.Orig_ttl(), fsdb.Lnki_w(), Err_.Message_gplx_full(exc)); } }
public static Fsdb_db_mgr new_src_bin_db_mgr(Xow_wiki wiki, String version) { String domain_str = wiki.Domain_str(); Fsdb_db_mgr rv = null; Io_url url = null; if (String_.Eq(version, "v1")) { url = wiki.Fsys_mgr() .File_dir() .OwnerDir() .GenSubDir(domain_str + "-prv"); // v1: EX: /xowa/file/en.wikipedia.org-prv/ rv = new Fsdb_db_mgr__v1(url); } else if (String_.Eq(version, "v2")) { url = wiki.Fsys_mgr().Root_dir().GenSubDir("prv"); // v2: EX: /xowa/wiki/en.wikipedia.org/prv/ rv = Fsdb_db_mgr_.new_detect(wiki, url, url); // note that v2 is prioritized over v1 } else throw Err_.new_wo_type("fsdb.make:unknown fsdb_type", "version", version); if (rv == null) throw Err_.new_wo_type( "fsdb.make:source fsdb not found", "version", version, "url", url.Raw()); return rv; }
public byte[] Convert_to_local_urls(byte[] rel_url_prefix, byte[] src, List_adp list) { try { int src_len = src.length; int prv_pos = 0; Bry_bfr bfr = Bry_bfr_.New_w_size(src_len); Hash_adp img_hash = Hash_adp_bry.cs(); while (true) { int url_pos = Bry_find_.Find_fwd(src, Bry_url, prv_pos); if (url_pos == Bry_find_.Not_found) { bfr.Add_mid(src, prv_pos, src_len); break; } // no more "url("; exit; int bgn_pos = url_pos + Bry_url_len; // set bgn_pos after "url(" byte bgn_byte = src[bgn_pos]; byte end_byte = Byte_ascii.Null; boolean quoted = true; switch (bgn_byte) { // find end_byte case Byte_ascii.Quote: case Byte_ascii.Apos: // quoted; end_byte is ' or " end_byte = bgn_byte; ++bgn_pos; break; default: // not quoted; end byte is ")" end_byte = Byte_ascii.Paren_end; quoted = false; break; } int end_pos = Bry_find_.Find_fwd(src, end_byte, bgn_pos, src_len); if (end_pos == Bry_find_.Not_found) { // unclosed "url("; exit since nothing else will be found usr_dlg.Warn_many( GRP_KEY, "parse.invalid_url.end_missing", "could not find end_sequence for 'url(': bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8__by_len(src, prv_pos, prv_pos + 25)); bfr.Add_mid(src, prv_pos, src_len); break; } if (end_pos - bgn_pos == 0) { // empty; "url()"; ignore usr_dlg.Warn_many( GRP_KEY, "parse.invalid_url.empty", "'url(' is empty: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8__by_len(src, prv_pos, prv_pos + 25)); bfr.Add_mid(src, prv_pos, bgn_pos); prv_pos = bgn_pos; continue; } byte[] img_raw = Bry_.Mid(src, bgn_pos, end_pos); int img_raw_len = img_raw.length; if (Bry_.Has_at_bgn(img_raw, Bry_data_image, 0, img_raw_len)) { // base64 bfr.Add_mid(src, prv_pos, end_pos); // nothing to download; just add entire String prv_pos = end_pos; continue; } int import_url_end = Import_url_chk( rel_url_prefix, src, src_len, prv_pos, url_pos, img_raw, bfr); // check for embedded stylesheets via @import tag if (import_url_end != Bry_find_.Not_found) { prv_pos = import_url_end; continue; } byte[] img_cleaned = Xob_url_fixer.Fix(wiki_domain, img_raw, img_raw_len); if (img_cleaned == null) { // could not clean img usr_dlg.Warn_many( GRP_KEY, "parse.invalid_url.clean_failed", "could not extract valid http src: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8(img_raw)); bfr.Add_mid(src, prv_pos, bgn_pos); prv_pos = bgn_pos; continue; } if (!img_hash.Has(img_cleaned)) { // only add unique items for download; img_hash.Add_as_key_and_val(img_cleaned); list.Add(String_.new_u8(img_cleaned)); } img_cleaned = Replace_invalid_chars( Bry_.Copy( img_cleaned)); // NOTE: must call ByteAry.Copy else img_cleaned will change // *inside* hash bfr.Add_mid(src, prv_pos, bgn_pos); if (!quoted) bfr.Add_byte(Byte_ascii.Quote); bfr.Add(img_cleaned); if (!quoted) bfr.Add_byte(Byte_ascii.Quote); prv_pos = end_pos; } return bfr.To_bry_and_clear(); } catch (Exception e) { usr_dlg.Warn_many( "", "", "failed to convert local_urls: ~{0} ~{1}", String_.new_u8(rel_url_prefix), Err_.Message_gplx_full(e)); return src; } }