/** * Creates the new file/folder name according to template string * * @param template the template * @param show the TV show * @param episodes the TV show episodes; nullable for TV show root foldername * @return the string */ public static String createDestination( String template, TvShow show, List<TvShowEpisode> episodes) { String newDestination = template; TvShowEpisode firstEp = null; // replace token show title ($N) if (newDestination.contains("$N")) { newDestination = replaceToken(newDestination, "$N", show.getTitle()); } // parse out episode depended tokens - for multi EP naming if (!episodes.isEmpty()) { Matcher matcher = multiEpisodeTokenPattern.matcher(template); String episodeTokens = ""; if (matcher.find()) { episodeTokens = matcher.group(0); } String combinedEpisodeParts = ""; for (TvShowEpisode episode : episodes) { String episodePart = episodeTokens; // remember first episode for media file tokens if (firstEp == null) { firstEp = episode; } // Season w/o leading zeros ($1) if (episodePart.contains("$1")) { episodePart = replaceToken(episodePart, "$1", String.valueOf(episode.getSeason())); } // Season leading zeros ($2) if (episodePart.contains("$2")) { episodePart = replaceToken(episodePart, "$2", lz(episode.getSeason())); } // DVD-Season w/o leading zeros ($3) if (episodePart.contains("$3")) { episodePart = replaceToken(episodePart, "$3", String.valueOf(episode.getDvdSeason())); } // DVD-Season leading zeros ($4) if (episodePart.contains("$4")) { episodePart = replaceToken(episodePart, "$4", lz(episode.getDvdSeason())); } // episode number if (episodePart.contains("$E")) { episodePart = replaceToken(episodePart, "$E", lz(episode.getEpisode())); } // DVD-episode number if (episodePart.contains("$D")) { episodePart = replaceToken(episodePart, "$D", lz(episode.getDvdEpisode())); } // episode title if (episodePart.contains("$T")) { episodePart = replaceToken(episodePart, "$T", episode.getTitle()); } combinedEpisodeParts += episodePart + " "; } // and now fill in the (multiple) episode parts if (StringUtils.isNotBlank(episodeTokens)) { newDestination = newDestination.replace(episodeTokens, combinedEpisodeParts); } } else { // we're in either TV show folder or season folder generation; // strip out episode tokens newDestination = newDestination.replace("$E", ""); newDestination = newDestination.replace("$T", ""); } // replace token year ($Y) if (newDestination.contains("$Y")) { if (show.getYear().equals("0")) { newDestination = newDestination.replace("$Y", ""); } else { newDestination = replaceToken(newDestination, "$Y", show.getYear()); } } if (firstEp != null && firstEp.getMediaFiles(MediaFileType.VIDEO).size() > 0) { MediaFile mf = firstEp.getMediaFiles(MediaFileType.VIDEO).get(0); // replace token resolution ($R) if (newDestination.contains("$R")) { newDestination = replaceToken(newDestination, "$R", mf.getVideoResolution()); } // replace token audio codec + channels ($A) if (newDestination.contains("$A")) { newDestination = replaceToken( newDestination, "$A", mf.getAudioCodec() + (mf.getAudioCodec().isEmpty() ? "" : "-") + mf.getAudioChannels()); } // replace token video codec + format ($V) if (newDestination.contains("$V")) { newDestination = replaceToken( newDestination, "$V", mf.getVideoCodec() + (mf.getVideoCodec().isEmpty() ? "" : "-") + mf.getVideoFormat()); } // replace token video format ($F) if (newDestination.contains("$F")) { newDestination = replaceToken(newDestination, "$F", mf.getVideoFormat()); } } else { // no mediafiles; remove at least token (if available) newDestination = newDestination.replace("$R", ""); newDestination = newDestination.replace("$A", ""); newDestination = newDestination.replace("$V", ""); newDestination = newDestination.replace("$F", ""); } // replace empty brackets newDestination = newDestination.replaceAll("\\(\\)", ""); newDestination = newDestination.replaceAll("\\[\\]", ""); // if there are multiple file separators in a row - strip them out if (SystemUtils.IS_OS_WINDOWS) { // we need to mask it in windows newDestination = newDestination.replaceAll("\\\\{2,}", "\\\\"); newDestination = newDestination.replaceAll("^\\\\", ""); } else { newDestination = newDestination.replaceAll(File.separator + "{2,}", File.separator); newDestination = newDestination.replaceAll("^" + File.separator, ""); } // ASCII replacement if (SETTINGS.isAsciiReplacement()) { newDestination = StrgUtils.convertToAscii(newDestination, false); } // trim out unnecessary whitespaces newDestination = newDestination.trim(); // any whitespace replacements? if (SETTINGS.isRenamerSpaceSubstitution()) { newDestination = newDestination.replaceAll(" ", SETTINGS.getRenamerSpaceReplacement()); } // replace trailing dots and spaces newDestination = newDestination.replaceAll("[ \\.]+$", ""); return newDestination.trim(); }
@Override public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception { LOGGER.debug("getMetadata() " + options.toString()); if (options.getType() != MediaType.MOVIE) { throw new UnsupportedMediaTypeException(options.getType()); } String id = ""; if (StringUtils.isNotBlank(options.getId(providerInfo.getId()))) { id = options.getId(providerInfo.getId()); } if (StringUtils.isBlank(id) && options.getResult() != null) { if (StringUtils.isEmpty(options.getResult().getId())) { id = StrgUtils.substr(options.getResult().getUrl(), "id=(.*?)"); } else { id = options.getResult().getId(); } } // we can not scrape without zelluloid id and url if (StringUtils.isBlank(id) && StringUtils.isBlank(options.getResult().getUrl())) { throw new Exception("cannot scrape without id and url"); } String detailurl = BASE_URL + "/filme/index.php3?id=" + id; if (StringUtils.isBlank(id)) { detailurl = options.getResult().getUrl(); } MediaMetadata md = new MediaMetadata(providerInfo.getId()); Url url; try { url = new CachedUrl(detailurl); InputStream in = url.getInputStream(); Document doc = Jsoup.parse(in, PAGE_ENCODING, ""); in.close(); // parse title String title = doc.getElementsByAttributeValue("property", "og:title").attr("content").trim(); md.setTitle(title); // parse plot String plot = doc.getElementsByAttributeValue("class", "bigtext").text(); md.setPlot(plot); md.setTagline(plot.length() > 150 ? plot.substring(0, 150) : plot); // parse poster Elements el = doc.getElementsByAttributeValueStarting("src", "/images/poster"); if (el.size() == 1) { // Poster MediaArtwork ma = new MediaArtwork(providerInfo.getId(), MediaArtwork.MediaArtworkType.POSTER); ma.setPreviewUrl(BASE_URL + el.get(0).attr("src")); ma.setDefaultUrl(BASE_URL + el.get(0).attr("src")); ma.setLanguage(options.getLanguage().getLanguage()); md.addMediaArt(ma); } // parse year el = doc.getElementsByAttributeValueContaining("href", "az.php3?j="); if (el.size() == 1) { try { md.setYear(Integer.parseInt(el.get(0).text())); } catch (Exception ignored) { } } // parse cinema release el = doc.getElementsByAttributeValueContaining("href", "?v=w"); if (el.size() > 0) { try { SimpleDateFormat sdf = new SimpleDateFormat("dd.MM.yyyy"); Date d = sdf.parse(el.get(0).text()); md.setReleaseDate(d); } catch (Exception e) { LOGGER.warn("cannot parse cinema release date: " + el.get(0).text()); } } // parse original title md.setOriginalTitle(StrgUtils.substr(doc.toString(), "Originaltitel: (.*?)\\<")); if (StringUtils.isEmpty(md.getOriginalTitle())) { md.setOriginalTitle(md.getTitle()); } // parse runtime String rt = (StrgUtils.substr(doc.toString(), "ca. (.*?) min")); if (!rt.isEmpty()) { try { md.setRuntime(Integer.valueOf(rt)); } catch (Exception e2) { LOGGER.warn("cannot convert runtime: " + rt); } } // parse genres el = doc.getElementsByAttributeValueContaining("href", "az.php3?g="); for (Element g : el) { String gid = g.attr("href").substring(g.attr("href").lastIndexOf('=') + 1); md.addGenre(getTmmGenre(gid)); } // parse cert // FSK: ab 12, $230 Mio. Budget String fsk = StrgUtils.substr(doc.toString(), "FSK: (.*?)[,<]"); if (!fsk.isEmpty()) { md.addCertification(Certification.findCertification(fsk)); } // parse rating Elements ratings = doc.getElementsByAttributeValue("class", "ratingBarTable"); if (ratings.size() == 2) { // get user rating Element e = ratings.get(1); // <div>87%</div> String r = e.getElementsByTag("div").text().replace("%", ""); try { md.setRating(Float.valueOf(r) / 10); // only 0-10 } catch (Exception e2) { LOGGER.warn("cannot convert rating: " + r); } } // details page doc = null; String detailsUrl = BASE_URL + "/filme/details.php3?id=" + id; try { url = new CachedUrl(detailsUrl); in = url.getInputStream(); doc = Jsoup.parse(in, PAGE_ENCODING, ""); in.close(); } catch (Exception e) { LOGGER.error("failed to get details: " + e.getMessage()); } if (doc != null) { Element tab = doc.getElementById("ccdetails"); int header = 0; String lastRole = ""; for (Element tr : tab.getElementsByTag("tr")) { if (tr.toString().contains("dyngfx")) { // header gfx if (tr.toString().contains("Besetzung")) { header = 1; } else if (tr.toString().contains("Crew")) { header = 2; } else if (tr.toString().contains("Produktion")) { // company, not producers header = 3; } else if (tr.toString().contains("Verleih")) { header = 4; } else if (tr.toString().contains("Alternativtitel")) { header = 5; } continue; } else { // no header gfx, so data MediaCastMember mcm = new MediaCastMember(); el = tr.getElementsByTag("td"); if (header == 1) { // actors if (el.size() == 2) { String role = "" + el.get(0).text().trim(); // text() decodes to \u00a0 if (role.equals("\u00a0") || StringUtils.isBlank(role)) { continue; } mcm.setCharacter(role); mcm.setName(el.get(1).getElementsByTag("a").text()); mcm.setId( StrgUtils.substr(el.get(1).getElementsByTag("a").attr("href"), "id=(\\d+)")); mcm.setType(MediaCastMember.CastType.ACTOR); md.addCastMember(mcm); // parsing actor pages would we too heavy here just for actor images.. } } else if (header == 2) { // crew if (el.size() == 2) { String crewrole = el.get(0).html().trim(); mcm.setName(el.get(1).getElementsByTag("a").text()); if (crewrole.equals(" ")) { crewrole = lastRole; // pop previous } else { lastRole = crewrole; // push new } mcm.setPart(crewrole); switch (crewrole) { case "Regie": mcm.setType(MediaCastMember.CastType.DIRECTOR); break; case "Drehbuch": mcm.setType(MediaCastMember.CastType.WRITER); break; case "Produktion": mcm.setType(MediaCastMember.CastType.PRODUCER); break; default: mcm.setType(MediaCastMember.CastType.OTHER); break; } mcm.setId( StrgUtils.substr(el.get(1).getElementsByTag("a").attr("href"), "id=(\\d+)")); md.addCastMember(mcm); } } else if (header == 3) { // production md.addProductionCompany(el.get(0).text()); } } } } // get links page doc = null; String linksUrl = BASE_URL + "/filme/links.php3?id=" + id; try { url = new CachedUrl(linksUrl); in = url.getInputStream(); doc = Jsoup.parse(in, PAGE_ENCODING, ""); in.close(); } catch (Exception e) { LOGGER.error("failed to get links page: " + e.getMessage()); } if (doc != null) { el = doc.getElementsByAttributeValueContaining("href", "german.imdb.com"); if (el != null && el.size() > 0) { String imdb = StrgUtils.substr(el.get(0).attr("href"), "(tt\\d{7})"); if (imdb.isEmpty()) { imdb = "tt" + StrgUtils.substr(el.get(0).attr("href"), "\\?(\\d+)"); } md.setId(MediaMetadata.IMDB, imdb); } } } catch (Exception e) { LOGGER.error("Error parsing " + detailurl); throw e; } return md; }
private static String generateName( String template, TvShow tvShow, MediaFile mf, boolean forFile) { String filename = ""; List<TvShowEpisode> eps = TvShowList.getInstance().getTvEpisodesByFile(tvShow, mf.getFile()); if (eps == null || eps.size() == 0) { // this should not happen, but unluckily ODB does it sometimes; try a second time to get the // episode try { Thread.sleep(250); } catch (Exception ex) { } eps = TvShowList.getInstance().getTvEpisodesByFile(tvShow, mf.getFile()); } if (eps == null || eps.size() == 0) { return ""; } if (StringUtils.isBlank(template)) { filename = createDestination(SETTINGS.getRenamerFilename(), tvShow, eps); } else { filename = createDestination(template, tvShow, eps); } // since we can use this method for folders too, use the next options solely for files if (forFile) { if (mf.getType().equals(MediaFileType.THUMB)) { if (SETTINGS.isUseRenamerThumbPostfix()) { filename = filename + "-thumb"; } // else let the filename as is } if (mf.getType().equals(MediaFileType.FANART)) { filename = filename + "-fanart"; } if (mf.getType().equals(MediaFileType.TRAILER)) { filename = filename + "-trailer"; } if (mf.getType().equals(MediaFileType.VIDEO_EXTRA)) { String name = mf.getBasename(); Pattern p = Pattern.compile("(?i).*([ _.-]extras[ _.-]).*"); Matcher m = p.matcher(name); if (m.matches()) { name = name.substring(m.end(1)); // everything behind } // if not, MF must be within /extras/ folder - use name 1:1 filename = filename + "-extras-" + name; } if (mf.getType().equals(MediaFileType.SUBTITLE)) { List<MediaFileSubtitle> subtitles = mf.getSubtitles(); if (subtitles != null && subtitles.size() > 0) { MediaFileSubtitle mfs = mf.getSubtitles().get(0); if (mfs != null) { if (!mfs.getLanguage().isEmpty()) { filename = filename + "." + mfs.getLanguage(); } if (mfs.isForced()) { filename = filename + ".forced"; } } else { // TODO: meh, we didn't have an actual MF yet - need to parse filename ourselves (like // movie). But with a recent scan of files/DB this // should not occur. } } } } // end forFile // ASCII replacement if (SETTINGS.isAsciiReplacement()) { filename = StrgUtils.convertToAscii(filename, false); } filename = filename + "." + mf.getExtension(); // readd original extension return filename; }
@Override public List<MediaSearchResult> search(MediaSearchOptions options) throws Exception { LOGGER.debug("search() " + options.toString()); if (options.getMediaType() != MediaType.MOVIE) { throw new UnsupportedMediaTypeException(options.getMediaType()); } int year = 0; if (options.getYear() != 0) { year = options.getYear(); } ArrayList<MediaSearchResult> resultList = new ArrayList<>(); String searchUrl = ""; String searchTerm = ""; String imdb = ""; // only title search if (StringUtils.isNotEmpty(options.getQuery())) { searchTerm = options.getQuery(); searchUrl = BASE_URL + "/suche/index.php3?qstring=" + URLEncoder.encode(searchTerm, "UTF-8"); LOGGER.debug("search for : " + searchTerm); } else { LOGGER.debug("empty searchString"); return resultList; } searchTerm = MetadataUtil.removeNonSearchCharacters(searchTerm); Document doc = null; try { Url url = new CachedUrl(searchUrl); InputStream in = url.getInputStream(); doc = Jsoup.parse(in, PAGE_ENCODING, ""); in.close(); } catch (Exception e) { LOGGER.error("failed to search for " + searchTerm + ": " + e.getMessage()); } if (doc == null || doc.text().contains("Interner Fehler")) { // FIXME: we are using the one which comes with zelluloid - NOT the global one SearchTitleWithGoogle gs = new SearchTitleWithGoogle(); List<MediaSearchResult> gr = gs.search("zelluloid.de", this.getProviderInfo(), options); for (MediaSearchResult msr : gr) { // filter google results - only movie links if (msr.getUrl().contains("/filme/index.php3")) { String id = StrgUtils.substr(msr.getUrl(), "id=(.*)"); msr.setId(id); resultList.add(msr); } } return resultList; } // only look for movie links // Elements filme = doc.getElementsByAttributeValueStarting("href", "hit.php"); // <TR><TD ALIGN=CENTER><IMG SRC="/gfx/icoMovie.gif" WIDTH=26 HEIGHT=26 // ALT="Film"></TD><TD><B><a // href="hit.php3?hit=3700de0676109950820a042115e98d99-movie-886-23126993-2" // class="normLight">Twelve // Monkeys</B> <nobr>(1995)</nobr></a><div class="smallBlur">R: Terry Gilliam</div></TD> Elements filme = doc.getElementsByTag("tr"); for (Element tr : filme) { // no nesting trs if (tr.getElementsByTag("tr").size() > 1) { continue; } // only tr with movie links Elements as = tr.getElementsByAttributeValueStarting("href", "hit.php3?hit="); if (as.isEmpty()) { continue; } // and only movies if (tr.text().contains("TV-Serie")) { continue; } try { Element a = as.first(); String id = StrgUtils.substr(a.attr("href"), "-movie-(.*?)-"); MediaSearchResult sr = new MediaSearchResult(providerInfo.getId(), options.getMediaType()); sr.setId(id); if (StringUtils.isEmpty(sr.getTitle())) { if (a.html().contains("nobr")) { sr.setTitle(a.ownText()); } else { sr.setTitle(a.text()); } } LOGGER.debug("found movie " + sr.getTitle()); sr.setOriginalTitle(a.getElementsByTag("span").text()); try { sr.setYear( Integer.parseInt( StrgUtils.substr( tr.getElementsByTag("nobr").text(), ".*(\\d{4}).*"))); // any 4 digit } catch (Exception ignored) { } sr.setUrl(BASE_URL + "/filme/index.php3?id=" + id); // sr.setPosterUrl(BASE_URL + "/images" + StrgUtils.substr(a.toString(), // "images(.*?)\\"")); if (imdb.equals(sr.getIMDBId())) { // perfect match sr.setScore(1); } else { // compare score based on names float score = MetadataUtil.calculateScore(searchTerm, sr.getTitle()); if (yearDiffers(year, sr.getYear())) { float diff = (float) Math.abs(year - sr.getYear()) / 100; LOGGER.debug( "parsed year does not match search result year - downgrading score by " + diff); score -= diff; } sr.setScore(score); } resultList.add(sr); } catch (Exception e) { LOGGER.warn("error parsing movie result: " + e.getMessage()); } } LOGGER.debug("found " + resultList.size() + " search results"); // didn't we find anything? we may have been redirected to the details page if (resultList.isEmpty()) { if (!doc.getElementsByTag("title").text().contains("Suche nach")) { // redirected to detail page MediaSearchResult msr = new MediaSearchResult(providerInfo.getId(), options.getMediaType()); Elements el = doc.getElementsByAttributeValueStarting("href", "index.php3?id="); if (el.size() > 0) { msr.setId(StrgUtils.substr(el.get(0).attr("href"), "id=(\\d+)")); } msr.setTitle(StrgUtils.substr(doc.getElementsByTag("title").text(), "(.*?)\\|").trim()); el = doc.getElementsByAttributeValueContaining("href", "az.php3?j="); if (el.size() == 1) { try { msr.setYear(Integer.parseInt(el.get(0).text())); } catch (Exception ignored) { } } resultList.add(msr); } return resultList; } Collections.sort(resultList); Collections.reverse(resultList); return resultList; }