/** * Capitalize first word at the beginning of a quote. * * @param line line to fix * @param so {@link SubtitleObject} * @return fixed line */ private static String fixQuoteCapitalization(String line, SubtitleObject so) { String[] split = so.split(RegexEnum.SPACE, line); String prev = null, current = null; boolean firstQuote = true, shouldCapitalize = false; for (int i = 0; i < split.length; ++i) { prev = current; current = split[i]; if (prev != null && prev.equals(",")) { if (current.equals("\"")) { if (firstQuote) { shouldCapitalize = true; firstQuote = false; } else { firstQuote = true; } } } else if (current.equals("\"")) { firstQuote = !firstQuote; } else if (shouldCapitalize && !StringUtil.isPunctuation(current)) { split[i] = StringUtil.capitalize(current); shouldCapitalize = false; } } return String.join(" ", split); }
/** * Fix all words that should be capitalized upon preceding a certain string. * * @param line line to fix * @param so {@link SubtitleObject} * @return fixed line */ private static String fixPrecedingShouldCapitalize(String line, SubtitleObject so) { String[] split = so.split(RegexEnum.SPACE, line); String prevPrev = null, prev = null, current = null; boolean shouldCapitalize = false; for (int i = 0; i < split.length; ++i) { prevPrev = prev; prev = current; current = split[i]; if (prev != null && PRECEDING_SHOULD_CAPITALIZE.contains(prev)) { shouldCapitalize = true; if (prev.equals(".") && prevPrev != null && prevPrev.equals(".")) { // ellipses shouldCapitalize = false; } else if (!current.equals("\"") && !RegexUtil.matches(RegexEnum.MATCH_DIGIT_AND_S, current)) { if (!StringUtil.startsUpperCase(current)) { split[i] = StringUtil.capitalize(current); } shouldCapitalize = false; } } else if (shouldCapitalize) { if (!StringUtil.isPunctuation(current)) { split[i] = StringUtil.capitalize(current); shouldCapitalize = false; } } } return String.join(" ", split); }
/** * {@link FixCapitalization} fix. * * @param line line to fix * @param so {@link SubtitleObject} * @return fixed line */ public static String fix(String line, SubtitleObject so) { if (Fixer.exclude(line, so, MIN_TOKENS, ' ')) { return line; } String originalLine = line; boolean shouldCapitalizeFirstWord = line.equals(StringUtil.capitalize(line)); String temp = line; line = fixProperNouns(line, so); temp = SubtitleObject.clearMapIfChanged(so, temp, line); line = fixAllUppercase(line, originalLine, so); temp = SubtitleObject.clearMapIfChanged(so, temp, line); line = fixPrecedingShouldCapitalize(line, so); temp = SubtitleObject.clearMapIfChanged(so, temp, line); if (shouldCapitalizeFirstWord) { line = StringUtil.capitalize(line); temp = SubtitleObject.clearMapIfChanged(so, temp, line); } if (line.contains("\"")) { line = fixQuoteCapitalization(line, so); temp = SubtitleObject.clearMapIfChanged(so, temp, line); } line = fixTitle(line, originalLine, so); temp = SubtitleObject.clearMapIfChanged(so, temp, line); return line; }
/** * Fix all words that should be all uppercase and lowercases all words that are currently all * uppercase (but shouldn't be). * * @param line line to fix * @param originalLine original line * @param so {@link SubtitleObject} * @return fixed line */ private static String fixAllUppercase(String line, String originalLine, SubtitleObject so) { String[] split = so.split(RegexEnum.SPACE, line); String current = null; for (int i = 0; i < split.length; ++i) { current = split[i]; if (ALL_UPPERCASE.contains(current)) { if (!current.equals(current.toUpperCase())) { split[i] = split[i].toUpperCase(); } } else if (POTENTIAL_ALL_UPPERCASE.contains(current)) { if (originalLine.indexOf(current.toUpperCase()) == line.indexOf(current)) { split[i] = split[i].toUpperCase(); } } else if (current.length() > 1 && current.equals(current.toUpperCase()) && !StringUtil.containsLettersAndNumbers(current)) { split[i] = split[i].toLowerCase(); } } return String.join(" ", split); }
/** * Fix all proper nouns. * * @param line line to fix * @param so {@link SubtitleObject} * @return fixed line */ private static String fixProperNouns(String line, SubtitleObject so) { String[] split = so.split(RegexEnum.SPACE, line); String current = null; for (int i = 0; i < split.length; ++i) { current = split[i]; if (!StringUtil.isPunctuation(current) && !StringUtil.containsLettersAndNumbers(current)) { if (StringUtil.startsUpperCase(current) && current.length() > 1 && StringUtil.countUppercase(current) == 1) { if (!PROPER_NOUNS.contains(current) && !POTENTIAL_PROPER_NOUNS.contains(current)) { split[i] = current.toLowerCase(); } } else { if (PROPER_NOUNS.contains(current)) { split[i] = StringUtil.capitalize(current); } } } } return String.join(" ", split); }